diff options
| author | vvvv <[email protected]> | 2025-10-08 11:41:14 +0300 |
|---|---|---|
| committer | vvvv <[email protected]> | 2025-10-08 12:20:42 +0300 |
| commit | d73f13cfdb331365ddad0da51ec36e0a3e4cf187 (patch) | |
| tree | 88cab10170ce9aa3389be7f1a09247386dcf5ebd /yql/essentials/public/purecalc | |
| parent | f377d8ad9e0741cd904c1d4934afdf24af517d93 (diff) | |
YQL-20086 public
commit_hash:68b0c2e9c2960587af7d57ecedcb38f4d05890b7
Diffstat (limited to 'yql/essentials/public/purecalc')
78 files changed, 5910 insertions, 6076 deletions
diff --git a/yql/essentials/public/purecalc/common/compile_mkql.cpp b/yql/essentials/public/purecalc/common/compile_mkql.cpp index 7bb95e20754..2a7500927f9 100644 --- a/yql/essentials/public/purecalc/common/compile_mkql.cpp +++ b/yql/essentials/public/purecalc/common/compile_mkql.cpp @@ -90,10 +90,10 @@ NCommon::IMkqlCallableCompiler::TCompiler MakeFolderPathCallableCompiler(const T }; } -} +} // namespace NKikimr::NMiniKQL::TRuntimeNode CompileMkql(const TExprNode::TPtr& exprRoot, TExprContext& exprCtx, - const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry, const NKikimr::NMiniKQL::TTypeEnvironment& env, const TUserDataTable& userData, NCommon::TMemoizedTypesMap* typeMemoization) + const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry, const NKikimr::NMiniKQL::TTypeEnvironment& env, const TUserDataTable& userData, NCommon::TMemoizedTypesMap* typeMemoization) { NCommon::TMkqlCommonCallableCompiler compiler; @@ -106,11 +106,11 @@ NKikimr::NMiniKQL::TRuntimeNode CompileMkql(const TExprNode::TPtr& exprRoot, TEx // Prepare build context NKikimr::NMiniKQL::TProgramBuilder pgmBuilder(env, funcRegistry); - NCommon::TMkqlBuildContext buildCtx(compiler, pgmBuilder, exprCtx, /*lambdaId*/0, /*args*/{}, typeMemoization); + NCommon::TMkqlBuildContext buildCtx(compiler, pgmBuilder, exprCtx, /*lambdaId*/ 0, /*args*/ {}, typeMemoization); // Build the root MKQL node return NCommon::MkqlBuildExpr(*exprRoot, buildCtx); } -} // NYql::NPureCalc +} // namespace NYql::NPureCalc diff --git a/yql/essentials/public/purecalc/common/compile_mkql.h b/yql/essentials/public/purecalc/common/compile_mkql.h index caba3baa2b9..09991e3d590 100644 --- a/yql/essentials/public/purecalc/common/compile_mkql.h +++ b/yql/essentials/public/purecalc/common/compile_mkql.h @@ -6,15 +6,14 @@ #include <yql/essentials/ast/yql_expr.h> #include <yql/essentials/core/yql_user_data.h> - namespace NYql::NPureCalc { - /** - * Compile expr to mkql byte-code - */ +/** + * Compile expr to mkql byte-code + */ - NKikimr::NMiniKQL::TRuntimeNode CompileMkql(const TExprNode::TPtr& exprRoot, TExprContext& exprCtx, - const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry, const NKikimr::NMiniKQL::TTypeEnvironment& env, const TUserDataTable& userData, - NCommon::TMemoizedTypesMap* typeMemoization = nullptr); +NKikimr::NMiniKQL::TRuntimeNode CompileMkql(const TExprNode::TPtr& exprRoot, TExprContext& exprCtx, + const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry, const NKikimr::NMiniKQL::TTypeEnvironment& env, const TUserDataTable& userData, + NCommon::TMemoizedTypesMap* typeMemoization = nullptr); -} +} // namespace NYql::NPureCalc diff --git a/yql/essentials/public/purecalc/common/fwd.h b/yql/essentials/public/purecalc/common/fwd.h index 22df90a6b29..dc77d14b96b 100644 --- a/yql/essentials/public/purecalc/common/fwd.h +++ b/yql/essentials/public/purecalc/common/fwd.h @@ -4,53 +4,53 @@ #include <memory> namespace NYql::NPureCalc { - class TCompileError; +class TCompileError; - template <typename> - class IConsumer; +template <typename> +class IConsumer; - template <typename> - class IStream; +template <typename> +class IStream; - class IProgramFactory; +class IProgramFactory; - class IWorkerFactory; +class IWorkerFactory; - class IPullStreamWorkerFactory; +class IPullStreamWorkerFactory; - class IPullListWorkerFactory; +class IPullListWorkerFactory; - class IPushStreamWorkerFactory; +class IPushStreamWorkerFactory; - class IWorker; +class IWorker; - class IPullStreamWorker; +class IPullStreamWorker; - class IPullListWorker; +class IPullListWorker; - class IPushStreamWorker; +class IPushStreamWorker; - class TInputSpecBase; +class TInputSpecBase; - class TOutputSpecBase; +class TOutputSpecBase; - class IProgram; +class IProgram; - template <typename, typename, typename> - class TProgramCommon; +template <typename, typename, typename> +class TProgramCommon; - template <typename, typename> - class TPullStreamProgram; +template <typename, typename> +class TPullStreamProgram; - template <typename, typename> - class TPullListProgram; +template <typename, typename> +class TPullListProgram; - template <typename, typename> - class TPushStreamProgram; +template <typename, typename> +class TPushStreamProgram; - using IProgramFactoryPtr = TIntrusivePtr<IProgramFactory>; - using IWorkerFactoryPtr = std::shared_ptr<IWorkerFactory>; - using IPullStreamWorkerFactoryPtr = std::shared_ptr<IPullStreamWorkerFactory>; - using IPullListWorkerFactoryPtr = std::shared_ptr<IPullListWorkerFactory>; - using IPushStreamWorkerFactoryPtr = std::shared_ptr<IPushStreamWorkerFactory>; -} +using IProgramFactoryPtr = TIntrusivePtr<IProgramFactory>; +using IWorkerFactoryPtr = std::shared_ptr<IWorkerFactory>; +using IPullStreamWorkerFactoryPtr = std::shared_ptr<IPullStreamWorkerFactory>; +using IPullListWorkerFactoryPtr = std::shared_ptr<IPullListWorkerFactory>; +using IPushStreamWorkerFactoryPtr = std::shared_ptr<IPushStreamWorkerFactory>; +} // namespace NYql::NPureCalc diff --git a/yql/essentials/public/purecalc/common/inspect_input.cpp b/yql/essentials/public/purecalc/common/inspect_input.cpp index 9ca56da5dec..7717aac2e04 100644 --- a/yql/essentials/public/purecalc/common/inspect_input.cpp +++ b/yql/essentials/public/purecalc/common/inspect_input.cpp @@ -3,31 +3,31 @@ #include <yql/essentials/core/yql_expr_type_annotation.h> namespace NYql::NPureCalc { - bool TryFetchInputIndexFromSelf(const TExprNode& node, TExprContext& ctx, ui32 inputsCount, ui32& result) { - TIssueScopeGuard issueSope(ctx.IssueManager, [&]() { - return MakeIntrusive<TIssue>(ctx.GetPosition(node.Pos()), TStringBuilder() << "At function: " << node.Content()); - }); +bool TryFetchInputIndexFromSelf(const TExprNode& node, TExprContext& ctx, ui32 inputsCount, ui32& result) { + TIssueScopeGuard issueSope(ctx.IssueManager, [&]() { + return MakeIntrusive<TIssue>(ctx.GetPosition(node.Pos()), TStringBuilder() << "At function: " << node.Content()); + }); - if (!EnsureArgsCount(node, 1, ctx)) { - return false; - } - - if (!EnsureAtom(*node.Child(0), ctx)) { - return false; - } + if (!EnsureArgsCount(node, 1, ctx)) { + return false; + } - if (!TryFromString(node.Child(0)->Content(), result)) { - auto message = TStringBuilder() << "Index " << TString{node.Child(0)->Content()}.Quote() << " isn't UI32"; - ctx.AddError(TIssue(ctx.GetPosition(node.Child(0)->Pos()), std::move(message))); - return false; - } + if (!EnsureAtom(*node.Child(0), ctx)) { + return false; + } - if (result >= inputsCount) { - auto message = TStringBuilder() << "Invalid input index: " << result << " is out of range [0;" << inputsCount << ")"; - ctx.AddError(TIssue(ctx.GetPosition(node.Child(0)->Pos()), std::move(message))); - return false; - } + if (!TryFromString(node.Child(0)->Content(), result)) { + auto message = TStringBuilder() << "Index " << TString{node.Child(0)->Content()}.Quote() << " isn't UI32"; + ctx.AddError(TIssue(ctx.GetPosition(node.Child(0)->Pos()), std::move(message))); + return false; + } - return true; + if (result >= inputsCount) { + auto message = TStringBuilder() << "Invalid input index: " << result << " is out of range [0;" << inputsCount << ")"; + ctx.AddError(TIssue(ctx.GetPosition(node.Child(0)->Pos()), std::move(message))); + return false; } + + return true; } +} // namespace NYql::NPureCalc diff --git a/yql/essentials/public/purecalc/common/inspect_input.h b/yql/essentials/public/purecalc/common/inspect_input.h index 558144865da..46a6a0b4fbe 100644 --- a/yql/essentials/public/purecalc/common/inspect_input.h +++ b/yql/essentials/public/purecalc/common/inspect_input.h @@ -3,5 +3,5 @@ #include <yql/essentials/ast/yql_expr.h> namespace NYql::NPureCalc { - bool TryFetchInputIndexFromSelf(const TExprNode&, TExprContext&, ui32, ui32&); -} +bool TryFetchInputIndexFromSelf(const TExprNode&, TExprContext&, ui32, ui32&); +} // namespace NYql::NPureCalc diff --git a/yql/essentials/public/purecalc/common/interface.h b/yql/essentials/public/purecalc/common/interface.h index 23aa93bdf23..36741ed24d3 100644 --- a/yql/essentials/public/purecalc/common/interface.h +++ b/yql/essentials/public/purecalc/common/interface.h @@ -24,970 +24,970 @@ class ITimeProvider; namespace NKikimr { - namespace NMiniKQL { - class TScopedAlloc; - class IComputationGraph; - class IFunctionRegistry; - class TTypeEnvironment; - class TType; - class TStructType; +namespace NMiniKQL { +class TScopedAlloc; +class IComputationGraph; +class IFunctionRegistry; +class TTypeEnvironment; +class TType; +class TStructType; +} // namespace NMiniKQL +} // namespace NKikimr + +namespace NYql { +namespace NPureCalc { +/** + * SQL or s-expression translation error. + */ +class TCompileError: public yexception { +private: + TString Yql_; + TString Issues_; + +public: + // TODO: maybe accept an actual list of issues here? + // See https://a.yandex-team.ru/arc/review/439403/details#comment-778237 + TCompileError(TString yql, TString issues) + : Yql_(std::move(yql)) + , Issues_(std::move(issues)) + { + } + +public: + /** + * Get the sql query which caused the error (if there is one available). + */ + const TString& GetYql() const { + return Yql_; + } + + /** + * Get detailed description for all errors and warnings that happened during sql translation. + */ + const TString& GetIssues() const { + return Issues_; } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * A generic input stream of objects. + */ +template <typename T> +class IStream { +public: + virtual ~IStream() = default; + +public: + /** + * Pops and returns a next value in the stream. If the stream is finished, should return some sentinel object. + * + * Depending on return type, this function may not transfer object ownership to a user. + * Thus, the stream may manage the returned object * itself. + * That is, the returned object's lifetime may be bound to the input stream lifetime; it may be destroyed + * upon calling Fetch() or upon destroying the stream, whichever happens first. + */ + virtual T Fetch() = 0; +}; + +/** + * Create a new stream which applies the given functor to the elements of the original stream. + */ +template <typename TOld, typename TNew, typename TFunctor> +inline THolder<IStream<TNew>> MapStream(THolder<IStream<TOld>> stream, TFunctor functor) { + return THolder(new NPrivate::TMappingStream<TNew, TOld, TFunctor>(std::move(stream), std::move(functor))); +}; + +/** + * Convert stream of objects into a stream of potentially incompatible objects. + * + * This conversion applies static cast to the output of the original stream. Use with caution! + */ +/// @{ +template < + typename TNew, typename TOld, + std::enable_if_t<!std::is_same<TNew, TOld>::value>* = nullptr> +inline THolder<IStream<TNew>> ConvertStreamUnsafe(THolder<IStream<TOld>> stream) { + return MapStream<TOld, TNew>(std::move(stream), [](TOld x) -> TNew { return static_cast<TNew>(x); }); +} +template <typename T> +inline THolder<IStream<T>> ConvertStreamUnsafe(THolder<IStream<T>> stream) { + return stream; +} +/// @} + +/** + * Convert stream of objects into a stream of compatible objects. + * + * Note: each conversion adds one level of indirection so avoid them if possible. + */ +template <typename TNew, typename TOld, std::enable_if_t<std::is_convertible<TOld, TNew>::value>* = nullptr> +inline THolder<IStream<TNew>> ConvertStream(THolder<IStream<TOld>> stream) { + return ConvertStreamUnsafe<TNew, TOld>(std::move(stream)); } -namespace NYql { - namespace NPureCalc { - /** - * SQL or s-expression translation error. - */ - class TCompileError: public yexception { - private: - TString Yql_; - TString Issues_; - - public: - // TODO: maybe accept an actual list of issues here? - // See https://a.yandex-team.ru/arc/review/439403/details#comment-778237 - TCompileError(TString yql, TString issues) - : Yql_(std::move(yql)) - , Issues_(std::move(issues)) - { - } - - public: - /** - * Get the sql query which caused the error (if there is one available). - */ - const TString& GetYql() const { - return Yql_; - } - - /** - * Get detailed description for all errors and warnings that happened during sql translation. - */ - const TString& GetIssues() const { - return Issues_; - } - }; - - //////////////////////////////////////////////////////////////////////////////////////////////////// - - /** - * A generic input stream of objects. - */ - template <typename T> - class IStream { - public: - virtual ~IStream() = default; - - public: - /** - * Pops and returns a next value in the stream. If the stream is finished, should return some sentinel object. - * - * Depending on return type, this function may not transfer object ownership to a user. - * Thus, the stream may manage the returned object * itself. - * That is, the returned object's lifetime may be bound to the input stream lifetime; it may be destroyed - * upon calling Fetch() or upon destroying the stream, whichever happens first. - */ - virtual T Fetch() = 0; - }; - - /** - * Create a new stream which applies the given functor to the elements of the original stream. - */ - template <typename TOld, typename TNew, typename TFunctor> - inline THolder<IStream<TNew>> MapStream(THolder<IStream<TOld>> stream, TFunctor functor) { - return THolder(new NPrivate::TMappingStream<TNew, TOld, TFunctor>(std::move(stream), std::move(functor))); - }; - - /** - * Convert stream of objects into a stream of potentially incompatible objects. - * - * This conversion applies static cast to the output of the original stream. Use with caution! - */ - /// @{ - template < - typename TNew, typename TOld, - std::enable_if_t<!std::is_same<TNew, TOld>::value>* = nullptr> - inline THolder<IStream<TNew>> ConvertStreamUnsafe(THolder<IStream<TOld>> stream) { - return MapStream<TOld, TNew>(std::move(stream), [](TOld x) -> TNew { return static_cast<TNew>(x); }); - } - template <typename T> - inline THolder<IStream<T>> ConvertStreamUnsafe(THolder<IStream<T>> stream) { - return stream; - } - /// @} - - /** - * Convert stream of objects into a stream of compatible objects. - * - * Note: each conversion adds one level of indirection so avoid them if possible. - */ - template <typename TNew, typename TOld, std::enable_if_t<std::is_convertible<TOld, TNew>::value>* = nullptr> - inline THolder<IStream<TNew>> ConvertStream(THolder<IStream<TOld>> stream) { - return ConvertStreamUnsafe<TNew, TOld>(std::move(stream)); - } +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * A generic push consumer. + */ +template <typename T> +class IConsumer { +public: + virtual ~IConsumer() = default; + +public: + /** + * Feed an object to consumer. + * + * Depending on argument type, the consumer may not take ownership of the passed object; + * in that case it is the caller responsibility to manage the object lifetime after passing it to this method. + * + * The passed object can be destroyed after the consumer returns from this function; the consumer should + * not store pointer to the passed object or the passed object itself without taking all necessary precautions + * to ensure that the pointer or the object stays valid after returning. + */ + virtual void OnObject(T) = 0; + + /** + * Close the consumer and run finalization logic. Calling OnObject after calling this function is an error. + */ + virtual void OnFinish() = 0; +}; + +/** + * Create a new consumer which applies the given functor to objects before . + */ +template <typename TOld, typename TNew, typename TFunctor> +inline THolder<IConsumer<TNew>> MapConsumer(THolder<IConsumer<TOld>> stream, TFunctor functor) { + return THolder(new NPrivate::TMappingConsumer<TNew, TOld, TFunctor>(std::move(stream), std::move(functor))); +}; + +/** + * Convert consumer of objects into a consumer of potentially incompatible objects. + * + * This conversion applies static cast to the input value. Use with caution. + */ +/// @{ +template < + typename TNew, typename TOld, + std::enable_if_t<!std::is_same<TNew, TOld>::value>* = nullptr> +inline THolder<IConsumer<TNew>> ConvertConsumerUnsafe(THolder<IConsumer<TOld>> consumer) { + return MapConsumer<TOld, TNew>(std::move(consumer), [](TNew x) -> TOld { return static_cast<TOld>(x); }); +} +template <typename T> +inline THolder<IConsumer<T>> ConvertConsumerUnsafe(THolder<IConsumer<T>> consumer) { + return consumer; +} +/// @} + +/** + * Convert consumer of objects into a consumer of compatible objects. + * + * Note: each conversion adds one level of indirection so avoid them if possible. + */ +template <typename TNew, typename TOld, std::enable_if_t<std::is_convertible<TNew, TOld>::value>* = nullptr> +inline THolder<IConsumer<TNew>> ConvertConsumer(THolder<IConsumer<TOld>> consumer) { + return ConvertConsumerUnsafe<TNew, TOld>(std::move(consumer)); +} - //////////////////////////////////////////////////////////////////////////////////////////////////// - - /** - * A generic push consumer. - */ - template <typename T> - class IConsumer { - public: - virtual ~IConsumer() = default; - - public: - /** - * Feed an object to consumer. - * - * Depending on argument type, the consumer may not take ownership of the passed object; - * in that case it is the caller responsibility to manage the object lifetime after passing it to this method. - * - * The passed object can be destroyed after the consumer returns from this function; the consumer should - * not store pointer to the passed object or the passed object itself without taking all necessary precautions - * to ensure that the pointer or the object stays valid after returning. - */ - virtual void OnObject(T) = 0; - - /** - * Close the consumer and run finalization logic. Calling OnObject after calling this function is an error. - */ - virtual void OnFinish() = 0; - }; - - /** - * Create a new consumer which applies the given functor to objects before . - */ - template <typename TOld, typename TNew, typename TFunctor> - inline THolder<IConsumer<TNew>> MapConsumer(THolder<IConsumer<TOld>> stream, TFunctor functor) { - return THolder(new NPrivate::TMappingConsumer<TNew, TOld, TFunctor>(std::move(stream), std::move(functor))); - }; - - - /** - * Convert consumer of objects into a consumer of potentially incompatible objects. - * - * This conversion applies static cast to the input value. Use with caution. - */ - /// @{ - template < - typename TNew, typename TOld, - std::enable_if_t<!std::is_same<TNew, TOld>::value>* = nullptr> - inline THolder<IConsumer<TNew>> ConvertConsumerUnsafe(THolder<IConsumer<TOld>> consumer) { - return MapConsumer<TOld, TNew>(std::move(consumer), [](TNew x) -> TOld { return static_cast<TOld>(x); }); - } - template <typename T> - inline THolder<IConsumer<T>> ConvertConsumerUnsafe(THolder<IConsumer<T>> consumer) { - return consumer; - } - /// @} - - /** - * Convert consumer of objects into a consumer of compatible objects. - * - * Note: each conversion adds one level of indirection so avoid them if possible. - */ - template <typename TNew, typename TOld, std::enable_if_t<std::is_convertible<TNew, TOld>::value>* = nullptr> - inline THolder<IConsumer<TNew>> ConvertConsumer(THolder<IConsumer<TOld>> consumer) { - return ConvertConsumerUnsafe<TNew, TOld>(std::move(consumer)); - } +/** + * Create a consumer which holds a non-owning pointer to the given consumer + * and passes all messages to the latter. + */ +template <typename T, typename C> +THolder<NPrivate::TNonOwningConsumer<T, C>> MakeNonOwningConsumer(C consumer) { + return MakeHolder<NPrivate::TNonOwningConsumer<T, C>>(consumer); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * Logging options. + */ +struct TLoggingOptions final { +public: + /// Logging level for messages generated during compilation. + ELogPriority LogLevel; + + /// Where to write log messages. + IOutputStream* LogDestination; + +public: + TLoggingOptions(); + /** + * Set a new logging level. + * + * @return reference to self, to allow method chaining. + */ + TLoggingOptions& SetLogLevel(ELogPriority); + + /** + * Set a new logging destination. + * + * @return reference to self, to allow method chaining. + */ + TLoggingOptions& SetLogDestination(IOutputStream*); +}; + +/** + * General options for program factory. + */ +struct TProgramFactoryOptions final { +public: + /// Path to a directory with compiled UDFs. Leave empty to disable loading external UDFs. + TString UdfsDir; + + /// List of available external resources, e.g. files, UDFs, libraries. + TVector<NUserData::TUserData> UserData; + + /// LLVM settings. Assign "OFF" to disable LLVM, empty string for default settings. + TString LLVMSettings; + + /// Block engine settings. Assign "force" to unconditionally enable + /// it, "disable" for turn it off and "auto" to left the final + /// decision to the platform heuristics. + TString BlockEngineSettings; + + /// Output stream to dump the compiled and optimized expressions. + IOutputStream* ExprOutputStream; + + /// Provider for generic counters which can be used to export statistics from UDFs. + NKikimr::NUdf::ICountersProvider* CountersProvider; + + /// YT Type V3 flags for Skiff/Yson serialization. + ui64 NativeYtTypeFlags; + + /// Seed for deterministic time provider + TMaybe<ui64> DeterministicTimeProviderSeed; + + /// Use special system columns to support tables naming (supports non empty ``TablePath()``/``TableName()``) + bool UseSystemColumns; + + /// Reuse allocated workers + bool UseWorkerPool; + + /// Use Antlr4 parser (for migration) + bool UseAntlr4; + + /// Language version + TLangVersion LangVer; + +public: + TProgramFactoryOptions(); + +public: + /** + * Set language version for queries + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetLanguageVersion(TLangVersion langver); + + /** + * Set a new path to a directory with UDFs. + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetUDFsDir(TStringBuf); + + /** + * Add a new library to the UserData list. + * + * @param disposition where the resource resides, e.g. on filesystem, in memory, etc. + * NB: URL disposition is not supported. + * @param name name of the resource. + * @param content depending on disposition, either path to the resource or its content. + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& AddLibrary(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content); + + /** + * Add a new file to the UserData list. + * + * @param disposition where the resource resides, e.g. on filesystem, in memory, etc. + * NB: URL disposition is not supported. + * @param name name of the resource. + * @param content depending on disposition, either path to the resource or its content. + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& AddFile(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content); + + /** + * Add a new UDF to the UserData list. + * + * @param disposition where the resource resides, e.g. on filesystem, in memory, etc. + * NB: URL disposition is not supported. + * @param name name of the resource. + * @param content depending on disposition, either path to the resource or its content. + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& AddUDF(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content); + + /** + * Set new LLVM settings. + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetLLVMSettings(TStringBuf llvm_settings); + + /** + * Set new block engine settings. + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetBlockEngineSettings(TStringBuf blockEngineSettings); + + /** + * Set the stream to dump the compiled and optimized expressions. + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetExprOutputStream(IOutputStream* exprOutputStream); + + /** + * Set new counters provider. Passed pointer should stay alive for as long as the processor factory + * stays alive. + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetCountersProvider(NKikimr::NUdf::ICountersProvider* countersProvider); + + /** + * Set new YT Type V3 mode. Deprecated method. Use SetNativeYtTypeFlags instead + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetUseNativeYtTypes(bool useNativeTypes); + + /** + * Set YT Type V3 flags. + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetNativeYtTypeFlags(ui64 nativeTypeFlags); + + /** + * Set seed for deterministic time provider. + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetDeterministicTimeProviderSeed(TMaybe<ui64> seed); + + /** + * Set new flag whether to allow using system columns or not. + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetUseSystemColumns(bool useSystemColumns); + + /** + * Set new flag whether to allow reusing workers or not. + * + * @return reference to self, to allow method chaining. + */ + TProgramFactoryOptions& SetUseWorkerPool(bool useWorkerPool); +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * What exactly are we parsing: SQL or an s-expression. + */ +enum class ETranslationMode { + SQL /* "SQL" */, + SExpr /* "s-expression" */, + Mkql /* "mkql" */, + PG /* PostgreSQL */ +}; + +/** + * A facility for compiling sql and s-expressions and making programs from them. + */ +class IProgramFactory: public TThrRefBase { +protected: + virtual IPullStreamWorkerFactoryPtr MakePullStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) = 0; + virtual IPullListWorkerFactoryPtr MakePullListWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) = 0; + virtual IPushStreamWorkerFactoryPtr MakePushStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) = 0; + +public: + /** + * Add new udf module. It's not specified whether adding new modules will affect existing programs + * (theoretical answer is 'no'). + */ + virtual void AddUdfModule(const TStringBuf&, NKikimr::NUdf::TUniquePtr<NKikimr::NUdf::IUdfModule>&&) = 0; + // TODO: support setting udf modules via factory options. + + /** + * Set new counters provider, override one that was specified via factory options. Note that existing + * programs will still reference the previous provider. + */ + virtual void SetCountersProvider(NKikimr::NUdf::ICountersProvider*) = 0; + // TODO: support setting providers via factory options. + + template <typename TInputSpec, typename TOutputSpec> + THolder<TPullStreamProgram<TInputSpec, TOutputSpec>> MakePullStreamProgram( + TInputSpec inputSpec, TOutputSpec outputSpec, TString query, ETranslationMode mode = ETranslationMode::SQL, ui16 syntaxVersion = 1) { + auto workerFactory = MakePullStreamWorkerFactory(inputSpec, outputSpec, std::move(query), mode, syntaxVersion); + return MakeHolder<TPullStreamProgram<TInputSpec, TOutputSpec>>(std::move(inputSpec), std::move(outputSpec), workerFactory); + } - /** - * Create a consumer which holds a non-owning pointer to the given consumer - * and passes all messages to the latter. - */ - template <typename T, typename C> - THolder<NPrivate::TNonOwningConsumer<T, C>> MakeNonOwningConsumer(C consumer) { - return MakeHolder<NPrivate::TNonOwningConsumer<T, C>>(consumer); + template <typename TInputSpec, typename TOutputSpec> + THolder<TPullListProgram<TInputSpec, TOutputSpec>> MakePullListProgram( + TInputSpec inputSpec, TOutputSpec outputSpec, TString query, ETranslationMode mode = ETranslationMode::SQL, ui16 syntaxVersion = 1) { + auto workerFactory = MakePullListWorkerFactory(inputSpec, outputSpec, std::move(query), mode, syntaxVersion); + return MakeHolder<TPullListProgram<TInputSpec, TOutputSpec>>(std::move(inputSpec), std::move(outputSpec), workerFactory); + } + + template <typename TInputSpec, typename TOutputSpec> + THolder<TPushStreamProgram<TInputSpec, TOutputSpec>> MakePushStreamProgram( + TInputSpec inputSpec, TOutputSpec outputSpec, TString query, ETranslationMode mode = ETranslationMode::SQL, ui16 syntaxVersion = 1) { + auto workerFactory = MakePushStreamWorkerFactory(inputSpec, outputSpec, std::move(query), mode, syntaxVersion); + return MakeHolder<TPushStreamProgram<TInputSpec, TOutputSpec>>(std::move(inputSpec), std::move(outputSpec), workerFactory); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * A facility for creating workers. Despite being a part of a public API, worker factory is not used directly. + */ +class IWorkerFactory: public std::enable_shared_from_this<IWorkerFactory> { +public: + virtual ~IWorkerFactory() = default; + /** + * Get input column names for specified input that are actually used in the query. + */ + virtual const THashSet<TString>& GetUsedColumns(ui32) const = 0; + /** + * Overload for single-input programs. + */ + virtual const THashSet<TString>& GetUsedColumns() const = 0; + + /** + * Make input type schema for specified input as deduced by program optimizer. This schema is equivalent + * to one provided by input spec up to the order of the fields in structures. + */ + virtual NYT::TNode MakeInputSchema(ui32) const = 0; + /** + * Overload for single-input programs. + */ + virtual NYT::TNode MakeInputSchema() const = 0; + + /** + * Make output type schema as deduced by program optimizer. If output spec provides its own schema, than + * this schema is equivalent to one provided by output spec up to the order of the fields in structures. + */ + /// @{ + /** + * Overload for single-table output programs (i.e. output type is struct). + */ + virtual NYT::TNode MakeOutputSchema() const = 0; + /** + * Overload for multi-table output programs (i.e. output type is variant over tuple). + */ + virtual NYT::TNode MakeOutputSchema(ui32) const = 0; + /** + * Overload for multi-table output programs (i.e. output type is variant over struct). + */ + virtual NYT::TNode MakeOutputSchema(TStringBuf) const = 0; + /// @} + + /** + * Make full output schema. For single-output programs returns struct type, for multi-output programs + * returns variant type. + * + * Warning: calling this function may result in extended memory usage for large number of output tables. + */ + virtual NYT::TNode MakeFullOutputSchema() const = 0; + + /** + * Get compilation issues + */ + virtual TIssues GetIssues() const = 0; + + /** + * Get precompiled mkql program + */ + virtual TString GetCompiledProgram() = 0; + + /** + * Return a worker to the factory for possible reuse + */ + virtual void ReturnWorker(IWorker* worker) = 0; +}; + +class TReleaseWorker { +public: + template <class T> + static inline void Destroy(T* t) noexcept { + t->Release(); + } +}; + +template <class T> +using TWorkerHolder = THolder<T, TReleaseWorker>; + +/** + * Factory for generating pull stream workers. + */ +class IPullStreamWorkerFactory: public IWorkerFactory { +public: + /** + * Create a new pull stream worker. + */ + virtual TWorkerHolder<IPullStreamWorker> MakeWorker() = 0; +}; + +/** + * Factory for generating pull list workers. + */ +class IPullListWorkerFactory: public IWorkerFactory { +public: + /** + * Create a new pull list worker. + */ + virtual TWorkerHolder<IPullListWorker> MakeWorker() = 0; +}; + +/** + * Factory for generating push stream workers. + */ +class IPushStreamWorkerFactory: public IWorkerFactory { +public: + /** + * Create a new push stream worker. + */ + virtual TWorkerHolder<IPushStreamWorker> MakeWorker() = 0; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * Worker is a central part of any program instance. It contains current computation state + * (called computation graph) and objects required to work with it, including an allocator for unboxed values. + * + * Usually, users do not interact with workers directly. They use program instance entry points such as streams + * and consumers instead. The only case when one would have to to interact with workers is when implementing + * custom io-specification. + */ +class IWorker { +protected: + friend class TReleaseWorker; + /** + * Cleanup the worker and return to a worker factory for reuse + */ + virtual void Release() = 0; + +public: + virtual ~IWorker() = default; + +public: + /** + * Number of inputs for this program. + */ + virtual ui32 GetInputsCount() const = 0; + + /** + * MiniKQL input struct type of specified input for this program. Type is equivalent to the deduced input + * schema (see IWorker::MakeInputSchema()) + * + * If ``original`` is set to ``true``, returns type without virtual system columns. + */ + virtual const NKikimr::NMiniKQL::TStructType* GetInputType(ui32, bool original = false) const = 0; + /** + * Overload for single-input programs. + */ + virtual const NKikimr::NMiniKQL::TStructType* GetInputType(bool original = false) const = 0; + + /** + * MiniKQL input struct type of the specified input for this program. + * The returned type is the actual type of the specified input node. + */ + virtual const NKikimr::NMiniKQL::TStructType* GetRawInputType(ui32) const = 0; + /** + * Overload for single-input programs. + */ + virtual const NKikimr::NMiniKQL::TStructType* GetRawInputType() const = 0; + + /** + * MiniKQL output struct type for this program. The returned type is equivalent to the deduced output + * schema (see IWorker::MakeFullOutputSchema()). + */ + virtual const NKikimr::NMiniKQL::TType* GetOutputType() const = 0; + + /** + * MiniKQL output struct type for this program. The returned type is + * the actual type of the root node. + */ + virtual const NKikimr::NMiniKQL::TType* GetRawOutputType() const = 0; + + /** + * Make input type schema for specified input as deduced by program optimizer. This schema is equivalent + * to one provided by input spec up to the order of the fields in structures. + */ + virtual NYT::TNode MakeInputSchema(ui32) const = 0; + /** + * Overload for single-input programs. + */ + virtual NYT::TNode MakeInputSchema() const = 0; + + /** + * Make output type schema as deduced by program optimizer. If output spec provides its own schema, than + * this schema is equivalent to one provided by output spec up to the order of the fields in structures. + */ + /// @{ + /** + * Overload for single-table output programs (i.e. output type is struct). + */ + virtual NYT::TNode MakeOutputSchema() const = 0; + /** + * Overload for multi-table output programs (i.e. output type is variant over tuple). + */ + virtual NYT::TNode MakeOutputSchema(ui32) const = 0; + /** + * Overload for multi-table output programs (i.e. output type is variant over struct). + */ + virtual NYT::TNode MakeOutputSchema(TStringBuf) const = 0; + /// @} + + /** + * Generates full output schema. For single-output programs returns struct type, for multi-output programs + * returns variant type. + * + * Warning: calling this function may result in extended memory usage for large number of output tables. + */ + virtual NYT::TNode MakeFullOutputSchema() const = 0; + + /** + * Get scoped alloc used in this worker. + */ + virtual NKikimr::NMiniKQL::TScopedAlloc& GetScopedAlloc() = 0; + + /** + * Get computation graph. + */ + virtual NKikimr::NMiniKQL::IComputationGraph& GetGraph() = 0; + + /** + * Get function registry for this worker. + */ + virtual const NKikimr::NMiniKQL::IFunctionRegistry& GetFunctionRegistry() const = 0; + + /** + * Get type environment for this worker. + */ + virtual NKikimr::NMiniKQL::TTypeEnvironment& GetTypeEnvironment() = 0; + + /** + * Get llvm settings for this worker. + */ + virtual const TString& GetLLVMSettings() const = 0; + + /** + * Get YT Type V3 flags + */ + virtual ui64 GetNativeYtTypeFlags() const = 0; + + /** + * Get time provider + */ + virtual ITimeProvider* GetTimeProvider() const = 0; + + /** + * Release all input data from worker state + */ + virtual void Invalidate() = 0; +}; + +/** + * Worker which operates in pull stream mode. + */ +class IPullStreamWorker: public IWorker { +public: + /** + * Set input computation graph node for specified input. The passed unboxed value should be a stream of + * structs. It should be created via the allocator associated with this very worker. + * This function can only be called once for each input. + */ + virtual void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) = 0; + + /** + * Get the output computation graph node. The returned node will be a stream of structs or variants. + * This function cannot be called before setting an input value. + */ + virtual NKikimr::NUdf::TUnboxedValue& GetOutput() = 0; +}; + +/** + * Worker which operates in pull list mode. + */ +class IPullListWorker: public IWorker { +public: + /** + * Set input computation graph node for specified input. The passed unboxed value should be a list of + * structs. It should be created via the allocator associated with this very worker. + * This function can only be called once for each index. + */ + virtual void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) = 0; + + /** + * Get the output computation graph node. The returned node will be a list of structs or variants. + * This function cannot be called before setting an input value. + */ + virtual NKikimr::NUdf::TUnboxedValue& GetOutput() = 0; + + /** + * Get iterator over the output list. + */ + virtual NKikimr::NUdf::TUnboxedValue& GetOutputIterator() = 0; + + /** + * Reset iterator to the beginning of the output list. After calling this function, GetOutputIterator() + * will return a fresh iterator; all previously returned iterators will become invalid. + */ + virtual void ResetOutputIterator() = 0; +}; + +/** + * Worker which operates in push stream mode. + */ +class IPushStreamWorker: public IWorker { +public: + /** + * Set a consumer where the worker will relay its output. This function can only be called once. + */ + virtual void SetConsumer(THolder<IConsumer<const NKikimr::NUdf::TUnboxedValue*>>) = 0; + + /** + * Push new value to the graph, than feed all new output to the consumer. Values cannot be pushed before + * assigning a consumer. + */ + virtual void Push(NKikimr::NUdf::TUnboxedValue&&) = 0; + + /** + * Send finish event and clear the computation graph. No new values will be accepted. + */ + virtual void OnFinish() = 0; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * Input specifications describe format for program input. They carry information about input data schema + * as well as the knowledge about how to convert input structures into unboxed values (data format which can be + * processed by the YQL runtime). + * + * Input spec defines the arguments of the program's Apply method. For example, a program + * with the protobuf input spec will accept a stream of protobuf messages while a program with the + * yson spec will accept an input stream (binary or text one). + * + * See documentation for input and output spec traits for hints on how to implement a custom specs. + */ +class TInputSpecBase { +protected: + mutable TVector<THashMap<TString, NYT::TNode>> AllVirtualColumns_; + +public: + virtual ~TInputSpecBase() = default; + +public: + /** + * Get input data schemas in YQL format (NB: not a YT format). Each item of the returned vector must + * describe a structure. + * + * Format of each item is approximately this one: + * + * @code + * [ + * 'StructType', + * [ + * ["Field1Name", ["DataType", "Int32"]], + * ["Field2Name", ["DataType", "String"]], + * ... + * ] + * ] + * @endcode + */ + virtual const TVector<NYT::TNode>& GetSchemas() const = 0; + // TODO: make a neat schema builder + + /** + * Get virtual columns for each input. + * + * Key of each mapping is column name, value is data schema in YQL format. + */ + const TVector<THashMap<TString, NYT::TNode>>& GetAllVirtualColumns() const { + if (AllVirtualColumns_.empty()) { + AllVirtualColumns_ = TVector<THashMap<TString, NYT::TNode>>(GetSchemas().size()); } - //////////////////////////////////////////////////////////////////////////////////////////////////// - - /** - * Logging options. - */ - struct TLoggingOptions final { - public: - /// Logging level for messages generated during compilation. - ELogPriority LogLevel; - - /// Where to write log messages. - IOutputStream* LogDestination; - - public: - TLoggingOptions(); - /** - * Set a new logging level. - * - * @return reference to self, to allow method chaining. - */ - TLoggingOptions& SetLogLevel(ELogPriority); - - /** - * Set a new logging destination. - * - * @return reference to self, to allow method chaining. - */ - TLoggingOptions& SetLogDestination(IOutputStream*); - }; - - /** - * General options for program factory. - */ - struct TProgramFactoryOptions final { - public: - /// Path to a directory with compiled UDFs. Leave empty to disable loading external UDFs. - TString UdfsDir; - - /// List of available external resources, e.g. files, UDFs, libraries. - TVector<NUserData::TUserData> UserData; - - /// LLVM settings. Assign "OFF" to disable LLVM, empty string for default settings. - TString LLVMSettings; - - /// Block engine settings. Assign "force" to unconditionally enable - /// it, "disable" for turn it off and "auto" to left the final - /// decision to the platform heuristics. - TString BlockEngineSettings; - - /// Output stream to dump the compiled and optimized expressions. - IOutputStream* ExprOutputStream; - - /// Provider for generic counters which can be used to export statistics from UDFs. - NKikimr::NUdf::ICountersProvider* CountersProvider; - - /// YT Type V3 flags for Skiff/Yson serialization. - ui64 NativeYtTypeFlags; - - /// Seed for deterministic time provider - TMaybe<ui64> DeterministicTimeProviderSeed; - - /// Use special system columns to support tables naming (supports non empty ``TablePath()``/``TableName()``) - bool UseSystemColumns; - - /// Reuse allocated workers - bool UseWorkerPool; - - /// Use Antlr4 parser (for migration) - bool UseAntlr4; - - /// Language version - TLangVersion LangVer; - - public: - TProgramFactoryOptions(); - - public: - /** - * Set language version for queries - * - * @return reference to self, to allow method chaining. - */ - TProgramFactoryOptions& SetLanguageVersion(TLangVersion langver); - - /** - * Set a new path to a directory with UDFs. - * - * @return reference to self, to allow method chaining. - */ - TProgramFactoryOptions& SetUDFsDir(TStringBuf); - - /** - * Add a new library to the UserData list. - * - * @param disposition where the resource resides, e.g. on filesystem, in memory, etc. - * NB: URL disposition is not supported. - * @param name name of the resource. - * @param content depending on disposition, either path to the resource or its content. - * @return reference to self, to allow method chaining. - */ - TProgramFactoryOptions& AddLibrary(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content); - - /** - * Add a new file to the UserData list. - * - * @param disposition where the resource resides, e.g. on filesystem, in memory, etc. - * NB: URL disposition is not supported. - * @param name name of the resource. - * @param content depending on disposition, either path to the resource or its content. - * @return reference to self, to allow method chaining. - */ - TProgramFactoryOptions& AddFile(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content); - - /** - * Add a new UDF to the UserData list. - * - * @param disposition where the resource resides, e.g. on filesystem, in memory, etc. - * NB: URL disposition is not supported. - * @param name name of the resource. - * @param content depending on disposition, either path to the resource or its content. - * @return reference to self, to allow method chaining. - */ - TProgramFactoryOptions& AddUDF(NUserData::EDisposition disposition, TStringBuf name, TStringBuf content); - - /** - * Set new LLVM settings. - * - * @return reference to self, to allow method chaining. - */ - TProgramFactoryOptions& SetLLVMSettings(TStringBuf llvm_settings); - - /** - * Set new block engine settings. - * - * @return reference to self, to allow method chaining. - */ - TProgramFactoryOptions& SetBlockEngineSettings(TStringBuf blockEngineSettings); - - /** - * Set the stream to dump the compiled and optimized expressions. - * - * @return reference to self, to allow method chaining. - */ - TProgramFactoryOptions& SetExprOutputStream(IOutputStream* exprOutputStream); - - /** - * Set new counters provider. Passed pointer should stay alive for as long as the processor factory - * stays alive. - * - * @return reference to self, to allow method chaining. - */ - TProgramFactoryOptions& SetCountersProvider(NKikimr::NUdf::ICountersProvider* countersProvider); - - /** - * Set new YT Type V3 mode. Deprecated method. Use SetNativeYtTypeFlags instead - * - * @return reference to self, to allow method chaining. - */ - TProgramFactoryOptions& SetUseNativeYtTypes(bool useNativeTypes); - - /** - * Set YT Type V3 flags. - * - * @return reference to self, to allow method chaining. - */ - TProgramFactoryOptions& SetNativeYtTypeFlags(ui64 nativeTypeFlags); - - /** - * Set seed for deterministic time provider. - * - * @return reference to self, to allow method chaining. - */ - TProgramFactoryOptions& SetDeterministicTimeProviderSeed(TMaybe<ui64> seed); - - /** - * Set new flag whether to allow using system columns or not. - * - * @return reference to self, to allow method chaining. - */ - TProgramFactoryOptions& SetUseSystemColumns(bool useSystemColumns); - - /** - * Set new flag whether to allow reusing workers or not. - * - * @return reference to self, to allow method chaining. - */ - TProgramFactoryOptions& SetUseWorkerPool(bool useWorkerPool); - }; - - //////////////////////////////////////////////////////////////////////////////////////////////////// - - /** - * What exactly are we parsing: SQL or an s-expression. - */ - enum class ETranslationMode { - SQL /* "SQL" */, - SExpr /* "s-expression" */, - Mkql /* "mkql" */, - PG /* PostgreSQL */ - }; - - /** - * A facility for compiling sql and s-expressions and making programs from them. - */ - class IProgramFactory: public TThrRefBase { - protected: - virtual IPullStreamWorkerFactoryPtr MakePullStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) = 0; - virtual IPullListWorkerFactoryPtr MakePullListWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) = 0; - virtual IPushStreamWorkerFactoryPtr MakePushStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) = 0; - - public: - /** - * Add new udf module. It's not specified whether adding new modules will affect existing programs - * (theoretical answer is 'no'). - */ - virtual void AddUdfModule(const TStringBuf&, NKikimr::NUdf::TUniquePtr<NKikimr::NUdf::IUdfModule>&&) = 0; - // TODO: support setting udf modules via factory options. - - /** - * Set new counters provider, override one that was specified via factory options. Note that existing - * programs will still reference the previous provider. - */ - virtual void SetCountersProvider(NKikimr::NUdf::ICountersProvider*) = 0; - // TODO: support setting providers via factory options. - - template <typename TInputSpec, typename TOutputSpec> - THolder<TPullStreamProgram<TInputSpec, TOutputSpec>> MakePullStreamProgram( - TInputSpec inputSpec, TOutputSpec outputSpec, TString query, ETranslationMode mode = ETranslationMode::SQL, ui16 syntaxVersion = 1 - ) { - auto workerFactory = MakePullStreamWorkerFactory(inputSpec, outputSpec, std::move(query), mode, syntaxVersion); - return MakeHolder<TPullStreamProgram<TInputSpec, TOutputSpec>>(std::move(inputSpec), std::move(outputSpec), workerFactory); - } - - template <typename TInputSpec, typename TOutputSpec> - THolder<TPullListProgram<TInputSpec, TOutputSpec>> MakePullListProgram( - TInputSpec inputSpec, TOutputSpec outputSpec, TString query, ETranslationMode mode = ETranslationMode::SQL, ui16 syntaxVersion = 1 - ) { - auto workerFactory = MakePullListWorkerFactory(inputSpec, outputSpec, std::move(query), mode, syntaxVersion); - return MakeHolder<TPullListProgram<TInputSpec, TOutputSpec>>(std::move(inputSpec), std::move(outputSpec), workerFactory); - } - - template <typename TInputSpec, typename TOutputSpec> - THolder<TPushStreamProgram<TInputSpec, TOutputSpec>> MakePushStreamProgram( - TInputSpec inputSpec, TOutputSpec outputSpec, TString query, ETranslationMode mode = ETranslationMode::SQL, ui16 syntaxVersion = 1 - ) { - auto workerFactory = MakePushStreamWorkerFactory(inputSpec, outputSpec, std::move(query), mode, syntaxVersion); - return MakeHolder<TPushStreamProgram<TInputSpec, TOutputSpec>>(std::move(inputSpec), std::move(outputSpec), workerFactory); - } - }; - - //////////////////////////////////////////////////////////////////////////////////////////////////// - - /** - * A facility for creating workers. Despite being a part of a public API, worker factory is not used directly. - */ - class IWorkerFactory: public std::enable_shared_from_this<IWorkerFactory> { - public: - virtual ~IWorkerFactory() = default; - /** - * Get input column names for specified input that are actually used in the query. - */ - virtual const THashSet<TString>& GetUsedColumns(ui32) const = 0; - /** - * Overload for single-input programs. - */ - virtual const THashSet<TString>& GetUsedColumns() const = 0; - - /** - * Make input type schema for specified input as deduced by program optimizer. This schema is equivalent - * to one provided by input spec up to the order of the fields in structures. - */ - virtual NYT::TNode MakeInputSchema(ui32) const = 0; - /** - * Overload for single-input programs. - */ - virtual NYT::TNode MakeInputSchema() const = 0; - - /** - * Make output type schema as deduced by program optimizer. If output spec provides its own schema, than - * this schema is equivalent to one provided by output spec up to the order of the fields in structures. - */ - /// @{ - /** - * Overload for single-table output programs (i.e. output type is struct). - */ - virtual NYT::TNode MakeOutputSchema() const = 0; - /** - * Overload for multi-table output programs (i.e. output type is variant over tuple). - */ - virtual NYT::TNode MakeOutputSchema(ui32) const = 0; - /** - * Overload for multi-table output programs (i.e. output type is variant over struct). - */ - virtual NYT::TNode MakeOutputSchema(TStringBuf) const = 0; - /// @} - - /** - * Make full output schema. For single-output programs returns struct type, for multi-output programs - * returns variant type. - * - * Warning: calling this function may result in extended memory usage for large number of output tables. - */ - virtual NYT::TNode MakeFullOutputSchema() const = 0; - - /** - * Get compilation issues - */ - virtual TIssues GetIssues() const = 0; - - /** - * Get precompiled mkql program - */ - virtual TString GetCompiledProgram() = 0; - - /** - * Return a worker to the factory for possible reuse - */ - virtual void ReturnWorker(IWorker* worker) = 0; - }; - - class TReleaseWorker { - public: - template <class T> - static inline void Destroy(T* t) noexcept { - t->Release(); - } - }; - - template <class T> - using TWorkerHolder = THolder<T, TReleaseWorker>; - - /** - * Factory for generating pull stream workers. - */ - class IPullStreamWorkerFactory: public IWorkerFactory { - public: - /** - * Create a new pull stream worker. - */ - virtual TWorkerHolder<IPullStreamWorker> MakeWorker() = 0; - }; - - /** - * Factory for generating pull list workers. - */ - class IPullListWorkerFactory: public IWorkerFactory { - public: - /** - * Create a new pull list worker. - */ - virtual TWorkerHolder<IPullListWorker> MakeWorker() = 0; - }; - - /** - * Factory for generating push stream workers. - */ - class IPushStreamWorkerFactory: public IWorkerFactory { - public: - /** - * Create a new push stream worker. - */ - virtual TWorkerHolder<IPushStreamWorker> MakeWorker() = 0; - }; - - //////////////////////////////////////////////////////////////////////////////////////////////////// - - /** - * Worker is a central part of any program instance. It contains current computation state - * (called computation graph) and objects required to work with it, including an allocator for unboxed values. - * - * Usually, users do not interact with workers directly. They use program instance entry points such as streams - * and consumers instead. The only case when one would have to to interact with workers is when implementing - * custom io-specification. - */ - class IWorker { - protected: - friend class TReleaseWorker; - /** - * Cleanup the worker and return to a worker factory for reuse - */ - virtual void Release() = 0; - - public: - virtual ~IWorker() = default; - - public: - /** - * Number of inputs for this program. - */ - virtual ui32 GetInputsCount() const = 0; - - /** - * MiniKQL input struct type of specified input for this program. Type is equivalent to the deduced input - * schema (see IWorker::MakeInputSchema()) - * - * If ``original`` is set to ``true``, returns type without virtual system columns. - */ - virtual const NKikimr::NMiniKQL::TStructType* GetInputType(ui32, bool original = false) const = 0; - /** - * Overload for single-input programs. - */ - virtual const NKikimr::NMiniKQL::TStructType* GetInputType(bool original = false) const = 0; - - /** - * MiniKQL input struct type of the specified input for this program. - * The returned type is the actual type of the specified input node. - */ - virtual const NKikimr::NMiniKQL::TStructType* GetRawInputType(ui32) const = 0; - /** - * Overload for single-input programs. - */ - virtual const NKikimr::NMiniKQL::TStructType* GetRawInputType() const = 0; - - /** - * MiniKQL output struct type for this program. The returned type is equivalent to the deduced output - * schema (see IWorker::MakeFullOutputSchema()). - */ - virtual const NKikimr::NMiniKQL::TType* GetOutputType() const = 0; - - /** - * MiniKQL output struct type for this program. The returned type is - * the actual type of the root node. - */ - virtual const NKikimr::NMiniKQL::TType* GetRawOutputType() const = 0; - - /** - * Make input type schema for specified input as deduced by program optimizer. This schema is equivalent - * to one provided by input spec up to the order of the fields in structures. - */ - virtual NYT::TNode MakeInputSchema(ui32) const = 0; - /** - * Overload for single-input programs. - */ - virtual NYT::TNode MakeInputSchema() const = 0; - - /** - * Make output type schema as deduced by program optimizer. If output spec provides its own schema, than - * this schema is equivalent to one provided by output spec up to the order of the fields in structures. - */ - /// @{ - /** - * Overload for single-table output programs (i.e. output type is struct). - */ - virtual NYT::TNode MakeOutputSchema() const = 0; - /** - * Overload for multi-table output programs (i.e. output type is variant over tuple). - */ - virtual NYT::TNode MakeOutputSchema(ui32) const = 0; - /** - * Overload for multi-table output programs (i.e. output type is variant over struct). - */ - virtual NYT::TNode MakeOutputSchema(TStringBuf) const = 0; - /// @} - - /** - * Generates full output schema. For single-output programs returns struct type, for multi-output programs - * returns variant type. - * - * Warning: calling this function may result in extended memory usage for large number of output tables. - */ - virtual NYT::TNode MakeFullOutputSchema() const = 0; - - /** - * Get scoped alloc used in this worker. - */ - virtual NKikimr::NMiniKQL::TScopedAlloc& GetScopedAlloc() = 0; - - /** - * Get computation graph. - */ - virtual NKikimr::NMiniKQL::IComputationGraph& GetGraph() = 0; - - /** - * Get function registry for this worker. - */ - virtual const NKikimr::NMiniKQL::IFunctionRegistry& GetFunctionRegistry() const = 0; - - /** - * Get type environment for this worker. - */ - virtual NKikimr::NMiniKQL::TTypeEnvironment& GetTypeEnvironment() = 0; - - /** - * Get llvm settings for this worker. - */ - virtual const TString& GetLLVMSettings() const = 0; - - /** - * Get YT Type V3 flags - */ - virtual ui64 GetNativeYtTypeFlags() const = 0; - - /** - * Get time provider - */ - virtual ITimeProvider* GetTimeProvider() const = 0; - - /** - * Release all input data from worker state - */ - virtual void Invalidate() = 0; - }; - - /** - * Worker which operates in pull stream mode. - */ - class IPullStreamWorker: public IWorker { - public: - /** - * Set input computation graph node for specified input. The passed unboxed value should be a stream of - * structs. It should be created via the allocator associated with this very worker. - * This function can only be called once for each input. - */ - virtual void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) = 0; - - /** - * Get the output computation graph node. The returned node will be a stream of structs or variants. - * This function cannot be called before setting an input value. - */ - virtual NKikimr::NUdf::TUnboxedValue& GetOutput() = 0; - }; - - /** - * Worker which operates in pull list mode. - */ - class IPullListWorker: public IWorker { - public: - /** - * Set input computation graph node for specified input. The passed unboxed value should be a list of - * structs. It should be created via the allocator associated with this very worker. - * This function can only be called once for each index. - */ - virtual void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) = 0; - - /** - * Get the output computation graph node. The returned node will be a list of structs or variants. - * This function cannot be called before setting an input value. - */ - virtual NKikimr::NUdf::TUnboxedValue& GetOutput() = 0; - - /** - * Get iterator over the output list. - */ - virtual NKikimr::NUdf::TUnboxedValue& GetOutputIterator() = 0; - - /** - * Reset iterator to the beginning of the output list. After calling this function, GetOutputIterator() - * will return a fresh iterator; all previously returned iterators will become invalid. - */ - virtual void ResetOutputIterator() = 0; - }; - - /** - * Worker which operates in push stream mode. - */ - class IPushStreamWorker: public IWorker { - public: - /** - * Set a consumer where the worker will relay its output. This function can only be called once. - */ - virtual void SetConsumer(THolder<IConsumer<const NKikimr::NUdf::TUnboxedValue*>>) = 0; - - /** - * Push new value to the graph, than feed all new output to the consumer. Values cannot be pushed before - * assigning a consumer. - */ - virtual void Push(NKikimr::NUdf::TUnboxedValue&&) = 0; - - /** - * Send finish event and clear the computation graph. No new values will be accepted. - */ - virtual void OnFinish() = 0; - }; - - //////////////////////////////////////////////////////////////////////////////////////////////////// - - /** - * Input specifications describe format for program input. They carry information about input data schema - * as well as the knowledge about how to convert input structures into unboxed values (data format which can be - * processed by the YQL runtime). - * - * Input spec defines the arguments of the program's Apply method. For example, a program - * with the protobuf input spec will accept a stream of protobuf messages while a program with the - * yson spec will accept an input stream (binary or text one). - * - * See documentation for input and output spec traits for hints on how to implement a custom specs. - */ - class TInputSpecBase { - protected: - mutable TVector<THashMap<TString, NYT::TNode>> AllVirtualColumns_; - - public: - virtual ~TInputSpecBase() = default; - - public: - /** - * Get input data schemas in YQL format (NB: not a YT format). Each item of the returned vector must - * describe a structure. - * - * Format of each item is approximately this one: - * - * @code - * [ - * 'StructType', - * [ - * ["Field1Name", ["DataType", "Int32"]], - * ["Field2Name", ["DataType", "String"]], - * ... - * ] - * ] - * @endcode - */ - virtual const TVector<NYT::TNode>& GetSchemas() const = 0; - // TODO: make a neat schema builder - - /** - * Get virtual columns for each input. - * - * Key of each mapping is column name, value is data schema in YQL format. - */ - const TVector<THashMap<TString, NYT::TNode>>& GetAllVirtualColumns() const { - if (AllVirtualColumns_.empty()) { - AllVirtualColumns_ = TVector<THashMap<TString, NYT::TNode>>(GetSchemas().size()); - } - - return AllVirtualColumns_; - } - - virtual bool ProvidesBlocks() const { return false; } - }; - - /** - * Output specifications describe format for program output. Like input specifications, they cary knowledge - * about program output type and how to convert unboxed values into that type. - */ - class TOutputSpecBase { - private: - TMaybe<THashSet<TString>> OutputColumnsFilter_; - - public: - virtual ~TOutputSpecBase() = default; - - public: - /** - * Get output data schema in YQL format (NB: not a YT format). The returned value must describe a structure - * or a variant made of structures for fulti-table outputs (note: not all specs support multi-table output). - * - * See docs for the input spec's GetSchemas(). - * - * Also TNode entity could be returned (NYT::TNode::CreateEntity()), - * in which case output schema would be inferred from query and could be - * obtained by Program::GetOutputSchema() call. - */ - virtual const NYT::TNode& GetSchema() const = 0; - - /** - * Get an output columns filter. - * - * Output columns filter is a set of column names that should be left in the output. All columns that are - * not in this set will not be calculated. Depending on the output schema, they will be either removed - * completely (for optional columns) or filled with defaults (for required columns). - */ - const TMaybe<THashSet<TString>>& GetOutputColumnsFilter() const { - return OutputColumnsFilter_; - } - - /** - * Set new output columns filter. - */ - void SetOutputColumnsFilter(const TMaybe<THashSet<TString>>& outputColumnsFilter) { - OutputColumnsFilter_ = outputColumnsFilter; - } - - virtual bool AcceptsBlocks() const { return false; } - }; - - //////////////////////////////////////////////////////////////////////////////////////////////////// - - /** - * Input spec traits provide information on how to process program input. - * - * Each input spec should create a template specialization for this class, in which it should provide several - * static variables and functions. - * - * For example, a hypothetical example of implementing a JSON input spec would look like this: - * - * @code - * class TJsonInputSpec: public TInputSpecBase { - * // whatever magic you require for this spec - * }; - * - * template <> - * class TInputSpecTraits<TJsonInputSpec> { - * // write here four constants, one typedef and three static functions described below - * }; - * @endcode - * - * @tparam T input spec type. - */ - template <typename T> - struct TInputSpecTraits { - /// Safety flag which should be set to false in all template specializations of this class. Attempt to - /// build a program using a spec with `IsPartial=true` will result in compilation error. - static const constexpr bool IsPartial = true; - - /// Indicates whether this spec supports pull stream mode. - static const constexpr bool SupportPullStreamMode = false; - /// Indicates whether this spec supports pull list mode. - static const constexpr bool SupportPullListMode = false; - /// Indicates whether this spec supports push stream mode. - static const constexpr bool SupportPushStreamMode = false; - - /// For push mode, indicates the return type of the builder's Process function. - using TConsumerType = void; - - /// For pull stream mode, should take an input spec, a pull stream worker and whatever the user passed - /// to the program's Apply function, create an unboxed values with a custom stream implementations - /// and pass it to the worker's SetInput function for each input. - template <typename ...A> - static void PreparePullStreamWorker(const T&, IPullStreamWorker*, A&&...) { - Y_UNREACHABLE(); - } - - /// For pull list mode, should take an input spec, a pull list worker and whatever the user passed - /// to the program's Apply function, create an unboxed values with a custom list implementations - /// and pass it to the worker's SetInput function for each input. - template <typename ...A> - static void PreparePullListWorker(const T&, IPullListWorker*, A&&...) { - Y_UNREACHABLE(); - } - - /// For push stream mode, should take an input spec and a worker and create a consumer which will - /// be returned to the user. The consumer should keep the worker alive until its own destruction. - /// The return type of this function should exactly match the one defined in ConsumerType typedef. - static TConsumerType MakeConsumer(const T&, TWorkerHolder<IPushStreamWorker>) { - Y_UNREACHABLE(); - } - }; - - /** - * Output spec traits provide information on how to process program output. Like with input specs, each output - * spec requires an appropriate template specialization of this class. - * - * @tparam T output spec type. - */ - template <typename T> - struct TOutputSpecTraits { - /// Safety flag which should be set to false in all template specializations of this class. Attempt to - /// build a program using a spec with `IsPartial=false` will result in compilation error. - static const constexpr bool IsPartial = true; - - /// Indicates whether this spec supports pull stream mode. - static const constexpr bool SupportPullStreamMode = false; - /// Indicates whether this spec supports pull list mode. - static const constexpr bool SupportPullListMode = false; - /// Indicates whether this spec supports push stream mode. - static const constexpr bool SupportPushStreamMode = false; - - /// For pull stream mode, indicates the return type of the program's Apply function. - using TPullStreamReturnType = void; - - /// For pull list mode, indicates the return type of the program's Apply function. - using TPullListReturnType = void; - - /// For pull stream mode, should take an output spec and a worker and build a stream which will be returned - /// to the user. The return type of this function must match the one specified in the PullStreamReturnType. - static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const T&, TWorkerHolder<IPullStreamWorker>) { - Y_UNREACHABLE(); - } - - /// For pull list mode, should take an output spec and a worker and build a list which will be returned - /// to the user. The return type of this function must match the one specified in the PullListReturnType. - static TPullListReturnType ConvertPullListWorkerToOutputType(const T&, TWorkerHolder<IPullListWorker>) { - Y_UNREACHABLE(); - } - - /// For push stream mode, should take an output spec, a worker and whatever arguments the user passed - /// to the program's Apply function, create a consumer for unboxed values and pass it to the worker's - /// SetConsumer function. - template <typename ...A> - static void SetConsumerToWorker(const T&, IPushStreamWorker*, A&&...) { - Y_UNREACHABLE(); - } - }; - - //////////////////////////////////////////////////////////////////////////////////////////////////// + return AllVirtualColumns_; + } + + virtual bool ProvidesBlocks() const { + return false; + } +}; + +/** + * Output specifications describe format for program output. Like input specifications, they cary knowledge + * about program output type and how to convert unboxed values into that type. + */ +class TOutputSpecBase { +private: + TMaybe<THashSet<TString>> OutputColumnsFilter_; + +public: + virtual ~TOutputSpecBase() = default; + +public: + /** + * Get output data schema in YQL format (NB: not a YT format). The returned value must describe a structure + * or a variant made of structures for fulti-table outputs (note: not all specs support multi-table output). + * + * See docs for the input spec's GetSchemas(). + * + * Also TNode entity could be returned (NYT::TNode::CreateEntity()), + * in which case output schema would be inferred from query and could be + * obtained by Program::GetOutputSchema() call. + */ + virtual const NYT::TNode& GetSchema() const = 0; + + /** + * Get an output columns filter. + * + * Output columns filter is a set of column names that should be left in the output. All columns that are + * not in this set will not be calculated. Depending on the output schema, they will be either removed + * completely (for optional columns) or filled with defaults (for required columns). + */ + const TMaybe<THashSet<TString>>& GetOutputColumnsFilter() const { + return OutputColumnsFilter_; + } + + /** + * Set new output columns filter. + */ + void SetOutputColumnsFilter(const TMaybe<THashSet<TString>>& outputColumnsFilter) { + OutputColumnsFilter_ = outputColumnsFilter; + } + + virtual bool AcceptsBlocks() const { + return false; + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * Input spec traits provide information on how to process program input. + * + * Each input spec should create a template specialization for this class, in which it should provide several + * static variables and functions. + * + * For example, a hypothetical example of implementing a JSON input spec would look like this: + * + * @code + * class TJsonInputSpec: public TInputSpecBase { + * // whatever magic you require for this spec + * }; + * + * template <> + * class TInputSpecTraits<TJsonInputSpec> { + * // write here four constants, one typedef and three static functions described below + * }; + * @endcode + * + * @tparam T input spec type. + */ +template <typename T> +struct TInputSpecTraits { + /// Safety flag which should be set to false in all template specializations of this class. Attempt to + /// build a program using a spec with `IsPartial=true` will result in compilation error. + static const constexpr bool IsPartial = true; + + /// Indicates whether this spec supports pull stream mode. + static const constexpr bool SupportPullStreamMode = false; + /// Indicates whether this spec supports pull list mode. + static const constexpr bool SupportPullListMode = false; + /// Indicates whether this spec supports push stream mode. + static const constexpr bool SupportPushStreamMode = false; + + /// For push mode, indicates the return type of the builder's Process function. + using TConsumerType = void; + + /// For pull stream mode, should take an input spec, a pull stream worker and whatever the user passed + /// to the program's Apply function, create an unboxed values with a custom stream implementations + /// and pass it to the worker's SetInput function for each input. + template <typename... A> + static void PreparePullStreamWorker(const T&, IPullStreamWorker*, A&&...) { + Y_UNREACHABLE(); + } + + /// For pull list mode, should take an input spec, a pull list worker and whatever the user passed + /// to the program's Apply function, create an unboxed values with a custom list implementations + /// and pass it to the worker's SetInput function for each input. + template <typename... A> + static void PreparePullListWorker(const T&, IPullListWorker*, A&&...) { + Y_UNREACHABLE(); + } + + /// For push stream mode, should take an input spec and a worker and create a consumer which will + /// be returned to the user. The consumer should keep the worker alive until its own destruction. + /// The return type of this function should exactly match the one defined in ConsumerType typedef. + static TConsumerType MakeConsumer(const T&, TWorkerHolder<IPushStreamWorker>) { + Y_UNREACHABLE(); + } +}; + +/** + * Output spec traits provide information on how to process program output. Like with input specs, each output + * spec requires an appropriate template specialization of this class. + * + * @tparam T output spec type. + */ +template <typename T> +struct TOutputSpecTraits { + /// Safety flag which should be set to false in all template specializations of this class. Attempt to + /// build a program using a spec with `IsPartial=false` will result in compilation error. + static const constexpr bool IsPartial = true; + + /// Indicates whether this spec supports pull stream mode. + static const constexpr bool SupportPullStreamMode = false; + /// Indicates whether this spec supports pull list mode. + static const constexpr bool SupportPullListMode = false; + /// Indicates whether this spec supports push stream mode. + static const constexpr bool SupportPushStreamMode = false; + + /// For pull stream mode, indicates the return type of the program's Apply function. + using TPullStreamReturnType = void; + + /// For pull list mode, indicates the return type of the program's Apply function. + using TPullListReturnType = void; + + /// For pull stream mode, should take an output spec and a worker and build a stream which will be returned + /// to the user. The return type of this function must match the one specified in the PullStreamReturnType. + static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const T&, TWorkerHolder<IPullStreamWorker>) { + Y_UNREACHABLE(); + } + + /// For pull list mode, should take an output spec and a worker and build a list which will be returned + /// to the user. The return type of this function must match the one specified in the PullListReturnType. + static TPullListReturnType ConvertPullListWorkerToOutputType(const T&, TWorkerHolder<IPullListWorker>) { + Y_UNREACHABLE(); + } + + /// For push stream mode, should take an output spec, a worker and whatever arguments the user passed + /// to the program's Apply function, create a consumer for unboxed values and pass it to the worker's + /// SetConsumer function. + template <typename... A> + static void SetConsumerToWorker(const T&, IPushStreamWorker*, A&&...) { + Y_UNREACHABLE(); + } +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// #define NOT_SPEC_MSG(spec_type) "passed class should be derived from " spec_type " spec base" #define PARTIAL_SPEC_MSG(spec_type) "this " spec_type " spec does not define its traits. Make sure you've passed " \ @@ -996,204 +996,203 @@ namespace NYql { "a spec traits template specialization" #define UNSUPPORTED_MODE_MSG(spec_type, mode) "this " spec_type " spec does not support " mode " mode" - class IProgram { - public: - virtual ~IProgram() = default; - - public: - virtual const TInputSpecBase& GetInputSpecBase() const = 0; - virtual const TOutputSpecBase& GetOutputSpecBase() const = 0; - virtual const THashSet<TString>& GetUsedColumns(ui32) const = 0; - virtual const THashSet<TString>& GetUsedColumns() const = 0; - virtual NYT::TNode MakeInputSchema(ui32) const = 0; - virtual NYT::TNode MakeInputSchema() const = 0; - virtual NYT::TNode MakeOutputSchema() const = 0; - virtual NYT::TNode MakeOutputSchema(ui32) const = 0; - virtual NYT::TNode MakeOutputSchema(TStringBuf) const = 0; - virtual NYT::TNode MakeFullOutputSchema() const = 0; - virtual TIssues GetIssues() const = 0; - virtual TString GetCompiledProgram() = 0; - - inline void MergeUsedColumns(THashSet<TString>& columns, ui32 inputIndex) { - const auto& usedColumns = GetUsedColumns(inputIndex); - columns.insert(usedColumns.begin(), usedColumns.end()); - } - - inline void MergeUsedColumns(THashSet<TString>& columns) { - const auto& usedColumns = GetUsedColumns(); - columns.insert(usedColumns.begin(), usedColumns.end()); - } - }; - - template <typename TInputSpec, typename TOutputSpec, typename WorkerFactory> - class TProgramCommon: public IProgram { - static_assert(std::is_base_of<TInputSpecBase, TInputSpec>::value, NOT_SPEC_MSG("input")); - static_assert(std::is_base_of<TOutputSpecBase, TOutputSpec>::value, NOT_SPEC_MSG("output")); - - protected: - TInputSpec InputSpec_; - TOutputSpec OutputSpec_; - std::shared_ptr<WorkerFactory> WorkerFactory_; - - public: - explicit TProgramCommon( - TInputSpec inputSpec, - TOutputSpec outputSpec, - std::shared_ptr<WorkerFactory> workerFactory - ) - : InputSpec_(inputSpec) - , OutputSpec_(outputSpec) - , WorkerFactory_(std::move(workerFactory)) - { - } - - public: - const TInputSpec& GetInputSpec() const { - return InputSpec_; - } - - const TOutputSpec& GetOutputSpec() const { - return OutputSpec_; - } - - const TInputSpecBase& GetInputSpecBase() const override { - return InputSpec_; - } - - const TOutputSpecBase& GetOutputSpecBase() const override { - return OutputSpec_; - } - - const THashSet<TString>& GetUsedColumns(ui32 inputIndex) const override { - return WorkerFactory_->GetUsedColumns(inputIndex); - } - - const THashSet<TString>& GetUsedColumns() const override { - return WorkerFactory_->GetUsedColumns(); - } - - NYT::TNode MakeInputSchema(ui32 inputIndex) const override { - return WorkerFactory_->MakeInputSchema(inputIndex); - } - - NYT::TNode MakeInputSchema() const override { - return WorkerFactory_->MakeInputSchema(); - } - - NYT::TNode MakeOutputSchema() const override { - return WorkerFactory_->MakeOutputSchema(); - } - - NYT::TNode MakeOutputSchema(ui32 outputIndex) const override { - return WorkerFactory_->MakeOutputSchema(outputIndex); - } - - NYT::TNode MakeOutputSchema(TStringBuf outputName) const override { - return WorkerFactory_->MakeOutputSchema(outputName); - } - - NYT::TNode MakeFullOutputSchema() const override { - return WorkerFactory_->MakeFullOutputSchema(); - } - - TIssues GetIssues() const override { - return WorkerFactory_->GetIssues(); - } - - TString GetCompiledProgram() override { - return WorkerFactory_->GetCompiledProgram(); - } - }; - - template <typename TInputSpec, typename TOutputSpec> - class TPullStreamProgram final: public TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory> { - using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::WorkerFactory_; - using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::InputSpec_; - using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::OutputSpec_; - - public: - using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::TProgramCommon; - - public: - template <typename ...T> - typename TOutputSpecTraits<TOutputSpec>::TPullStreamReturnType Apply(T&& ... t) { - static_assert(!TInputSpecTraits<TInputSpec>::IsPartial, PARTIAL_SPEC_MSG("input")); - static_assert(!TOutputSpecTraits<TOutputSpec>::IsPartial, PARTIAL_SPEC_MSG("output")); - static_assert(TInputSpecTraits<TInputSpec>::SupportPullStreamMode, UNSUPPORTED_MODE_MSG("input", "pull stream")); - static_assert(TOutputSpecTraits<TOutputSpec>::SupportPullStreamMode, UNSUPPORTED_MODE_MSG("output", "pull stream")); - - auto worker = WorkerFactory_->MakeWorker(); - TInputSpecTraits<TInputSpec>::PreparePullStreamWorker(InputSpec_, worker.Get(), std::forward<T>(t)...); - return TOutputSpecTraits<TOutputSpec>::ConvertPullStreamWorkerToOutputType(OutputSpec_, std::move(worker)); - } - }; - - template <typename TInputSpec, typename TOutputSpec> - class TPullListProgram final: public TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory> { - using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::WorkerFactory_; - using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::InputSpec_; - using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::OutputSpec_; - - public: - using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::TProgramCommon; - - public: - template <typename ...T> - typename TOutputSpecTraits<TOutputSpec>::TPullListReturnType Apply(T&& ... t) { - static_assert(!TInputSpecTraits<TInputSpec>::IsPartial, PARTIAL_SPEC_MSG("input")); - static_assert(!TOutputSpecTraits<TOutputSpec>::IsPartial, PARTIAL_SPEC_MSG("output")); - static_assert(TInputSpecTraits<TInputSpec>::SupportPullListMode, UNSUPPORTED_MODE_MSG("input", "pull list")); - static_assert(TOutputSpecTraits<TOutputSpec>::SupportPullListMode, UNSUPPORTED_MODE_MSG("output", "pull list")); - - auto worker = WorkerFactory_->MakeWorker(); - TInputSpecTraits<TInputSpec>::PreparePullListWorker(InputSpec_, worker.Get(), std::forward<T>(t)...); - return TOutputSpecTraits<TOutputSpec>::ConvertPullListWorkerToOutputType(OutputSpec_, std::move(worker)); - } - }; - - template <typename TInputSpec, typename TOutputSpec> - class TPushStreamProgram final: public TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory> { - using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::WorkerFactory_; - using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::InputSpec_; - using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::OutputSpec_; - - public: - using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::TProgramCommon; - - public: - template <typename ...T> - typename TInputSpecTraits<TInputSpec>::TConsumerType Apply(T&& ... t) { - static_assert(!TInputSpecTraits<TInputSpec>::IsPartial, PARTIAL_SPEC_MSG("input")); - static_assert(!TOutputSpecTraits<TOutputSpec>::IsPartial, PARTIAL_SPEC_MSG("output")); - static_assert(TInputSpecTraits<TInputSpec>::SupportPushStreamMode, UNSUPPORTED_MODE_MSG("input", "push stream")); - static_assert(TOutputSpecTraits<TOutputSpec>::SupportPushStreamMode, UNSUPPORTED_MODE_MSG("output", "push stream")); - - auto worker = WorkerFactory_->MakeWorker(); - TOutputSpecTraits<TOutputSpec>::SetConsumerToWorker(OutputSpec_, worker.Get(), std::forward<T>(t)...); - return TInputSpecTraits<TInputSpec>::MakeConsumer(InputSpec_, std::move(worker)); - } - }; +class IProgram { +public: + virtual ~IProgram() = default; + +public: + virtual const TInputSpecBase& GetInputSpecBase() const = 0; + virtual const TOutputSpecBase& GetOutputSpecBase() const = 0; + virtual const THashSet<TString>& GetUsedColumns(ui32) const = 0; + virtual const THashSet<TString>& GetUsedColumns() const = 0; + virtual NYT::TNode MakeInputSchema(ui32) const = 0; + virtual NYT::TNode MakeInputSchema() const = 0; + virtual NYT::TNode MakeOutputSchema() const = 0; + virtual NYT::TNode MakeOutputSchema(ui32) const = 0; + virtual NYT::TNode MakeOutputSchema(TStringBuf) const = 0; + virtual NYT::TNode MakeFullOutputSchema() const = 0; + virtual TIssues GetIssues() const = 0; + virtual TString GetCompiledProgram() = 0; + + inline void MergeUsedColumns(THashSet<TString>& columns, ui32 inputIndex) { + const auto& usedColumns = GetUsedColumns(inputIndex); + columns.insert(usedColumns.begin(), usedColumns.end()); + } + + inline void MergeUsedColumns(THashSet<TString>& columns) { + const auto& usedColumns = GetUsedColumns(); + columns.insert(usedColumns.begin(), usedColumns.end()); + } +}; + +template <typename TInputSpec, typename TOutputSpec, typename WorkerFactory> +class TProgramCommon: public IProgram { + static_assert(std::is_base_of<TInputSpecBase, TInputSpec>::value, NOT_SPEC_MSG("input")); + static_assert(std::is_base_of<TOutputSpecBase, TOutputSpec>::value, NOT_SPEC_MSG("output")); + +protected: + TInputSpec InputSpec_; + TOutputSpec OutputSpec_; + std::shared_ptr<WorkerFactory> WorkerFactory_; + +public: + explicit TProgramCommon( + TInputSpec inputSpec, + TOutputSpec outputSpec, + std::shared_ptr<WorkerFactory> workerFactory) + : InputSpec_(inputSpec) + , OutputSpec_(outputSpec) + , WorkerFactory_(std::move(workerFactory)) + { + } + +public: + const TInputSpec& GetInputSpec() const { + return InputSpec_; + } + + const TOutputSpec& GetOutputSpec() const { + return OutputSpec_; + } + + const TInputSpecBase& GetInputSpecBase() const override { + return InputSpec_; + } + + const TOutputSpecBase& GetOutputSpecBase() const override { + return OutputSpec_; + } + + const THashSet<TString>& GetUsedColumns(ui32 inputIndex) const override { + return WorkerFactory_->GetUsedColumns(inputIndex); + } + + const THashSet<TString>& GetUsedColumns() const override { + return WorkerFactory_->GetUsedColumns(); + } + + NYT::TNode MakeInputSchema(ui32 inputIndex) const override { + return WorkerFactory_->MakeInputSchema(inputIndex); + } + + NYT::TNode MakeInputSchema() const override { + return WorkerFactory_->MakeInputSchema(); + } + + NYT::TNode MakeOutputSchema() const override { + return WorkerFactory_->MakeOutputSchema(); + } + + NYT::TNode MakeOutputSchema(ui32 outputIndex) const override { + return WorkerFactory_->MakeOutputSchema(outputIndex); + } + + NYT::TNode MakeOutputSchema(TStringBuf outputName) const override { + return WorkerFactory_->MakeOutputSchema(outputName); + } + + NYT::TNode MakeFullOutputSchema() const override { + return WorkerFactory_->MakeFullOutputSchema(); + } + + TIssues GetIssues() const override { + return WorkerFactory_->GetIssues(); + } + + TString GetCompiledProgram() override { + return WorkerFactory_->GetCompiledProgram(); + } +}; + +template <typename TInputSpec, typename TOutputSpec> +class TPullStreamProgram final: public TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory> { + using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::WorkerFactory_; + using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::InputSpec_; + using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::OutputSpec_; + +public: + using TProgramCommon<TInputSpec, TOutputSpec, IPullStreamWorkerFactory>::TProgramCommon; + +public: + template <typename... T> + typename TOutputSpecTraits<TOutputSpec>::TPullStreamReturnType Apply(T&&... t) { + static_assert(!TInputSpecTraits<TInputSpec>::IsPartial, PARTIAL_SPEC_MSG("input")); + static_assert(!TOutputSpecTraits<TOutputSpec>::IsPartial, PARTIAL_SPEC_MSG("output")); + static_assert(TInputSpecTraits<TInputSpec>::SupportPullStreamMode, UNSUPPORTED_MODE_MSG("input", "pull stream")); + static_assert(TOutputSpecTraits<TOutputSpec>::SupportPullStreamMode, UNSUPPORTED_MODE_MSG("output", "pull stream")); + + auto worker = WorkerFactory_->MakeWorker(); + TInputSpecTraits<TInputSpec>::PreparePullStreamWorker(InputSpec_, worker.Get(), std::forward<T>(t)...); + return TOutputSpecTraits<TOutputSpec>::ConvertPullStreamWorkerToOutputType(OutputSpec_, std::move(worker)); + } +}; + +template <typename TInputSpec, typename TOutputSpec> +class TPullListProgram final: public TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory> { + using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::WorkerFactory_; + using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::InputSpec_; + using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::OutputSpec_; + +public: + using TProgramCommon<TInputSpec, TOutputSpec, IPullListWorkerFactory>::TProgramCommon; + +public: + template <typename... T> + typename TOutputSpecTraits<TOutputSpec>::TPullListReturnType Apply(T&&... t) { + static_assert(!TInputSpecTraits<TInputSpec>::IsPartial, PARTIAL_SPEC_MSG("input")); + static_assert(!TOutputSpecTraits<TOutputSpec>::IsPartial, PARTIAL_SPEC_MSG("output")); + static_assert(TInputSpecTraits<TInputSpec>::SupportPullListMode, UNSUPPORTED_MODE_MSG("input", "pull list")); + static_assert(TOutputSpecTraits<TOutputSpec>::SupportPullListMode, UNSUPPORTED_MODE_MSG("output", "pull list")); + + auto worker = WorkerFactory_->MakeWorker(); + TInputSpecTraits<TInputSpec>::PreparePullListWorker(InputSpec_, worker.Get(), std::forward<T>(t)...); + return TOutputSpecTraits<TOutputSpec>::ConvertPullListWorkerToOutputType(OutputSpec_, std::move(worker)); + } +}; + +template <typename TInputSpec, typename TOutputSpec> +class TPushStreamProgram final: public TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory> { + using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::WorkerFactory_; + using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::InputSpec_; + using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::OutputSpec_; + +public: + using TProgramCommon<TInputSpec, TOutputSpec, IPushStreamWorkerFactory>::TProgramCommon; + +public: + template <typename... T> + typename TInputSpecTraits<TInputSpec>::TConsumerType Apply(T&&... t) { + static_assert(!TInputSpecTraits<TInputSpec>::IsPartial, PARTIAL_SPEC_MSG("input")); + static_assert(!TOutputSpecTraits<TOutputSpec>::IsPartial, PARTIAL_SPEC_MSG("output")); + static_assert(TInputSpecTraits<TInputSpec>::SupportPushStreamMode, UNSUPPORTED_MODE_MSG("input", "push stream")); + static_assert(TOutputSpecTraits<TOutputSpec>::SupportPushStreamMode, UNSUPPORTED_MODE_MSG("output", "push stream")); + + auto worker = WorkerFactory_->MakeWorker(); + TOutputSpecTraits<TOutputSpec>::SetConsumerToWorker(OutputSpec_, worker.Get(), std::forward<T>(t)...); + return TInputSpecTraits<TInputSpec>::MakeConsumer(InputSpec_, std::move(worker)); + } +}; #undef NOT_SPEC_MSG #undef PARTIAL_SPEC_MSG #undef UNSUPPORTED_MODE_MSG - //////////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////////// - /** - * Configure global logging facilities. Affects all YQL modules. - */ - void ConfigureLogging(const TLoggingOptions& = {}); +/** + * Configure global logging facilities. Affects all YQL modules. + */ +void ConfigureLogging(const TLoggingOptions& = {}); - /** - * Create a new program factory. - * Custom logging initialization could be preformed by a call to the ConfigureLogging method beforehand. - * If the ConfigureLogging method has not been called the default logging initialization will be performed. - */ - IProgramFactoryPtr MakeProgramFactory(const TProgramFactoryOptions& = {}); - } -} +/** + * Create a new program factory. + * Custom logging initialization could be preformed by a call to the ConfigureLogging method beforehand. + * If the ConfigureLogging method has not been called the default logging initialization will be performed. + */ +IProgramFactoryPtr MakeProgramFactory(const TProgramFactoryOptions& = {}); +} // namespace NPureCalc +} // namespace NYql Y_DECLARE_OUT_SPEC(inline, NYql::NPureCalc::TCompileError, stream, value) { - stream << value.AsStrBuf() << Endl << "Issues:" << Endl << value.GetIssues() << Endl << Endl << "Yql:" << Endl <<value.GetYql(); + stream << value.AsStrBuf() << Endl << "Issues:" << Endl << value.GetIssues() << Endl << Endl << "Yql:" << Endl << value.GetYql(); } diff --git a/yql/essentials/public/purecalc/common/logger_init.cpp b/yql/essentials/public/purecalc/common/logger_init.cpp index 0c1e53d0338..ec3f66140bb 100644 --- a/yql/essentials/public/purecalc/common/logger_init.cpp +++ b/yql/essentials/public/purecalc/common/logger_init.cpp @@ -8,25 +8,25 @@ namespace NYql { namespace NPureCalc { namespace { - std::atomic_bool Initialized; -} +std::atomic_bool Initialized; +} // namespace - void InitLogging(const TLoggingOptions& options) { - NLog::InitLogger(options.LogDestination); - auto& logger = NLog::YqlLogger(); - logger.SetDefaultPriority(options.LogLevel); - for (int i = 0; i < NLog::EComponentHelpers::ToInt(NLog::EComponent::MaxValue); ++i) { - logger.SetComponentLevel((NLog::EComponent) i, (NLog::ELevel) options.LogLevel); - } - Initialized = true; +void InitLogging(const TLoggingOptions& options) { + NLog::InitLogger(options.LogDestination); + auto& logger = NLog::YqlLogger(); + logger.SetDefaultPriority(options.LogLevel); + for (int i = 0; i < NLog::EComponentHelpers::ToInt(NLog::EComponent::MaxValue); ++i) { + logger.SetComponentLevel((NLog::EComponent)i, (NLog::ELevel)options.LogLevel); } + Initialized = true; +} - void EnsureLoggingInitialized() { - if (Initialized.load()) { - return; - } - InitLogging(TLoggingOptions()); +void EnsureLoggingInitialized() { + if (Initialized.load()) { + return; } - -} + InitLogging(TLoggingOptions()); } + +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/common/logger_init.h b/yql/essentials/public/purecalc/common/logger_init.h index 039cbd44118..349d74b0025 100644 --- a/yql/essentials/public/purecalc/common/logger_init.h +++ b/yql/essentials/public/purecalc/common/logger_init.h @@ -3,8 +3,8 @@ #include "interface.h" namespace NYql { - namespace NPureCalc { - void InitLogging(const TLoggingOptions& options); - void EnsureLoggingInitialized(); - } -} +namespace NPureCalc { +void InitLogging(const TLoggingOptions& options); +void EnsureLoggingInitialized(); +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/common/names.cpp b/yql/essentials/public/purecalc/common/names.cpp index 5e8412a7b22..b678969daf6 100644 --- a/yql/essentials/public/purecalc/common/names.cpp +++ b/yql/essentials/public/purecalc/common/names.cpp @@ -3,17 +3,17 @@ #include <util/generic/strbuf.h> namespace NYql::NPureCalc { - const TStringBuf PurecalcSysColumnsPrefix = "_yql_sys_"; - const TStringBuf PurecalcSysColumnTablePath = "_yql_sys_tablepath"; - const TStringBuf PurecalcBlockColumnLength = "_yql_block_length"; +const TStringBuf PurecalcSysColumnsPrefix = "_yql_sys_"; +const TStringBuf PurecalcSysColumnTablePath = "_yql_sys_tablepath"; +const TStringBuf PurecalcBlockColumnLength = "_yql_block_length"; - const TStringBuf PurecalcDefaultCluster = "view"; - const TStringBuf PurecalcDefaultService = "data"; +const TStringBuf PurecalcDefaultCluster = "view"; +const TStringBuf PurecalcDefaultService = "data"; - const TStringBuf PurecalcInputCallableName = "Self"; - const TStringBuf PurecalcInputTablePrefix = "Input"; +const TStringBuf PurecalcInputCallableName = "Self"; +const TStringBuf PurecalcInputTablePrefix = "Input"; - const TStringBuf PurecalcBlockInputCallableName = "BlockSelf"; +const TStringBuf PurecalcBlockInputCallableName = "BlockSelf"; - const TStringBuf PurecalcUdfModulePrefix = "<purecalc>::"; -} +const TStringBuf PurecalcUdfModulePrefix = "<purecalc>::"; +} // namespace NYql::NPureCalc diff --git a/yql/essentials/public/purecalc/common/names.h b/yql/essentials/public/purecalc/common/names.h index b19c15ca4fe..fccfe7caad8 100644 --- a/yql/essentials/public/purecalc/common/names.h +++ b/yql/essentials/public/purecalc/common/names.h @@ -3,17 +3,17 @@ #include <util/generic/fwd.h> namespace NYql::NPureCalc { - extern const TStringBuf PurecalcSysColumnsPrefix; - extern const TStringBuf PurecalcSysColumnTablePath; - extern const TStringBuf PurecalcBlockColumnLength; +extern const TStringBuf PurecalcSysColumnsPrefix; +extern const TStringBuf PurecalcSysColumnTablePath; +extern const TStringBuf PurecalcBlockColumnLength; - extern const TStringBuf PurecalcDefaultCluster; - extern const TStringBuf PurecalcDefaultService; +extern const TStringBuf PurecalcDefaultCluster; +extern const TStringBuf PurecalcDefaultService; - extern const TStringBuf PurecalcInputCallableName; - extern const TStringBuf PurecalcInputTablePrefix; +extern const TStringBuf PurecalcInputCallableName; +extern const TStringBuf PurecalcInputTablePrefix; - extern const TStringBuf PurecalcBlockInputCallableName; +extern const TStringBuf PurecalcBlockInputCallableName; - extern const TStringBuf PurecalcUdfModulePrefix; -} +extern const TStringBuf PurecalcUdfModulePrefix; +} // namespace NYql::NPureCalc diff --git a/yql/essentials/public/purecalc/common/processor_mode.h b/yql/essentials/public/purecalc/common/processor_mode.h index 9bec87cadc9..49368c81c11 100644 --- a/yql/essentials/public/purecalc/common/processor_mode.h +++ b/yql/essentials/public/purecalc/common/processor_mode.h @@ -1,11 +1,11 @@ #pragma once namespace NYql { - namespace NPureCalc { - enum class EProcessorMode { - PullList, - PullStream, - PushStream - }; - } -} +namespace NPureCalc { +enum class EProcessorMode { + PullList, + PullStream, + PushStream +}; +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/common/program_factory.cpp b/yql/essentials/public/purecalc/common/program_factory.cpp index 320d1b6aa8a..1096f6acc4f 100644 --- a/yql/essentials/public/purecalc/common/program_factory.cpp +++ b/yql/essentials/public/purecalc/common/program_factory.cpp @@ -33,11 +33,10 @@ TProgramFactory::TProgramFactory(const TProgramFactoryOptions& options) } TVector<TString> UDFsPaths; - for (const auto& item: Options_.UserData) { + for (const auto& item : Options_.UserData) { if ( item.Type == NUserData::EType::UDF && - item.Disposition == NUserData::EDisposition::FILESYSTEM - ) { + item.Disposition == NUserData::EDisposition::FILESYSTEM) { UDFsPaths.push_back(item.Content); } } @@ -47,7 +46,8 @@ TProgramFactory::TProgramFactory(const TProgramFactoryOptions& options) } FuncRegistry_ = NKikimr::NMiniKQL::CreateFunctionRegistry( - &NYql::NBacktrace::KikimrBackTrace, NKikimr::NMiniKQL::CreateBuiltinRegistry(), false, UDFsPaths)->Clone(); + &NYql::NBacktrace::KikimrBackTrace, NKikimr::NMiniKQL::CreateBuiltinRegistry(), false, UDFsPaths) + ->Clone(); NKikimr::NMiniKQL::FillStaticModules(*FuncRegistry_); } @@ -57,11 +57,9 @@ TProgramFactory::~TProgramFactory() { void TProgramFactory::AddUdfModule( const TStringBuf& moduleName, - NKikimr::NUdf::TUniquePtr<NKikimr::NUdf::IUdfModule>&& module -) { + NKikimr::NUdf::TUniquePtr<NKikimr::NUdf::IUdfModule>&& module) { FuncRegistry_->AddModule( - TString::Join(PurecalcUdfModulePrefix, moduleName), moduleName, std::move(module) - ); + TString::Join(PurecalcUdfModulePrefix, moduleName), moduleName, std::move(module)); } void TProgramFactory::SetCountersProvider(NKikimr::NUdf::ICountersProvider* provider) { @@ -73,8 +71,7 @@ IPullStreamWorkerFactoryPtr TProgramFactory::MakePullStreamWorkerFactory( const TOutputSpecBase& outputSpec, TString query, ETranslationMode mode, - ui16 syntaxVersion -) { + ui16 syntaxVersion) { return std::make_shared<TPullStreamWorkerFactory>(TWorkerFactoryOptions( TIntrusivePtr<TProgramFactory>(this), inputSpec, @@ -95,8 +92,7 @@ IPullStreamWorkerFactoryPtr TProgramFactory::MakePullStreamWorkerFactory( Options_.DeterministicTimeProviderSeed, Options_.UseSystemColumns, Options_.UseWorkerPool, - Options_.UseAntlr4 - )); + Options_.UseAntlr4)); } IPullListWorkerFactoryPtr TProgramFactory::MakePullListWorkerFactory( @@ -104,8 +100,7 @@ IPullListWorkerFactoryPtr TProgramFactory::MakePullListWorkerFactory( const TOutputSpecBase& outputSpec, TString query, ETranslationMode mode, - ui16 syntaxVersion -) { + ui16 syntaxVersion) { return std::make_shared<TPullListWorkerFactory>(TWorkerFactoryOptions( TIntrusivePtr<TProgramFactory>(this), inputSpec, @@ -126,8 +121,7 @@ IPullListWorkerFactoryPtr TProgramFactory::MakePullListWorkerFactory( Options_.DeterministicTimeProviderSeed, Options_.UseSystemColumns, Options_.UseWorkerPool, - Options_.UseAntlr4 - )); + Options_.UseAntlr4)); } IPushStreamWorkerFactoryPtr TProgramFactory::MakePushStreamWorkerFactory( @@ -135,8 +129,7 @@ IPushStreamWorkerFactoryPtr TProgramFactory::MakePushStreamWorkerFactory( const TOutputSpecBase& outputSpec, TString query, ETranslationMode mode, - ui16 syntaxVersion -) { + ui16 syntaxVersion) { if (inputSpec.GetSchemas().size() > 1) { ythrow yexception() << "push stream mode doesn't support several inputs"; } @@ -161,6 +154,5 @@ IPushStreamWorkerFactoryPtr TProgramFactory::MakePushStreamWorkerFactory( Options_.DeterministicTimeProviderSeed, Options_.UseSystemColumns, Options_.UseWorkerPool, - Options_.UseAntlr4 - )); + Options_.UseAntlr4)); } diff --git a/yql/essentials/public/purecalc/common/program_factory.h b/yql/essentials/public/purecalc/common/program_factory.h index 278d3e05a6a..e46de24d9ee 100644 --- a/yql/essentials/public/purecalc/common/program_factory.h +++ b/yql/essentials/public/purecalc/common/program_factory.h @@ -15,34 +15,33 @@ #include <util/generic/strbuf.h> namespace NYql { - namespace NPureCalc { - class TProgramFactory: public IProgramFactory { - private: - TProgramFactoryOptions Options_; - TExprContext ExprContext_; - TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry_; - IModuleResolver::TPtr ModuleResolver_; - TUserDataTable UserData_; - EBlockEngineMode BlockEngineMode_; - IOutputStream* ExprOutputStream_; - THashMap<TString, TString> Modules_; - NKikimr::NUdf::ICountersProvider* CountersProvider_; - - public: - explicit TProgramFactory(const TProgramFactoryOptions&); - ~TProgramFactory() override; - - public: - void AddUdfModule( - const TStringBuf& moduleName, - NKikimr::NUdf::TUniquePtr<NKikimr::NUdf::IUdfModule>&& module - ) override; - - void SetCountersProvider(NKikimr::NUdf::ICountersProvider* provider) override; - - IPullStreamWorkerFactoryPtr MakePullStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) override; - IPullListWorkerFactoryPtr MakePullListWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) override; - IPushStreamWorkerFactoryPtr MakePushStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) override; - }; - } -} +namespace NPureCalc { +class TProgramFactory: public IProgramFactory { +private: + TProgramFactoryOptions Options_; + TExprContext ExprContext_; + TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry_; + IModuleResolver::TPtr ModuleResolver_; + TUserDataTable UserData_; + EBlockEngineMode BlockEngineMode_; + IOutputStream* ExprOutputStream_; + THashMap<TString, TString> Modules_; + NKikimr::NUdf::ICountersProvider* CountersProvider_; + +public: + explicit TProgramFactory(const TProgramFactoryOptions&); + ~TProgramFactory() override; + +public: + void AddUdfModule( + const TStringBuf& moduleName, + NKikimr::NUdf::TUniquePtr<NKikimr::NUdf::IUdfModule>&& module) override; + + void SetCountersProvider(NKikimr::NUdf::ICountersProvider* provider) override; + + IPullStreamWorkerFactoryPtr MakePullStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) override; + IPullListWorkerFactoryPtr MakePullListWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) override; + IPushStreamWorkerFactoryPtr MakePushStreamWorkerFactory(const TInputSpecBase&, const TOutputSpecBase&, TString, ETranslationMode, ui16) override; +}; +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/common/transformations/align_output_schema.cpp b/yql/essentials/public/purecalc/common/transformations/align_output_schema.cpp index f11195bf756..47806d6846e 100644 --- a/yql/essentials/public/purecalc/common/transformations/align_output_schema.cpp +++ b/yql/essentials/public/purecalc/common/transformations/align_output_schema.cpp @@ -10,117 +10,115 @@ using namespace NYql; using namespace NYql::NPureCalc; namespace { - class TOutputAligner : public TSyncTransformerBase { - private: - const TTypeAnnotationNode* OutputStruct_; - bool AcceptsBlocks_; - EProcessorMode ProcessorMode_; - const TTypeAnnotationContext& TypeCtx_; - - public: - explicit TOutputAligner( - const TTypeAnnotationNode* outputStruct, - bool acceptsBlocks, - EProcessorMode processorMode, - const TTypeAnnotationContext& typeCtx - ) - : OutputStruct_(outputStruct) - , AcceptsBlocks_(acceptsBlocks) - , ProcessorMode_(processorMode) - , TypeCtx_(typeCtx) - { - } - - public: - TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final { - output = input; - - const auto* expectedType = MakeExpectedType(ctx); - const auto* expectedItemType = MakeExpectedItemType(); - const auto* actualType = MakeActualType(input); - const auto* actualItemType = MakeActualItemType(input); - - // XXX: Tweak the obtained expression type, is the spec supports blocks: - // 1. Remove "_yql_block_length" attribute, since it's for internal usage. - // 2. Strip block container from the type to store its internal type. - if (AcceptsBlocks_) { - Y_ENSURE(actualItemType->GetKind() == ETypeAnnotationKind::Struct); - actualItemType = UnwrapBlockStruct(actualItemType->Cast<TStructExprType>(), ctx); - if (ProcessorMode_ == EProcessorMode::PullList) { - actualType = ctx.MakeType<TListExprType>(actualItemType); - } else { - actualType = ctx.MakeType<TStreamExprType>(actualItemType); - } - } - - if (!ValidateOutputType(actualItemType, expectedItemType, ctx, TypeCtx_)) { - return TStatus::Error; - } - - if (!expectedType) { - return TStatus::Ok; - } - - auto status = TryConvertTo(output, *actualType, *expectedType, ctx, TypeCtx_); - - if (status.Level == IGraphTransformer::TStatus::Repeat) { - status = IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true); +class TOutputAligner: public TSyncTransformerBase { +private: + const TTypeAnnotationNode* OutputStruct_; + bool AcceptsBlocks_; + EProcessorMode ProcessorMode_; + const TTypeAnnotationContext& TypeCtx_; + +public: + explicit TOutputAligner( + const TTypeAnnotationNode* outputStruct, + bool acceptsBlocks, + EProcessorMode processorMode, + const TTypeAnnotationContext& typeCtx) + : OutputStruct_(outputStruct) + , AcceptsBlocks_(acceptsBlocks) + , ProcessorMode_(processorMode) + , TypeCtx_(typeCtx) + { + } + +public: + TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final { + output = input; + + const auto* expectedType = MakeExpectedType(ctx); + const auto* expectedItemType = MakeExpectedItemType(); + const auto* actualType = MakeActualType(input); + const auto* actualItemType = MakeActualItemType(input); + + // XXX: Tweak the obtained expression type, is the spec supports blocks: + // 1. Remove "_yql_block_length" attribute, since it's for internal usage. + // 2. Strip block container from the type to store its internal type. + if (AcceptsBlocks_) { + Y_ENSURE(actualItemType->GetKind() == ETypeAnnotationKind::Struct); + actualItemType = UnwrapBlockStruct(actualItemType->Cast<TStructExprType>(), ctx); + if (ProcessorMode_ == EProcessorMode::PullList) { + actualType = ctx.MakeType<TListExprType>(actualItemType); + } else { + actualType = ctx.MakeType<TStreamExprType>(actualItemType); } - - return status; } - void Rewind() final { + if (!ValidateOutputType(actualItemType, expectedItemType, ctx, TypeCtx_)) { + return TStatus::Error; } - private: - const TTypeAnnotationNode* MakeExpectedType(TExprContext& ctx) { - if (!OutputStruct_) { - return nullptr; - } + if (!expectedType) { + return TStatus::Ok; + } - switch (ProcessorMode_) { - case EProcessorMode::PullList: - return ctx.MakeType<TListExprType>(OutputStruct_); - case EProcessorMode::PullStream: - case EProcessorMode::PushStream: - return ctx.MakeType<TStreamExprType>(OutputStruct_); - } + auto status = TryConvertTo(output, *actualType, *expectedType, ctx, TypeCtx_); - Y_ABORT("Unexpected"); + if (status.Level == IGraphTransformer::TStatus::Repeat) { + status = IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true); } - const TTypeAnnotationNode* MakeExpectedItemType() { - return OutputStruct_; + return status; + } + + void Rewind() final { + } + +private: + const TTypeAnnotationNode* MakeExpectedType(TExprContext& ctx) { + if (!OutputStruct_) { + return nullptr; } - const TTypeAnnotationNode* MakeActualType(TExprNode::TPtr& input) { - return input->GetTypeAnn(); + switch (ProcessorMode_) { + case EProcessorMode::PullList: + return ctx.MakeType<TListExprType>(OutputStruct_); + case EProcessorMode::PullStream: + case EProcessorMode::PushStream: + return ctx.MakeType<TStreamExprType>(OutputStruct_); } - const TTypeAnnotationNode* MakeActualItemType(TExprNode::TPtr& input) { - auto actualType = MakeActualType(input); - switch (actualType->GetKind()) { - case ETypeAnnotationKind::Stream: - Y_ENSURE(ProcessorMode_ != EProcessorMode::PullList, - "processor mode mismatches the actual container type"); - return actualType->Cast<TStreamExprType>()->GetItemType(); - case ETypeAnnotationKind::List: - Y_ENSURE(ProcessorMode_ == EProcessorMode::PullList, - "processor mode mismatches the actual container type"); - return actualType->Cast<TListExprType>()->GetItemType(); - default: - Y_ABORT("unexpected return type"); - } + Y_ABORT("Unexpected"); + } + + const TTypeAnnotationNode* MakeExpectedItemType() { + return OutputStruct_; + } + + const TTypeAnnotationNode* MakeActualType(TExprNode::TPtr& input) { + return input->GetTypeAnn(); + } + + const TTypeAnnotationNode* MakeActualItemType(TExprNode::TPtr& input) { + auto actualType = MakeActualType(input); + switch (actualType->GetKind()) { + case ETypeAnnotationKind::Stream: + Y_ENSURE(ProcessorMode_ != EProcessorMode::PullList, + "processor mode mismatches the actual container type"); + return actualType->Cast<TStreamExprType>()->GetItemType(); + case ETypeAnnotationKind::List: + Y_ENSURE(ProcessorMode_ == EProcessorMode::PullList, + "processor mode mismatches the actual container type"); + return actualType->Cast<TListExprType>()->GetItemType(); + default: + Y_ABORT("unexpected return type"); } - }; -} + } +}; +} // namespace TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeOutputAligner( const TTypeAnnotationNode* outputStruct, bool acceptsBlocks, EProcessorMode processorMode, - const TTypeAnnotationContext& typeCtx -) { + const TTypeAnnotationContext& typeCtx) { return new TOutputAligner(outputStruct, acceptsBlocks, processorMode, typeCtx); } diff --git a/yql/essentials/public/purecalc/common/transformations/align_output_schema.h b/yql/essentials/public/purecalc/common/transformations/align_output_schema.h index a1bf1717ce1..4cbcdcd81fa 100644 --- a/yql/essentials/public/purecalc/common/transformations/align_output_schema.h +++ b/yql/essentials/public/purecalc/common/transformations/align_output_schema.h @@ -6,23 +6,22 @@ #include <yql/essentials/core/yql_type_annotation.h> namespace NYql { - struct TTypeAnnotationContext; +struct TTypeAnnotationContext; - namespace NPureCalc { - /** - * A transformer which converts an output type of the expression to the given type or reports an error. - * - * @param outputStruct destination output struct type. - * @param acceptsBlocks indicates, whether the output type need to be - * preprocessed. - * @param processorMode specifies the top-most container of the result. - * @return a graph transformer for type alignment. - */ - TAutoPtr<IGraphTransformer> MakeOutputAligner( - const TTypeAnnotationNode* outputStruct, - bool acceptsBlocks, - EProcessorMode processorMode, - const TTypeAnnotationContext& typeCtx - ); - } -} +namespace NPureCalc { +/** + * A transformer which converts an output type of the expression to the given type or reports an error. + * + * @param outputStruct destination output struct type. + * @param acceptsBlocks indicates, whether the output type need to be + * preprocessed. + * @param processorMode specifies the top-most container of the result. + * @return a graph transformer for type alignment. + */ +TAutoPtr<IGraphTransformer> MakeOutputAligner( + const TTypeAnnotationNode* outputStruct, + bool acceptsBlocks, + EProcessorMode processorMode, + const TTypeAnnotationContext& typeCtx); +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/common/transformations/extract_used_columns.cpp b/yql/essentials/public/purecalc/common/transformations/extract_used_columns.cpp index 9ff7a0df638..4190ce1fad9 100644 --- a/yql/essentials/public/purecalc/common/transformations/extract_used_columns.cpp +++ b/yql/essentials/public/purecalc/common/transformations/extract_used_columns.cpp @@ -9,88 +9,86 @@ using namespace NYql; using namespace NYql::NPureCalc; namespace { - class TUsedColumnsExtractor : public TSyncTransformerBase { - private: - TVector<THashSet<TString>>* const Destination_; - const TVector<THashSet<TString>>& AllColumns_; - TString NodeName_; - - bool CalculatedUsedFields_ = false; - - public: - TUsedColumnsExtractor( - TVector<THashSet<TString>>* destination, - const TVector<THashSet<TString>>& allColumns, - TString nodeName - ) - : Destination_(destination) - , AllColumns_(allColumns) - , NodeName_(std::move(nodeName)) - { +class TUsedColumnsExtractor: public TSyncTransformerBase { +private: + TVector<THashSet<TString>>* const Destination_; + const TVector<THashSet<TString>>& AllColumns_; + TString NodeName_; + + bool CalculatedUsedFields_ = false; + +public: + TUsedColumnsExtractor( + TVector<THashSet<TString>>* destination, + const TVector<THashSet<TString>>& allColumns, + TString nodeName) + : Destination_(destination) + , AllColumns_(allColumns) + , NodeName_(std::move(nodeName)) + { + } + + TUsedColumnsExtractor(TVector<THashSet<TString>>*, TVector<THashSet<TString>>&&, TString) = delete; + +public: + TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final { + output = input; + + if (CalculatedUsedFields_) { + return IGraphTransformer::TStatus::Ok; } - TUsedColumnsExtractor(TVector<THashSet<TString>>*, TVector<THashSet<TString>>&&, TString) = delete; - - public: - TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final { - output = input; + bool hasError = false; - if (CalculatedUsedFields_) { - return IGraphTransformer::TStatus::Ok; - } + *Destination_ = AllColumns_; - bool hasError = false; - - *Destination_ = AllColumns_; - - VisitExpr(input, [&](const TExprNode::TPtr& inputExpr) { - NNodes::TExprBase node(inputExpr); - if (auto maybeExtract = node.Maybe<NNodes::TCoExtractMembers>()) { - auto extract = maybeExtract.Cast(); - const auto& arg = extract.Input().Ref(); - if (arg.IsCallable(NodeName_)) { - ui32 inputIndex; - if (!TryFetchInputIndexFromSelf(arg, ctx, AllColumns_.size(), inputIndex)) { - hasError = true; - return false; - } - - YQL_ENSURE(inputIndex < AllColumns_.size()); - - auto& destinationColumnsSet = (*Destination_)[inputIndex]; - const auto& allColumnsSet = AllColumns_[inputIndex]; - - destinationColumnsSet.clear(); - for (const auto& columnAtom : extract.Members()) { - TString name = TString(columnAtom.Value()); - YQL_ENSURE(allColumnsSet.contains(name), "unexpected column in the input struct"); - destinationColumnsSet.insert(name); - } + VisitExpr(input, [&](const TExprNode::TPtr& inputExpr) { + NNodes::TExprBase node(inputExpr); + if (auto maybeExtract = node.Maybe<NNodes::TCoExtractMembers>()) { + auto extract = maybeExtract.Cast(); + const auto& arg = extract.Input().Ref(); + if (arg.IsCallable(NodeName_)) { + ui32 inputIndex; + if (!TryFetchInputIndexFromSelf(arg, ctx, AllColumns_.size(), inputIndex)) { + hasError = true; + return false; } - } - return true; - }); + YQL_ENSURE(inputIndex < AllColumns_.size()); + + auto& destinationColumnsSet = (*Destination_)[inputIndex]; + const auto& allColumnsSet = AllColumns_[inputIndex]; - if (hasError) { - return IGraphTransformer::TStatus::Error; + destinationColumnsSet.clear(); + for (const auto& columnAtom : extract.Members()) { + TString name = TString(columnAtom.Value()); + YQL_ENSURE(allColumnsSet.contains(name), "unexpected column in the input struct"); + destinationColumnsSet.insert(name); + } + } } - CalculatedUsedFields_ = true; + return true; + }); - return IGraphTransformer::TStatus::Ok; + if (hasError) { + return IGraphTransformer::TStatus::Error; } - void Rewind() final { - CalculatedUsedFields_ = false; - } - }; -} + CalculatedUsedFields_ = true; + + return IGraphTransformer::TStatus::Ok; + } + + void Rewind() final { + CalculatedUsedFields_ = false; + } +}; +} // namespace TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeUsedColumnsExtractor( TVector<THashSet<TString>>* destination, const TVector<THashSet<TString>>& allColumns, - const TString& nodeName -) { + const TString& nodeName) { return new TUsedColumnsExtractor(destination, allColumns, nodeName); } diff --git a/yql/essentials/public/purecalc/common/transformations/extract_used_columns.h b/yql/essentials/public/purecalc/common/transformations/extract_used_columns.h index d0850e28b59..09f82d04e6d 100644 --- a/yql/essentials/public/purecalc/common/transformations/extract_used_columns.h +++ b/yql/essentials/public/purecalc/common/transformations/extract_used_columns.h @@ -9,21 +9,20 @@ #include <util/generic/string.h> namespace NYql { - namespace NPureCalc { - /** - * Make transformation which builds sets of input columns from the given expression. - * - * @param destination a vector of string sets which will be populated with column names sets when - * transformation pipeline is launched. This pointer should contain a valid - * TVector<THashSet> instance. The transformation will overwrite its contents. - * @param allColumns vector of sets with all available columns for each input. - * @param nodeName name of the callable used to get input data, e.g. `Self`. - * @return an extractor which scans an input structs contents and populates destination. - */ - TAutoPtr<IGraphTransformer> MakeUsedColumnsExtractor( - TVector<THashSet<TString>>* destination, - const TVector<THashSet<TString>>& allColumns, - const TString& nodeName = TString{PurecalcInputCallableName} - ); - } -} +namespace NPureCalc { +/** + * Make transformation which builds sets of input columns from the given expression. + * + * @param destination a vector of string sets which will be populated with column names sets when + * transformation pipeline is launched. This pointer should contain a valid + * TVector<THashSet> instance. The transformation will overwrite its contents. + * @param allColumns vector of sets with all available columns for each input. + * @param nodeName name of the callable used to get input data, e.g. `Self`. + * @return an extractor which scans an input structs contents and populates destination. + */ +TAutoPtr<IGraphTransformer> MakeUsedColumnsExtractor( + TVector<THashSet<TString>>* destination, + const TVector<THashSet<TString>>& allColumns, + const TString& nodeName = TString{PurecalcInputCallableName}); +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/common/transformations/output_columns_filter.cpp b/yql/essentials/public/purecalc/common/transformations/output_columns_filter.cpp index 04181db7c83..38dd2f074ba 100644 --- a/yql/essentials/public/purecalc/common/transformations/output_columns_filter.cpp +++ b/yql/essentials/public/purecalc/common/transformations/output_columns_filter.cpp @@ -6,94 +6,102 @@ using namespace NYql; using namespace NYql::NPureCalc; namespace { - class TOutputColumnsFilter: public TSyncTransformerBase { - private: - TMaybe<THashSet<TString>> Filter_; - bool Fired_; - - public: - explicit TOutputColumnsFilter(TMaybe<THashSet<TString>> filter) - : Filter_(std::move(filter)) - , Fired_(false) - { +class TOutputColumnsFilter: public TSyncTransformerBase { +private: + TMaybe<THashSet<TString>> Filter_; + bool Fired_; + +public: + explicit TOutputColumnsFilter(TMaybe<THashSet<TString>> filter) + : Filter_(std::move(filter)) + , Fired_(false) + { + } + +public: + void Rewind() override { + Fired_ = false; + } + + TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final { + output = input; + + if (Fired_ || Filter_.Empty()) { + return IGraphTransformer::TStatus::Ok; } - public: - void Rewind() override { - Fired_ = false; + const TTypeAnnotationNode* returnType = output->GetTypeAnn(); + const TTypeAnnotationNode* returnItemType = nullptr; + switch (returnType->GetKind()) { + case ETypeAnnotationKind::Stream: + returnItemType = returnType->Cast<TStreamExprType>()->GetItemType(); + break; + case ETypeAnnotationKind::List: + returnItemType = returnType->Cast<TListExprType>()->GetItemType(); + break; + default: + Y_ABORT("unexpected return type"); } - TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final { - output = input; - - if (Fired_ || Filter_.Empty()) { - return IGraphTransformer::TStatus::Ok; - } - - const TTypeAnnotationNode* returnType = output->GetTypeAnn(); - const TTypeAnnotationNode* returnItemType = nullptr; - switch (returnType->GetKind()) { - case ETypeAnnotationKind::Stream: - returnItemType = returnType->Cast<TStreamExprType>()->GetItemType(); - break; - case ETypeAnnotationKind::List: - returnItemType = returnType->Cast<TListExprType>()->GetItemType(); - break; - default: - Y_ABORT("unexpected return type"); - } - - if (returnItemType->GetKind() != ETypeAnnotationKind::Struct) { - ctx.AddError(TIssue(ctx.GetPosition(output->Pos()), "columns filter only supported for single-output programs")); - } + if (returnItemType->GetKind() != ETypeAnnotationKind::Struct) { + ctx.AddError(TIssue(ctx.GetPosition(output->Pos()), "columns filter only supported for single-output programs")); + } - const auto* returnItemStruct = returnItemType->Cast<TStructExprType>(); - - auto arg = ctx.NewArgument(TPositionHandle(), "row"); - TExprNode::TListType asStructItems; - for (const auto& x : returnItemStruct->GetItems()) { - TExprNode::TPtr value; - if (Filter_->contains(x->GetName())) { - value = ctx.Builder({}) - .Callable("Member") - .Add(0, arg) - .Atom(1, x->GetName()) - .Seal() - .Build(); - } else { - auto type = x->GetItemType(); - value = ctx.Builder({}) - .Callable(type->GetKind() == ETypeAnnotationKind::Optional ? "Nothing" : "Default") - .Add(0, ExpandType({}, *type, ctx)) - .Seal() - .Build(); - } - - auto item = ctx.Builder({}) - .List() - .Atom(0, x->GetName()) - .Add(1, value) - .Seal() - .Build(); - - asStructItems.push_back(item); + const auto* returnItemStruct = returnItemType->Cast<TStructExprType>(); + + auto arg = ctx.NewArgument(TPositionHandle(), "row"); + TExprNode::TListType asStructItems; + for (const auto& x : returnItemStruct->GetItems()) { + TExprNode::TPtr value; + if (Filter_->contains(x->GetName())) { + // clang-format off + value = ctx.Builder({}) + .Callable("Member") + .Add(0, arg) + .Atom(1, x->GetName()) + .Seal() + .Build(); + // clang-format on + } else { + auto type = x->GetItemType(); + // clang-format off + value = ctx.Builder({}) + .Callable(type->GetKind() == ETypeAnnotationKind::Optional ? "Nothing" : "Default") + .Add(0, ExpandType({}, *type, ctx)) + .Seal() + .Build(); + // clang-format on } - auto body = ctx.NewCallable(TPositionHandle(), "AsStruct", std::move(asStructItems)); - auto lambda = ctx.NewLambda(TPositionHandle(), ctx.NewArguments(TPositionHandle(), {arg}), std::move(body)); - output = ctx.Builder(TPositionHandle()) - .Callable("Map") - .Add(0, output) - .Add(1, lambda) - .Seal() - .Build(); - - Fired_ = true; + // clang-format off + auto item = ctx.Builder({}) + .List() + .Atom(0, x->GetName()) + .Add(1, value) + .Seal() + .Build(); + // clang-format on - return IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true); + asStructItems.push_back(item); } - }; -} + + auto body = ctx.NewCallable(TPositionHandle(), "AsStruct", std::move(asStructItems)); + auto lambda = ctx.NewLambda(TPositionHandle(), ctx.NewArguments(TPositionHandle(), {arg}), std::move(body)); + // clang-format off + output = ctx.Builder(TPositionHandle()) + .Callable("Map") + .Add(0, output) + .Add(1, lambda) + .Seal() + .Build(); + // clang-format on + + Fired_ = true; + + return IGraphTransformer::TStatus(IGraphTransformer::TStatus::Repeat, true); + } +}; +} // namespace TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeOutputColumnsFilter(const TMaybe<THashSet<TString>>& columns) { return new TOutputColumnsFilter(columns); diff --git a/yql/essentials/public/purecalc/common/transformations/output_columns_filter.h b/yql/essentials/public/purecalc/common/transformations/output_columns_filter.h index 85302d82feb..235f02a8379 100644 --- a/yql/essentials/public/purecalc/common/transformations/output_columns_filter.h +++ b/yql/essentials/public/purecalc/common/transformations/output_columns_filter.h @@ -6,13 +6,13 @@ #include <yql/essentials/core/yql_type_annotation.h> namespace NYql { - namespace NPureCalc { - /** - * A transformer which removes unwanted columns from output. - * - * @param columns remove all columns that are not in this set. - * @return a graph transformer for filtering output. - */ - TAutoPtr<IGraphTransformer> MakeOutputColumnsFilter(const TMaybe<THashSet<TString>>& columns); - } -} +namespace NPureCalc { +/** + * A transformer which removes unwanted columns from output. + * + * @param columns remove all columns that are not in this set. + * @return a graph transformer for filtering output. + */ +TAutoPtr<IGraphTransformer> MakeOutputColumnsFilter(const TMaybe<THashSet<TString>>& columns); +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/common/transformations/replace_table_reads.cpp b/yql/essentials/public/purecalc/common/transformations/replace_table_reads.cpp index 141e92baf28..ae160ee3e91 100644 --- a/yql/essentials/public/purecalc/common/transformations/replace_table_reads.cpp +++ b/yql/essentials/public/purecalc/common/transformations/replace_table_reads.cpp @@ -10,238 +10,242 @@ using namespace NYql; using namespace NYql::NPureCalc; namespace { - class TTableReadsReplacer: public TSyncTransformerBase { - private: - const TVector<const TStructExprType*>& InputStructs_; - bool UseSystemColumns_; - EProcessorMode ProcessorMode_; - TString CallableName_; - TString TablePrefix_; - bool Complete_ = false; - - public: - explicit TTableReadsReplacer( - const TVector<const TStructExprType*>& inputStructs, - bool useSystemColumns, - EProcessorMode processorMode, - TString inputNodeName, - TString tablePrefix - ) - : InputStructs_(inputStructs) - , UseSystemColumns_(useSystemColumns) - , ProcessorMode_(processorMode) - , CallableName_(std::move(inputNodeName)) - , TablePrefix_(std::move(tablePrefix)) - { +class TTableReadsReplacer: public TSyncTransformerBase { +private: + const TVector<const TStructExprType*>& InputStructs_; + bool UseSystemColumns_; + EProcessorMode ProcessorMode_; + TString CallableName_; + TString TablePrefix_; + bool Complete_ = false; + +public: + explicit TTableReadsReplacer( + const TVector<const TStructExprType*>& inputStructs, + bool useSystemColumns, + EProcessorMode processorMode, + TString inputNodeName, + TString tablePrefix) + : InputStructs_(inputStructs) + , UseSystemColumns_(useSystemColumns) + , ProcessorMode_(processorMode) + , CallableName_(std::move(inputNodeName)) + , TablePrefix_(std::move(tablePrefix)) + { + } + + TTableReadsReplacer(TVector<const TStructExprType*>&&, TString, TString) = delete; + +public: + TStatus DoTransform(const TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final { + output = input; + if (Complete_) { + return TStatus::Ok; } - TTableReadsReplacer(TVector<const TStructExprType*>&&, TString, TString) = delete; - - public: - TStatus DoTransform(const TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final { - output = input; - if (Complete_) { - return TStatus::Ok; - } - - TOptimizeExprSettings settings(nullptr); + TOptimizeExprSettings settings(nullptr); - auto status = OptimizeExpr(input, output, [&](const TExprNode::TPtr& node, TExprContext& ctx) -> TExprNode::TPtr { - if (node->IsCallable(NNodes::TCoRight::CallableName())) { - TIssueScopeGuard issueScope(ctx.IssueManager, [&]() { - return new TIssue(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content()); - }); + auto status = OptimizeExpr(input, output, [&](const TExprNode::TPtr& node, TExprContext& ctx) -> TExprNode::TPtr { + if (node->IsCallable(NNodes::TCoRight::CallableName())) { + TIssueScopeGuard issueScope(ctx.IssueManager, [&]() { + return new TIssue(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content()); + }); - if (!EnsureMinArgsCount(*node, 1, ctx)) { - return nullptr; - } - - if (node->Child(0)->IsCallable(NNodes::TCoCons::CallableName())) { - return node; - } - - if (!node->Child(0)->IsCallable(NNodes::TCoRead::CallableName())) { - ctx.AddError(TIssue(ctx.GetPosition(node->Child(0)->Pos()), TStringBuilder() << "Expected Read!")); - return nullptr; - } + if (!EnsureMinArgsCount(*node, 1, ctx)) { + return nullptr; + } - return BuildInputFromRead(node->Pos(), node->ChildPtr(0), ctx); - } else if (node->IsCallable(NNodes::TCoLeft::CallableName())) { - TIssueScopeGuard issueScope(ctx.IssueManager, [&]() { - return new TIssue(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content()); - }); + if (node->Child(0)->IsCallable(NNodes::TCoCons::CallableName())) { + return node; + } - if (!EnsureMinArgsCount(*node, 1, ctx)) { - return nullptr; - } + if (!node->Child(0)->IsCallable(NNodes::TCoRead::CallableName())) { + ctx.AddError(TIssue(ctx.GetPosition(node->Child(0)->Pos()), TStringBuilder() << "Expected Read!")); + return nullptr; + } - if (!node->Child(0)->IsCallable(NNodes::TCoRead::CallableName())) { - ctx.AddError(TIssue(ctx.GetPosition(node->Child(0)->Pos()), TStringBuilder() << "Expected Read!")); - return nullptr; - } + return BuildInputFromRead(node->Pos(), node->ChildPtr(0), ctx); + } else if (node->IsCallable(NNodes::TCoLeft::CallableName())) { + TIssueScopeGuard issueScope(ctx.IssueManager, [&]() { + return new TIssue(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content()); + }); - return node->Child(0)->HeadPtr(); + if (!EnsureMinArgsCount(*node, 1, ctx)) { + return nullptr; } - return node; - }, ctx, settings); + if (!node->Child(0)->IsCallable(NNodes::TCoRead::CallableName())) { + ctx.AddError(TIssue(ctx.GetPosition(node->Child(0)->Pos()), TStringBuilder() << "Expected Read!")); + return nullptr; + } - if (status.Level == TStatus::Ok) { - Complete_ = true; + return node->Child(0)->HeadPtr(); } - return status; - } - - void Rewind() override { - Complete_ = false; - } - private: - TExprNode::TPtr BuildInputFromRead(TPositionHandle replacePos, const TExprNode::TPtr& node, TExprContext& ctx) { - TIssueScopeGuard issueScope(ctx.IssueManager, [&]() { - return MakeIntrusive<TIssue>(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content()); - }); + return node; + }, ctx, settings); - if (!EnsureMinArgsCount(*node, 3, ctx)) { - return nullptr; - } + if (status.Level == TStatus::Ok) { + Complete_ = true; + } + return status; + } - const auto source = node->ChildPtr(2); - if (source->IsCallable(NNodes::TCoKey::CallableName())) { - return BuildInputFromKey(replacePos, source, ctx); - } - if (source->IsCallable("DataTables")) { - return BuildInputFromDataTables(replacePos, source, ctx); - } + void Rewind() override { + Complete_ = false; + } - ctx.AddError(TIssue(ctx.GetPosition(source->Pos()), TStringBuilder() << "Unsupported read source: " << source->Content())); +private: + TExprNode::TPtr BuildInputFromRead(TPositionHandle replacePos, const TExprNode::TPtr& node, TExprContext& ctx) { + TIssueScopeGuard issueScope(ctx.IssueManager, [&]() { + return MakeIntrusive<TIssue>(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content()); + }); + if (!EnsureMinArgsCount(*node, 3, ctx)) { return nullptr; } - TExprNode::TPtr BuildInputFromKey(TPositionHandle replacePos, const TExprNode::TPtr& node, TExprContext& ctx) { - TIssueScopeGuard issueScope(ctx.IssueManager, [&]() { - return MakeIntrusive<TIssue>(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content()); - }); + const auto source = node->ChildPtr(2); + if (source->IsCallable(NNodes::TCoKey::CallableName())) { + return BuildInputFromKey(replacePos, source, ctx); + } + if (source->IsCallable("DataTables")) { + return BuildInputFromDataTables(replacePos, source, ctx); + } - ui32 inputIndex; - TExprNode::TPtr inputTableName; + ctx.AddError(TIssue(ctx.GetPosition(source->Pos()), TStringBuilder() << "Unsupported read source: " << source->Content())); - if (!TryFetchInputIndexFromKey(node, ctx, inputIndex, inputTableName)) { - return nullptr; - } + return nullptr; + } - YQL_ENSURE(inputTableName->IsCallable(NNodes::TCoString::CallableName())); + TExprNode::TPtr BuildInputFromKey(TPositionHandle replacePos, const TExprNode::TPtr& node, TExprContext& ctx) { + TIssueScopeGuard issueScope(ctx.IssueManager, [&]() { + return MakeIntrusive<TIssue>(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content()); + }); - auto inputNode = ctx.Builder(replacePos) - .Callable(CallableName_) - .Atom(0, ToString(inputIndex)) - .Seal() - .Build(); + ui32 inputIndex; + TExprNode::TPtr inputTableName; - if (inputNode->IsCallable(PurecalcBlockInputCallableName)) { - const auto inputStruct = InputStructs_[inputIndex]->Cast<TStructExprType>(); - const auto blocksLambda = NodeFromBlocks(replacePos, inputStruct, ctx); - bool wrapLMap = ProcessorMode_ == EProcessorMode::PullList; - inputNode = ApplyToIterable(replacePos, inputNode, blocksLambda, wrapLMap, ctx); - } + if (!TryFetchInputIndexFromKey(node, ctx, inputIndex, inputTableName)) { + return nullptr; + } - if (UseSystemColumns_) { - auto mapLambda = ctx.Builder(replacePos) - .Lambda() - .Param("row") - .Callable(0, NNodes::TCoAddMember::CallableName()) - .Arg(0, "row") - .Atom(1, PurecalcSysColumnTablePath) - .Add(2, inputTableName) - .Seal() - .Seal() - .Build(); + YQL_ENSURE(inputTableName->IsCallable(NNodes::TCoString::CallableName())); + + // clang-format off + auto inputNode = ctx.Builder(replacePos) + .Callable(CallableName_) + .Atom(0, ToString(inputIndex)) + .Seal() + .Build(); + // clang-format on + + if (inputNode->IsCallable(PurecalcBlockInputCallableName)) { + const auto inputStruct = InputStructs_[inputIndex]->Cast<TStructExprType>(); + const auto blocksLambda = NodeFromBlocks(replacePos, inputStruct, ctx); + bool wrapLMap = ProcessorMode_ == EProcessorMode::PullList; + inputNode = ApplyToIterable(replacePos, inputNode, blocksLambda, wrapLMap, ctx); + } - return ctx.Builder(replacePos) - .Callable(NNodes::TCoMap::CallableName()) - .Add(0, std::move(inputNode)) - .Add(1, std::move(mapLambda)) + if (UseSystemColumns_) { + // clang-format off + auto mapLambda = ctx.Builder(replacePos) + .Lambda() + .Param("row") + .Callable(0, NNodes::TCoAddMember::CallableName()) + .Arg(0, "row") + .Atom(1, PurecalcSysColumnTablePath) + .Add(2, inputTableName) .Seal() - .Build(); - } + .Seal() + .Build(); + // clang-format on - return inputNode; + // clang-format off + return ctx.Builder(replacePos) + .Callable(NNodes::TCoMap::CallableName()) + .Add(0, std::move(inputNode)) + .Add(1, std::move(mapLambda)) + .Seal() + .Build(); + // clang-format on } - TExprNode::TPtr BuildInputFromDataTables(TPositionHandle replacePos, const TExprNode::TPtr& node, TExprContext& ctx) { - TIssueScopeGuard issueScope(ctx.IssueManager, [&]() { - return MakeIntrusive<TIssue>(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content()); - }); + return inputNode; + } - if (InputStructs_.empty()) { - ctx.AddError(TIssue(ctx.GetPosition(node->Pos()), "No inputs provided by input spec")); - return nullptr; - } + TExprNode::TPtr BuildInputFromDataTables(TPositionHandle replacePos, const TExprNode::TPtr& node, TExprContext& ctx) { + TIssueScopeGuard issueScope(ctx.IssueManager, [&]() { + return MakeIntrusive<TIssue>(ctx.GetPosition(node->Pos()), TStringBuilder() << "At function: " << node->Content()); + }); - if (!EnsureArgsCount(*node, 0, ctx)) { - return nullptr; - } + if (InputStructs_.empty()) { + ctx.AddError(TIssue(ctx.GetPosition(node->Pos()), "No inputs provided by input spec")); + return nullptr; + } - auto builder = ctx.Builder(replacePos); + if (!EnsureArgsCount(*node, 0, ctx)) { + return nullptr; + } - if (InputStructs_.size() > 1) { - auto listBuilder = builder.List(); + auto builder = ctx.Builder(replacePos); - for (ui32 i = 0; i < InputStructs_.size(); ++i) { - listBuilder.Callable(i, CallableName_).Atom(0, ToString(i)).Seal(); - } + if (InputStructs_.size() > 1) { + auto listBuilder = builder.List(); - return listBuilder.Seal().Build(); + for (ui32 i = 0; i < InputStructs_.size(); ++i) { + listBuilder.Callable(i, CallableName_).Atom(0, ToString(i)).Seal(); } - return builder.Callable(CallableName_).Atom(0, "0").Seal().Build(); + return listBuilder.Seal().Build(); } - bool TryFetchInputIndexFromKey(const TExprNode::TPtr& node, TExprContext& ctx, ui32& resultIndex, TExprNode::TPtr& resultTableName) { - if (!EnsureArgsCount(*node, 1, ctx)) { - return false; - } + return builder.Callable(CallableName_).Atom(0, "0").Seal().Build(); + } - const auto* keyArg = node->Child(0); - if (!keyArg->IsList() || keyArg->ChildrenSize() != 2 || !keyArg->Child(0)->IsAtom("table") || - !keyArg->Child(1)->IsCallable(NNodes::TCoString::CallableName())) - { - ctx.AddError(TIssue(ctx.GetPosition(keyArg->Pos()), "Expected single table name")); - return false; - } + bool TryFetchInputIndexFromKey(const TExprNode::TPtr& node, TExprContext& ctx, ui32& resultIndex, TExprNode::TPtr& resultTableName) { + if (!EnsureArgsCount(*node, 1, ctx)) { + return false; + } - resultTableName = keyArg->ChildPtr(1); + const auto* keyArg = node->Child(0); + if (!keyArg->IsList() || keyArg->ChildrenSize() != 2 || !keyArg->Child(0)->IsAtom("table") || + !keyArg->Child(1)->IsCallable(NNodes::TCoString::CallableName())) + { + ctx.AddError(TIssue(ctx.GetPosition(keyArg->Pos()), "Expected single table name")); + return false; + } - auto tableName = resultTableName->Child(0)->Content(); + resultTableName = keyArg->ChildPtr(1); - if (!tableName.StartsWith(TablePrefix_)) { - ctx.AddError(TIssue(ctx.GetPosition(resultTableName->Child(0)->Pos()), - TStringBuilder() << "Invalid table name " << TString{tableName}.Quote() << ": prefix must be " << TablePrefix_.Quote())); - return false; - } + auto tableName = resultTableName->Child(0)->Content(); - tableName.SkipPrefix(TablePrefix_); + if (!tableName.StartsWith(TablePrefix_)) { + ctx.AddError(TIssue(ctx.GetPosition(resultTableName->Child(0)->Pos()), + TStringBuilder() << "Invalid table name " << TString{tableName}.Quote() << ": prefix must be " << TablePrefix_.Quote())); + return false; + } - if (!tableName) { - resultIndex = 0; - } else if (!TryFromString(tableName, resultIndex)) { - ctx.AddError(TIssue(ctx.GetPosition(resultTableName->Child(0)->Pos()), - TStringBuilder() << "Invalid table name " << TString{tableName}.Quote() << ": suffix must be UI32 number")); - return false; - } + tableName.SkipPrefix(TablePrefix_); - return true; + if (!tableName) { + resultIndex = 0; + } else if (!TryFromString(tableName, resultIndex)) { + ctx.AddError(TIssue(ctx.GetPosition(resultTableName->Child(0)->Pos()), + TStringBuilder() << "Invalid table name " << TString{tableName}.Quote() << ": suffix must be UI32 number")); + return false; } - }; -} + + return true; + } +}; +} // namespace TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeTableReadsReplacer( const TVector<const TStructExprType*>& inputStructs, bool useSystemColumns, EProcessorMode processorMode, TString callableName, - TString tablePrefix -) { + TString tablePrefix) { return new TTableReadsReplacer(inputStructs, useSystemColumns, processorMode, std::move(callableName), std::move(tablePrefix)); } diff --git a/yql/essentials/public/purecalc/common/transformations/replace_table_reads.h b/yql/essentials/public/purecalc/common/transformations/replace_table_reads.h index 33bc7174ac4..750ed8b37fb 100644 --- a/yql/essentials/public/purecalc/common/transformations/replace_table_reads.h +++ b/yql/essentials/public/purecalc/common/transformations/replace_table_reads.h @@ -6,25 +6,24 @@ #include <yql/essentials/core/yql_graph_transformer.h> namespace NYql::NPureCalc { - /** - * SQL translation would generate a standard Read! call to read each input table. It will than generate - * a Right! call to get the table data from a tuple returned by Read!. This transformation replaces any Right! - * call with a call to special function used to get input data. - * - * Each table name must starts with the specified prefix and ends with an index of program input (e.g. `Input0`). - * Name without numeric suffix is an alias for the first input. - * - * @param inputStructs types of each input. - * @param useSystemColumns whether to allow special system columns in input structs. - * @param callableName name of the special callable used to get input data (e.g. `Self`). - * @param tablePrefix required prefix for all table names (e.g. `Input`). - * @param return a graph transformer for replacing table reads. - */ - TAutoPtr<IGraphTransformer> MakeTableReadsReplacer( - const TVector<const TStructExprType*>& inputStructs, - bool useSystemColumns, - EProcessorMode processorMode, - TString callableName = TString{PurecalcInputCallableName}, - TString tablePrefix = TString{PurecalcInputTablePrefix} - ); -} +/** + * SQL translation would generate a standard Read! call to read each input table. It will than generate + * a Right! call to get the table data from a tuple returned by Read!. This transformation replaces any Right! + * call with a call to special function used to get input data. + * + * Each table name must starts with the specified prefix and ends with an index of program input (e.g. `Input0`). + * Name without numeric suffix is an alias for the first input. + * + * @param inputStructs types of each input. + * @param useSystemColumns whether to allow special system columns in input structs. + * @param callableName name of the special callable used to get input data (e.g. `Self`). + * @param tablePrefix required prefix for all table names (e.g. `Input`). + * @param return a graph transformer for replacing table reads. + */ +TAutoPtr<IGraphTransformer> MakeTableReadsReplacer( + const TVector<const TStructExprType*>& inputStructs, + bool useSystemColumns, + EProcessorMode processorMode, + TString callableName = TString{PurecalcInputCallableName}, + TString tablePrefix = TString{PurecalcInputTablePrefix}); +} // namespace NYql::NPureCalc diff --git a/yql/essentials/public/purecalc/common/transformations/root_to_blocks.cpp b/yql/essentials/public/purecalc/common/transformations/root_to_blocks.cpp index 07c959d1077..67f6f59a1ef 100644 --- a/yql/essentials/public/purecalc/common/transformations/root_to_blocks.cpp +++ b/yql/essentials/public/purecalc/common/transformations/root_to_blocks.cpp @@ -59,7 +59,6 @@ public: TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeRootToBlocks( bool acceptsBlocks, - EProcessorMode processorMode -) { + EProcessorMode processorMode) { return new TRootToBlocks(acceptsBlocks, processorMode); } diff --git a/yql/essentials/public/purecalc/common/transformations/root_to_blocks.h b/yql/essentials/public/purecalc/common/transformations/root_to_blocks.h index 13a7a9dfc11..46ff25d2051 100644 --- a/yql/essentials/public/purecalc/common/transformations/root_to_blocks.h +++ b/yql/essentials/public/purecalc/common/transformations/root_to_blocks.h @@ -5,18 +5,17 @@ #include <yql/essentials/core/yql_graph_transformer.h> namespace NYql { - namespace NPureCalc { - /** - * A transformer which rewrite the root to respect block types. - * - * @param acceptsBlock allows using this transformer in pipeline and - * skip this phase if no block output is required. - * @param processorMode specifies the top-most container of the result. - * @return a graph transformer for rewriting the root node. - */ - TAutoPtr<IGraphTransformer> MakeRootToBlocks( - bool acceptsBlocks, - EProcessorMode processorMode - ); - } -} +namespace NPureCalc { +/** + * A transformer which rewrite the root to respect block types. + * + * @param acceptsBlock allows using this transformer in pipeline and + * skip this phase if no block output is required. + * @param processorMode specifies the top-most container of the result. + * @return a graph transformer for rewriting the root node. + */ +TAutoPtr<IGraphTransformer> MakeRootToBlocks( + bool acceptsBlocks, + EProcessorMode processorMode); +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/common/transformations/type_annotation.cpp b/yql/essentials/public/purecalc/common/transformations/type_annotation.cpp index 63105cb8878..61f14fcc65a 100644 --- a/yql/essentials/public/purecalc/common/transformations/type_annotation.cpp +++ b/yql/essentials/public/purecalc/common/transformations/type_annotation.cpp @@ -14,171 +14,144 @@ using namespace NYql; using namespace NYql::NPureCalc; namespace { - class TTypeAnnotatorBase: public TSyncTransformerBase { - public: - using THandler = std::function<TStatus(const TExprNode::TPtr&, TExprNode::TPtr&, TExprContext&)>; +class TTypeAnnotatorBase: public TSyncTransformerBase { +public: + using THandler = std::function<TStatus(const TExprNode::TPtr&, TExprNode::TPtr&, TExprContext&)>; - TTypeAnnotatorBase(TTypeAnnotationContextPtr typeAnnotationContext) - { - OriginalTransformer_.reset(CreateExtCallableTypeAnnotationTransformer(*typeAnnotationContext).Release()); - } + TTypeAnnotatorBase(TTypeAnnotationContextPtr typeAnnotationContext) + { + OriginalTransformer_.reset(CreateExtCallableTypeAnnotationTransformer(*typeAnnotationContext).Release()); + } - TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final { - if (input->Type() == TExprNode::Callable) { - if (auto handler = Handlers_.FindPtr(input->Content())) { - return (*handler)(input, output, ctx); - } + TStatus DoTransform(TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) final { + if (input->Type() == TExprNode::Callable) { + if (auto handler = Handlers_.FindPtr(input->Content())) { + return (*handler)(input, output, ctx); } - - auto status = OriginalTransformer_->Transform(input, output, ctx); - - YQL_ENSURE(status.Level != IGraphTransformer::TStatus::Async, "Async type check is not supported"); - - return status; } - void Rewind() final { - OriginalTransformer_->Rewind(); - } + auto status = OriginalTransformer_->Transform(input, output, ctx); - protected: - void AddHandler(std::initializer_list<TStringBuf> names, THandler handler) { - for (auto name: names) { - YQL_ENSURE(Handlers_.emplace(name, handler).second, "Duplicate handler for " << name); - } - } + YQL_ENSURE(status.Level != IGraphTransformer::TStatus::Async, "Async type check is not supported"); - template <class TDerived> - THandler Hndl(TStatus(TDerived::* handler)(const TExprNode::TPtr&, TExprNode::TPtr&, TExprContext&)) { - return [this, handler] (TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) { - return (static_cast<TDerived*>(this)->*handler)(input, output, ctx); - }; - } - - template <class TDerived> - THandler Hndl(TStatus(TDerived::* handler)(const TExprNode::TPtr&, TExprContext&)) { - return [this, handler] (TExprNode::TPtr input, TExprNode::TPtr& /*output*/, TExprContext& ctx) { - return (static_cast<TDerived*>(this)->*handler)(input, ctx); - }; - } - - private: - std::shared_ptr<IGraphTransformer> OriginalTransformer_; - THashMap<TStringBuf, THandler> Handlers_; - }; - - class TTypeAnnotator : public TTypeAnnotatorBase { - private: - TTypeAnnotationContextPtr TypeAnnotationContext_; - const TVector<const TStructExprType*>& InputStructs_; - TVector<const TStructExprType*>& RawInputTypes_; - EProcessorMode ProcessorMode_; - TString InputNodeName_; - - public: - TTypeAnnotator( - TTypeAnnotationContextPtr typeAnnotationContext, - const TVector<const TStructExprType*>& inputStructs, - TVector<const TStructExprType*>& rawInputTypes, - EProcessorMode processorMode, - TString nodeName - ) - : TTypeAnnotatorBase(typeAnnotationContext) - , TypeAnnotationContext_(typeAnnotationContext) - , InputStructs_(inputStructs) - , RawInputTypes_(rawInputTypes) - , ProcessorMode_(processorMode) - , InputNodeName_(std::move(nodeName)) - { - AddHandler({InputNodeName_}, Hndl(&TTypeAnnotator::HandleInputNode)); - AddHandler({NNodes::TCoTableName::CallableName()}, Hndl(&TTypeAnnotator::HandleTableName)); - AddHandler({NNodes::TCoTablePath::CallableName()}, Hndl(&TTypeAnnotator::HandleTablePath)); - AddHandler({NNodes::TCoHoppingTraits::CallableName()}, Hndl(&TTypeAnnotator::HandleHoppingTraits)); - } - - TTypeAnnotator(TTypeAnnotationContextPtr, TVector<const TStructExprType*>&&, EProcessorMode, TString) = delete; - - private: - TStatus HandleInputNode(const TExprNode::TPtr& input, TExprContext& ctx) { - ui32 inputIndex; - if (!TryFetchInputIndexFromSelf(*input, ctx, InputStructs_.size(), inputIndex)) { - return IGraphTransformer::TStatus::Error; - } + return status; + } - YQL_ENSURE(inputIndex < InputStructs_.size()); + void Rewind() final { + OriginalTransformer_->Rewind(); + } - auto itemType = InputStructs_[inputIndex]; +protected: + void AddHandler(std::initializer_list<TStringBuf> names, THandler handler) { + for (auto name : names) { + YQL_ENSURE(Handlers_.emplace(name, handler).second, "Duplicate handler for " << name); + } + } + + template <class TDerived> + THandler Hndl(TStatus (TDerived::*handler)(const TExprNode::TPtr&, TExprNode::TPtr&, TExprContext&)) { + return [this, handler](TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) { + return (static_cast<TDerived*>(this)->*handler)(input, output, ctx); + }; + } + + template <class TDerived> + THandler Hndl(TStatus (TDerived::*handler)(const TExprNode::TPtr&, TExprContext&)) { + return [this, handler](TExprNode::TPtr input, TExprNode::TPtr& /*output*/, TExprContext& ctx) { + return (static_cast<TDerived*>(this)->*handler)(input, ctx); + }; + } + +private: + std::shared_ptr<IGraphTransformer> OriginalTransformer_; + THashMap<TStringBuf, THandler> Handlers_; +}; + +class TTypeAnnotator: public TTypeAnnotatorBase { +private: + TTypeAnnotationContextPtr TypeAnnotationContext_; + const TVector<const TStructExprType*>& InputStructs_; + TVector<const TStructExprType*>& RawInputTypes_; + EProcessorMode ProcessorMode_; + TString InputNodeName_; + +public: + TTypeAnnotator( + TTypeAnnotationContextPtr typeAnnotationContext, + const TVector<const TStructExprType*>& inputStructs, + TVector<const TStructExprType*>& rawInputTypes, + EProcessorMode processorMode, + TString nodeName) + : TTypeAnnotatorBase(typeAnnotationContext) + , TypeAnnotationContext_(typeAnnotationContext) + , InputStructs_(inputStructs) + , RawInputTypes_(rawInputTypes) + , ProcessorMode_(processorMode) + , InputNodeName_(std::move(nodeName)) + { + AddHandler({InputNodeName_}, Hndl(&TTypeAnnotator::HandleInputNode)); + AddHandler({NNodes::TCoTableName::CallableName()}, Hndl(&TTypeAnnotator::HandleTableName)); + AddHandler({NNodes::TCoTablePath::CallableName()}, Hndl(&TTypeAnnotator::HandleTablePath)); + AddHandler({NNodes::TCoHoppingTraits::CallableName()}, Hndl(&TTypeAnnotator::HandleHoppingTraits)); + } + + TTypeAnnotator(TTypeAnnotationContextPtr, TVector<const TStructExprType*>&&, EProcessorMode, TString) = delete; + +private: + TStatus HandleInputNode(const TExprNode::TPtr& input, TExprContext& ctx) { + ui32 inputIndex; + if (!TryFetchInputIndexFromSelf(*input, ctx, InputStructs_.size(), inputIndex)) { + return IGraphTransformer::TStatus::Error; + } - // XXX: Tweak the input expression type, if the spec supports blocks: - // 1. Add "_yql_block_length" attribute for internal usage. - // 2. Add block container to wrap the actual item type. - if (input->IsCallable(PurecalcBlockInputCallableName)) { - itemType = WrapBlockStruct(itemType, ctx); - } + YQL_ENSURE(inputIndex < InputStructs_.size()); - RawInputTypes_[inputIndex] = itemType; + auto itemType = InputStructs_[inputIndex]; - TColumnOrder columnOrder; - for (const auto& i : itemType->GetItems()) { - columnOrder.AddColumn(TString(i->GetName())); - } + // XXX: Tweak the input expression type, if the spec supports blocks: + // 1. Add "_yql_block_length" attribute for internal usage. + // 2. Add block container to wrap the actual item type. + if (input->IsCallable(PurecalcBlockInputCallableName)) { + itemType = WrapBlockStruct(itemType, ctx); + } - if (ProcessorMode_ != EProcessorMode::PullList) { - input->SetTypeAnn(ctx.MakeType<TStreamExprType>(itemType)); - } else { - input->SetTypeAnn(ctx.MakeType<TListExprType>(itemType)); - } + RawInputTypes_[inputIndex] = itemType; - TypeAnnotationContext_->SetColumnOrder(*input, columnOrder, ctx); - return TStatus::Ok; + TColumnOrder columnOrder; + for (const auto& i : itemType->GetItems()) { + columnOrder.AddColumn(TString(i->GetName())); } - TStatus HandleTableName(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { - if (!EnsureMinMaxArgsCount(*input, 1, 2, ctx)) { - return TStatus::Error; - } - - if (input->ChildrenSize() > 1) { - if (!EnsureAtom(input->Tail(), ctx)) { - return TStatus::Error; - } - - if (input->Tail().Content() != PurecalcDefaultService) { - ctx.AddError( - TIssue( - ctx.GetPosition(input->Tail().Pos()), - TStringBuilder() << "Unsupported system: " << input->Tail().Content())); - return TStatus::Error; - } - } + if (ProcessorMode_ != EProcessorMode::PullList) { + input->SetTypeAnn(ctx.MakeType<TStreamExprType>(itemType)); + } else { + input->SetTypeAnn(ctx.MakeType<TListExprType>(itemType)); + } - if (NNodes::TCoDependsOnBase::Match(&input->Head())) { - if (!EnsureArgsCount(input->Head(), 1, ctx)) { - return TStatus::Error; - } - - if (!TryBuildTableNameNode(input->Pos(), input->Head().HeadPtr(), output, ctx)) { - return TStatus::Error; - } - } else { - if (!EnsureSpecificDataType(input->Head(), EDataSlot::String, ctx)) { - return TStatus::Error; - } - output = input->HeadPtr(); - } + TypeAnnotationContext_->SetColumnOrder(*input, columnOrder, ctx); + return TStatus::Ok; + } - return TStatus::Repeat; + TStatus HandleTableName(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { + if (!EnsureMinMaxArgsCount(*input, 1, 2, ctx)) { + return TStatus::Error; } - TStatus HandleTablePath(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { - if (!EnsureArgsCount(*input, 1, ctx)) { + if (input->ChildrenSize() > 1) { + if (!EnsureAtom(input->Tail(), ctx)) { return TStatus::Error; } - if (!EnsureDependsOn(input->Head(), ctx)) { + if (input->Tail().Content() != PurecalcDefaultService) { + ctx.AddError( + TIssue( + ctx.GetPosition(input->Tail().Pos()), + TStringBuilder() << "Unsupported system: " << input->Tail().Content())); return TStatus::Error; } + } + if (NNodes::TCoDependsOnBase::Match(&input->Head())) { if (!EnsureArgsCount(input->Head(), 1, ctx)) { return TStatus::Error; } @@ -186,66 +159,97 @@ namespace { if (!TryBuildTableNameNode(input->Pos(), input->Head().HeadPtr(), output, ctx)) { return TStatus::Error; } + } else { + if (!EnsureSpecificDataType(input->Head(), EDataSlot::String, ctx)) { + return TStatus::Error; + } + output = input->HeadPtr(); + } + + return TStatus::Repeat; + } + + TStatus HandleTablePath(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { + if (!EnsureArgsCount(*input, 1, ctx)) { + return TStatus::Error; + } + + if (!EnsureDependsOn(input->Head(), ctx)) { + return TStatus::Error; + } + if (!EnsureArgsCount(input->Head(), 1, ctx)) { + return TStatus::Error; + } + + if (!TryBuildTableNameNode(input->Pos(), input->Head().HeadPtr(), output, ctx)) { + return TStatus::Error; + } + + return TStatus::Repeat; + } + + TStatus HandleHoppingTraits(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { + Y_UNUSED(output); + if (input->ChildrenSize() == 1) { + auto children = input->ChildrenList(); + // clang-format off + auto falseArg = ctx.Builder(input->Pos()) + .Atom("false") + .Seal() + .Build(); + // clang-format on + children.emplace_back(falseArg); + input->ChangeChildrenInplace(std::move(children)); return TStatus::Repeat; } - TStatus HandleHoppingTraits(const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { - Y_UNUSED(output); - if (input->ChildrenSize() == 1) { - auto children = input->ChildrenList(); - auto falseArg = ctx.Builder(input->Pos()) - .Atom("false") - .Seal() - .Build(); - children.emplace_back(falseArg); - input->ChangeChildrenInplace(std::move(children)); - return TStatus::Repeat; - } + return TStatus::Ok; + } - return TStatus::Ok; +private: + bool TryBuildTableNameNode( + TPositionHandle position, const TExprNode::TPtr& row, TExprNode::TPtr& result, TExprContext& ctx) + { + if (!EnsureStructType(*row, ctx)) { + return false; } - private: - bool TryBuildTableNameNode( - TPositionHandle position, const TExprNode::TPtr& row, TExprNode::TPtr& result, TExprContext& ctx) - { - if (!EnsureStructType(*row, ctx)) { - return false; - } + const auto* structType = row->GetTypeAnn()->Cast<TStructExprType>(); - const auto* structType = row->GetTypeAnn()->Cast<TStructExprType>(); - - if (auto pos = structType->FindItem(PurecalcSysColumnTablePath)) { - if (!EnsureSpecificDataType(row->Pos(), *structType->GetItems()[*pos]->GetItemType(), EDataSlot::String, ctx)) { - return false; - } - - result = ctx.Builder(position) - .Callable(NNodes::TCoMember::CallableName()) - .Add(0, row) - .Atom(1, PurecalcSysColumnTablePath) - .Seal() - .Build(); - } else { - result = ctx.Builder(position) - .Callable(NNodes::TCoString::CallableName()) - .Atom(0, "") - .Seal() - .Build(); + if (auto pos = structType->FindItem(PurecalcSysColumnTablePath)) { + if (!EnsureSpecificDataType(row->Pos(), *structType->GetItems()[*pos]->GetItemType(), EDataSlot::String, ctx)) { + return false; } - return true; + // clang-format off + result = ctx.Builder(position) + .Callable(NNodes::TCoMember::CallableName()) + .Add(0, row) + .Atom(1, PurecalcSysColumnTablePath) + .Seal() + .Build(); + // clang-format on + } else { + // clang-format off + result = ctx.Builder(position) + .Callable(NNodes::TCoString::CallableName()) + .Atom(0, "") + .Seal() + .Build(); + // clang-format on } - }; -} + + return true; + } +}; +} // namespace TAutoPtr<IGraphTransformer> NYql::NPureCalc::MakeTypeAnnotationTransformer( TTypeAnnotationContextPtr typeAnnotationContext, const TVector<const TStructExprType*>& inputStructs, TVector<const TStructExprType*>& rawInputTypes, EProcessorMode processorMode, - const TString& nodeName -) { + const TString& nodeName) { return new TTypeAnnotator(typeAnnotationContext, inputStructs, rawInputTypes, processorMode, nodeName); } diff --git a/yql/essentials/public/purecalc/common/transformations/type_annotation.h b/yql/essentials/public/purecalc/common/transformations/type_annotation.h index 87649fd231a..5ec0df2da5c 100644 --- a/yql/essentials/public/purecalc/common/transformations/type_annotation.h +++ b/yql/essentials/public/purecalc/common/transformations/type_annotation.h @@ -7,24 +7,23 @@ #include <yql/essentials/core/yql_type_annotation.h> namespace NYql { - namespace NPureCalc { - /** - * Build type annotation transformer that is aware of type of the input rows. - * - * @param typeAnnotationContext current context. - * @param inputStructs types of each input. - * @param rawInputStructs container to store the resulting input item type. - * @param processorMode current processor mode. This will affect generated input type, - * e.g. list node or struct node. - * @param nodeName name of the callable used to get input data, e.g. `Self`. - * @return a graph transformer for type annotation. - */ - TAutoPtr<IGraphTransformer> MakeTypeAnnotationTransformer( - TTypeAnnotationContextPtr typeAnnotationContext, - const TVector<const TStructExprType*>& inputStructs, - TVector<const TStructExprType*>& rawInputStructs, - EProcessorMode processorMode, - const TString& nodeName = TString{PurecalcInputCallableName} - ); - } -} +namespace NPureCalc { +/** + * Build type annotation transformer that is aware of type of the input rows. + * + * @param typeAnnotationContext current context. + * @param inputStructs types of each input. + * @param rawInputStructs container to store the resulting input item type. + * @param processorMode current processor mode. This will affect generated input type, + * e.g. list node or struct node. + * @param nodeName name of the callable used to get input data, e.g. `Self`. + * @return a graph transformer for type annotation. + */ +TAutoPtr<IGraphTransformer> MakeTypeAnnotationTransformer( + TTypeAnnotationContextPtr typeAnnotationContext, + const TVector<const TStructExprType*>& inputStructs, + TVector<const TStructExprType*>& rawInputStructs, + EProcessorMode processorMode, + const TString& nodeName = TString{PurecalcInputCallableName}); +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/common/transformations/utils.cpp b/yql/essentials/public/purecalc/common/transformations/utils.cpp index 54e0ef7caf8..82d84187a44 100644 --- a/yql/essentials/public/purecalc/common/transformations/utils.cpp +++ b/yql/essentials/public/purecalc/common/transformations/utils.cpp @@ -9,10 +9,10 @@ using namespace NYql::NPureCalc; TExprNode::TPtr NYql::NPureCalc::NodeFromBlocks( const TPositionHandle& pos, const TStructExprType* structType, - TExprContext& ctx -) { + TExprContext& ctx) { const auto items = structType->GetItems(); Y_ENSURE(items.size() > 0); + // clang-format off return ctx.Builder(pos) .Lambda() .Param("stream") @@ -50,30 +50,31 @@ TExprNode::TPtr NYql::NPureCalc::NodeFromBlocks( .Params("fields", items.size()) .Callable("AsStruct") .Do([&](TExprNodeBuilder& parent) -> TExprNodeBuilder& { - ui32 i = 0; - for (const auto& item : items) { - parent.List(i) - .Atom(0, item->GetName()) - .Arg(1, "fields", i++) - .Seal(); - } - return parent; - }) + ui32 i = 0; + for (const auto& item : items) { + parent.List(i) + .Atom(0, item->GetName()) + .Arg(1, "fields", i++) + .Seal(); + } + return parent; + }) .Seal() .Seal() .Seal() .Seal() .Seal() .Build(); + // clang-format on } TExprNode::TPtr NYql::NPureCalc::NodeToBlocks( const TPositionHandle& pos, const TStructExprType* structType, - TExprContext& ctx -) { + TExprContext& ctx) { const auto items = structType->GetItems(); Y_ENSURE(items.size() > 0); + // clang-format off return ctx.Builder(pos) .Lambda() .Param("stream") @@ -126,6 +127,7 @@ TExprNode::TPtr NYql::NPureCalc::NodeToBlocks( .Seal() .Seal() .Build(); + // clang-format on } TExprNode::TPtr NYql::NPureCalc::ApplyToIterable( @@ -133,9 +135,9 @@ TExprNode::TPtr NYql::NPureCalc::ApplyToIterable( const TExprNode::TPtr iterable, const TExprNode::TPtr lambda, bool wrapLMap, - TExprContext& ctx -) { + TExprContext& ctx) { if (wrapLMap) { + // clang-format off return ctx.Builder(pos) .Callable("LMap") .Add(0, iterable) @@ -147,19 +149,21 @@ TExprNode::TPtr NYql::NPureCalc::ApplyToIterable( .Seal() .Seal() .Build(); + // clang-format on } else { + // clang-format off return ctx.Builder(pos) .Apply(lambda) .With(0, iterable) .Seal() .Build(); + // clang-format on } } const TStructExprType* NYql::NPureCalc::WrapBlockStruct( const TStructExprType* structType, - TExprContext& ctx -) { + TExprContext& ctx) { TVector<const TItemExprType*> members; for (const auto& item : structType->GetItems()) { const auto blockItemType = ctx.MakeType<TBlockExprType>(item->GetItemType()); @@ -172,8 +176,7 @@ const TStructExprType* NYql::NPureCalc::WrapBlockStruct( const TStructExprType* NYql::NPureCalc::UnwrapBlockStruct( const TStructExprType* structType, - TExprContext& ctx -) { + TExprContext& ctx) { TVector<const TItemExprType*> members; for (const auto& item : structType->GetItems()) { if (item->GetName() == PurecalcBlockColumnLength) { diff --git a/yql/essentials/public/purecalc/common/transformations/utils.h b/yql/essentials/public/purecalc/common/transformations/utils.h index cc8849b7e3a..c2ddf299ce3 100644 --- a/yql/essentials/public/purecalc/common/transformations/utils.h +++ b/yql/essentials/public/purecalc/common/transformations/utils.h @@ -3,81 +3,76 @@ #include <yql/essentials/core/yql_graph_transformer.h> namespace NYql { - namespace NPureCalc { - /** - * A transformer which wraps the given input node with the pipeline - * converting the input type to the block one. - * - * @param pos the position of the given node to be rewritten. - * @param structType the item type of the container provided by the node. - * @param ctx the context to make ExprNode rewrites. - * @return the resulting ExprNode. - */ - TExprNode::TPtr NodeFromBlocks( - const TPositionHandle& pos, - const TStructExprType* structType, - TExprContext& ctx - ); +namespace NPureCalc { +/** + * A transformer which wraps the given input node with the pipeline + * converting the input type to the block one. + * + * @param pos the position of the given node to be rewritten. + * @param structType the item type of the container provided by the node. + * @param ctx the context to make ExprNode rewrites. + * @return the resulting ExprNode. + */ +TExprNode::TPtr NodeFromBlocks( + const TPositionHandle& pos, + const TStructExprType* structType, + TExprContext& ctx); - /** - * A transformer which wraps the given root node with the pipeline - * converting the output type to the block one. - * - * @param pos the position of the given node to be rewritten. - * @param structType the item type of the container provided by the node. - * @param ctx the context to make ExprNode rewrites. - * @return the resulting ExprNode. - */ - TExprNode::TPtr NodeToBlocks( - const TPositionHandle& pos, - const TStructExprType* structType, - TExprContext& ctx - ); +/** + * A transformer which wraps the given root node with the pipeline + * converting the output type to the block one. + * + * @param pos the position of the given node to be rewritten. + * @param structType the item type of the container provided by the node. + * @param ctx the context to make ExprNode rewrites. + * @return the resulting ExprNode. + */ +TExprNode::TPtr NodeToBlocks( + const TPositionHandle& pos, + const TStructExprType* structType, + TExprContext& ctx); - /** - * A transformer to apply the given lambda to the given iterable (either - * list or stream). If the iterable is list, the lambda should be passed - * to the <LMap> callable; if the iterable is stream, the lambda should - * be applied right to the iterable. - * - * @param pos the position of the given node to be rewritten. - * @param iterable the node, that provides the iterable to be processed. - * @param lambda the node, that provides lambda to be applied. - * @param wrapLMap indicator to wrap the result with LMap callable. - * @oaram ctx the context to make ExprNode rewrites. - */ - TExprNode::TPtr ApplyToIterable( - const TPositionHandle& pos, - const TExprNode::TPtr iterable, - const TExprNode::TPtr lambda, - bool wrapLMap, - TExprContext& ctx - ); +/** + * A transformer to apply the given lambda to the given iterable (either + * list or stream). If the iterable is list, the lambda should be passed + * to the <LMap> callable; if the iterable is stream, the lambda should + * be applied right to the iterable. + * + * @param pos the position of the given node to be rewritten. + * @param iterable the node, that provides the iterable to be processed. + * @param lambda the node, that provides lambda to be applied. + * @param wrapLMap indicator to wrap the result with LMap callable. + * @oaram ctx the context to make ExprNode rewrites. + */ +TExprNode::TPtr ApplyToIterable( + const TPositionHandle& pos, + const TExprNode::TPtr iterable, + const TExprNode::TPtr lambda, + bool wrapLMap, + TExprContext& ctx); - /** - * A helper which wraps the items of the given struct with the block - * type container and appends the new item for _yql_block_length column. - * - * @param structType original struct to be wrapped. - * @param ctx the context to make ExprType rewrite. - * @return the new struct with block items. - */ - const TStructExprType* WrapBlockStruct( - const TStructExprType* structType, - TExprContext& ctx - ); +/** + * A helper which wraps the items of the given struct with the block + * type container and appends the new item for _yql_block_length column. + * + * @param structType original struct to be wrapped. + * @param ctx the context to make ExprType rewrite. + * @return the new struct with block items. + */ +const TStructExprType* WrapBlockStruct( + const TStructExprType* structType, + TExprContext& ctx); - /** - * A helper which unwraps the block container from the items of the - * given struct and removes the item for _yql_block_length column. - * - * @param structType original struct to be unwrapped. - * @param ctx the context to make ExprType rewrite. - * @return the new struct without block items. - */ - const TStructExprType* UnwrapBlockStruct( - const TStructExprType* structType, - TExprContext& ctx - ); - } -} +/** + * A helper which unwraps the block container from the items of the + * given struct and removes the item for _yql_block_length column. + * + * @param structType original struct to be unwrapped. + * @param ctx the context to make ExprType rewrite. + * @return the new struct without block items. + */ +const TStructExprType* UnwrapBlockStruct( + const TStructExprType* structType, + TExprContext& ctx); +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/common/type_from_schema.cpp b/yql/essentials/public/purecalc/common/type_from_schema.cpp index 09a3ff50a77..184419bbc97 100644 --- a/yql/essentials/public/purecalc/common/type_from_schema.cpp +++ b/yql/essentials/public/purecalc/common/type_from_schema.cpp @@ -6,255 +6,251 @@ #include <yql/essentials/providers/common/schema/expr/yql_expr_schema.h> namespace { - using namespace NYql; +using namespace NYql; #define REPORT(...) ctx.AddError(TIssue(TString(TStringBuilder() << __VA_ARGS__))) - bool CheckStruct(const TStructExprType* got, const TStructExprType* expected, TExprContext& ctx, const TTypeAnnotationContext& typeCtx) { - auto status = true; - - if (expected) { - for (const auto* gotNamedItem : got->GetItems()) { - auto expectedIndex = expected->FindItem(gotNamedItem->GetName()); - if (expectedIndex) { - const auto* gotItem = gotNamedItem->GetItemType(); - const auto* expectedItem = expected->GetItems()[*expectedIndex]->GetItemType(); - - auto arg = ctx.NewArgument(TPositionHandle(), "arg"); - auto fieldConversionStatus = TryConvertTo(arg, *gotItem, *expectedItem, ctx, typeCtx); - if (fieldConversionStatus.Level == IGraphTransformer::TStatus::Error) { - status = false; - } - } else { - REPORT("Got unexpected item " << TString{gotNamedItem->GetName()}.Quote()); - status = false; - } - } +bool CheckStruct(const TStructExprType* got, const TStructExprType* expected, TExprContext& ctx, const TTypeAnnotationContext& typeCtx) { + auto status = true; - for (const auto* expectedNamedItem : expected->GetItems()) { - if (expectedNamedItem->GetItemType()->GetKind() == ETypeAnnotationKind::Optional) { - continue; - } - if (!got->FindItem(expectedNamedItem->GetName())) { - REPORT("Expected item " << TString{expectedNamedItem->GetName()}.Quote()); + if (expected) { + for (const auto* gotNamedItem : got->GetItems()) { + auto expectedIndex = expected->FindItem(gotNamedItem->GetName()); + if (expectedIndex) { + const auto* gotItem = gotNamedItem->GetItemType(); + const auto* expectedItem = expected->GetItems()[*expectedIndex]->GetItemType(); + + auto arg = ctx.NewArgument(TPositionHandle(), "arg"); + auto fieldConversionStatus = TryConvertTo(arg, *gotItem, *expectedItem, ctx, typeCtx); + if (fieldConversionStatus.Level == IGraphTransformer::TStatus::Error) { status = false; } + } else { + REPORT("Got unexpected item " << TString{gotNamedItem->GetName()}.Quote()); + status = false; } } - return status; - } - - bool CheckVariantContent(const TStructExprType* got, const TStructExprType* expected, TExprContext& ctx, - const TTypeAnnotationContext& typeCtx) { - auto status = true; - - if (expected) { - for (const auto* gotNamedItem : got->GetItems()) { - if (!expected->FindItem(gotNamedItem->GetName())) { - REPORT("Got unexpected alternative " << TString{gotNamedItem->GetName()}.Quote()); - status = false; - } + for (const auto* expectedNamedItem : expected->GetItems()) { + if (expectedNamedItem->GetItemType()->GetKind() == ETypeAnnotationKind::Optional) { + continue; } - - for (const auto* expectedNamedItem : expected->GetItems()) { - if (!got->FindItem(expectedNamedItem->GetName())) { - REPORT("Expected alternative " << TString{expectedNamedItem->GetName()}.Quote()); - status = false; - } + if (!got->FindItem(expectedNamedItem->GetName())) { + REPORT("Expected item " << TString{expectedNamedItem->GetName()}.Quote()); + status = false; } } + } - for (const auto* gotNamedItem : got->GetItems()) { - const auto* gotItem = gotNamedItem->GetItemType(); - auto expectedIndex = expected ? expected->FindItem(gotNamedItem->GetName()) : Nothing(); - const auto* expectedItem = expected && expectedIndex ? expected->GetItems()[*expectedIndex]->GetItemType() : nullptr; + return status; +} - TIssueScopeGuard issueScope(ctx.IssueManager, [&]() { - return new TIssue(TPosition(), TStringBuilder() << "Alternative " << TString{gotNamedItem->GetName()}.Quote()); - }); +bool CheckVariantContent(const TStructExprType* got, const TStructExprType* expected, TExprContext& ctx, + const TTypeAnnotationContext& typeCtx) { + auto status = true; - if (expectedItem && expectedItem->GetKind() != gotItem->GetKind()) { - REPORT("Expected to be " << expectedItem->GetKind() << ", but got " << gotItem->GetKind()); + if (expected) { + for (const auto* gotNamedItem : got->GetItems()) { + if (!expected->FindItem(gotNamedItem->GetName())) { + REPORT("Got unexpected alternative " << TString{gotNamedItem->GetName()}.Quote()); status = false; } + } - if (gotItem->GetKind() != ETypeAnnotationKind::Struct) { - REPORT("Expected to be Struct, but got " << gotItem->GetKind()); + for (const auto* expectedNamedItem : expected->GetItems()) { + if (!got->FindItem(expectedNamedItem->GetName())) { + REPORT("Expected alternative " << TString{expectedNamedItem->GetName()}.Quote()); status = false; } + } + } - const auto* gotStruct = gotItem->Cast<TStructExprType>(); - const auto* expectedStruct = expectedItem ? expectedItem->Cast<TStructExprType>() : nullptr; + for (const auto* gotNamedItem : got->GetItems()) { + const auto* gotItem = gotNamedItem->GetItemType(); + auto expectedIndex = expected ? expected->FindItem(gotNamedItem->GetName()) : Nothing(); + const auto* expectedItem = expected && expectedIndex ? expected->GetItems()[*expectedIndex]->GetItemType() : nullptr; - if (!CheckStruct(gotStruct, expectedStruct, ctx, typeCtx)) { - status = false; - } + TIssueScopeGuard issueScope(ctx.IssueManager, [&]() { + return new TIssue(TPosition(), TStringBuilder() << "Alternative " << TString{gotNamedItem->GetName()}.Quote()); + }); + + if (expectedItem && expectedItem->GetKind() != gotItem->GetKind()) { + REPORT("Expected to be " << expectedItem->GetKind() << ", but got " << gotItem->GetKind()); + status = false; } - return status; - } + if (gotItem->GetKind() != ETypeAnnotationKind::Struct) { + REPORT("Expected to be Struct, but got " << gotItem->GetKind()); + status = false; + } - bool CheckVariantContent(const TTupleExprType* got, const TTupleExprType* expected, TExprContext& ctx, - const TTypeAnnotationContext& typeCtx) { - if (expected && expected->GetSize() != got->GetSize()) { - REPORT("Expected to have " << expected->GetSize() << " alternatives, but got " << got->GetSize()); - return false; + const auto* gotStruct = gotItem->Cast<TStructExprType>(); + const auto* expectedStruct = expectedItem ? expectedItem->Cast<TStructExprType>() : nullptr; + + if (!CheckStruct(gotStruct, expectedStruct, ctx, typeCtx)) { + status = false; } + } - auto status = true; + return status; +} - for (size_t i = 0; i < got->GetSize(); i++) { - const auto* gotItem = got->GetItems()[i]; - const auto* expectedItem = expected ? expected->GetItems()[i] : nullptr; +bool CheckVariantContent(const TTupleExprType* got, const TTupleExprType* expected, TExprContext& ctx, + const TTypeAnnotationContext& typeCtx) { + if (expected && expected->GetSize() != got->GetSize()) { + REPORT("Expected to have " << expected->GetSize() << " alternatives, but got " << got->GetSize()); + return false; + } - TIssueScopeGuard issueScope(ctx.IssueManager, [i]() { - return new TIssue(TPosition(), TStringBuilder() << "Alternative #" << i); - }); + auto status = true; - if (expectedItem && expectedItem->GetKind() != gotItem->GetKind()) { - REPORT("Expected " << expectedItem->GetKind() << ", but got " << gotItem->GetKind()); - status = false; - } + for (size_t i = 0; i < got->GetSize(); i++) { + const auto* gotItem = got->GetItems()[i]; + const auto* expectedItem = expected ? expected->GetItems()[i] : nullptr; - if (gotItem->GetKind() != ETypeAnnotationKind::Struct) { - REPORT("Expected Struct, but got " << gotItem->GetKind()); - status = false; - } + TIssueScopeGuard issueScope(ctx.IssueManager, [i]() { + return new TIssue(TPosition(), TStringBuilder() << "Alternative #" << i); + }); - const auto* gotStruct = gotItem->Cast<TStructExprType>(); - const auto* expectedStruct = expectedItem ? expectedItem->Cast<TStructExprType>() : nullptr; + if (expectedItem && expectedItem->GetKind() != gotItem->GetKind()) { + REPORT("Expected " << expectedItem->GetKind() << ", but got " << gotItem->GetKind()); + status = false; + } - if (!CheckStruct(gotStruct, expectedStruct, ctx, typeCtx)) { - status = false; - } + if (gotItem->GetKind() != ETypeAnnotationKind::Struct) { + REPORT("Expected Struct, but got " << gotItem->GetKind()); + status = false; } - return status; - } + const auto* gotStruct = gotItem->Cast<TStructExprType>(); + const auto* expectedStruct = expectedItem ? expectedItem->Cast<TStructExprType>() : nullptr; - bool CheckVariant(const TVariantExprType* got, const TVariantExprType* expected, TExprContext& ctx, - const TTypeAnnotationContext& typeCtx) { - if (expected && expected->GetUnderlyingType()->GetKind() != got->GetUnderlyingType()->GetKind()) { - REPORT("Expected Variant over " << expected->GetUnderlyingType()->GetKind() << - ", but got Variant over " << got->GetUnderlyingType()->GetKind()); - return false; + if (!CheckStruct(gotStruct, expectedStruct, ctx, typeCtx)) { + status = false; } + } - switch (got->GetUnderlyingType()->GetKind()) { - case ETypeAnnotationKind::Struct: - { - const auto* gotStruct = got->GetUnderlyingType()->Cast<TStructExprType>(); - const auto* expectedStruct = expected ? expected->GetUnderlyingType()->Cast<TStructExprType>() : nullptr; - return CheckVariantContent(gotStruct, expectedStruct, ctx, typeCtx); - } - case ETypeAnnotationKind::Tuple: - { - const auto* gotTuple = got->GetUnderlyingType()->Cast<TTupleExprType>(); - const auto* expectedTuple = expected ? expected->GetUnderlyingType()->Cast<TTupleExprType>() : nullptr; - return CheckVariantContent(gotTuple, expectedTuple, ctx, typeCtx); - } - default: - Y_UNREACHABLE(); - } + return status; +} +bool CheckVariant(const TVariantExprType* got, const TVariantExprType* expected, TExprContext& ctx, + const TTypeAnnotationContext& typeCtx) { + if (expected && expected->GetUnderlyingType()->GetKind() != got->GetUnderlyingType()->GetKind()) { + REPORT("Expected Variant over " << expected->GetUnderlyingType()->GetKind() << ", but got Variant over " << got->GetUnderlyingType()->GetKind()); return false; } - bool CheckSchema(const TTypeAnnotationNode* got, const TTypeAnnotationNode* expected, TExprContext& ctx, const TTypeAnnotationContext& typeCtx, bool allowVariant) { - if (expected && expected->GetKind() != got->GetKind()) { - REPORT("Expected " << expected->GetKind() << ", but got " << got->GetKind()); - return false; + switch (got->GetUnderlyingType()->GetKind()) { + case ETypeAnnotationKind::Struct: { + const auto* gotStruct = got->GetUnderlyingType()->Cast<TStructExprType>(); + const auto* expectedStruct = expected ? expected->GetUnderlyingType()->Cast<TStructExprType>() : nullptr; + return CheckVariantContent(gotStruct, expectedStruct, ctx, typeCtx); } + case ETypeAnnotationKind::Tuple: { + const auto* gotTuple = got->GetUnderlyingType()->Cast<TTupleExprType>(); + const auto* expectedTuple = expected ? expected->GetUnderlyingType()->Cast<TTupleExprType>() : nullptr; + return CheckVariantContent(gotTuple, expectedTuple, ctx, typeCtx); + } + default: + Y_UNREACHABLE(); + } - switch (got->GetKind()) { - case ETypeAnnotationKind::Struct: - { - TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Toplevel struct"); }); + return false; +} - const auto* gotStruct = got->Cast<TStructExprType>(); - const auto* expectedStruct = expected ? expected->Cast<TStructExprType>() : nullptr; +bool CheckSchema(const TTypeAnnotationNode* got, const TTypeAnnotationNode* expected, TExprContext& ctx, const TTypeAnnotationContext& typeCtx, bool allowVariant) { + if (expected && expected->GetKind() != got->GetKind()) { + REPORT("Expected " << expected->GetKind() << ", but got " << got->GetKind()); + return false; + } - if (!gotStruct->Validate(TPositionHandle(), ctx)) { - return false; - } + switch (got->GetKind()) { + case ETypeAnnotationKind::Struct: { + TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Toplevel struct"); }); + + const auto* gotStruct = got->Cast<TStructExprType>(); + const auto* expectedStruct = expected ? expected->Cast<TStructExprType>() : nullptr; - return CheckStruct(gotStruct, expectedStruct, ctx, typeCtx); + if (!gotStruct->Validate(TPositionHandle(), ctx)) { + return false; } - case ETypeAnnotationKind::Variant: - if (allowVariant) { - TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Toplevel variant"); }); - const auto* gotVariant = got->Cast<TVariantExprType>(); - const auto* expectedVariant = expected ? expected->Cast<TVariantExprType>() : nullptr; + return CheckStruct(gotStruct, expectedStruct, ctx, typeCtx); + } + case ETypeAnnotationKind::Variant: + if (allowVariant) { + TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Toplevel variant"); }); - if (!gotVariant->Validate(TPositionHandle(), ctx)) { - return false; - } + const auto* gotVariant = got->Cast<TVariantExprType>(); + const auto* expectedVariant = expected ? expected->Cast<TVariantExprType>() : nullptr; - return CheckVariant(gotVariant, expectedVariant, ctx, typeCtx); - } - [[fallthrough]]; - default: - if (allowVariant) { - REPORT("Expected Struct or Variant, but got " << got->GetKind()); - } else { - REPORT("Expected Struct, but got " << got->GetKind()); + if (!gotVariant->Validate(TPositionHandle(), ctx)) { + return false; } - return false; - } + + return CheckVariant(gotVariant, expectedVariant, ctx, typeCtx); + } + [[fallthrough]]; + default: + if (allowVariant) { + REPORT("Expected Struct or Variant, but got " << got->GetKind()); + } else { + REPORT("Expected Struct, but got " << got->GetKind()); + } + return false; } } +} // namespace namespace NYql::NPureCalc { - const TTypeAnnotationNode* MakeTypeFromSchema(const NYT::TNode& yson, TExprContext& ctx) { - const auto* type = NCommon::ParseTypeFromYson(yson, ctx); - - if (!type) { - auto issues = ctx.IssueManager.GetIssues(); - CheckFatalIssues(issues); - ythrow TCompileError("", issues.ToString()) - << "Incorrect schema: " << NYT::NodeToYsonString(yson, NYson::EYsonFormat::Text); - } - - return type; +const TTypeAnnotationNode* MakeTypeFromSchema(const NYT::TNode& yson, TExprContext& ctx) { + const auto* type = NCommon::ParseTypeFromYson(yson, ctx); + + if (!type) { + auto issues = ctx.IssueManager.GetIssues(); + CheckFatalIssues(issues); + ythrow TCompileError("", issues.ToString()) + << "Incorrect schema: " << NYT::NodeToYsonString(yson, NYson::EYsonFormat::Text); } - const TStructExprType* ExtendStructType( - const TStructExprType* type, const THashMap<TString, NYT::TNode>& extraColumns, TExprContext& ctx) - { - if (extraColumns.empty()) { - return type; - } + return type; +} - auto items = type->GetItems(); - for (const auto& pair : extraColumns) { - items.push_back(ctx.MakeType<TItemExprType>(pair.first, MakeTypeFromSchema(pair.second, ctx))); - } +const TStructExprType* ExtendStructType( + const TStructExprType* type, const THashMap<TString, NYT::TNode>& extraColumns, TExprContext& ctx) +{ + if (extraColumns.empty()) { + return type; + } - auto result = ctx.MakeType<TStructExprType>(items); + auto items = type->GetItems(); + for (const auto& pair : extraColumns) { + items.push_back(ctx.MakeType<TItemExprType>(pair.first, MakeTypeFromSchema(pair.second, ctx))); + } - if (!result->Validate(TPosition(), ctx)) { - auto issues = ctx.IssueManager.GetIssues(); - CheckFatalIssues(issues); - ythrow TCompileError("", issues.ToString()) << "Incorrect extended struct type"; - } + auto result = ctx.MakeType<TStructExprType>(items); - return result; + if (!result->Validate(TPosition(), ctx)) { + auto issues = ctx.IssueManager.GetIssues(); + CheckFatalIssues(issues); + ythrow TCompileError("", issues.ToString()) << "Incorrect extended struct type"; } - bool ValidateInputSchema(const TTypeAnnotationNode* type, TExprContext& ctx, const TTypeAnnotationContext& typeCtx) { - TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Input schema"); }); - return CheckSchema(type, nullptr, ctx, typeCtx, false); - } + return result; +} - bool ValidateOutputSchema(const TTypeAnnotationNode* type, TExprContext& ctx, const TTypeAnnotationContext& typeCtx) { - TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Output schema"); }); - return CheckSchema(type, nullptr, ctx, typeCtx, true); - } +bool ValidateInputSchema(const TTypeAnnotationNode* type, TExprContext& ctx, const TTypeAnnotationContext& typeCtx) { + TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Input schema"); }); + return CheckSchema(type, nullptr, ctx, typeCtx, false); +} - bool ValidateOutputType(const TTypeAnnotationNode* type, const TTypeAnnotationNode* expected, TExprContext& ctx, const TTypeAnnotationContext& typeCtx) { - TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Program return type"); }); - return CheckSchema(type, expected, ctx, typeCtx, true); - } +bool ValidateOutputSchema(const TTypeAnnotationNode* type, TExprContext& ctx, const TTypeAnnotationContext& typeCtx) { + TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Output schema"); }); + return CheckSchema(type, nullptr, ctx, typeCtx, true); +} + +bool ValidateOutputType(const TTypeAnnotationNode* type, const TTypeAnnotationNode* expected, TExprContext& ctx, const TTypeAnnotationContext& typeCtx) { + TIssueScopeGuard issueScope(ctx.IssueManager, []() { return new TIssue(TPosition(), "Program return type"); }); + return CheckSchema(type, expected, ctx, typeCtx, true); } +} // namespace NYql::NPureCalc diff --git a/yql/essentials/public/purecalc/common/type_from_schema.h b/yql/essentials/public/purecalc/common/type_from_schema.h index 18e6e8373f4..a34210b1bed 100644 --- a/yql/essentials/public/purecalc/common/type_from_schema.h +++ b/yql/essentials/public/purecalc/common/type_from_schema.h @@ -7,32 +7,32 @@ #include <library/cpp/yson/node/node.h> namespace NYql { - struct TTypeAnnotationContext; - - namespace NPureCalc { - /** - * Load struct type from yson. Use methods below to check returned type for correctness. - */ - const TTypeAnnotationNode* MakeTypeFromSchema(const NYT::TNode&, TExprContext&); - - /** - * Extend struct type with additional columns. Type of each extra column is loaded from yson. - */ - const TStructExprType* ExtendStructType(const TStructExprType*, const THashMap<TString, NYT::TNode>&, TExprContext&); - - /** - * Check if the given type can be used as an input schema, i.e. it is a struct. - */ - bool ValidateInputSchema(const TTypeAnnotationNode* type, TExprContext& ctx, const TTypeAnnotationContext& typeCtx); - - /** - * Check if the given type can be used as an output schema, i.e. it is a struct or a variant of structs. - */ - bool ValidateOutputSchema(const TTypeAnnotationNode* type, TExprContext& ctx, const TTypeAnnotationContext& typeCtx); - - /** - * Check if output type can be silently converted to the expected type. - */ - bool ValidateOutputType(const TTypeAnnotationNode* type, const TTypeAnnotationNode* expected, TExprContext& ctx, const TTypeAnnotationContext& typeCtx); - } -} +struct TTypeAnnotationContext; + +namespace NPureCalc { +/** + * Load struct type from yson. Use methods below to check returned type for correctness. + */ +const TTypeAnnotationNode* MakeTypeFromSchema(const NYT::TNode&, TExprContext&); + +/** + * Extend struct type with additional columns. Type of each extra column is loaded from yson. + */ +const TStructExprType* ExtendStructType(const TStructExprType*, const THashMap<TString, NYT::TNode>&, TExprContext&); + +/** + * Check if the given type can be used as an input schema, i.e. it is a struct. + */ +bool ValidateInputSchema(const TTypeAnnotationNode* type, TExprContext& ctx, const TTypeAnnotationContext& typeCtx); + +/** + * Check if the given type can be used as an output schema, i.e. it is a struct or a variant of structs. + */ +bool ValidateOutputSchema(const TTypeAnnotationNode* type, TExprContext& ctx, const TTypeAnnotationContext& typeCtx); + +/** + * Check if output type can be silently converted to the expected type. + */ +bool ValidateOutputType(const TTypeAnnotationNode* type, const TTypeAnnotationNode* expected, TExprContext& ctx, const TTypeAnnotationContext& typeCtx); +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/common/worker.cpp b/yql/essentials/public/purecalc/common/worker.cpp index d5561fc4f73..e6a32ff4118 100644 --- a/yql/essentials/public/purecalc/common/worker.cpp +++ b/yql/essentials/public/purecalc/common/worker.cpp @@ -46,15 +46,12 @@ TWorkerGraph::TWorkerGraph( ui64 nativeYtTypeFlags, TMaybe<ui64> deterministicTimeProviderSeed, TLangVersion langver, - bool insideEvaluation -) + bool insideEvaluation) : ScopedAlloc(__LOCATION__, NKikimr::TAlignedPagePoolCounters(), funcRegistry.SupportsSizedAllocators()) , Env(ScopedAlloc) , FuncRegistry(funcRegistry) , RandomProvider(CreateDefaultRandomProvider()) - , TimeProvider(deterministicTimeProviderSeed ? - CreateDeterministicTimeProvider(*deterministicTimeProviderSeed) : - CreateDefaultTimeProvider()) + , TimeProvider(deterministicTimeProviderSeed ? CreateDeterministicTimeProvider(*deterministicTimeProviderSeed) : CreateDefaultTimeProvider()) , LLVMSettings(LLVMSettings) , NativeYtTypeFlags(nativeYtTypeFlags) { @@ -124,19 +121,16 @@ TWorkerGraph::TWorkerGraph( const THashSet<NKikimr::NMiniKQL::TInternName> selfCallableNames = { Env.InternName(PurecalcInputCallableName), - Env.InternName(PurecalcBlockInputCallableName) - }; + Env.InternName(PurecalcBlockInputCallableName)}; NKikimr::NMiniKQL::TExploringNodeVisitor explorer; explorer.Walk(rootNode.GetNode(), Env.GetNodeStack()); auto compositeNodeFactory = NKikimr::NMiniKQL::GetCompositeWithBuiltinFactory( - {NKikimr::NMiniKQL::GetYqlFactory(), NYql::GetPgFactory()} - ); + {NKikimr::NMiniKQL::GetYqlFactory(), NYql::GetPgFactory()}); auto nodeFactory = [&]( - NKikimr::NMiniKQL::TCallable& callable, const NKikimr::NMiniKQL::TComputationNodeFactoryContext& ctx - ) -> NKikimr::NMiniKQL::IComputationNode* { + NKikimr::NMiniKQL::TCallable& callable, const NKikimr::NMiniKQL::TComputationNodeFactoryContext& ctx) -> NKikimr::NMiniKQL::IComputationNode* { if (selfCallableNames.contains(callable.GetType()->GetNameStr())) { if (insideEvaluation) { throw TErrorException(0) << "Inputs aren't available during evaluation"; @@ -147,8 +141,7 @@ TWorkerGraph::TWorkerGraph( YQL_ENSURE(inputIndex < inputsCount, "Self index is out of range"); YQL_ENSURE(!SelfNodes[inputIndex], "Self can be called at most once with each index"); return SelfNodes[inputIndex] = new NKikimr::NMiniKQL::TExternalComputationNode(ctx.Mutables); - } - else { + } else { return compositeNodeFactory(callable, ctx); } }; @@ -171,7 +164,7 @@ TWorkerGraph::TWorkerGraph( ComputationPattern = NKikimr::NMiniKQL::MakeComputationPattern( explorer, rootNode, - { rootNode.GetNode() }, + {rootNode.GetNode()}, computationPatternOpts); ComputationGraph = ComputationPattern->Clone( @@ -206,12 +199,11 @@ TWorker<TBase>::TWorker( NKikimr::NUdf::ICountersProvider* countersProvider, ui64 nativeYtTypeFlags, TMaybe<ui64> deterministicTimeProviderSeed, - TLangVersion langver -) + TLangVersion langver) : WorkerFactory_(std::move(factory)) , Graph_(exprRoot, exprCtx, serializedProgram, funcRegistry, userData, - inputTypes, originalInputTypes, rawInputTypes, outputType, rawOutputType, - LLVMSettings, countersProvider, nativeYtTypeFlags, deterministicTimeProviderSeed, langver, false) + inputTypes, originalInputTypes, rawInputTypes, outputType, rawOutputType, + LLVMSettings, countersProvider, nativeYtTypeFlags, deterministicTimeProviderSeed, langver, false) { } @@ -404,9 +396,9 @@ NKikimr::NUdf::TUnboxedValue& TPullStreamWorker::GetOutput() { } void TPullStreamWorker::Release() { - with_lock(GetScopedAlloc()) { + with_lock (GetScopedAlloc()) { Output_ = NKikimr::NUdf::TUnboxedValue::Invalid(); - for (auto selfNode: Graph_.SelfNodes) { + for (auto selfNode : Graph_.SelfNodes) { if (selfNode) { selfNode->SetValue(Graph_.ComputationGraph->GetContext(), NKikimr::NUdf::TUnboxedValue::Invalid()); } @@ -477,11 +469,11 @@ void TPullListWorker::ResetOutputIterator() { } void TPullListWorker::Release() { - with_lock(GetScopedAlloc()) { + with_lock (GetScopedAlloc()) { Output_ = NKikimr::NUdf::TUnboxedValue::Invalid(); OutputIterator_ = NKikimr::NUdf::TUnboxedValue::Invalid(); - for (auto selfNode: Graph_.SelfNodes) { + for (auto selfNode : Graph_.SelfNodes) { if (selfNode) { selfNode->SetValue(Graph_.ComputationGraph->GetContext(), NKikimr::NUdf::TUnboxedValue::Invalid()); } @@ -492,45 +484,45 @@ void TPullListWorker::Release() { } namespace { - class TPushStream final: public NKikimr::NMiniKQL::TCustomListValue { - private: - mutable bool HasIterator_ = false; - bool HasValue_ = false; - bool IsFinished_ = false; - NKikimr::NUdf::TUnboxedValue Value_ = NKikimr::NUdf::TUnboxedValue::Invalid(); - - public: - using TCustomListValue::TCustomListValue; - - public: - void SetValue(NKikimr::NUdf::TUnboxedValue&& value) { - Value_ = std::move(value); - HasValue_ = true; - } +class TPushStream final: public NKikimr::NMiniKQL::TCustomListValue { +private: + mutable bool HasIterator_ = false; + bool HasValue_ = false; + bool IsFinished_ = false; + NKikimr::NUdf::TUnboxedValue Value_ = NKikimr::NUdf::TUnboxedValue::Invalid(); + +public: + using TCustomListValue::TCustomListValue; + +public: + void SetValue(NKikimr::NUdf::TUnboxedValue&& value) { + Value_ = std::move(value); + HasValue_ = true; + } - void SetFinished() { - IsFinished_ = true; - } + void SetFinished() { + IsFinished_ = true; + } - NKikimr::NUdf::TUnboxedValue GetListIterator() const override { - YQL_ENSURE(!HasIterator_, "only one pass over input is supported"); - HasIterator_ = true; - return NKikimr::NUdf::TUnboxedValuePod(const_cast<TPushStream*>(this)); - } + NKikimr::NUdf::TUnboxedValue GetListIterator() const override { + YQL_ENSURE(!HasIterator_, "only one pass over input is supported"); + HasIterator_ = true; + return NKikimr::NUdf::TUnboxedValuePod(const_cast<TPushStream*>(this)); + } - NKikimr::NUdf::EFetchStatus Fetch(NKikimr::NUdf::TUnboxedValue& result) override { - if (IsFinished_) { - return NKikimr::NUdf::EFetchStatus::Finish; - } else if (!HasValue_) { - return NKikimr::NUdf::EFetchStatus::Yield; - } else { - result = std::move(Value_); - HasValue_ = false; - return NKikimr::NUdf::EFetchStatus::Ok; - } + NKikimr::NUdf::EFetchStatus Fetch(NKikimr::NUdf::TUnboxedValue& result) override { + if (IsFinished_) { + return NKikimr::NUdf::EFetchStatus::Finish; + } else if (!HasValue_) { + return NKikimr::NUdf::EFetchStatus::Yield; + } else { + result = std::move(Value_); + HasValue_ = false; + return NKikimr::NUdf::EFetchStatus::Ok; } - }; -} + } +}; +} // namespace void TPushStreamWorker::FeedToConsumer() { auto value = Graph_.ComputationGraph->GetValue(); @@ -608,7 +600,7 @@ void TPushStreamWorker::OnFinish() { } void TPushStreamWorker::Release() { - with_lock(GetScopedAlloc()) { + with_lock (GetScopedAlloc()) { Consumer_.Destroy(); if (SelfNode_) { SelfNode_->SetValue(Graph_.ComputationGraph->GetContext(), NKikimr::NUdf::TUnboxedValue::Invalid()); @@ -619,16 +611,12 @@ void TPushStreamWorker::Release() { TWorker<IPushStreamWorker>::Release(); } - namespace NYql { - namespace NPureCalc { - template - class TWorker<IPullStreamWorker>; +namespace NPureCalc { +template class TWorker<IPullStreamWorker>; - template - class TWorker<IPullListWorker>; +template class TWorker<IPullListWorker>; - template - class TWorker<IPushStreamWorker>; - } -} +template class TWorker<IPushStreamWorker>; +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/common/worker.h b/yql/essentials/public/purecalc/common/worker.h index b032b15af15..415e57fcf5e 100644 --- a/yql/essentials/public/purecalc/common/worker.h +++ b/yql/essentials/public/purecalc/common/worker.h @@ -14,169 +14,169 @@ #include <memory> namespace NYql { - namespace NPureCalc { - struct TWorkerGraph { - TWorkerGraph( - const TExprNode::TPtr& exprRoot, - TExprContext& exprCtx, - const TString& serializedProgram, - const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry, - const TUserDataTable& userData, - const TVector<const TStructExprType*>& inputTypes, - const TVector<const TStructExprType*>& originalInputTypes, - const TVector<const TStructExprType*>& rawInputTypes, - const TTypeAnnotationNode* outputType, - const TTypeAnnotationNode* rawOutputType, - const TString& LLVMSettings, - NKikimr::NUdf::ICountersProvider* countersProvider, - ui64 nativeYtTypeFlags, - TMaybe<ui64> deterministicTimeProviderSeed, - TLangVersion langver, - bool insideEvaluation - ); - - ~TWorkerGraph(); - - NKikimr::NMiniKQL::TScopedAlloc ScopedAlloc; - NKikimr::NMiniKQL::TTypeEnvironment Env; - const NKikimr::NMiniKQL::IFunctionRegistry& FuncRegistry; - TIntrusivePtr<IRandomProvider> RandomProvider; - TIntrusivePtr<ITimeProvider> TimeProvider; - NKikimr::NMiniKQL::IComputationPattern::TPtr ComputationPattern; - THolder<NKikimr::NMiniKQL::IComputationGraph> ComputationGraph; - TString LLVMSettings; - ui64 NativeYtTypeFlags; - TMaybe<TString> TimestampColumn; - const NKikimr::NMiniKQL::TType* OutputType; - const NKikimr::NMiniKQL::TType* RawOutputType; - TVector<NKikimr::NMiniKQL::IComputationExternalNode*> SelfNodes; - TVector<const NKikimr::NMiniKQL::TStructType*> InputTypes; - TVector<const NKikimr::NMiniKQL::TStructType*> OriginalInputTypes; - TVector<const NKikimr::NMiniKQL::TStructType*> RawInputTypes; - }; - - template <typename TBase> - class TWorker: public TBase { - public: - using TWorkerFactoryPtr = std::weak_ptr<IWorkerFactory>; - private: - // Worker factory implementation should stay alive for this worker to operate correctly. - TWorkerFactoryPtr WorkerFactory_; - - protected: - TWorkerGraph Graph_; - - public: - TWorker( - TWorkerFactoryPtr factory, - const TExprNode::TPtr& exprRoot, - TExprContext& exprCtx, - const TString& serializedProgram, - const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry, - const TUserDataTable& userData, - const TVector<const TStructExprType*>& inputTypes, - const TVector<const TStructExprType*>& originalInputTypes, - const TVector<const TStructExprType*>& rawInputTypes, - const TTypeAnnotationNode* outputType, - const TTypeAnnotationNode* rawOutputType, - const TString& LLVMSettings, - NKikimr::NUdf::ICountersProvider* countersProvider, - ui64 nativeYtTypeFlags, - TMaybe<ui64> deterministicTimeProviderSeed, - TLangVersion langver - ); - - public: - ui32 GetInputsCount() const override; - const NKikimr::NMiniKQL::TStructType* GetInputType(ui32, bool) const override; - const NKikimr::NMiniKQL::TStructType* GetInputType(bool) const override; - const NKikimr::NMiniKQL::TStructType* GetRawInputType(ui32) const override; - const NKikimr::NMiniKQL::TStructType* GetRawInputType() const override; - const NKikimr::NMiniKQL::TType* GetOutputType() const override; - const NKikimr::NMiniKQL::TType* GetRawOutputType() const override; - NYT::TNode MakeInputSchema() const override; - NYT::TNode MakeInputSchema(ui32) const override; - NYT::TNode MakeOutputSchema() const override; - NYT::TNode MakeOutputSchema(ui32) const override; - NYT::TNode MakeOutputSchema(TStringBuf) const override; - NYT::TNode MakeFullOutputSchema() const override; - NKikimr::NMiniKQL::TScopedAlloc& GetScopedAlloc() override; - NKikimr::NMiniKQL::IComputationGraph& GetGraph() override; - const NKikimr::NMiniKQL::IFunctionRegistry& GetFunctionRegistry() const override; - NKikimr::NMiniKQL::TTypeEnvironment& GetTypeEnvironment() override; - const TString& GetLLVMSettings() const override; - ui64 GetNativeYtTypeFlags() const override; - ITimeProvider* GetTimeProvider() const override; - void Invalidate() override; - protected: - void Release() override; - }; - - class TPullStreamWorker final: public TWorker<IPullStreamWorker> { - private: - NKikimr::NUdf::TUnboxedValue Output_ = NKikimr::NUdf::TUnboxedValue::Invalid(); - TVector<bool> HasInput_; - - inline bool CheckAllInputsSet() { - return AllOf(HasInput_, [](bool x) { return x; }); - } - - public: - using TWorker::TWorker; - ~TPullStreamWorker(); - - public: - void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) override; - NKikimr::NUdf::TUnboxedValue& GetOutput() override; - - protected: - void Release() override; - }; - - class TPullListWorker final: public TWorker<IPullListWorker> { - private: - NKikimr::NUdf::TUnboxedValue Output_ = NKikimr::NUdf::TUnboxedValue::Invalid(); - NKikimr::NUdf::TUnboxedValue OutputIterator_ = NKikimr::NUdf::TUnboxedValue::Invalid(); - TVector<bool> HasInput_; - - inline bool CheckAllInputsSet() { - return AllOf(HasInput_, [](bool x) { return x; }); - } - - public: - using TWorker::TWorker; - ~TPullListWorker(); - - public: - void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) override; - NKikimr::NUdf::TUnboxedValue& GetOutput() override; - NKikimr::NUdf::TUnboxedValue& GetOutputIterator() override; - void ResetOutputIterator() override; - - protected: - void Release() override; - }; - - class TPushStreamWorker final: public TWorker<IPushStreamWorker> { - private: - THolder<IConsumer<const NKikimr::NUdf::TUnboxedValue*>> Consumer_{}; - bool Finished_ = false; - NKikimr::NMiniKQL::IComputationExternalNode* SelfNode_ = nullptr; - - public: - using TWorker::TWorker; - - private: - void FeedToConsumer(); - NYql::NUdf::IBoxedValue* GetPushStream() const; - - public: - void SetConsumer(THolder<IConsumer<const NKikimr::NUdf::TUnboxedValue*>>) override; - void Push(NKikimr::NUdf::TUnboxedValue&&) override; - void OnFinish() override; - - protected: - void Release() override; - }; +namespace NPureCalc { +struct TWorkerGraph { + TWorkerGraph( + const TExprNode::TPtr& exprRoot, + TExprContext& exprCtx, + const TString& serializedProgram, + const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry, + const TUserDataTable& userData, + const TVector<const TStructExprType*>& inputTypes, + const TVector<const TStructExprType*>& originalInputTypes, + const TVector<const TStructExprType*>& rawInputTypes, + const TTypeAnnotationNode* outputType, + const TTypeAnnotationNode* rawOutputType, + const TString& LLVMSettings, + NKikimr::NUdf::ICountersProvider* countersProvider, + ui64 nativeYtTypeFlags, + TMaybe<ui64> deterministicTimeProviderSeed, + TLangVersion langver, + bool insideEvaluation); + + ~TWorkerGraph(); + + NKikimr::NMiniKQL::TScopedAlloc ScopedAlloc; + NKikimr::NMiniKQL::TTypeEnvironment Env; + const NKikimr::NMiniKQL::IFunctionRegistry& FuncRegistry; + TIntrusivePtr<IRandomProvider> RandomProvider; + TIntrusivePtr<ITimeProvider> TimeProvider; + NKikimr::NMiniKQL::IComputationPattern::TPtr ComputationPattern; + THolder<NKikimr::NMiniKQL::IComputationGraph> ComputationGraph; + TString LLVMSettings; + ui64 NativeYtTypeFlags; + TMaybe<TString> TimestampColumn; + const NKikimr::NMiniKQL::TType* OutputType; + const NKikimr::NMiniKQL::TType* RawOutputType; + TVector<NKikimr::NMiniKQL::IComputationExternalNode*> SelfNodes; + TVector<const NKikimr::NMiniKQL::TStructType*> InputTypes; + TVector<const NKikimr::NMiniKQL::TStructType*> OriginalInputTypes; + TVector<const NKikimr::NMiniKQL::TStructType*> RawInputTypes; +}; + +template <typename TBase> +class TWorker: public TBase { +public: + using TWorkerFactoryPtr = std::weak_ptr<IWorkerFactory>; + +private: + // Worker factory implementation should stay alive for this worker to operate correctly. + TWorkerFactoryPtr WorkerFactory_; + +protected: + TWorkerGraph Graph_; + +public: + TWorker( + TWorkerFactoryPtr factory, + const TExprNode::TPtr& exprRoot, + TExprContext& exprCtx, + const TString& serializedProgram, + const NKikimr::NMiniKQL::IFunctionRegistry& funcRegistry, + const TUserDataTable& userData, + const TVector<const TStructExprType*>& inputTypes, + const TVector<const TStructExprType*>& originalInputTypes, + const TVector<const TStructExprType*>& rawInputTypes, + const TTypeAnnotationNode* outputType, + const TTypeAnnotationNode* rawOutputType, + const TString& LLVMSettings, + NKikimr::NUdf::ICountersProvider* countersProvider, + ui64 nativeYtTypeFlags, + TMaybe<ui64> deterministicTimeProviderSeed, + TLangVersion langver); + +public: + ui32 GetInputsCount() const override; + const NKikimr::NMiniKQL::TStructType* GetInputType(ui32, bool) const override; + const NKikimr::NMiniKQL::TStructType* GetInputType(bool) const override; + const NKikimr::NMiniKQL::TStructType* GetRawInputType(ui32) const override; + const NKikimr::NMiniKQL::TStructType* GetRawInputType() const override; + const NKikimr::NMiniKQL::TType* GetOutputType() const override; + const NKikimr::NMiniKQL::TType* GetRawOutputType() const override; + NYT::TNode MakeInputSchema() const override; + NYT::TNode MakeInputSchema(ui32) const override; + NYT::TNode MakeOutputSchema() const override; + NYT::TNode MakeOutputSchema(ui32) const override; + NYT::TNode MakeOutputSchema(TStringBuf) const override; + NYT::TNode MakeFullOutputSchema() const override; + NKikimr::NMiniKQL::TScopedAlloc& GetScopedAlloc() override; + NKikimr::NMiniKQL::IComputationGraph& GetGraph() override; + const NKikimr::NMiniKQL::IFunctionRegistry& GetFunctionRegistry() const override; + NKikimr::NMiniKQL::TTypeEnvironment& GetTypeEnvironment() override; + const TString& GetLLVMSettings() const override; + ui64 GetNativeYtTypeFlags() const override; + ITimeProvider* GetTimeProvider() const override; + void Invalidate() override; + +protected: + void Release() override; +}; + +class TPullStreamWorker final: public TWorker<IPullStreamWorker> { +private: + NKikimr::NUdf::TUnboxedValue Output_ = NKikimr::NUdf::TUnboxedValue::Invalid(); + TVector<bool> HasInput_; + + inline bool CheckAllInputsSet() { + return AllOf(HasInput_, [](bool x) { return x; }); } -} + +public: + using TWorker::TWorker; + ~TPullStreamWorker(); + +public: + void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) override; + NKikimr::NUdf::TUnboxedValue& GetOutput() override; + +protected: + void Release() override; +}; + +class TPullListWorker final: public TWorker<IPullListWorker> { +private: + NKikimr::NUdf::TUnboxedValue Output_ = NKikimr::NUdf::TUnboxedValue::Invalid(); + NKikimr::NUdf::TUnboxedValue OutputIterator_ = NKikimr::NUdf::TUnboxedValue::Invalid(); + TVector<bool> HasInput_; + + inline bool CheckAllInputsSet() { + return AllOf(HasInput_, [](bool x) { return x; }); + } + +public: + using TWorker::TWorker; + ~TPullListWorker(); + +public: + void SetInput(NKikimr::NUdf::TUnboxedValue&&, ui32) override; + NKikimr::NUdf::TUnboxedValue& GetOutput() override; + NKikimr::NUdf::TUnboxedValue& GetOutputIterator() override; + void ResetOutputIterator() override; + +protected: + void Release() override; +}; + +class TPushStreamWorker final: public TWorker<IPushStreamWorker> { +private: + THolder<IConsumer<const NKikimr::NUdf::TUnboxedValue*>> Consumer_{}; + bool Finished_ = false; + NKikimr::NMiniKQL::IComputationExternalNode* SelfNode_ = nullptr; + +public: + using TWorker::TWorker; + +private: + void FeedToConsumer(); + NYql::NUdf::IBoxedValue* GetPushStream() const; + +public: + void SetConsumer(THolder<IConsumer<const NKikimr::NUdf::TUnboxedValue*>>) override; + void Push(NKikimr::NUdf::TUnboxedValue&&) override; + void OnFinish() override; + +protected: + void Release() override; +}; +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/common/worker_factory.cpp b/yql/essentials/public/purecalc/common/worker_factory.cpp index 8261528b237..347fbbe585a 100644 --- a/yql/essentials/public/purecalc/common/worker_factory.cpp +++ b/yql/essentials/public/purecalc/common/worker_factory.cpp @@ -113,8 +113,8 @@ TWorkerFactory<TBase>::TWorkerFactory(TWorkerFactoryOptions options, EProcessorM SerializedProgram_ = TString{options.Query}; } else { ExprRoot_ = Compile(options.Query, options.TranslationMode, - options.SyntaxVersion, options.Modules, - options.InputSpec, options.OutputSpec, options.UseAntlr4, processorMode, typeCtx.Get()); + options.SyntaxVersion, options.Modules, + options.InputSpec, options.OutputSpec, options.UseAntlr4, processorMode, typeCtx.Get()); RawOutputType_ = GetSequenceItemType(ExprRoot_->Pos(), ExprRoot_->GetTypeAnn(), true, ExprContext_); @@ -138,8 +138,7 @@ TWorkerFactory<TBase>::TWorkerFactory(TWorkerFactoryOptions options, EProcessorM template <typename TBase> TIntrusivePtr<TTypeAnnotationContext> TWorkerFactory<TBase>::PrepareTypeContext( - IModuleResolver::TPtr factoryModuleResolver -) { + IModuleResolver::TPtr factoryModuleResolver) { // Prepare type annotation context IModuleResolver::TPtr moduleResolver = factoryModuleResolver ? factoryModuleResolver->CreateMutableChild() : nullptr; @@ -147,9 +146,7 @@ TIntrusivePtr<TTypeAnnotationContext> TWorkerFactory<TBase>::PrepareTypeContext( typeContext->LangVer = LangVer_; typeContext->UseTypeDiffForConvertToError = true; typeContext->RandomProvider = CreateDefaultRandomProvider(); - typeContext->TimeProvider = DeterministicTimeProviderSeed_ ? - CreateDeterministicTimeProvider(*DeterministicTimeProviderSeed_) : - CreateDefaultTimeProvider(); + typeContext->TimeProvider = DeterministicTimeProviderSeed_ ? CreateDeterministicTimeProvider(*DeterministicTimeProviderSeed_) : CreateDefaultTimeProvider(); typeContext->UdfResolver = NCommon::CreateSimpleUdfResolver(FuncRegistry_.Get()); typeContext->ArrowResolver = MakeSimpleArrowResolver(*FuncRegistry_.Get()); typeContext->UserDataStorage = MakeIntrusive<TUserDataStorage>(nullptr, UserData_, nullptr, nullptr); @@ -176,8 +173,7 @@ TExprNode::TPtr TWorkerFactory<TBase>::Compile( const TOutputSpecBase& outputSpec, bool useAntlr4, EProcessorMode processorMode, - TTypeAnnotationContext* typeContext -) { + TTypeAnnotationContext* typeContext) { Y_ENSURE(useAntlr4, "Antlr3 support is dropped"); if (mode == ETranslationMode::PG && processorMode != EProcessorMode::PullList) { ythrow TCompileError("", "") << "only PullList mode is compatible to PostgreSQL syntax"; @@ -219,8 +215,7 @@ TExprNode::TPtr TWorkerFactory<TBase>::Compile( "DisableAnsiOptionalAs", "DisableCoalesceJoinKeysOnQualifiedAll", "DisableUnorderedSubqueries", - "FlexibleTypes" - }; + "FlexibleTypes"}; if (BlockEngineMode_ != EBlockEngineMode::Disable) { settings.Flags.insert("EmitAggApply"); } @@ -242,8 +237,7 @@ TExprNode::TPtr TWorkerFactory<TBase>::Compile( NSQLTranslation::TTranslators translators( nullptr, NSQLTranslationV1::MakeTranslator(lexers, parsers), - NSQLTranslationPG::MakeTranslator() - ); + NSQLTranslationPG::MakeTranslator()); astRes = SqlToYql(translators, TString(query), settings); } else { @@ -273,11 +267,9 @@ TExprNode::TPtr TWorkerFactory<TBase>::Compile( ythrow TCompileError(astStr.Str(), GetIssues().ToString()) << "failed to compile"; } - // Prepare transformation pipeline THolder<IGraphTransformer> calcTransformer = CreateFunctorTransformer([&](TExprNode::TPtr input, TExprNode::TPtr& output, TExprContext& ctx) - -> IGraphTransformer::TStatus - { + -> IGraphTransformer::TStatus { output = input; auto valueNode = input->HeadPtr(); @@ -309,8 +301,7 @@ TExprNode::TPtr TWorkerFactory<TBase>::Compile( NativeYtTypeFlags_, DeterministicTimeProviderSeed_, LangVer_, - true - ); + true); with_lock (graph.ScopedAlloc) { const auto value = graph.ComputationGraph->GetValue(); @@ -324,8 +315,8 @@ TExprNode::TPtr TWorkerFactory<TBase>::Compile( }); const TString& selfName = TString(inputSpec.ProvidesBlocks() - ? PurecalcBlockInputCallableName - : PurecalcInputCallableName); + ? PurecalcBlockInputCallableName + : PurecalcInputCallableName); TTypeAnnCallableFactory typeAnnCallableFactory = [&]() { return MakeTypeAnnotationTransformer(typeContext, InputTypes_, RawInputTypes_, processorMode, selfName); @@ -343,28 +334,26 @@ TExprNode::TPtr TWorkerFactory<TBase>::Compile( pipeline.AddTypeAnnotationTransformer(); pipeline.AddPostTypeAnnotation(); pipeline.Add(CreateFunctorTransformer( - [&](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { - return OptimizeExpr(input, output, [](const TExprNode::TPtr& node, TExprContext&) -> TExprNode::TPtr { - if (node->IsCallable("Unordered") && node->Child(0)->IsCallable({ - PurecalcInputCallableName, PurecalcBlockInputCallableName - })) { - return node->ChildPtr(0); - } - return node; - }, ctx, TOptimizeExprSettings(nullptr)); - }), "Unordered", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR, - "Unordered optimizations"); + [&](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { + return OptimizeExpr(input, output, [](const TExprNode::TPtr& node, TExprContext&) -> TExprNode::TPtr { + if (node->IsCallable("Unordered") && node->Child(0)->IsCallable({PurecalcInputCallableName, PurecalcBlockInputCallableName})) { + return node->ChildPtr(0); + } + return node; + }, ctx, TOptimizeExprSettings(nullptr)); + }), "Unordered", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR, + "Unordered optimizations"); pipeline.Add(CreateFunctorTransformer( - [&](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { - return OptimizeExpr(input, output, [](const TExprNode::TPtr& node, TExprContext&) -> TExprNode::TPtr { - if (node->IsCallable("Right!") && node->Head().IsCallable("Cons!")) { - return node->Head().ChildPtr(1); - } - - return node; - }, ctx, TOptimizeExprSettings(nullptr)); - }), "Cons", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR, - "Cons optimizations"); + [&](const TExprNode::TPtr& input, TExprNode::TPtr& output, TExprContext& ctx) { + return OptimizeExpr(input, output, [](const TExprNode::TPtr& node, TExprContext&) -> TExprNode::TPtr { + if (node->IsCallable("Right!") && node->Head().IsCallable("Cons!")) { + return node->Head().ChildPtr(1); + } + + return node; + }, ctx, TOptimizeExprSettings(nullptr)); + }), "Cons", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR, + "Cons optimizations"); pipeline.Add(MakeOutputColumnsFilter(outputSpec.GetOutputColumnsFilter()), "Filter", EYqlIssueCode::TIssuesIds_EIssueCode_DEFAULT_ERROR, "Filter output columns"); @@ -406,10 +395,7 @@ TExprNode::TPtr TWorkerFactory<TBase>::Compile( if (exprOut) { *exprOut << "After optimization:" << Endl; - ConvertToAst(*exprRoot, ExprContext_, 0, true).Root - ->PrettyPrintTo(*exprOut, TAstPrintFlags::PerLine - | TAstPrintFlags::ShortQuote - | TAstPrintFlags::AdaptArbitraryContent); + ConvertToAst(*exprRoot, ExprContext_, 0, true).Root->PrettyPrintTo(*exprOut, TAstPrintFlags::PerLine | TAstPrintFlags::ShortQuote | TAstPrintFlags::AdaptArbitraryContent); } return exprRoot; } @@ -523,7 +509,7 @@ template <typename TBase> TString TWorkerFactory<TBase>::GetCompiledProgram() { if (ExprRoot_) { NKikimr::NMiniKQL::TScopedAlloc alloc(__LOCATION__, NKikimr::TAlignedPagePoolCounters(), - FuncRegistry_->SupportsSizedAllocators()); + FuncRegistry_->SupportsSizedAllocators()); NKikimr::NMiniKQL::TTypeEnvironment env(alloc); auto rootNode = CompileMkql(ExprRoot_, ExprContext_, *FuncRegistry_, env, UserData_); @@ -541,32 +527,30 @@ void TWorkerFactory<TBase>::ReturnWorker(IWorker* worker) { } } - -#define DEFINE_WORKER_MAKER(MODE) \ - TWorkerHolder<I##MODE##Worker> T##MODE##WorkerFactory::MakeWorker() { \ - if (!WorkerPool_.empty()) { \ - auto res = std::move(WorkerPool_.back()); \ - WorkerPool_.pop_back(); \ - return TWorkerHolder<I##MODE##Worker>((I##MODE##Worker *)res.Release()); \ - } \ - return TWorkerHolder<I##MODE##Worker>(new T##MODE##Worker( \ - weak_from_this(), \ - ExprRoot_, \ - ExprContext_, \ - SerializedProgram_, \ - *FuncRegistry_, \ - UserData_, \ - InputTypes_, \ - OriginalInputTypes_, \ - RawInputTypes_, \ - OutputType_, \ - RawOutputType_, \ - LLVMSettings_, \ - CountersProvider_, \ - NativeYtTypeFlags_, \ - DeterministicTimeProviderSeed_, \ - LangVer_ \ - )); \ +#define DEFINE_WORKER_MAKER(MODE) \ + TWorkerHolder<I##MODE##Worker> T##MODE##WorkerFactory::MakeWorker() { \ + if (!WorkerPool_.empty()) { \ + auto res = std::move(WorkerPool_.back()); \ + WorkerPool_.pop_back(); \ + return TWorkerHolder<I##MODE##Worker>((I##MODE##Worker*)res.Release()); \ + } \ + return TWorkerHolder<I##MODE##Worker>(new T##MODE##Worker( \ + weak_from_this(), \ + ExprRoot_, \ + ExprContext_, \ + SerializedProgram_, \ + *FuncRegistry_, \ + UserData_, \ + InputTypes_, \ + OriginalInputTypes_, \ + RawInputTypes_, \ + OutputType_, \ + RawOutputType_, \ + LLVMSettings_, \ + CountersProvider_, \ + NativeYtTypeFlags_, \ + DeterministicTimeProviderSeed_, \ + LangVer_)); \ } DEFINE_WORKER_MAKER(PullStream) @@ -574,14 +558,11 @@ DEFINE_WORKER_MAKER(PullList) DEFINE_WORKER_MAKER(PushStream) namespace NYql { - namespace NPureCalc { - template - class TWorkerFactory<IPullStreamWorkerFactory>; +namespace NPureCalc { +template class TWorkerFactory<IPullStreamWorkerFactory>; - template - class TWorkerFactory<IPullListWorkerFactory>; +template class TWorkerFactory<IPullListWorkerFactory>; - template - class TWorkerFactory<IPushStreamWorkerFactory>; - } -} +template class TWorkerFactory<IPushStreamWorkerFactory>; +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/common/worker_factory.h b/yql/essentials/public/purecalc/common/worker_factory.h index 719a625f0b6..e50a1236c9f 100644 --- a/yql/essentials/public/purecalc/common/worker_factory.h +++ b/yql/essentials/public/purecalc/common/worker_factory.h @@ -12,169 +12,167 @@ #include <utility> namespace NYql { - namespace NPureCalc { - struct TWorkerFactoryOptions { - IProgramFactoryPtr Factory; - const TInputSpecBase& InputSpec; - const TOutputSpecBase& OutputSpec; - TStringBuf Query; - TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry; - IModuleResolver::TPtr ModuleResolver; - const TUserDataTable& UserData; - const THashMap<TString, TString>& Modules; - TString LLVMSettings; - EBlockEngineMode BlockEngineMode; - IOutputStream* ExprOutputStream; - NKikimr::NUdf::ICountersProvider* CountersProvider; - ETranslationMode TranslationMode; - ui16 SyntaxVersion; - TLangVersion LangVer; - ui64 NativeYtTypeFlags; - TMaybe<ui64> DeterministicTimeProviderSeed; - bool UseSystemColumns; - bool UseWorkerPool; - bool UseAntlr4; - - TWorkerFactoryOptions( - IProgramFactoryPtr Factory, - const TInputSpecBase& InputSpec, - const TOutputSpecBase& OutputSpec, - TStringBuf Query, - TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry, - IModuleResolver::TPtr ModuleResolver, - const TUserDataTable& UserData, - const THashMap<TString, TString>& Modules, - TString LLVMSettings, - EBlockEngineMode BlockEngineMode, - IOutputStream* ExprOutputStream, - NKikimr::NUdf::ICountersProvider* CountersProvider, - ETranslationMode translationMode, - ui16 syntaxVersion, - TLangVersion langver, - ui64 nativeYtTypeFlags, - TMaybe<ui64> deterministicTimeProviderSeed, - bool useSystemColumns, - bool useWorkerPool, - bool useAntlr4 - ) - : Factory(std::move(Factory)) - , InputSpec(InputSpec) - , OutputSpec(OutputSpec) - , Query(Query) - , FuncRegistry(std::move(FuncRegistry)) - , ModuleResolver(std::move(ModuleResolver)) - , UserData(UserData) - , Modules(Modules) - , LLVMSettings(std::move(LLVMSettings)) - , BlockEngineMode(BlockEngineMode) - , ExprOutputStream(ExprOutputStream) - , CountersProvider(CountersProvider) - , TranslationMode(translationMode) - , SyntaxVersion(syntaxVersion) - , LangVer(langver) - , NativeYtTypeFlags(nativeYtTypeFlags) - , DeterministicTimeProviderSeed(deterministicTimeProviderSeed) - , UseSystemColumns(useSystemColumns) - , UseWorkerPool(useWorkerPool) - , UseAntlr4(useAntlr4) - { - } - }; - - template <typename TBase> - class TWorkerFactory: public TBase { - private: - IProgramFactoryPtr Factory_; - - protected: - TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry_; - const TUserDataTable& UserData_; - TExprContext ExprContext_; - TExprNode::TPtr ExprRoot_; - TString SerializedProgram_; - TVector<const TStructExprType*> InputTypes_; - TVector<const TStructExprType*> OriginalInputTypes_; - TVector<const TStructExprType*> RawInputTypes_; - const TTypeAnnotationNode* OutputType_; - const TTypeAnnotationNode* RawOutputType_; - TVector<THashSet<TString>> AllColumns_; - TVector<THashSet<TString>> UsedColumns_; - TString LLVMSettings_; - EBlockEngineMode BlockEngineMode_; - IOutputStream* ExprOutputStream_; - NKikimr::NUdf::ICountersProvider* CountersProvider_; - ui64 NativeYtTypeFlags_; - TMaybe<ui64> DeterministicTimeProviderSeed_; - bool UseSystemColumns_; - bool UseWorkerPool_; - TLangVersion LangVer_; - TVector<THolder<IWorker>> WorkerPool_; - - public: - TWorkerFactory(TWorkerFactoryOptions, EProcessorMode); - - public: - NYT::TNode MakeInputSchema(ui32) const override; - NYT::TNode MakeInputSchema() const override; - NYT::TNode MakeOutputSchema() const override; - NYT::TNode MakeOutputSchema(ui32) const override; - NYT::TNode MakeOutputSchema(TStringBuf) const override; - NYT::TNode MakeFullOutputSchema() const override; - const THashSet<TString>& GetUsedColumns(ui32 inputIndex) const override; - const THashSet<TString>& GetUsedColumns() const override; - TIssues GetIssues() const override; - TString GetCompiledProgram() override; - - protected: - void ReturnWorker(IWorker* worker) override; - - private: - TIntrusivePtr<TTypeAnnotationContext> PrepareTypeContext( - IModuleResolver::TPtr factoryModuleResolver - ); - - TExprNode::TPtr Compile(TStringBuf query, - ETranslationMode mode, - ui16 syntaxVersion, - const THashMap<TString, TString>& modules, - const TInputSpecBase& inputSpec, - const TOutputSpecBase& outputSpec, - bool useAntlr4, - EProcessorMode processorMode, - TTypeAnnotationContext* typeContext); - }; - - class TPullStreamWorkerFactory final: public TWorkerFactory<IPullStreamWorkerFactory> { - public: - explicit TPullStreamWorkerFactory(TWorkerFactoryOptions options) - : TWorkerFactory(std::move(options), EProcessorMode::PullStream) - { - } - - public: - TWorkerHolder<IPullStreamWorker> MakeWorker() override; - }; - - class TPullListWorkerFactory final: public TWorkerFactory<IPullListWorkerFactory> { - public: - explicit TPullListWorkerFactory(TWorkerFactoryOptions options) - : TWorkerFactory(std::move(options), EProcessorMode::PullList) - { - } - - public: - TWorkerHolder<IPullListWorker> MakeWorker() override; - }; - - class TPushStreamWorkerFactory final: public TWorkerFactory<IPushStreamWorkerFactory> { - public: - explicit TPushStreamWorkerFactory(TWorkerFactoryOptions options) - : TWorkerFactory(std::move(options), EProcessorMode::PushStream) - { - } - - public: - TWorkerHolder<IPushStreamWorker> MakeWorker() override; - }; +namespace NPureCalc { +struct TWorkerFactoryOptions { + IProgramFactoryPtr Factory; + const TInputSpecBase& InputSpec; + const TOutputSpecBase& OutputSpec; + TStringBuf Query; + TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry; + IModuleResolver::TPtr ModuleResolver; + const TUserDataTable& UserData; + const THashMap<TString, TString>& Modules; + TString LLVMSettings; + EBlockEngineMode BlockEngineMode; + IOutputStream* ExprOutputStream; + NKikimr::NUdf::ICountersProvider* CountersProvider; + ETranslationMode TranslationMode; + ui16 SyntaxVersion; + TLangVersion LangVer; + ui64 NativeYtTypeFlags; + TMaybe<ui64> DeterministicTimeProviderSeed; + bool UseSystemColumns; + bool UseWorkerPool; + bool UseAntlr4; + + TWorkerFactoryOptions( + IProgramFactoryPtr Factory, + const TInputSpecBase& InputSpec, + const TOutputSpecBase& OutputSpec, + TStringBuf Query, + TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry, + IModuleResolver::TPtr ModuleResolver, + const TUserDataTable& UserData, + const THashMap<TString, TString>& Modules, + TString LLVMSettings, + EBlockEngineMode BlockEngineMode, + IOutputStream* ExprOutputStream, + NKikimr::NUdf::ICountersProvider* CountersProvider, + ETranslationMode translationMode, + ui16 syntaxVersion, + TLangVersion langver, + ui64 nativeYtTypeFlags, + TMaybe<ui64> deterministicTimeProviderSeed, + bool useSystemColumns, + bool useWorkerPool, + bool useAntlr4) + : Factory(std::move(Factory)) + , InputSpec(InputSpec) + , OutputSpec(OutputSpec) + , Query(Query) + , FuncRegistry(std::move(FuncRegistry)) + , ModuleResolver(std::move(ModuleResolver)) + , UserData(UserData) + , Modules(Modules) + , LLVMSettings(std::move(LLVMSettings)) + , BlockEngineMode(BlockEngineMode) + , ExprOutputStream(ExprOutputStream) + , CountersProvider(CountersProvider) + , TranslationMode(translationMode) + , SyntaxVersion(syntaxVersion) + , LangVer(langver) + , NativeYtTypeFlags(nativeYtTypeFlags) + , DeterministicTimeProviderSeed(deterministicTimeProviderSeed) + , UseSystemColumns(useSystemColumns) + , UseWorkerPool(useWorkerPool) + , UseAntlr4(useAntlr4) + { } -} +}; + +template <typename TBase> +class TWorkerFactory: public TBase { +private: + IProgramFactoryPtr Factory_; + +protected: + TIntrusivePtr<NKikimr::NMiniKQL::IMutableFunctionRegistry> FuncRegistry_; + const TUserDataTable& UserData_; + TExprContext ExprContext_; + TExprNode::TPtr ExprRoot_; + TString SerializedProgram_; + TVector<const TStructExprType*> InputTypes_; + TVector<const TStructExprType*> OriginalInputTypes_; + TVector<const TStructExprType*> RawInputTypes_; + const TTypeAnnotationNode* OutputType_; + const TTypeAnnotationNode* RawOutputType_; + TVector<THashSet<TString>> AllColumns_; + TVector<THashSet<TString>> UsedColumns_; + TString LLVMSettings_; + EBlockEngineMode BlockEngineMode_; + IOutputStream* ExprOutputStream_; + NKikimr::NUdf::ICountersProvider* CountersProvider_; + ui64 NativeYtTypeFlags_; + TMaybe<ui64> DeterministicTimeProviderSeed_; + bool UseSystemColumns_; + bool UseWorkerPool_; + TLangVersion LangVer_; + TVector<THolder<IWorker>> WorkerPool_; + +public: + TWorkerFactory(TWorkerFactoryOptions, EProcessorMode); + +public: + NYT::TNode MakeInputSchema(ui32) const override; + NYT::TNode MakeInputSchema() const override; + NYT::TNode MakeOutputSchema() const override; + NYT::TNode MakeOutputSchema(ui32) const override; + NYT::TNode MakeOutputSchema(TStringBuf) const override; + NYT::TNode MakeFullOutputSchema() const override; + const THashSet<TString>& GetUsedColumns(ui32 inputIndex) const override; + const THashSet<TString>& GetUsedColumns() const override; + TIssues GetIssues() const override; + TString GetCompiledProgram() override; + +protected: + void ReturnWorker(IWorker* worker) override; + +private: + TIntrusivePtr<TTypeAnnotationContext> PrepareTypeContext( + IModuleResolver::TPtr factoryModuleResolver); + + TExprNode::TPtr Compile(TStringBuf query, + ETranslationMode mode, + ui16 syntaxVersion, + const THashMap<TString, TString>& modules, + const TInputSpecBase& inputSpec, + const TOutputSpecBase& outputSpec, + bool useAntlr4, + EProcessorMode processorMode, + TTypeAnnotationContext* typeContext); +}; + +class TPullStreamWorkerFactory final: public TWorkerFactory<IPullStreamWorkerFactory> { +public: + explicit TPullStreamWorkerFactory(TWorkerFactoryOptions options) + : TWorkerFactory(std::move(options), EProcessorMode::PullStream) + { + } + +public: + TWorkerHolder<IPullStreamWorker> MakeWorker() override; +}; + +class TPullListWorkerFactory final: public TWorkerFactory<IPullListWorkerFactory> { +public: + explicit TPullListWorkerFactory(TWorkerFactoryOptions options) + : TWorkerFactory(std::move(options), EProcessorMode::PullList) + { + } + +public: + TWorkerHolder<IPullListWorker> MakeWorker() override; +}; + +class TPushStreamWorkerFactory final: public TWorkerFactory<IPushStreamWorkerFactory> { +public: + explicit TPushStreamWorkerFactory(TWorkerFactoryOptions options) + : TWorkerFactory(std::move(options), EProcessorMode::PushStream) + { + } + +public: + TWorkerHolder<IPushStreamWorker> MakeWorker() override; +}; +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/common/wrappers.h b/yql/essentials/public/purecalc/common/wrappers.h index 35edeb7398a..57be6315a43 100644 --- a/yql/essentials/public/purecalc/common/wrappers.h +++ b/yql/essentials/public/purecalc/common/wrappers.h @@ -5,66 +5,66 @@ #include <util/generic/ptr.h> namespace NYql::NPureCalc::NPrivate { - template <typename TNew, typename TOld, typename TFunctor> - class TMappingStream final: public IStream<TNew> { - private: - THolder<IStream<TOld>> Old_; - TFunctor Functor_; +template <typename TNew, typename TOld, typename TFunctor> +class TMappingStream final: public IStream<TNew> { +private: + THolder<IStream<TOld>> Old_; + TFunctor Functor_; - public: - TMappingStream(THolder<IStream<TOld>> old, TFunctor functor) - : Old_(std::move(old)) - , Functor_(std::move(functor)) - { - } +public: + TMappingStream(THolder<IStream<TOld>> old, TFunctor functor) + : Old_(std::move(old)) + , Functor_(std::move(functor)) + { + } - public: - TNew Fetch() override { - return Functor_(Old_->Fetch()); - } - }; +public: + TNew Fetch() override { + return Functor_(Old_->Fetch()); + } +}; - template <typename TNew, typename TOld, typename TFunctor> - class TMappingConsumer final: public IConsumer<TNew> { - private: - THolder<IConsumer<TOld>> Old_; - TFunctor Functor_; +template <typename TNew, typename TOld, typename TFunctor> +class TMappingConsumer final: public IConsumer<TNew> { +private: + THolder<IConsumer<TOld>> Old_; + TFunctor Functor_; - public: - TMappingConsumer(THolder<IConsumer<TOld>> old, TFunctor functor) - : Old_(std::move(old)) - , Functor_(std::move(functor)) +public: + TMappingConsumer(THolder<IConsumer<TOld>> old, TFunctor functor) + : Old_(std::move(old)) + , Functor_(std::move(functor)) { } - public: - void OnObject(TNew object) override { - Old_->OnObject(Functor_(object)); - } +public: + void OnObject(TNew object) override { + Old_->OnObject(Functor_(object)); + } - void OnFinish() override { - Old_->OnFinish(); - } - }; + void OnFinish() override { + Old_->OnFinish(); + } +}; - template <typename T, typename C> - class TNonOwningConsumer final: public IConsumer<T> { - private: - C Consumer_; +template <typename T, typename C> +class TNonOwningConsumer final: public IConsumer<T> { +private: + C Consumer_; - public: - explicit TNonOwningConsumer(const C& consumer) - : Consumer_(consumer) - { - } +public: + explicit TNonOwningConsumer(const C& consumer) + : Consumer_(consumer) + { + } - public: - void OnObject(T t) override { - Consumer_->OnObject(t); - } +public: + void OnObject(T t) override { + Consumer_->OnObject(t); + } - void OnFinish() override { - Consumer_->OnFinish(); - } - }; -} + void OnFinish() override { + Consumer_->OnFinish(); + } +}; +} // namespace NYql::NPureCalc::NPrivate diff --git a/yql/essentials/public/purecalc/common/ya.make.inc b/yql/essentials/public/purecalc/common/ya.make.inc index 8aaf8e7c3f7..d5c4292d9b7 100644 --- a/yql/essentials/public/purecalc/common/ya.make.inc +++ b/yql/essentials/public/purecalc/common/ya.make.inc @@ -1,3 +1,5 @@ +ENABLE(YQL_STYLE_CPP) + SRCDIR( yql/essentials/public/purecalc/common ) diff --git a/yql/essentials/public/purecalc/examples/protobuf/main.cpp b/yql/essentials/public/purecalc/examples/protobuf/main.cpp index 2cf9ff47360..4bcf19be0ae 100644 --- a/yql/essentials/public/purecalc/examples/protobuf/main.cpp +++ b/yql/essentials/public/purecalc/examples/protobuf/main.cpp @@ -97,10 +97,10 @@ void PrecompileExample(IProgramFactoryPtr factory) { } auto program = factory->MakePullStreamProgram( - TProtobufInputSpec<TInput>(), - TProtobufOutputSpec<TOutput>(), - prg, - ETranslationMode::Mkql); + TProtobufInputSpec<TInput>(), + TProtobufOutputSpec<TOutput>(), + prg, + ETranslationMode::Mkql); auto result = program->Apply(MakeInput()); diff --git a/yql/essentials/public/purecalc/examples/protobuf/ya.make b/yql/essentials/public/purecalc/examples/protobuf/ya.make index c50a3c4af25..9589ce28ccf 100644 --- a/yql/essentials/public/purecalc/examples/protobuf/ya.make +++ b/yql/essentials/public/purecalc/examples/protobuf/ya.make @@ -1,5 +1,7 @@ PROGRAM() +ENABLE(YQL_STYLE_CPP) + SRCS( main.proto main.cpp diff --git a/yql/essentials/public/purecalc/examples/protobuf_pull_list/main.cpp b/yql/essentials/public/purecalc/examples/protobuf_pull_list/main.cpp index b3e27cec10f..793738a4336 100644 --- a/yql/essentials/public/purecalc/examples/protobuf_pull_list/main.cpp +++ b/yql/essentials/public/purecalc/examples/protobuf_pull_list/main.cpp @@ -29,8 +29,7 @@ int main() { TProtobufInputSpec<TInput>(), TProtobufOutputSpec<TOutput>(), Query, - ETranslationMode::SQL - ); + ETranslationMode::SQL); auto result = program->Apply(MakeInput()); diff --git a/yql/essentials/public/purecalc/examples/protobuf_pull_list/ya.make b/yql/essentials/public/purecalc/examples/protobuf_pull_list/ya.make index a102f5fb2ca..9eceb2893a9 100644 --- a/yql/essentials/public/purecalc/examples/protobuf_pull_list/ya.make +++ b/yql/essentials/public/purecalc/examples/protobuf_pull_list/ya.make @@ -1,5 +1,7 @@ PROGRAM() +ENABLE(YQL_STYLE_CPP) + SRCS( main.proto main.cpp diff --git a/yql/essentials/public/purecalc/helpers/protobuf/schema_from_proto.cpp b/yql/essentials/public/purecalc/helpers/protobuf/schema_from_proto.cpp index e194e4f22c9..0d6c309f6cc 100644 --- a/yql/essentials/public/purecalc/helpers/protobuf/schema_from_proto.cpp +++ b/yql/essentials/public/purecalc/helpers/protobuf/schema_from_proto.cpp @@ -10,199 +10,193 @@ namespace pb = google::protobuf; namespace NYql { - namespace NPureCalc { +namespace NPureCalc { - TProtoSchemaOptions::TProtoSchemaOptions() - : EnumPolicy(EEnumPolicy::Int32) - , ListIsOptional(false) - , EnableRecursiveRenaming(false) - { - } +TProtoSchemaOptions::TProtoSchemaOptions() + : EnumPolicy(EEnumPolicy::Int32) + , ListIsOptional(false) + , EnableRecursiveRenaming(false) +{ +} - TProtoSchemaOptions& TProtoSchemaOptions::SetEnumPolicy(EEnumPolicy policy) { - EnumPolicy = policy; - return *this; - } +TProtoSchemaOptions& TProtoSchemaOptions::SetEnumPolicy(EEnumPolicy policy) { + EnumPolicy = policy; + return *this; +} - TProtoSchemaOptions& TProtoSchemaOptions::SetListIsOptional(bool value) { - ListIsOptional = value; - return *this; - } +TProtoSchemaOptions& TProtoSchemaOptions::SetListIsOptional(bool value) { + ListIsOptional = value; + return *this; +} - TProtoSchemaOptions& TProtoSchemaOptions::SetEnableRecursiveRenaming(bool value) { - EnableRecursiveRenaming = value; - return *this; - } +TProtoSchemaOptions& TProtoSchemaOptions::SetEnableRecursiveRenaming(bool value) { + EnableRecursiveRenaming = value; + return *this; +} - TProtoSchemaOptions& TProtoSchemaOptions::SetFieldRenames( - THashMap<TString, TString> fieldRenames - ) { - FieldRenames = std::move(fieldRenames); - return *this; - } +TProtoSchemaOptions& TProtoSchemaOptions::SetFieldRenames( + THashMap<TString, TString> fieldRenames) { + FieldRenames = std::move(fieldRenames); + return *this; +} - namespace { - EEnumFormatType EnumFormatTypeWithYTFlag(const pb::FieldDescriptor& enumField, EEnumFormatType defaultEnumFormatType) { - auto flags = enumField.options().GetRepeatedExtension(NYT::flags); - for (auto flag : flags) { - if (flag == NYT::EWrapperFieldFlag::ENUM_INT) { - return EEnumFormatType::Int32; - } else if (flag == NYT::EWrapperFieldFlag::ENUM_STRING) { - return EEnumFormatType::String; - } - } - return defaultEnumFormatType; - } +namespace { +EEnumFormatType EnumFormatTypeWithYTFlag(const pb::FieldDescriptor& enumField, EEnumFormatType defaultEnumFormatType) { + auto flags = enumField.options().GetRepeatedExtension(NYT::flags); + for (auto flag : flags) { + if (flag == NYT::EWrapperFieldFlag::ENUM_INT) { + return EEnumFormatType::Int32; + } else if (flag == NYT::EWrapperFieldFlag::ENUM_STRING) { + return EEnumFormatType::String; } + } + return defaultEnumFormatType; +} +} // namespace + +EEnumFormatType EnumFormatType(const pb::FieldDescriptor& enumField, EEnumPolicy enumPolicy) { + switch (enumPolicy) { + case EEnumPolicy::Int32: + return EEnumFormatType::Int32; + case EEnumPolicy::String: + return EEnumFormatType::String; + case EEnumPolicy::YTFlagDefaultInt32: + return EnumFormatTypeWithYTFlag(enumField, EEnumFormatType::Int32); + case EEnumPolicy::YTFlagDefaultString: + return EnumFormatTypeWithYTFlag(enumField, EEnumFormatType::String); + } +} - EEnumFormatType EnumFormatType(const pb::FieldDescriptor& enumField, EEnumPolicy enumPolicy) { - switch (enumPolicy) { - case EEnumPolicy::Int32: - return EEnumFormatType::Int32; - case EEnumPolicy::String: - return EEnumFormatType::String; - case EEnumPolicy::YTFlagDefaultInt32: - return EnumFormatTypeWithYTFlag(enumField, EEnumFormatType::Int32); - case EEnumPolicy::YTFlagDefaultString: - return EnumFormatTypeWithYTFlag(enumField, EEnumFormatType::String); +namespace { +const char* FormatTypeName(const pb::FieldDescriptor* field, EEnumPolicy enumPolicy) { + switch (field->type()) { + case pb::FieldDescriptor::TYPE_DOUBLE: + return "Double"; + case pb::FieldDescriptor::TYPE_FLOAT: + return "Float"; + case pb::FieldDescriptor::TYPE_INT64: + case pb::FieldDescriptor::TYPE_SFIXED64: + case pb::FieldDescriptor::TYPE_SINT64: + return "Int64"; + case pb::FieldDescriptor::TYPE_UINT64: + case pb::FieldDescriptor::TYPE_FIXED64: + return "Uint64"; + case pb::FieldDescriptor::TYPE_INT32: + case pb::FieldDescriptor::TYPE_SFIXED32: + case pb::FieldDescriptor::TYPE_SINT32: + return "Int32"; + case pb::FieldDescriptor::TYPE_UINT32: + case pb::FieldDescriptor::TYPE_FIXED32: + return "Uint32"; + case pb::FieldDescriptor::TYPE_BOOL: + return "Bool"; + case pb::FieldDescriptor::TYPE_STRING: + return "Utf8"; + case pb::FieldDescriptor::TYPE_BYTES: + return "String"; + case pb::FieldDescriptor::TYPE_ENUM: + switch (EnumFormatType(*field, enumPolicy)) { + case EEnumFormatType::Int32: + return "Int32"; + case EEnumFormatType::String: + return "String"; } - } + default: + ythrow yexception() << "Unsupported protobuf type: " << field->type_name() + << ", field: " << field->name() << ", " << int(field->type()); + } +} +} // namespace - namespace { - const char* FormatTypeName(const pb::FieldDescriptor* field, EEnumPolicy enumPolicy) { - switch (field->type()) { - case pb::FieldDescriptor::TYPE_DOUBLE: - return "Double"; - case pb::FieldDescriptor::TYPE_FLOAT: - return "Float"; - case pb::FieldDescriptor::TYPE_INT64: - case pb::FieldDescriptor::TYPE_SFIXED64: - case pb::FieldDescriptor::TYPE_SINT64: - return "Int64"; - case pb::FieldDescriptor::TYPE_UINT64: - case pb::FieldDescriptor::TYPE_FIXED64: - return "Uint64"; - case pb::FieldDescriptor::TYPE_INT32: - case pb::FieldDescriptor::TYPE_SFIXED32: - case pb::FieldDescriptor::TYPE_SINT32: - return "Int32"; - case pb::FieldDescriptor::TYPE_UINT32: - case pb::FieldDescriptor::TYPE_FIXED32: - return "Uint32"; - case pb::FieldDescriptor::TYPE_BOOL: - return "Bool"; - case pb::FieldDescriptor::TYPE_STRING: - return "Utf8"; - case pb::FieldDescriptor::TYPE_BYTES: - return "String"; - case pb::FieldDescriptor::TYPE_ENUM: - switch (EnumFormatType(*field, enumPolicy)) { - case EEnumFormatType::Int32: - return "Int32"; - case EEnumFormatType::String: - return "String"; - } - default: - ythrow yexception() << "Unsupported protobuf type: " << field->type_name() - << ", field: " << field->name() << ", " << int(field->type()); - } - } +NYT::TNode MakeSchemaFromProto(const pb::Descriptor& descriptor, TVector<const pb::Descriptor*>& nested, const TProtoSchemaOptions& options) { + if (Find(nested, &descriptor) != nested.end()) { + TVector<TString> nestedNames; + for (const auto* d : nested) { + nestedNames.push_back(d->full_name()); } + nestedNames.push_back(descriptor.full_name()); + ythrow yexception() << Sprintf("recursive messages are not supported (%s)", + JoinStrings(nestedNames, "->").c_str()); + } + nested.push_back(&descriptor); - NYT::TNode MakeSchemaFromProto(const pb::Descriptor& descriptor, TVector<const pb::Descriptor*>& nested, const TProtoSchemaOptions& options) { - if (Find(nested, &descriptor) != nested.end()) { - TVector<TString> nestedNames; - for (const auto* d : nested) { - nestedNames.push_back(d->full_name()); - } - nestedNames.push_back(descriptor.full_name()); - ythrow yexception() << Sprintf("recursive messages are not supported (%s)", - JoinStrings(nestedNames, "->").c_str()); - } - nested.push_back(&descriptor); - - auto items = NYT::TNode::CreateList(); - for (int fieldNo = 0; fieldNo < descriptor.field_count(); ++fieldNo) { - const auto& fieldDescriptor = *descriptor.field(fieldNo); - - auto name = fieldDescriptor.name(); - if ( - auto renamePtr = options.FieldRenames.FindPtr(name); - (options.EnableRecursiveRenaming || nested.size() == 1) && renamePtr - ) { - name = *renamePtr; - } + auto items = NYT::TNode::CreateList(); + for (int fieldNo = 0; fieldNo < descriptor.field_count(); ++fieldNo) { + const auto& fieldDescriptor = *descriptor.field(fieldNo); - NYT::TNode itemType; - if (fieldDescriptor.type() == pb::FieldDescriptor::TYPE_MESSAGE) { - itemType = MakeSchemaFromProto(*fieldDescriptor.message_type(), nested, options); - } else { - itemType = NYT::TNode::CreateList(); - itemType.Add("DataType"); - itemType.Add(FormatTypeName(&fieldDescriptor, options.EnumPolicy)); - } - switch (fieldDescriptor.label()) { - case pb::FieldDescriptor::LABEL_OPTIONAL: - { - auto optionalType = NYT::TNode::CreateList(); - optionalType.Add("OptionalType"); - optionalType.Add(std::move(itemType)); - itemType = std::move(optionalType); - } - break; - case pb::FieldDescriptor::LABEL_REQUIRED: - break; - case pb::FieldDescriptor::LABEL_REPEATED: - { - auto listType = NYT::TNode::CreateList(); - listType.Add("ListType"); - listType.Add(std::move(itemType)); - itemType = std::move(listType); - if (options.ListIsOptional) { - itemType = NYT::TNode::CreateList().Add("OptionalType").Add(std::move(itemType)); - } - } - break; - default: - ythrow yexception() << "Unknown protobuf label: " << (ui32)fieldDescriptor.label() << ", field: " << name; + auto name = fieldDescriptor.name(); + if ( + auto renamePtr = options.FieldRenames.FindPtr(name); + (options.EnableRecursiveRenaming || nested.size() == 1) && renamePtr) { + name = *renamePtr; + } + + NYT::TNode itemType; + if (fieldDescriptor.type() == pb::FieldDescriptor::TYPE_MESSAGE) { + itemType = MakeSchemaFromProto(*fieldDescriptor.message_type(), nested, options); + } else { + itemType = NYT::TNode::CreateList(); + itemType.Add("DataType"); + itemType.Add(FormatTypeName(&fieldDescriptor, options.EnumPolicy)); + } + switch (fieldDescriptor.label()) { + case pb::FieldDescriptor::LABEL_OPTIONAL: { + auto optionalType = NYT::TNode::CreateList(); + optionalType.Add("OptionalType"); + optionalType.Add(std::move(itemType)); + itemType = std::move(optionalType); + } break; + case pb::FieldDescriptor::LABEL_REQUIRED: + break; + case pb::FieldDescriptor::LABEL_REPEATED: { + auto listType = NYT::TNode::CreateList(); + listType.Add("ListType"); + listType.Add(std::move(itemType)); + itemType = std::move(listType); + if (options.ListIsOptional) { + itemType = NYT::TNode::CreateList().Add("OptionalType").Add(std::move(itemType)); } + } break; + default: + ythrow yexception() << "Unknown protobuf label: " << (ui32)fieldDescriptor.label() << ", field: " << name; + } - auto itemNode = NYT::TNode::CreateList(); - itemNode.Add(name); - itemNode.Add(std::move(itemType)); + auto itemNode = NYT::TNode::CreateList(); + itemNode.Add(name); + itemNode.Add(std::move(itemType)); - items.Add(std::move(itemNode)); - } - auto root = NYT::TNode::CreateList(); - root.Add("StructType"); - root.Add(std::move(items)); + items.Add(std::move(itemNode)); + } + auto root = NYT::TNode::CreateList(); + root.Add("StructType"); + root.Add(std::move(items)); - nested.pop_back(); - return root; - } + nested.pop_back(); + return root; +} - NYT::TNode MakeSchemaFromProto(const pb::Descriptor& descriptor, const TProtoSchemaOptions& options) { - TVector<const pb::Descriptor*> nested; - return MakeSchemaFromProto(descriptor, nested, options); - } +NYT::TNode MakeSchemaFromProto(const pb::Descriptor& descriptor, const TProtoSchemaOptions& options) { + TVector<const pb::Descriptor*> nested; + return MakeSchemaFromProto(descriptor, nested, options); +} - NYT::TNode MakeVariantSchemaFromProtos(const TVector<const pb::Descriptor*>& descriptors, const TProtoSchemaOptions& options) { - Y_ENSURE(options.FieldRenames.empty(), "Renames are not supported in variant mode"); +NYT::TNode MakeVariantSchemaFromProtos(const TVector<const pb::Descriptor*>& descriptors, const TProtoSchemaOptions& options) { + Y_ENSURE(options.FieldRenames.empty(), "Renames are not supported in variant mode"); - auto tupleItems = NYT::TNode::CreateList(); - for (auto descriptor : descriptors) { - tupleItems.Add(MakeSchemaFromProto(*descriptor, options)); - } + auto tupleItems = NYT::TNode::CreateList(); + for (auto descriptor : descriptors) { + tupleItems.Add(MakeSchemaFromProto(*descriptor, options)); + } - auto tupleType = NYT::TNode::CreateList(); - tupleType.Add("TupleType"); - tupleType.Add(std::move(tupleItems)); + auto tupleType = NYT::TNode::CreateList(); + tupleType.Add("TupleType"); + tupleType.Add(std::move(tupleItems)); - auto variantType = NYT::TNode::CreateList(); - variantType.Add("VariantType"); - variantType.Add(std::move(tupleType)); + auto variantType = NYT::TNode::CreateList(); + variantType.Add("VariantType"); + variantType.Add(std::move(tupleType)); - return variantType; - } - } + return variantType; } +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/helpers/protobuf/schema_from_proto.h b/yql/essentials/public/purecalc/helpers/protobuf/schema_from_proto.h index bf85fc5e33e..f1511d78b7a 100644 --- a/yql/essentials/public/purecalc/helpers/protobuf/schema_from_proto.h +++ b/yql/essentials/public/purecalc/helpers/protobuf/schema_from_proto.h @@ -7,55 +7,54 @@ #include <google/protobuf/descriptor.h> - namespace NYql { - namespace NPureCalc { - enum class EEnumPolicy { - Int32, - String, - YTFlagDefaultInt32, - YTFlagDefaultString - }; - - enum class EEnumFormatType { - Int32, - String - }; - - /** - * Options that customize building of struct type from protobuf descriptor. - */ - struct TProtoSchemaOptions { - public: - EEnumPolicy EnumPolicy; - bool ListIsOptional; - bool EnableRecursiveRenaming; - THashMap<TString, TString> FieldRenames; - - public: - TProtoSchemaOptions(); - - public: - TProtoSchemaOptions& SetEnumPolicy(EEnumPolicy); - - TProtoSchemaOptions& SetListIsOptional(bool); - - TProtoSchemaOptions& SetEnableRecursiveRenaming(bool); - - TProtoSchemaOptions& SetFieldRenames(THashMap<TString, TString>); - }; - - EEnumFormatType EnumFormatType(const google::protobuf::FieldDescriptor& enumField, EEnumPolicy enumPolicy); - - /** - * Build struct type from a protobuf descriptor. The returned yson can be loaded into a struct annotation node - * using the ParseTypeFromYson function. - */ - NYT::TNode MakeSchemaFromProto(const google::protobuf::Descriptor&, const TProtoSchemaOptions& = {}); - - /** - * Build variant over tuple type from protobuf descriptors. - */ - NYT::TNode MakeVariantSchemaFromProtos(const TVector<const google::protobuf::Descriptor*>&, const TProtoSchemaOptions& = {}); - } -} +namespace NPureCalc { +enum class EEnumPolicy { + Int32, + String, + YTFlagDefaultInt32, + YTFlagDefaultString +}; + +enum class EEnumFormatType { + Int32, + String +}; + +/** + * Options that customize building of struct type from protobuf descriptor. + */ +struct TProtoSchemaOptions { +public: + EEnumPolicy EnumPolicy; + bool ListIsOptional; + bool EnableRecursiveRenaming; + THashMap<TString, TString> FieldRenames; + +public: + TProtoSchemaOptions(); + +public: + TProtoSchemaOptions& SetEnumPolicy(EEnumPolicy); + + TProtoSchemaOptions& SetListIsOptional(bool); + + TProtoSchemaOptions& SetEnableRecursiveRenaming(bool); + + TProtoSchemaOptions& SetFieldRenames(THashMap<TString, TString>); +}; + +EEnumFormatType EnumFormatType(const google::protobuf::FieldDescriptor& enumField, EEnumPolicy enumPolicy); + +/** + * Build struct type from a protobuf descriptor. The returned yson can be loaded into a struct annotation node + * using the ParseTypeFromYson function. + */ +NYT::TNode MakeSchemaFromProto(const google::protobuf::Descriptor&, const TProtoSchemaOptions& = {}); + +/** + * Build variant over tuple type from protobuf descriptors. + */ +NYT::TNode MakeVariantSchemaFromProtos(const TVector<const google::protobuf::Descriptor*>&, const TProtoSchemaOptions& = {}); +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/helpers/protobuf/ya.make b/yql/essentials/public/purecalc/helpers/protobuf/ya.make index 11300baba84..5e1b006bf92 100644 --- a/yql/essentials/public/purecalc/helpers/protobuf/ya.make +++ b/yql/essentials/public/purecalc/helpers/protobuf/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + SRCS( schema_from_proto.cpp ) diff --git a/yql/essentials/public/purecalc/helpers/stream/stream_from_vector.h b/yql/essentials/public/purecalc/helpers/stream/stream_from_vector.h index a2a50558003..69bcbb1f566 100644 --- a/yql/essentials/public/purecalc/helpers/stream/stream_from_vector.h +++ b/yql/essentials/public/purecalc/helpers/stream/stream_from_vector.h @@ -3,38 +3,38 @@ #include <yql/essentials/public/purecalc/common/interface.h> namespace NYql { - namespace NPureCalc { - namespace NPrivate { - template <typename T> - class TVectorStream final: public IStream<T*> { - private: - size_t I_; - TVector<T> Data_; +namespace NPureCalc { +namespace NPrivate { +template <typename T> +class TVectorStream final: public IStream<T*> { +private: + size_t I_; + TVector<T> Data_; - public: - explicit TVectorStream(TVector<T> data) - : I_(0) - , Data_(std::move(data)) - { - } - - public: - T* Fetch() override { - if (I_ >= Data_.size()) { - return nullptr; - } else { - return &Data_[I_++]; - } - } - }; - } +public: + explicit TVectorStream(TVector<T> data) + : I_(0) + , Data_(std::move(data)) + { + } - /** - * Convert vector into a purecalc stream. - */ - template <typename T> - THolder<IStream<T*>> StreamFromVector(TVector<T> data) { - return MakeHolder<NPrivate::TVectorStream<T>>(std::move(data)); +public: + T* Fetch() override { + if (I_ >= Data_.size()) { + return nullptr; + } else { + return &Data_[I_++]; } } +}; +} // namespace NPrivate + +/** + * Convert vector into a purecalc stream. + */ +template <typename T> +THolder<IStream<T*>> StreamFromVector(TVector<T> data) { + return MakeHolder<NPrivate::TVectorStream<T>>(std::move(data)); } +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/helpers/stream/ya.make b/yql/essentials/public/purecalc/helpers/stream/ya.make index f40bb9af559..c6aa839b803 100644 --- a/yql/essentials/public/purecalc/helpers/stream/ya.make +++ b/yql/essentials/public/purecalc/helpers/stream/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + SRCS( stream_from_vector.cpp ) diff --git a/yql/essentials/public/purecalc/io_specs/arrow/spec.cpp b/yql/essentials/public/purecalc/io_specs/arrow/spec.cpp index 84562ee82ce..3b717813794 100644 --- a/yql/essentials/public/purecalc/io_specs/arrow/spec.cpp +++ b/yql/essentials/public/purecalc/io_specs/arrow/spec.cpp @@ -28,8 +28,7 @@ inline TVector<THolder<T>> VectorFromHolder(THolder<T> holder) { return result; } - -class TArrowIStreamImpl : public IArrowIStream { +class TArrowIStreamImpl: public IArrowIStream { private: IArrowIStream* Underlying_; // If we own Underlying_, than Owned_ == Underlying_; @@ -59,7 +58,6 @@ public: } }; - /** * Converts input Datums to unboxed values. */ @@ -73,16 +71,15 @@ public: explicit TArrowInputConverter( const TArrowInputSpec& inputSpec, ui32 index, - IWorker* worker - ) + IWorker* worker) : Factory_(worker->GetGraph().GetHolderFactory()) { const NYT::TNode& inputSchema = inputSpec.GetSchema(index); // Deduce the schema from the input MKQL type, if no is // provided by <inputSpec>. const NYT::TNode& schema = inputSchema.IsEntity() - ? worker->MakeInputSchema(index) - : inputSchema; + ? worker->MakeInputSchema(index) + : inputSchema; const auto* type = worker->GetRawInputType(index); @@ -118,7 +115,6 @@ public: } }; - /** * Converts unboxed values to output Datums (single-output program case). */ @@ -132,8 +128,7 @@ protected: public: explicit TArrowOutputConverter( const TArrowOutputSpec& outputSpec, - IWorker* worker - ) + IWorker* worker) : Factory_(worker->GetGraph().GetHolderFactory()) { Batch_.Reset(new arrow::compute::ExecBatch); @@ -142,8 +137,8 @@ public: // Deduce the schema from the output MKQL type, if no is // provided by <outputSpec>. const NYT::TNode& schema = outputSchema.IsEntity() - ? worker->MakeOutputSchema() - : outputSchema; + ? worker->MakeOutputSchema() + : outputSchema; const auto* type = worker->GetRawOutputType(); @@ -194,7 +189,6 @@ public: } }; - /** * List (or, better, stream) of unboxed values. * Used as an input value in pull workers. @@ -213,13 +207,12 @@ public: const TArrowInputSpec& inputSpec, ui32 index, THolder<IArrowIStream> underlying, - IWorker* worker - ) - : TCustomListValue(memInfo) - , Underlying_(std::move(underlying)) - , Worker_(worker) - , Converter_(inputSpec, index, Worker_) - , ScopedAlloc_(Worker_->GetScopedAlloc()) + IWorker* worker) + : TCustomListValue(memInfo) + , Underlying_(std::move(underlying)) + , Worker_(worker) + , Converter_(inputSpec, index, Worker_) + , ScopedAlloc_(Worker_->GetScopedAlloc()) { } @@ -273,7 +266,6 @@ public: } }; - /** * Arrow input stream for unboxed value lists. */ @@ -285,8 +277,7 @@ protected: public: explicit TArrowListImpl( const TArrowOutputSpec& outputSpec, - TWorkerHolder<IPullListWorker> worker - ) + TWorkerHolder<IPullListWorker> worker) : WorkerHolder_(std::move(worker)) , Converter_(outputSpec, WorkerHolder_.Get()) { @@ -295,7 +286,7 @@ public: OutputItemType Fetch() override { TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator()); - with_lock(WorkerHolder_->GetScopedAlloc()) { + with_lock (WorkerHolder_->GetScopedAlloc()) { TUnboxedValue value; if (!WorkerHolder_->GetOutputIterator().Next(value)) { @@ -307,7 +298,6 @@ public: } }; - /** * Arrow input stream for unboxed value streams. */ @@ -326,7 +316,7 @@ public: OutputItemType Fetch() override { TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator()); - with_lock(WorkerHolder_->GetScopedAlloc()) { + with_lock (WorkerHolder_->GetScopedAlloc()) { TUnboxedValue value; auto status = WorkerHolder_->GetOutput().Fetch(value); @@ -341,7 +331,6 @@ public: } }; - /** * Consumer which converts Datums to unboxed values and relays them to the * worker. Used as a return value of the push processor's Process function. @@ -354,8 +343,7 @@ private: public: explicit TArrowConsumerImpl( const TArrowInputSpec& inputSpec, - TWorkerHolder<IPushStreamWorker> worker - ) + TWorkerHolder<IPushStreamWorker> worker) : TArrowConsumerImpl(inputSpec, 0, std::move(worker)) { } @@ -363,8 +351,7 @@ public: explicit TArrowConsumerImpl( const TArrowInputSpec& inputSpec, ui32 index, - TWorkerHolder<IPushStreamWorker> worker - ) + TWorkerHolder<IPushStreamWorker> worker) : WorkerHolder_(std::move(worker)) , Converter_(inputSpec, index, WorkerHolder_.Get()) { @@ -373,7 +360,7 @@ public: void OnObject(arrow::compute::ExecBatch* batch) override { TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator()); - with_lock(WorkerHolder_->GetScopedAlloc()) { + with_lock (WorkerHolder_->GetScopedAlloc()) { TUnboxedValue result; Converter_.DoConvert(batch, result); WorkerHolder_->Push(std::move(result)); @@ -383,13 +370,12 @@ public: void OnFinish() override { TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator()); - with_lock(WorkerHolder_->GetScopedAlloc()) { + with_lock (WorkerHolder_->GetScopedAlloc()) { WorkerHolder_->OnFinish(); } } }; - /** * Push relay used to convert generated unboxed value to a Datum and push it to * the user's consumer. @@ -404,8 +390,7 @@ public: TArrowPushRelayImpl( const TArrowOutputSpec& outputSpec, IPushStreamWorker* worker, - THolder<IConsumer<OutputItemType>> underlying - ) + THolder<IConsumer<OutputItemType>> underlying) : Underlying_(std::move(underlying)) , Worker_(worker) , Converter_(outputSpec, Worker_) @@ -429,15 +414,13 @@ public: } }; - template <typename TWorker> void PrepareWorkerImpl(const TArrowInputSpec& inputSpec, TWorker* worker, - TVector<THolder<TArrowIStreamImpl>>&& streams -) { + TVector<THolder<TArrowIStreamImpl>>&& streams) { YQL_ENSURE(worker->GetInputsCount() == streams.size(), - "number of input streams should match number of inputs provided by spec"); + "number of input streams should match number of inputs provided by spec"); - with_lock(worker->GetScopedAlloc()) { + with_lock (worker->GetScopedAlloc()) { auto& holderFactory = worker->GetGraph().GetHolderFactory(); for (ui32 i = 0; i < streams.size(); i++) { auto input = holderFactory.template Create<TArrowListValue>( @@ -449,7 +432,6 @@ void PrepareWorkerImpl(const TArrowInputSpec& inputSpec, TWorker* worker, } // namespace - TArrowInputSpec::TArrowInputSpec(const TVector<NYT::TNode>& schemas) : Schemas_(schemas) { @@ -465,16 +447,14 @@ const NYT::TNode& TArrowInputSpec::GetSchema(ui32 index) const { void TInputSpecTraits<TArrowInputSpec>::PreparePullListWorker( const TArrowInputSpec& inputSpec, IPullListWorker* worker, - IArrowIStream* stream -) { + IArrowIStream* stream) { TInputSpecTraits<TArrowInputSpec>::PreparePullListWorker( inputSpec, worker, TVector<IArrowIStream*>({stream})); } void TInputSpecTraits<TArrowInputSpec>::PreparePullListWorker( const TArrowInputSpec& inputSpec, IPullListWorker* worker, - const TVector<IArrowIStream*>& streams -) { + const TVector<IArrowIStream*>& streams) { TVector<THolder<TArrowIStreamImpl>> wrappers; for (ui32 i = 0; i < streams.size(); i++) { wrappers.push_back(MakeHolder<TArrowIStreamImpl>(streams[i])); @@ -484,16 +464,14 @@ void TInputSpecTraits<TArrowInputSpec>::PreparePullListWorker( void TInputSpecTraits<TArrowInputSpec>::PreparePullListWorker( const TArrowInputSpec& inputSpec, IPullListWorker* worker, - THolder<IArrowIStream> stream -) { + THolder<IArrowIStream> stream) { TInputSpecTraits<TArrowInputSpec>::PreparePullListWorker(inputSpec, worker, - VectorFromHolder<IArrowIStream>(std::move(stream))); + VectorFromHolder<IArrowIStream>(std::move(stream))); } void TInputSpecTraits<TArrowInputSpec>::PreparePullListWorker( const TArrowInputSpec& inputSpec, IPullListWorker* worker, - TVector<THolder<IArrowIStream>>&& streams -) { + TVector<THolder<IArrowIStream>>&& streams) { TVector<THolder<TArrowIStreamImpl>> wrappers; for (ui32 i = 0; i < streams.size(); i++) { wrappers.push_back(MakeHolder<TArrowIStreamImpl>(std::move(streams[i]))); @@ -501,19 +479,16 @@ void TInputSpecTraits<TArrowInputSpec>::PreparePullListWorker( PrepareWorkerImpl(inputSpec, worker, std::move(wrappers)); } - void TInputSpecTraits<TArrowInputSpec>::PreparePullStreamWorker( const TArrowInputSpec& inputSpec, IPullStreamWorker* worker, - IArrowIStream* stream -) { + IArrowIStream* stream) { TInputSpecTraits<TArrowInputSpec>::PreparePullStreamWorker( inputSpec, worker, TVector<IArrowIStream*>({stream})); } void TInputSpecTraits<TArrowInputSpec>::PreparePullStreamWorker( const TArrowInputSpec& inputSpec, IPullStreamWorker* worker, - const TVector<IArrowIStream*>& streams -) { + const TVector<IArrowIStream*>& streams) { TVector<THolder<TArrowIStreamImpl>> wrappers; for (ui32 i = 0; i < streams.size(); i++) { wrappers.push_back(MakeHolder<TArrowIStreamImpl>(streams[i])); @@ -523,16 +498,14 @@ void TInputSpecTraits<TArrowInputSpec>::PreparePullStreamWorker( void TInputSpecTraits<TArrowInputSpec>::PreparePullStreamWorker( const TArrowInputSpec& inputSpec, IPullStreamWorker* worker, - THolder<IArrowIStream> stream -) { + THolder<IArrowIStream> stream) { TInputSpecTraits<TArrowInputSpec>::PreparePullStreamWorker( inputSpec, worker, VectorFromHolder<IArrowIStream>(std::move(stream))); } void TInputSpecTraits<TArrowInputSpec>::PreparePullStreamWorker( const TArrowInputSpec& inputSpec, IPullStreamWorker* worker, - TVector<THolder<IArrowIStream>>&& streams -) { + TVector<THolder<IArrowIStream>>&& streams) { TVector<THolder<TArrowIStreamImpl>> wrappers; for (ui32 i = 0; i < streams.size(); i++) { wrappers.push_back(MakeHolder<TArrowIStreamImpl>(std::move(streams[i]))); @@ -540,14 +513,11 @@ void TInputSpecTraits<TArrowInputSpec>::PreparePullStreamWorker( PrepareWorkerImpl(inputSpec, worker, std::move(wrappers)); } - ConsumerType TInputSpecTraits<TArrowInputSpec>::MakeConsumer( - const TArrowInputSpec& inputSpec, TWorkerHolder<IPushStreamWorker> worker -) { + const TArrowInputSpec& inputSpec, TWorkerHolder<IPushStreamWorker> worker) { return MakeHolder<TArrowConsumerImpl>(inputSpec, std::move(worker)); } - TArrowOutputSpec::TArrowOutputSpec(const NYT::TNode& schema) : Schema_(schema) { @@ -557,22 +527,18 @@ const NYT::TNode& TArrowOutputSpec::GetSchema() const { return Schema_; } - PullListReturnType TOutputSpecTraits<TArrowOutputSpec>::ConvertPullListWorkerToOutputType( - const TArrowOutputSpec& outputSpec, TWorkerHolder<IPullListWorker> worker -) { + const TArrowOutputSpec& outputSpec, TWorkerHolder<IPullListWorker> worker) { return MakeHolder<TArrowListImpl>(outputSpec, std::move(worker)); } PullStreamReturnType TOutputSpecTraits<TArrowOutputSpec>::ConvertPullStreamWorkerToOutputType( - const TArrowOutputSpec& outputSpec, TWorkerHolder<IPullStreamWorker> worker -) { + const TArrowOutputSpec& outputSpec, TWorkerHolder<IPullStreamWorker> worker) { return MakeHolder<TArrowStreamImpl>(outputSpec, std::move(worker)); } void TOutputSpecTraits<TArrowOutputSpec>::SetConsumerToWorker( const TArrowOutputSpec& outputSpec, IPushStreamWorker* worker, - THolder<IConsumer<TOutputItemType>> consumer -) { + THolder<IConsumer<TOutputItemType>> consumer) { worker->SetConsumer(MakeHolder<TArrowPushRelayImpl>(outputSpec, worker, std::move(consumer))); } diff --git a/yql/essentials/public/purecalc/io_specs/arrow/spec.h b/yql/essentials/public/purecalc/io_specs/arrow/spec.h index 42780b1a376..0b48321539d 100644 --- a/yql/essentials/public/purecalc/io_specs/arrow/spec.h +++ b/yql/essentials/public/purecalc/io_specs/arrow/spec.h @@ -37,7 +37,9 @@ public: explicit TArrowInputSpec(const TVector<NYT::TNode>& schemas); const TVector<NYT::TNode>& GetSchemas() const override; const NYT::TNode& GetSchema(ui32 index) const; - bool ProvidesBlocks() const override { return true; } + bool ProvidesBlocks() const override { + return true; + } }; /** @@ -70,7 +72,9 @@ private: public: explicit TArrowOutputSpec(const NYT::TNode& schema); const NYT::TNode& GetSchema() const override; - bool AcceptsBlocks() const override { return true; } + bool AcceptsBlocks() const override { + return true; + } }; template <> @@ -86,22 +90,22 @@ struct TInputSpecTraits<TArrowInputSpec> { using TConsumerType = THolder<IConsumer<TInputItemType>>; static void PreparePullListWorker(const TArrowInputSpec&, IPullListWorker*, - IInputStream*); + IInputStream*); static void PreparePullListWorker(const TArrowInputSpec&, IPullListWorker*, - THolder<IInputStream>); + THolder<IInputStream>); static void PreparePullListWorker(const TArrowInputSpec&, IPullListWorker*, - const TVector<IInputStream*>&); + const TVector<IInputStream*>&); static void PreparePullListWorker(const TArrowInputSpec&, IPullListWorker*, - TVector<THolder<IInputStream>>&&); + TVector<THolder<IInputStream>>&&); static void PreparePullStreamWorker(const TArrowInputSpec&, IPullStreamWorker*, - IInputStream*); + IInputStream*); static void PreparePullStreamWorker(const TArrowInputSpec&, IPullStreamWorker*, - THolder<IInputStream>); + THolder<IInputStream>); static void PreparePullStreamWorker(const TArrowInputSpec&, IPullStreamWorker*, - const TVector<IInputStream*>&); + const TVector<IInputStream*>&); static void PreparePullStreamWorker(const TArrowInputSpec&, IPullStreamWorker*, - TVector<THolder<IInputStream>>&&); + TVector<THolder<IInputStream>>&&); static TConsumerType MakeConsumer(const TArrowInputSpec&, TWorkerHolder<IPushStreamWorker>); }; diff --git a/yql/essentials/public/purecalc/io_specs/arrow/ut/test_spec.cpp b/yql/essentials/public/purecalc/io_specs/arrow/ut/test_spec.cpp index e9e0cb7072b..2d0d42ca21a 100644 --- a/yql/essentials/public/purecalc/io_specs/arrow/ut/test_spec.cpp +++ b/yql/essentials/public/purecalc/io_specs/arrow/ut/test_spec.cpp @@ -12,20 +12,20 @@ namespace { #define Y_UNIT_TEST_ADD_BLOCK_TEST(N, MODE) \ TCurrentTest::AddTest(#N ":BlockEngineMode=" #MODE, \ - static_cast<void (*)(NUnitTest::TTestContext&)>(&N<NYql::EBlockEngineMode::MODE>), false); - -#define Y_UNIT_TEST_BLOCKS(N) \ - template<NYql::EBlockEngineMode BlockEngineMode> \ - void N(NUnitTest::TTestContext&); \ - struct TTestRegistration##N { \ - TTestRegistration##N() { \ - Y_UNIT_TEST_ADD_BLOCK_TEST(N, Disable) \ - Y_UNIT_TEST_ADD_BLOCK_TEST(N, Auto) \ - Y_UNIT_TEST_ADD_BLOCK_TEST(N, Force) \ - } \ - }; \ - static TTestRegistration##N testRegistration##N; \ - template<NYql::EBlockEngineMode BlockEngineMode> \ + static_cast<void (*)(NUnitTest::TTestContext&)>(&N<NYql::EBlockEngineMode::MODE>), false); + +#define Y_UNIT_TEST_BLOCKS(N) \ + template <NYql::EBlockEngineMode BlockEngineMode> \ + void N(NUnitTest::TTestContext&); \ + struct TTestRegistration##N { \ + TTestRegistration##N() { \ + Y_UNIT_TEST_ADD_BLOCK_TEST(N, Disable) \ + Y_UNIT_TEST_ADD_BLOCK_TEST(N, Auto) \ + Y_UNIT_TEST_ADD_BLOCK_TEST(N, Force) \ + } \ + }; \ + static TTestRegistration##N testRegistration##N; \ + template <NYql::EBlockEngineMode BlockEngineMode> \ void N(NUnitTest::TTestContext&) NYql::NPureCalc::TProgramFactoryOptions TestOptions(NYql::EBlockEngineMode mode) { @@ -39,7 +39,6 @@ NYql::NPureCalc::TProgramFactoryOptions TestOptions(NYql::EBlockEngineMode mode) return options; } - template <typename T> struct TVectorStream: public NYql::NPureCalc::IStream<T*> { TVector<T> Data; @@ -56,8 +55,7 @@ public: } }; - -template<typename T> +template <typename T> struct TVectorConsumer: public NYql::NPureCalc::IConsumer<T*> { TVector<T>& Data; size_t Index = 0; @@ -78,15 +76,13 @@ public: } }; - using ExecBatchStreamImpl = TVectorStream<arrow::compute::ExecBatch>; using ExecBatchConsumerImpl = TVectorConsumer<arrow::compute::ExecBatch>; template <typename TBuilder> arrow::Datum MakeArrayDatumFromVector( const TVector<typename TBuilder::value_type>& data, - const TVector<bool>& valid -) { + const TVector<bool>& valid) { TBuilder builder; ARROW_OK(builder.Reserve(data.size())); ARROW_OK(builder.AppendValues(data, valid)); @@ -96,17 +92,16 @@ arrow::Datum MakeArrayDatumFromVector( template <typename TValue> TVector<TValue> MakeVectorFromArrayDatum( const arrow::Datum& datum, - const int64_t dsize -) { + const int64_t dsize) { Y_ENSURE(datum.is_array(), "ExecBatch layout doesn't respect the schema"); const auto& array = *datum.array(); Y_ENSURE(array.length == dsize, - "Array Datum size differs from the given ExecBatch size"); + "Array Datum size differs from the given ExecBatch size"); Y_ENSURE(array.GetNullCount() == 0, - "Null values conversion is not supported"); + "Null values conversion is not supported"); Y_ENSURE(array.buffers.size() == 2, - "Array Datum layout doesn't respect the schema"); + "Array Datum layout doesn't respect the schema"); const TValue* adata1 = array.GetValuesSafe<TValue>(1); return TVector<TValue>(adata1, adata1 + dsize); @@ -122,8 +117,7 @@ arrow::compute::ExecBatch MakeBatch(ui64 bsize, i64 value, ui64 init = 1) { TVector<arrow::Datum> batchArgs = { MakeArrayDatumFromVector<arrow::UInt64Builder>(data1, valid), - MakeArrayDatumFromVector<arrow::Int64Builder>(data2, valid) - }; + MakeArrayDatumFromVector<arrow::Int64Builder>(data2, valid)}; return arrow::compute::ExecBatch(std::move(batchArgs), bsize); } @@ -146,274 +140,261 @@ TVector<std::tuple<ui64, i64>> CanonBatches(const TVector<arrow::compute::ExecBa } // namespace - Y_UNIT_TEST_SUITE(TestSimplePullListArrowIO) { - Y_UNIT_TEST_BLOCKS(TestSingleInput) { - using namespace NYql::NPureCalc; - - TVector<TString> fields = {"uint64", "int64"}; - auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields); - - auto factory = MakeProgramFactory(TestOptions(BlockEngineMode)); - - try { - auto program = factory->MakePullListProgram( - TArrowInputSpec({schema}), - TArrowOutputSpec(schema), - "SELECT * FROM Input", - ETranslationMode::SQL - ); - - const TVector<arrow::compute::ExecBatch> input({MakeBatch(9, 19)}); - const auto canonInput = CanonBatches(input); - ExecBatchStreamImpl items(input); - - auto stream = program->Apply(&items); - - TVector<arrow::compute::ExecBatch> output; - while (arrow::compute::ExecBatch* batch = stream->Fetch()) { - output.push_back(*batch); - } - const auto canonOutput = CanonBatches(output); - UNIT_ASSERT_EQUAL(canonInput, canonOutput); - } catch (const TCompileError& error) { - UNIT_FAIL(error.GetIssues()); +Y_UNIT_TEST_BLOCKS(TestSingleInput) { + using namespace NYql::NPureCalc; + + TVector<TString> fields = {"uint64", "int64"}; + auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields); + + auto factory = MakeProgramFactory(TestOptions(BlockEngineMode)); + + try { + auto program = factory->MakePullListProgram( + TArrowInputSpec({schema}), + TArrowOutputSpec(schema), + "SELECT * FROM Input", + ETranslationMode::SQL); + + const TVector<arrow::compute::ExecBatch> input({MakeBatch(9, 19)}); + const auto canonInput = CanonBatches(input); + ExecBatchStreamImpl items(input); + + auto stream = program->Apply(&items); + + TVector<arrow::compute::ExecBatch> output; + while (arrow::compute::ExecBatch* batch = stream->Fetch()) { + output.push_back(*batch); } + const auto canonOutput = CanonBatches(output); + UNIT_ASSERT_EQUAL(canonInput, canonOutput); + } catch (const TCompileError& error) { + UNIT_FAIL(error.GetIssues()); } +} - Y_UNIT_TEST_BLOCKS(TestMultiInput) { - using namespace NYql::NPureCalc; +Y_UNIT_TEST_BLOCKS(TestMultiInput) { + using namespace NYql::NPureCalc; - TVector<TString> fields = {"uint64", "int64"}; - auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields); + TVector<TString> fields = {"uint64", "int64"}; + auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields); - auto factory = MakeProgramFactory(TestOptions(BlockEngineMode)); + auto factory = MakeProgramFactory(TestOptions(BlockEngineMode)); - try { - auto program = factory->MakePullListProgram( - TArrowInputSpec({schema, schema}), - TArrowOutputSpec(schema), - R"( + try { + auto program = factory->MakePullListProgram( + TArrowInputSpec({schema, schema}), + TArrowOutputSpec(schema), + R"( SELECT * FROM Input0 UNION ALL SELECT * FROM Input1 )", - ETranslationMode::SQL - ); - - TVector<arrow::compute::ExecBatch> inputs = { - MakeBatch(9, 19), - MakeBatch(7, 17) - }; - const auto canonInputs = CanonBatches(inputs); - - ExecBatchStreamImpl items0({inputs[0]}); - ExecBatchStreamImpl items1({inputs[1]}); - - const TVector<IStream<arrow::compute::ExecBatch*>*> items({&items0, &items1}); - - auto stream = program->Apply(items); - - TVector<arrow::compute::ExecBatch> output; - while (arrow::compute::ExecBatch* batch = stream->Fetch()) { - output.push_back(*batch); - } - const auto canonOutput = CanonBatches(output); - UNIT_ASSERT_EQUAL(canonInputs, canonOutput); - } catch (const TCompileError& error) { - UNIT_FAIL(error.GetIssues()); + ETranslationMode::SQL); + + TVector<arrow::compute::ExecBatch> inputs = { + MakeBatch(9, 19), + MakeBatch(7, 17)}; + const auto canonInputs = CanonBatches(inputs); + + ExecBatchStreamImpl items0({inputs[0]}); + ExecBatchStreamImpl items1({inputs[1]}); + + const TVector<IStream<arrow::compute::ExecBatch*>*> items({&items0, &items1}); + + auto stream = program->Apply(items); + + TVector<arrow::compute::ExecBatch> output; + while (arrow::compute::ExecBatch* batch = stream->Fetch()) { + output.push_back(*batch); } + const auto canonOutput = CanonBatches(output); + UNIT_ASSERT_EQUAL(canonInputs, canonOutput); + } catch (const TCompileError& error) { + UNIT_FAIL(error.GetIssues()); } } - +} // Y_UNIT_TEST_SUITE(TestSimplePullListArrowIO) Y_UNIT_TEST_SUITE(TestMorePullListArrowIO) { - Y_UNIT_TEST_BLOCKS(TestInc) { - using namespace NYql::NPureCalc; +Y_UNIT_TEST_BLOCKS(TestInc) { + using namespace NYql::NPureCalc; - TVector<TString> fields = {"uint64", "int64"}; - auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields); + TVector<TString> fields = {"uint64", "int64"}; + auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields); - auto factory = MakeProgramFactory(TestOptions(BlockEngineMode)); + auto factory = MakeProgramFactory(TestOptions(BlockEngineMode)); - try { - auto program = factory->MakePullListProgram( - TArrowInputSpec({schema}), - TArrowOutputSpec(schema), - R"(SELECT + try { + auto program = factory->MakePullListProgram( + TArrowInputSpec({schema}), + TArrowOutputSpec(schema), + R"(SELECT uint64 + 1 as uint64, int64 - 2 as int64, FROM Input)", - ETranslationMode::SQL - ); - - const TVector<arrow::compute::ExecBatch> input({MakeBatch(9, 19)}); - const auto canonInput = CanonBatches(input); - ExecBatchStreamImpl items(input); - - auto stream = program->Apply(&items); - - TVector<arrow::compute::ExecBatch> output; - while (arrow::compute::ExecBatch* batch = stream->Fetch()) { - output.push_back(*batch); - } - const auto canonOutput = CanonBatches(output); - const TVector<arrow::compute::ExecBatch> check({MakeBatch(9, 17, 2)}); - const auto canonCheck = CanonBatches(check); - UNIT_ASSERT_EQUAL(canonCheck, canonOutput); - } catch (const TCompileError& error) { - UNIT_FAIL(error.GetIssues()); + ETranslationMode::SQL); + + const TVector<arrow::compute::ExecBatch> input({MakeBatch(9, 19)}); + const auto canonInput = CanonBatches(input); + ExecBatchStreamImpl items(input); + + auto stream = program->Apply(&items); + + TVector<arrow::compute::ExecBatch> output; + while (arrow::compute::ExecBatch* batch = stream->Fetch()) { + output.push_back(*batch); } + const auto canonOutput = CanonBatches(output); + const TVector<arrow::compute::ExecBatch> check({MakeBatch(9, 17, 2)}); + const auto canonCheck = CanonBatches(check); + UNIT_ASSERT_EQUAL(canonCheck, canonOutput); + } catch (const TCompileError& error) { + UNIT_FAIL(error.GetIssues()); } } - +} // Y_UNIT_TEST_SUITE(TestMorePullListArrowIO) Y_UNIT_TEST_SUITE(TestSimplePullStreamArrowIO) { - Y_UNIT_TEST_BLOCKS(TestSingleInput) { - using namespace NYql::NPureCalc; - - TVector<TString> fields = {"uint64", "int64"}; - auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields); - - auto factory = MakeProgramFactory(TestOptions(BlockEngineMode)); - - try { - auto program = factory->MakePullStreamProgram( - TArrowInputSpec({schema}), - TArrowOutputSpec(schema), - "SELECT * FROM Input", - ETranslationMode::SQL - ); - - const TVector<arrow::compute::ExecBatch> input({MakeBatch(9, 19)}); - const auto canonInput = CanonBatches(input); - ExecBatchStreamImpl items(input); - - auto stream = program->Apply(&items); - - TVector<arrow::compute::ExecBatch> output; - while (arrow::compute::ExecBatch* batch = stream->Fetch()) { - output.push_back(*batch); - } - const auto canonOutput = CanonBatches(output); - UNIT_ASSERT_EQUAL(canonInput, canonOutput); - } catch (const TCompileError& error) { - UNIT_FAIL(error.GetIssues()); +Y_UNIT_TEST_BLOCKS(TestSingleInput) { + using namespace NYql::NPureCalc; + + TVector<TString> fields = {"uint64", "int64"}; + auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields); + + auto factory = MakeProgramFactory(TestOptions(BlockEngineMode)); + + try { + auto program = factory->MakePullStreamProgram( + TArrowInputSpec({schema}), + TArrowOutputSpec(schema), + "SELECT * FROM Input", + ETranslationMode::SQL); + + const TVector<arrow::compute::ExecBatch> input({MakeBatch(9, 19)}); + const auto canonInput = CanonBatches(input); + ExecBatchStreamImpl items(input); + + auto stream = program->Apply(&items); + + TVector<arrow::compute::ExecBatch> output; + while (arrow::compute::ExecBatch* batch = stream->Fetch()) { + output.push_back(*batch); } + const auto canonOutput = CanonBatches(output); + UNIT_ASSERT_EQUAL(canonInput, canonOutput); + } catch (const TCompileError& error) { + UNIT_FAIL(error.GetIssues()); } } - +} // Y_UNIT_TEST_SUITE(TestSimplePullStreamArrowIO) Y_UNIT_TEST_SUITE(TestMorePullStreamArrowIO) { - Y_UNIT_TEST_BLOCKS(TestInc) { - using namespace NYql::NPureCalc; +Y_UNIT_TEST_BLOCKS(TestInc) { + using namespace NYql::NPureCalc; - TVector<TString> fields = {"uint64", "int64"}; - auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields); + TVector<TString> fields = {"uint64", "int64"}; + auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields); - auto factory = MakeProgramFactory(TestOptions(BlockEngineMode)); + auto factory = MakeProgramFactory(TestOptions(BlockEngineMode)); - try { - auto program = factory->MakePullStreamProgram( - TArrowInputSpec({schema}), - TArrowOutputSpec(schema), - R"(SELECT + try { + auto program = factory->MakePullStreamProgram( + TArrowInputSpec({schema}), + TArrowOutputSpec(schema), + R"(SELECT uint64 + 1 as uint64, int64 - 2 as int64, FROM Input)", - ETranslationMode::SQL - ); - - const TVector<arrow::compute::ExecBatch> input({MakeBatch(9, 19)}); - const auto canonInput = CanonBatches(input); - ExecBatchStreamImpl items(input); - - auto stream = program->Apply(&items); - - TVector<arrow::compute::ExecBatch> output; - while (arrow::compute::ExecBatch* batch = stream->Fetch()) { - output.push_back(*batch); - } - const auto canonOutput = CanonBatches(output); - const TVector<arrow::compute::ExecBatch> check({MakeBatch(9, 17, 2)}); - const auto canonCheck = CanonBatches(check); - UNIT_ASSERT_EQUAL(canonCheck, canonOutput); - } catch (const TCompileError& error) { - UNIT_FAIL(error.GetIssues()); + ETranslationMode::SQL); + + const TVector<arrow::compute::ExecBatch> input({MakeBatch(9, 19)}); + const auto canonInput = CanonBatches(input); + ExecBatchStreamImpl items(input); + + auto stream = program->Apply(&items); + + TVector<arrow::compute::ExecBatch> output; + while (arrow::compute::ExecBatch* batch = stream->Fetch()) { + output.push_back(*batch); } + const auto canonOutput = CanonBatches(output); + const TVector<arrow::compute::ExecBatch> check({MakeBatch(9, 17, 2)}); + const auto canonCheck = CanonBatches(check); + UNIT_ASSERT_EQUAL(canonCheck, canonOutput); + } catch (const TCompileError& error) { + UNIT_FAIL(error.GetIssues()); } } - +} // Y_UNIT_TEST_SUITE(TestMorePullStreamArrowIO) Y_UNIT_TEST_SUITE(TestPushStreamArrowIO) { - Y_UNIT_TEST_BLOCKS(TestAllColumns) { - using namespace NYql::NPureCalc; +Y_UNIT_TEST_BLOCKS(TestAllColumns) { + using namespace NYql::NPureCalc; - TVector<TString> fields = {"uint64", "int64"}; - auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields); + TVector<TString> fields = {"uint64", "int64"}; + auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields); - auto factory = MakeProgramFactory(TestOptions(BlockEngineMode)); + auto factory = MakeProgramFactory(TestOptions(BlockEngineMode)); - try { - auto program = factory->MakePushStreamProgram( - TArrowInputSpec({schema}), - TArrowOutputSpec(schema), - "SELECT * FROM Input", - ETranslationMode::SQL - ); + try { + auto program = factory->MakePushStreamProgram( + TArrowInputSpec({schema}), + TArrowOutputSpec(schema), + "SELECT * FROM Input", + ETranslationMode::SQL); - arrow::compute::ExecBatch input = MakeBatch(9, 19); - const auto canonInput = CanonBatches({input}); - TVector<arrow::compute::ExecBatch> output; + arrow::compute::ExecBatch input = MakeBatch(9, 19); + const auto canonInput = CanonBatches({input}); + TVector<arrow::compute::ExecBatch> output; - auto consumer = program->Apply(MakeHolder<ExecBatchConsumerImpl>(output)); + auto consumer = program->Apply(MakeHolder<ExecBatchConsumerImpl>(output)); - UNIT_ASSERT_NO_EXCEPTION([&](){ consumer->OnObject(&input); }()); - UNIT_ASSERT_NO_EXCEPTION([&](){ consumer->OnFinish(); }()); + UNIT_ASSERT_NO_EXCEPTION([&]() { consumer->OnObject(&input); }()); + UNIT_ASSERT_NO_EXCEPTION([&]() { consumer->OnFinish(); }()); - const auto canonOutput = CanonBatches(output); - UNIT_ASSERT_EQUAL(canonInput, canonOutput); - } catch (const TCompileError& error) { - UNIT_FAIL(error.GetIssues()); - } + const auto canonOutput = CanonBatches(output); + UNIT_ASSERT_EQUAL(canonInput, canonOutput); + } catch (const TCompileError& error) { + UNIT_FAIL(error.GetIssues()); } } +} // Y_UNIT_TEST_SUITE(TestPushStreamArrowIO) Y_UNIT_TEST_SUITE(TestMorePushStreamArrowIO) { - Y_UNIT_TEST_BLOCKS(TestInc) { - using namespace NYql::NPureCalc; +Y_UNIT_TEST_BLOCKS(TestInc) { + using namespace NYql::NPureCalc; - TVector<TString> fields = {"uint64", "int64"}; - auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields); + TVector<TString> fields = {"uint64", "int64"}; + auto schema = NYql::NPureCalc::NPrivate::GetSchema(fields); - auto factory = MakeProgramFactory(TestOptions(BlockEngineMode)); + auto factory = MakeProgramFactory(TestOptions(BlockEngineMode)); - try { - auto program = factory->MakePushStreamProgram( - TArrowInputSpec({schema}), - TArrowOutputSpec(schema), - R"(SELECT + try { + auto program = factory->MakePushStreamProgram( + TArrowInputSpec({schema}), + TArrowOutputSpec(schema), + R"(SELECT uint64 + 1 as uint64, int64 - 2 as int64, FROM Input)", - ETranslationMode::SQL - ); + ETranslationMode::SQL); - arrow::compute::ExecBatch input = MakeBatch(9, 19); - const auto canonInput = CanonBatches({input}); - TVector<arrow::compute::ExecBatch> output; + arrow::compute::ExecBatch input = MakeBatch(9, 19); + const auto canonInput = CanonBatches({input}); + TVector<arrow::compute::ExecBatch> output; - auto consumer = program->Apply(MakeHolder<ExecBatchConsumerImpl>(output)); + auto consumer = program->Apply(MakeHolder<ExecBatchConsumerImpl>(output)); - UNIT_ASSERT_NO_EXCEPTION([&](){ consumer->OnObject(&input); }()); - UNIT_ASSERT_NO_EXCEPTION([&](){ consumer->OnFinish(); }()); + UNIT_ASSERT_NO_EXCEPTION([&]() { consumer->OnObject(&input); }()); + UNIT_ASSERT_NO_EXCEPTION([&]() { consumer->OnFinish(); }()); - const auto canonOutput = CanonBatches(output); - const TVector<arrow::compute::ExecBatch> check({MakeBatch(9, 17, 2)}); - const auto canonCheck = CanonBatches(check); - UNIT_ASSERT_EQUAL(canonCheck, canonOutput); - } catch (const TCompileError& error) { - UNIT_FAIL(error.GetIssues()); - } + const auto canonOutput = CanonBatches(output); + const TVector<arrow::compute::ExecBatch> check({MakeBatch(9, 17, 2)}); + const auto canonCheck = CanonBatches(check); + UNIT_ASSERT_EQUAL(canonCheck, canonOutput); + } catch (const TCompileError& error) { + UNIT_FAIL(error.GetIssues()); } } +} // Y_UNIT_TEST_SUITE(TestMorePushStreamArrowIO) diff --git a/yql/essentials/public/purecalc/io_specs/arrow/ut/ya.make b/yql/essentials/public/purecalc/io_specs/arrow/ut/ya.make index ad7eb5881f5..6e50cdd16d1 100644 --- a/yql/essentials/public/purecalc/io_specs/arrow/ut/ya.make +++ b/yql/essentials/public/purecalc/io_specs/arrow/ut/ya.make @@ -1,5 +1,7 @@ UNITTEST() +ENABLE(YQL_STYLE_CPP) + SIZE(MEDIUM) TIMEOUT(300) diff --git a/yql/essentials/public/purecalc/io_specs/arrow/ya.make.inc b/yql/essentials/public/purecalc/io_specs/arrow/ya.make.inc index 37ff3be849e..ef958d3a24a 100644 --- a/yql/essentials/public/purecalc/io_specs/arrow/ya.make.inc +++ b/yql/essentials/public/purecalc/io_specs/arrow/ya.make.inc @@ -1,3 +1,5 @@ +ENABLE(YQL_STYLE_CPP) + SRCDIR( yql/essentials/public/purecalc/io_specs/arrow ) diff --git a/yql/essentials/public/purecalc/io_specs/protobuf/proto_variant.h b/yql/essentials/public/purecalc/io_specs/protobuf/proto_variant.h index 0692440ca1c..6f1703f01b6 100644 --- a/yql/essentials/public/purecalc/io_specs/protobuf/proto_variant.h +++ b/yql/essentials/public/purecalc/io_specs/protobuf/proto_variant.h @@ -5,76 +5,76 @@ #include <array> namespace NYql::NPureCalc::NPrivate { - using TProtoRawMultiOutput = std::pair<ui32, google::protobuf::Message*>; +using TProtoRawMultiOutput = std::pair<ui32, google::protobuf::Message*>; - template <typename... T> - using TProtoMultiOutput = std::variant<T*...>; +template <typename... T> +using TProtoMultiOutput = std::variant<T*...>; - template <size_t I, typename... T> - using TProtoOutput = std::add_pointer_t<typename TTypeList<T...>::template TGet<I>>; +template <size_t I, typename... T> +using TProtoOutput = std::add_pointer_t<typename TTypeList<T...>::template TGet<I>>; - template <size_t I, typename... T> - TProtoMultiOutput<T...> InitProtobufsVariant(google::protobuf::Message* ptr) { - static_assert(std::conjunction_v<std::is_base_of<google::protobuf::Message, T>...>); - return TProtoMultiOutput<T...>(std::in_place_index<I>, static_cast<TProtoOutput<I, T...>>(ptr)); +template <size_t I, typename... T> +TProtoMultiOutput<T...> InitProtobufsVariant(google::protobuf::Message* ptr) { + static_assert(std::conjunction_v<std::is_base_of<google::protobuf::Message, T>...>); + return TProtoMultiOutput<T...>(std::in_place_index<I>, static_cast<TProtoOutput<I, T...>>(ptr)); +} + +template <typename... T> +class TProtobufsMappingBase { +public: + TProtobufsMappingBase() + : InitFuncs_(BuildInitFuncs(std::make_index_sequence<sizeof...(T)>())) + { } - template <typename... T> - class TProtobufsMappingBase { - public: - TProtobufsMappingBase() - : InitFuncs_(BuildInitFuncs(std::make_index_sequence<sizeof...(T)>())) - { - } - - private: - typedef TProtoMultiOutput<T...> (*initfunc)(google::protobuf::Message*); - - template <size_t... I> - inline std::array<initfunc, sizeof...(T)> BuildInitFuncs(std::index_sequence<I...>) { - return {&InitProtobufsVariant<I, T...>...}; - } - - protected: - const std::array<initfunc, sizeof...(T)> InitFuncs_; - }; - - template <typename... T> - class TProtobufsMappingStream: public IStream<TProtoMultiOutput<T...>>, public TProtobufsMappingBase<T...> { - public: - TProtobufsMappingStream(THolder<IStream<TProtoRawMultiOutput>> oldStream) - : OldStream_(std::move(oldStream)) - { - } - - public: - TProtoMultiOutput<T...> Fetch() override { - auto&& oldItem = OldStream_->Fetch(); - return this->InitFuncs_[oldItem.first](oldItem.second); - } - - private: - THolder<IStream<TProtoRawMultiOutput>> OldStream_; - }; - - template <typename... T> - class TProtobufsMappingConsumer: public IConsumer<TProtoRawMultiOutput>, public TProtobufsMappingBase<T...> { - public: - TProtobufsMappingConsumer(THolder<IConsumer<TProtoMultiOutput<T...>>> oldConsumer) - : OldConsumer_(std::move(oldConsumer)) - { - } - - public: - void OnObject(TProtoRawMultiOutput oldItem) override { - OldConsumer_->OnObject(this->InitFuncs_[oldItem.first](oldItem.second)); - } - - void OnFinish() override { - OldConsumer_->OnFinish(); - } - - private: - THolder<IConsumer<TProtoMultiOutput<T...>>> OldConsumer_; - }; -} +private: + typedef TProtoMultiOutput<T...> (*initfunc)(google::protobuf::Message*); + + template <size_t... I> + inline std::array<initfunc, sizeof...(T)> BuildInitFuncs(std::index_sequence<I...>) { + return {&InitProtobufsVariant<I, T...>...}; + } + +protected: + const std::array<initfunc, sizeof...(T)> InitFuncs_; +}; + +template <typename... T> +class TProtobufsMappingStream: public IStream<TProtoMultiOutput<T...>>, public TProtobufsMappingBase<T...> { +public: + TProtobufsMappingStream(THolder<IStream<TProtoRawMultiOutput>> oldStream) + : OldStream_(std::move(oldStream)) + { + } + +public: + TProtoMultiOutput<T...> Fetch() override { + auto&& oldItem = OldStream_->Fetch(); + return this->InitFuncs_[oldItem.first](oldItem.second); + } + +private: + THolder<IStream<TProtoRawMultiOutput>> OldStream_; +}; + +template <typename... T> +class TProtobufsMappingConsumer: public IConsumer<TProtoRawMultiOutput>, public TProtobufsMappingBase<T...> { +public: + TProtobufsMappingConsumer(THolder<IConsumer<TProtoMultiOutput<T...>>> oldConsumer) + : OldConsumer_(std::move(oldConsumer)) + { + } + +public: + void OnObject(TProtoRawMultiOutput oldItem) override { + OldConsumer_->OnObject(this->InitFuncs_[oldItem.first](oldItem.second)); + } + + void OnFinish() override { + OldConsumer_->OnFinish(); + } + +private: + THolder<IConsumer<TProtoMultiOutput<T...>>> OldConsumer_; +}; +} // namespace NYql::NPureCalc::NPrivate diff --git a/yql/essentials/public/purecalc/io_specs/protobuf/spec.h b/yql/essentials/public/purecalc/io_specs/protobuf/spec.h index 0e1a97f632a..f2e45fc8c74 100644 --- a/yql/essentials/public/purecalc/io_specs/protobuf/spec.h +++ b/yql/essentials/public/purecalc/io_specs/protobuf/spec.h @@ -5,143 +5,143 @@ #include <yql/essentials/public/purecalc/io_specs/protobuf_raw/spec.h> namespace NYql { - namespace NPureCalc { - /** - * Processing mode for working with non-raw protobuf messages. - * - * @tparam T message type. - */ - template <typename T> - class TProtobufInputSpec: public TProtobufRawInputSpec { - static_assert(std::is_base_of<google::protobuf::Message, T>::value, - "should be derived from google::protobuf::Message"); - public: - TProtobufInputSpec( - const TMaybe<TString>& timestampColumn = Nothing(), - const TProtoSchemaOptions& options = {} - ) - : TProtobufRawInputSpec(*T::descriptor(), timestampColumn, options) - { - } - }; - - /** - * Processing mode for working with non-raw protobuf messages. - * - * @tparam T message type. - */ - template <typename T> - class TProtobufOutputSpec: public TProtobufRawOutputSpec { - static_assert(std::is_base_of<google::protobuf::Message, T>::value, - "should be derived from google::protobuf::Message"); - public: - TProtobufOutputSpec( - const TProtoSchemaOptions& options = {}, - google::protobuf::Arena* arena = nullptr - ) - : TProtobufRawOutputSpec(*T::descriptor(), nullptr, options, arena) - { - } - }; - - /** - * Processing mode for working with non-raw protobuf messages and several outputs. - */ - template <typename... T> - class TProtobufMultiOutputSpec: public TProtobufRawMultiOutputSpec { - static_assert( - std::conjunction_v<std::is_base_of<google::protobuf::Message, T>...>, - "all types should be derived from google::protobuf::Message"); - public: - TProtobufMultiOutputSpec( - const TProtoSchemaOptions& options = {}, - TMaybe<TVector<google::protobuf::Arena*>> arenas = {} - ) - : TProtobufRawMultiOutputSpec({T::descriptor()...}, Nothing(), options, std::move(arenas)) - { - } - }; - - template <typename T> - struct TInputSpecTraits<TProtobufInputSpec<T>> { - static const constexpr bool IsPartial = false; - - static const constexpr bool SupportPullStreamMode = true; - static const constexpr bool SupportPullListMode = true; - static const constexpr bool SupportPushStreamMode = true; - - using TConsumerType = THolder<IConsumer<T*>>; - - static void PreparePullStreamWorker(const TProtobufInputSpec<T>& inputSpec, IPullStreamWorker* worker, THolder<IStream<T*>> stream) { - auto raw = ConvertStream<google::protobuf::Message*>(std::move(stream)); - TInputSpecTraits<TProtobufRawInputSpec>::PreparePullStreamWorker(inputSpec, worker, std::move(raw)); - } - - static void PreparePullListWorker(const TProtobufInputSpec<T>& inputSpec, IPullListWorker* worker, THolder<IStream<T*>> stream) { - auto raw = ConvertStream<google::protobuf::Message*>(std::move(stream)); - TInputSpecTraits<TProtobufRawInputSpec>::PreparePullListWorker(inputSpec, worker, std::move(raw)); - } - - static TConsumerType MakeConsumer(const TProtobufInputSpec<T>& inputSpec, TWorkerHolder<IPushStreamWorker> worker) { - auto raw = TInputSpecTraits<TProtobufRawInputSpec>::MakeConsumer(inputSpec, std::move(worker)); - return ConvertConsumer<T*>(std::move(raw)); - } - }; - - template <typename T> - struct TOutputSpecTraits<TProtobufOutputSpec<T>> { - static const constexpr bool IsPartial = false; - - static const constexpr bool SupportPullStreamMode = true; - static const constexpr bool SupportPullListMode = true; - static const constexpr bool SupportPushStreamMode = true; - - using TOutputItemType = T*; - using TPullStreamReturnType = THolder<IStream<TOutputItemType>>; - using TPullListReturnType = THolder<IStream<TOutputItemType>>; - - static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufOutputSpec<T>& outputSpec, TWorkerHolder<IPullStreamWorker> worker) { - auto raw = TOutputSpecTraits<TProtobufRawOutputSpec>::ConvertPullStreamWorkerToOutputType(outputSpec, std::move(worker)); - return ConvertStreamUnsafe<TOutputItemType>(std::move(raw)); - } - - static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufOutputSpec<T>& outputSpec, TWorkerHolder<IPullListWorker> worker) { - auto raw = TOutputSpecTraits<TProtobufRawOutputSpec>::ConvertPullListWorkerToOutputType(outputSpec, std::move(worker)); - return ConvertStreamUnsafe<TOutputItemType>(std::move(raw)); - } - - static void SetConsumerToWorker(const TProtobufOutputSpec<T>& outputSpec, IPushStreamWorker* worker, THolder<IConsumer<T*>> consumer) { - auto raw = ConvertConsumerUnsafe<google::protobuf::Message*>(std::move(consumer)); - TOutputSpecTraits<TProtobufRawOutputSpec>::SetConsumerToWorker(outputSpec, worker, std::move(raw)); - } - }; - - template <typename... T> - struct TOutputSpecTraits<TProtobufMultiOutputSpec<T...>> { - static const constexpr bool IsPartial = false; - - static const constexpr bool SupportPullStreamMode = true; - static const constexpr bool SupportPullListMode = true; - static const constexpr bool SupportPushStreamMode = true; - - using TOutputItemType = std::variant<T*...>; - using TPullStreamReturnType = THolder<IStream<TOutputItemType>>; - using TPullListReturnType = THolder<IStream<TOutputItemType>>; - - static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufMultiOutputSpec<T...>& outputSpec, TWorkerHolder<IPullStreamWorker> worker) { - auto raw = TOutputSpecTraits<TProtobufRawMultiOutputSpec>::ConvertPullStreamWorkerToOutputType(outputSpec, std::move(worker)); - return THolder(new NPrivate::TProtobufsMappingStream<T...>(std::move(raw))); - } - - static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufMultiOutputSpec<T...>& outputSpec, TWorkerHolder<IPullListWorker> worker) { - auto raw = TOutputSpecTraits<TProtobufRawMultiOutputSpec>::ConvertPullListWorkerToOutputType(outputSpec, std::move(worker)); - return THolder(new NPrivate::TProtobufsMappingStream<T...>(std::move(raw))); - } - - static void SetConsumerToWorker(const TProtobufMultiOutputSpec<T...>& outputSpec, IPushStreamWorker* worker, THolder<IConsumer<TOutputItemType>> consumer) { - auto wrapper = MakeHolder<NPrivate::TProtobufsMappingConsumer<T...>>(std::move(consumer)); - TOutputSpecTraits<TProtobufRawMultiOutputSpec>::SetConsumerToWorker(outputSpec, worker, std::move(wrapper)); - } - }; +namespace NPureCalc { +/** + * Processing mode for working with non-raw protobuf messages. + * + * @tparam T message type. + */ +template <typename T> +class TProtobufInputSpec: public TProtobufRawInputSpec { + static_assert(std::is_base_of<google::protobuf::Message, T>::value, + "should be derived from google::protobuf::Message"); + +public: + TProtobufInputSpec( + const TMaybe<TString>& timestampColumn = Nothing(), + const TProtoSchemaOptions& options = {}) + : TProtobufRawInputSpec(*T::descriptor(), timestampColumn, options) + { } -} +}; + +/** + * Processing mode for working with non-raw protobuf messages. + * + * @tparam T message type. + */ +template <typename T> +class TProtobufOutputSpec: public TProtobufRawOutputSpec { + static_assert(std::is_base_of<google::protobuf::Message, T>::value, + "should be derived from google::protobuf::Message"); + +public: + TProtobufOutputSpec( + const TProtoSchemaOptions& options = {}, + google::protobuf::Arena* arena = nullptr) + : TProtobufRawOutputSpec(*T::descriptor(), nullptr, options, arena) + { + } +}; + +/** + * Processing mode for working with non-raw protobuf messages and several outputs. + */ +template <typename... T> +class TProtobufMultiOutputSpec: public TProtobufRawMultiOutputSpec { + static_assert( + std::conjunction_v<std::is_base_of<google::protobuf::Message, T>...>, + "all types should be derived from google::protobuf::Message"); + +public: + TProtobufMultiOutputSpec( + const TProtoSchemaOptions& options = {}, + TMaybe<TVector<google::protobuf::Arena*>> arenas = {}) + : TProtobufRawMultiOutputSpec({T::descriptor()...}, Nothing(), options, std::move(arenas)) + { + } +}; + +template <typename T> +struct TInputSpecTraits<TProtobufInputSpec<T>> { + static const constexpr bool IsPartial = false; + + static const constexpr bool SupportPullStreamMode = true; + static const constexpr bool SupportPullListMode = true; + static const constexpr bool SupportPushStreamMode = true; + + using TConsumerType = THolder<IConsumer<T*>>; + + static void PreparePullStreamWorker(const TProtobufInputSpec<T>& inputSpec, IPullStreamWorker* worker, THolder<IStream<T*>> stream) { + auto raw = ConvertStream<google::protobuf::Message*>(std::move(stream)); + TInputSpecTraits<TProtobufRawInputSpec>::PreparePullStreamWorker(inputSpec, worker, std::move(raw)); + } + + static void PreparePullListWorker(const TProtobufInputSpec<T>& inputSpec, IPullListWorker* worker, THolder<IStream<T*>> stream) { + auto raw = ConvertStream<google::protobuf::Message*>(std::move(stream)); + TInputSpecTraits<TProtobufRawInputSpec>::PreparePullListWorker(inputSpec, worker, std::move(raw)); + } + + static TConsumerType MakeConsumer(const TProtobufInputSpec<T>& inputSpec, TWorkerHolder<IPushStreamWorker> worker) { + auto raw = TInputSpecTraits<TProtobufRawInputSpec>::MakeConsumer(inputSpec, std::move(worker)); + return ConvertConsumer<T*>(std::move(raw)); + } +}; + +template <typename T> +struct TOutputSpecTraits<TProtobufOutputSpec<T>> { + static const constexpr bool IsPartial = false; + + static const constexpr bool SupportPullStreamMode = true; + static const constexpr bool SupportPullListMode = true; + static const constexpr bool SupportPushStreamMode = true; + + using TOutputItemType = T*; + using TPullStreamReturnType = THolder<IStream<TOutputItemType>>; + using TPullListReturnType = THolder<IStream<TOutputItemType>>; + + static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufOutputSpec<T>& outputSpec, TWorkerHolder<IPullStreamWorker> worker) { + auto raw = TOutputSpecTraits<TProtobufRawOutputSpec>::ConvertPullStreamWorkerToOutputType(outputSpec, std::move(worker)); + return ConvertStreamUnsafe<TOutputItemType>(std::move(raw)); + } + + static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufOutputSpec<T>& outputSpec, TWorkerHolder<IPullListWorker> worker) { + auto raw = TOutputSpecTraits<TProtobufRawOutputSpec>::ConvertPullListWorkerToOutputType(outputSpec, std::move(worker)); + return ConvertStreamUnsafe<TOutputItemType>(std::move(raw)); + } + + static void SetConsumerToWorker(const TProtobufOutputSpec<T>& outputSpec, IPushStreamWorker* worker, THolder<IConsumer<T*>> consumer) { + auto raw = ConvertConsumerUnsafe<google::protobuf::Message*>(std::move(consumer)); + TOutputSpecTraits<TProtobufRawOutputSpec>::SetConsumerToWorker(outputSpec, worker, std::move(raw)); + } +}; + +template <typename... T> +struct TOutputSpecTraits<TProtobufMultiOutputSpec<T...>> { + static const constexpr bool IsPartial = false; + + static const constexpr bool SupportPullStreamMode = true; + static const constexpr bool SupportPullListMode = true; + static const constexpr bool SupportPushStreamMode = true; + + using TOutputItemType = std::variant<T*...>; + using TPullStreamReturnType = THolder<IStream<TOutputItemType>>; + using TPullListReturnType = THolder<IStream<TOutputItemType>>; + + static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufMultiOutputSpec<T...>& outputSpec, TWorkerHolder<IPullStreamWorker> worker) { + auto raw = TOutputSpecTraits<TProtobufRawMultiOutputSpec>::ConvertPullStreamWorkerToOutputType(outputSpec, std::move(worker)); + return THolder(new NPrivate::TProtobufsMappingStream<T...>(std::move(raw))); + } + + static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufMultiOutputSpec<T...>& outputSpec, TWorkerHolder<IPullListWorker> worker) { + auto raw = TOutputSpecTraits<TProtobufRawMultiOutputSpec>::ConvertPullListWorkerToOutputType(outputSpec, std::move(worker)); + return THolder(new NPrivate::TProtobufsMappingStream<T...>(std::move(raw))); + } + + static void SetConsumerToWorker(const TProtobufMultiOutputSpec<T...>& outputSpec, IPushStreamWorker* worker, THolder<IConsumer<TOutputItemType>> consumer) { + auto wrapper = MakeHolder<NPrivate::TProtobufsMappingConsumer<T...>>(std::move(consumer)); + TOutputSpecTraits<TProtobufRawMultiOutputSpec>::SetConsumerToWorker(outputSpec, worker, std::move(wrapper)); + } +}; +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/io_specs/protobuf/ut/test_spec.cpp b/yql/essentials/public/purecalc/io_specs/protobuf/ut/test_spec.cpp index 5d479d2ef2c..981b8403754 100644 --- a/yql/essentials/public/purecalc/io_specs/protobuf/ut/test_spec.cpp +++ b/yql/essentials/public/purecalc/io_specs/protobuf/ut/test_spec.cpp @@ -7,71 +7,71 @@ #include <util/generic/xrange.h> namespace { - TMaybe<NPureCalcProto::TAllTypes> allTypesMessage; - - NPureCalcProto::TAllTypes& GetCanonicalMessage() { - if (!allTypesMessage) { - allTypesMessage = NPureCalcProto::TAllTypes(); - - allTypesMessage->SetFDouble(1); - allTypesMessage->SetFFloat(2); - allTypesMessage->SetFInt64(3); - allTypesMessage->SetFSfixed64(4); - allTypesMessage->SetFSint64(5); - allTypesMessage->SetFUint64(6); - allTypesMessage->SetFFixed64(7); - allTypesMessage->SetFInt32(8); - allTypesMessage->SetFSfixed32(9); - allTypesMessage->SetFSint32(10); - allTypesMessage->SetFUint32(11); - allTypesMessage->SetFFixed32(12); - allTypesMessage->SetFBool(true); - allTypesMessage->SetFString("asd"); - allTypesMessage->SetFBytes("dsa"); - } - - return allTypesMessage.GetRef(); +TMaybe<NPureCalcProto::TAllTypes> allTypesMessage; + +NPureCalcProto::TAllTypes& GetCanonicalMessage() { + if (!allTypesMessage) { + allTypesMessage = NPureCalcProto::TAllTypes(); + + allTypesMessage->SetFDouble(1); + allTypesMessage->SetFFloat(2); + allTypesMessage->SetFInt64(3); + allTypesMessage->SetFSfixed64(4); + allTypesMessage->SetFSint64(5); + allTypesMessage->SetFUint64(6); + allTypesMessage->SetFFixed64(7); + allTypesMessage->SetFInt32(8); + allTypesMessage->SetFSfixed32(9); + allTypesMessage->SetFSint32(10); + allTypesMessage->SetFUint32(11); + allTypesMessage->SetFFixed32(12); + allTypesMessage->SetFBool(true); + allTypesMessage->SetFString("asd"); + allTypesMessage->SetFBytes("dsa"); } - template <typename T1, typename T2> - void AssertEqualToCanonical(const T1& got, const T2& expected) { - UNIT_ASSERT_EQUAL(expected.GetFDouble(), got.GetFDouble()); - UNIT_ASSERT_EQUAL(expected.GetFFloat(), got.GetFFloat()); - UNIT_ASSERT_EQUAL(expected.GetFInt64(), got.GetFInt64()); - UNIT_ASSERT_EQUAL(expected.GetFSfixed64(), got.GetFSfixed64()); - UNIT_ASSERT_EQUAL(expected.GetFSint64(), got.GetFSint64()); - UNIT_ASSERT_EQUAL(expected.GetFUint64(), got.GetFUint64()); - UNIT_ASSERT_EQUAL(expected.GetFFixed64(), got.GetFFixed64()); - UNIT_ASSERT_EQUAL(expected.GetFInt32(), got.GetFInt32()); - UNIT_ASSERT_EQUAL(expected.GetFSfixed32(), got.GetFSfixed32()); - UNIT_ASSERT_EQUAL(expected.GetFSint32(), got.GetFSint32()); - UNIT_ASSERT_EQUAL(expected.GetFUint32(), got.GetFUint32()); - UNIT_ASSERT_EQUAL(expected.GetFFixed32(), got.GetFFixed32()); - UNIT_ASSERT_EQUAL(expected.GetFBool(), got.GetFBool()); - UNIT_ASSERT_EQUAL(expected.GetFString(), got.GetFString()); - UNIT_ASSERT_EQUAL(expected.GetFBytes(), got.GetFBytes()); - } + return allTypesMessage.GetRef(); +} - template <typename T> - void AssertEqualToCanonical(const T& got) { - AssertEqualToCanonical(got, GetCanonicalMessage()); - } +template <typename T1, typename T2> +void AssertEqualToCanonical(const T1& got, const T2& expected) { + UNIT_ASSERT_EQUAL(expected.GetFDouble(), got.GetFDouble()); + UNIT_ASSERT_EQUAL(expected.GetFFloat(), got.GetFFloat()); + UNIT_ASSERT_EQUAL(expected.GetFInt64(), got.GetFInt64()); + UNIT_ASSERT_EQUAL(expected.GetFSfixed64(), got.GetFSfixed64()); + UNIT_ASSERT_EQUAL(expected.GetFSint64(), got.GetFSint64()); + UNIT_ASSERT_EQUAL(expected.GetFUint64(), got.GetFUint64()); + UNIT_ASSERT_EQUAL(expected.GetFFixed64(), got.GetFFixed64()); + UNIT_ASSERT_EQUAL(expected.GetFInt32(), got.GetFInt32()); + UNIT_ASSERT_EQUAL(expected.GetFSfixed32(), got.GetFSfixed32()); + UNIT_ASSERT_EQUAL(expected.GetFSint32(), got.GetFSint32()); + UNIT_ASSERT_EQUAL(expected.GetFUint32(), got.GetFUint32()); + UNIT_ASSERT_EQUAL(expected.GetFFixed32(), got.GetFFixed32()); + UNIT_ASSERT_EQUAL(expected.GetFBool(), got.GetFBool()); + UNIT_ASSERT_EQUAL(expected.GetFString(), got.GetFString()); + UNIT_ASSERT_EQUAL(expected.GetFBytes(), got.GetFBytes()); +} - TString SerializeToTextFormatAsString(const google::protobuf::Message& message) { - TString result; - { - TStringOutput output(result); - SerializeToTextFormat(message, output); - } - return result; - } +template <typename T> +void AssertEqualToCanonical(const T& got) { + AssertEqualToCanonical(got, GetCanonicalMessage()); +} - template <typename T> - void AssertProtoEqual(const T& actual, const T& expected) { - UNIT_ASSERT_VALUES_EQUAL(SerializeToTextFormatAsString(actual), SerializeToTextFormatAsString(expected)); +TString SerializeToTextFormatAsString(const google::protobuf::Message& message) { + TString result; + { + TStringOutput output(result); + SerializeToTextFormat(message, output); } + return result; } +template <typename T> +void AssertProtoEqual(const T& actual, const T& expected) { + UNIT_ASSERT_VALUES_EQUAL(SerializeToTextFormatAsString(actual), SerializeToTextFormatAsString(expected)); +} +} // namespace + class TAllTypesStreamImpl: public NYql::NPureCalc::IStream<NPureCalcProto::TAllTypes*> { private: int I_ = 0; @@ -256,7 +256,7 @@ private: ui32 I_ = 0; }; -template<typename T> +template <typename T> struct TVectorConsumer: public NYql::NPureCalc::IConsumer<T*> { TVector<T> Data; @@ -280,494 +280,476 @@ public: }; Y_UNIT_TEST_SUITE(TestProtoIO) { - Y_UNIT_TEST(TestAllTypes) { - using namespace NYql::NPureCalc; +Y_UNIT_TEST(TestAllTypes) { + using namespace NYql::NPureCalc; - auto factory = MakeProgramFactory(); + auto factory = MakeProgramFactory(); - { - auto program = factory->MakePullStreamProgram( - TProtobufInputSpec<NPureCalcProto::TAllTypes>(), - TProtobufOutputSpec<NPureCalcProto::TAllTypes>(), - "SELECT * FROM Input", - ETranslationMode::SQL - ); + { + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TAllTypes>(), + TProtobufOutputSpec<NPureCalcProto::TAllTypes>(), + "SELECT * FROM Input", + ETranslationMode::SQL); - auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>()); + auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>()); - NPureCalcProto::TAllTypes* message; + NPureCalcProto::TAllTypes* message; - UNIT_ASSERT(message = stream->Fetch()); - AssertEqualToCanonical(*message); - UNIT_ASSERT(!stream->Fetch()); - } + UNIT_ASSERT(message = stream->Fetch()); + AssertEqualToCanonical(*message); + UNIT_ASSERT(!stream->Fetch()); + } - { - auto program = factory->MakePullListProgram( - TProtobufInputSpec<NPureCalcProto::TAllTypes>(), - TProtobufOutputSpec<NPureCalcProto::TAllTypes>(), - "SELECT * FROM Input", - ETranslationMode::SQL - ); + { + auto program = factory->MakePullListProgram( + TProtobufInputSpec<NPureCalcProto::TAllTypes>(), + TProtobufOutputSpec<NPureCalcProto::TAllTypes>(), + "SELECT * FROM Input", + ETranslationMode::SQL); - auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>()); + auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>()); - NPureCalcProto::TAllTypes* message; + NPureCalcProto::TAllTypes* message; - UNIT_ASSERT(message = stream->Fetch()); - AssertEqualToCanonical(*message); - UNIT_ASSERT(!stream->Fetch()); - } + UNIT_ASSERT(message = stream->Fetch()); + AssertEqualToCanonical(*message); + UNIT_ASSERT(!stream->Fetch()); + } - { - auto program = factory->MakePushStreamProgram( - TProtobufInputSpec<NPureCalcProto::TAllTypes>(), - TProtobufOutputSpec<NPureCalcProto::TAllTypes>(), - "SELECT * FROM Input", - ETranslationMode::SQL - ); + { + auto program = factory->MakePushStreamProgram( + TProtobufInputSpec<NPureCalcProto::TAllTypes>(), + TProtobufOutputSpec<NPureCalcProto::TAllTypes>(), + "SELECT * FROM Input", + ETranslationMode::SQL); - auto consumer = program->Apply(MakeHolder<TAllTypesConsumerImpl>()); + auto consumer = program->Apply(MakeHolder<TAllTypesConsumerImpl>()); - UNIT_ASSERT_NO_EXCEPTION([&](){ consumer->OnObject(&GetCanonicalMessage()); }()); - UNIT_ASSERT_NO_EXCEPTION([&](){ consumer->OnFinish(); }()); - } + UNIT_ASSERT_NO_EXCEPTION([&]() { consumer->OnObject(&GetCanonicalMessage()); }()); + UNIT_ASSERT_NO_EXCEPTION([&]() { consumer->OnFinish(); }()); } +} - template <typename T> - void CheckPassThroughYql(T& testInput, google::protobuf::Arena* arena = nullptr) { - using namespace NYql::NPureCalc; +template <typename T> +void CheckPassThroughYql(T& testInput, google::protobuf::Arena* arena = nullptr) { + using namespace NYql::NPureCalc; - auto resetArena = [arena]() { - if (arena != nullptr) { - arena->Reset(); - } - }; + auto resetArena = [arena]() { + if (arena != nullptr) { + arena->Reset(); + } + }; - auto factory = MakeProgramFactory(); + auto factory = MakeProgramFactory(); - { - auto program = factory->MakePushStreamProgram( - TProtobufInputSpec<T>(), - TProtobufOutputSpec<T>({}, arena), - "SELECT * FROM Input", - ETranslationMode::SQL - ); - - auto resultConsumer = MakeHolder<TVectorConsumer<T>>(); - auto* resultConsumerPtr = resultConsumer.Get(); - auto sourceConsumer = program->Apply(std::move(resultConsumer)); - - sourceConsumer->OnObject(&testInput); - UNIT_ASSERT_VALUES_EQUAL(1, resultConsumerPtr->Data.size()); - AssertProtoEqual(resultConsumerPtr->Data[0], testInput); - - resultConsumerPtr->Data.clear(); - sourceConsumer->OnObject(&testInput); - UNIT_ASSERT_VALUES_EQUAL(1, resultConsumerPtr->Data.size()); - AssertProtoEqual(resultConsumerPtr->Data[0], testInput); - } - resetArena(); + { + auto program = factory->MakePushStreamProgram( + TProtobufInputSpec<T>(), + TProtobufOutputSpec<T>({}, arena), + "SELECT * FROM Input", + ETranslationMode::SQL); + + auto resultConsumer = MakeHolder<TVectorConsumer<T>>(); + auto* resultConsumerPtr = resultConsumer.Get(); + auto sourceConsumer = program->Apply(std::move(resultConsumer)); + + sourceConsumer->OnObject(&testInput); + UNIT_ASSERT_VALUES_EQUAL(1, resultConsumerPtr->Data.size()); + AssertProtoEqual(resultConsumerPtr->Data[0], testInput); + + resultConsumerPtr->Data.clear(); + sourceConsumer->OnObject(&testInput); + UNIT_ASSERT_VALUES_EQUAL(1, resultConsumerPtr->Data.size()); + AssertProtoEqual(resultConsumerPtr->Data[0], testInput); + } + resetArena(); - { - auto program = factory->MakePullStreamProgram( - TProtobufInputSpec<T>(), - TProtobufOutputSpec<T>({}, arena), - "SELECT * FROM Input", - ETranslationMode::SQL - ); - - auto sourceStream = MakeHolder<TVectorStream<T>>(); - auto* sourceStreamPtr = sourceStream.Get(); - auto resultStream = program->Apply(std::move(sourceStream)); - - sourceStreamPtr->Data.push_back(testInput); - T* resultMessage; - UNIT_ASSERT(resultMessage = resultStream->Fetch()); - AssertProtoEqual(*resultMessage, testInput); - UNIT_ASSERT(!resultStream->Fetch()); - - UNIT_ASSERT_VALUES_EQUAL(resultMessage->GetArena(), arena); - } - resetArena(); + { + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<T>(), + TProtobufOutputSpec<T>({}, arena), + "SELECT * FROM Input", + ETranslationMode::SQL); + + auto sourceStream = MakeHolder<TVectorStream<T>>(); + auto* sourceStreamPtr = sourceStream.Get(); + auto resultStream = program->Apply(std::move(sourceStream)); + + sourceStreamPtr->Data.push_back(testInput); + T* resultMessage; + UNIT_ASSERT(resultMessage = resultStream->Fetch()); + AssertProtoEqual(*resultMessage, testInput); + UNIT_ASSERT(!resultStream->Fetch()); + + UNIT_ASSERT_VALUES_EQUAL(resultMessage->GetArena(), arena); + } + resetArena(); - { - auto program = factory->MakePullListProgram( - TProtobufInputSpec<T>(), - TProtobufOutputSpec<T>({}, arena), - "SELECT * FROM Input", - ETranslationMode::SQL - ); - - auto sourceStream = MakeHolder<TVectorStream<T>>(); - auto* sourceStreamPtr = sourceStream.Get(); - auto resultStream = program->Apply(std::move(sourceStream)); - - sourceStreamPtr->Data.push_back(testInput); - T* resultMessage; - UNIT_ASSERT(resultMessage = resultStream->Fetch()); - AssertProtoEqual(*resultMessage, testInput); - UNIT_ASSERT(!resultStream->Fetch()); - - UNIT_ASSERT_VALUES_EQUAL(resultMessage->GetArena(), arena); - } - resetArena(); + { + auto program = factory->MakePullListProgram( + TProtobufInputSpec<T>(), + TProtobufOutputSpec<T>({}, arena), + "SELECT * FROM Input", + ETranslationMode::SQL); + + auto sourceStream = MakeHolder<TVectorStream<T>>(); + auto* sourceStreamPtr = sourceStream.Get(); + auto resultStream = program->Apply(std::move(sourceStream)); + + sourceStreamPtr->Data.push_back(testInput); + T* resultMessage; + UNIT_ASSERT(resultMessage = resultStream->Fetch()); + AssertProtoEqual(*resultMessage, testInput); + UNIT_ASSERT(!resultStream->Fetch()); + + UNIT_ASSERT_VALUES_EQUAL(resultMessage->GetArena(), arena); } + resetArena(); +} - template <typename T> - void CheckMessageIsInvalid(const TString& expectedExceptionMessage) { - using namespace NYql::NPureCalc; +template <typename T> +void CheckMessageIsInvalid(const TString& expectedExceptionMessage) { + using namespace NYql::NPureCalc; - auto factory = MakeProgramFactory(); + auto factory = MakeProgramFactory(); - UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { - factory->MakePushStreamProgram(TProtobufInputSpec<T>(), TProtobufOutputSpec<T>(), "SELECT * FROM Input", ETranslationMode::SQL); - }(), yexception, expectedExceptionMessage); + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePushStreamProgram(TProtobufInputSpec<T>(), TProtobufOutputSpec<T>(), "SELECT * FROM Input", ETranslationMode::SQL); + }(), yexception, expectedExceptionMessage); - UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { - factory->MakePullStreamProgram(TProtobufInputSpec<T>(), TProtobufOutputSpec<T>(), "SELECT * FROM Input", ETranslationMode::SQL); - }(), yexception, expectedExceptionMessage); + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePullStreamProgram(TProtobufInputSpec<T>(), TProtobufOutputSpec<T>(), "SELECT * FROM Input", ETranslationMode::SQL); + }(), yexception, expectedExceptionMessage); - UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { - factory->MakePullListProgram(TProtobufInputSpec<T>(), TProtobufOutputSpec<T>(), "SELECT * FROM Input", ETranslationMode::SQL); - }(), yexception, expectedExceptionMessage); - } + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePullListProgram(TProtobufInputSpec<T>(), TProtobufOutputSpec<T>(), "SELECT * FROM Input", ETranslationMode::SQL); + }(), yexception, expectedExceptionMessage); +} - Y_UNIT_TEST(TestSimpleNested) { - NPureCalcProto::TSimpleNested input; - input.SetX(10); - { - auto* item = input.MutableY(); - *item = GetCanonicalMessage(); - item->SetFUint64(100); - } - CheckPassThroughYql(input); +Y_UNIT_TEST(TestSimpleNested) { + NPureCalcProto::TSimpleNested input; + input.SetX(10); + { + auto* item = input.MutableY(); + *item = GetCanonicalMessage(); + item->SetFUint64(100); } + CheckPassThroughYql(input); +} - Y_UNIT_TEST(TestOptionalNested) { - NPureCalcProto::TOptionalNested input; - { - auto* item = input.MutableX(); - *item = GetCanonicalMessage(); - item->SetFUint64(100); - } - CheckPassThroughYql(input); +Y_UNIT_TEST(TestOptionalNested) { + NPureCalcProto::TOptionalNested input; + { + auto* item = input.MutableX(); + *item = GetCanonicalMessage(); + item->SetFUint64(100); } + CheckPassThroughYql(input); +} - Y_UNIT_TEST(TestSimpleRepeated) { - NPureCalcProto::TSimpleRepeated input; - input.SetX(20); - input.AddY(100); - input.AddY(200); - input.AddY(300); - CheckPassThroughYql(input); - } +Y_UNIT_TEST(TestSimpleRepeated) { + NPureCalcProto::TSimpleRepeated input; + input.SetX(20); + input.AddY(100); + input.AddY(200); + input.AddY(300); + CheckPassThroughYql(input); +} - Y_UNIT_TEST(TestNestedRepeated) { - NPureCalcProto::TNestedRepeated input; - input.SetX(20); +Y_UNIT_TEST(TestNestedRepeated) { + NPureCalcProto::TNestedRepeated input; + input.SetX(20); + { + auto* item = input.MutableY()->Add(); + item->SetX(100); { - auto* item = input.MutableY()->Add(); - item->SetX(100); - { - auto* y = item->MutableY(); - *y = GetCanonicalMessage(); - y->SetFUint64(1000); - } + auto* y = item->MutableY(); + *y = GetCanonicalMessage(); + y->SetFUint64(1000); } + } + { + auto* item = input.MutableY()->Add(); + item->SetX(200); { - auto* item = input.MutableY()->Add(); - item->SetX(200); - { - auto* y = item->MutableY(); - *y = GetCanonicalMessage(); - y->SetFUint64(2000); - } + auto* y = item->MutableY(); + *y = GetCanonicalMessage(); + y->SetFUint64(2000); } - CheckPassThroughYql(input); } + CheckPassThroughYql(input); +} - Y_UNIT_TEST(TestMessageWithEnum) { - NPureCalcProto::TMessageWithEnum input; - input.AddEnumValue(NPureCalcProto::TMessageWithEnum::VALUE1); - input.AddEnumValue(NPureCalcProto::TMessageWithEnum::VALUE2); - CheckPassThroughYql(input); - } +Y_UNIT_TEST(TestMessageWithEnum) { + NPureCalcProto::TMessageWithEnum input; + input.AddEnumValue(NPureCalcProto::TMessageWithEnum::VALUE1); + input.AddEnumValue(NPureCalcProto::TMessageWithEnum::VALUE2); + CheckPassThroughYql(input); +} - Y_UNIT_TEST(TestRecursive) { - CheckMessageIsInvalid<NPureCalcProto::TRecursive>("NPureCalcProto.TRecursive->NPureCalcProto.TRecursive"); - } +Y_UNIT_TEST(TestRecursive) { + CheckMessageIsInvalid<NPureCalcProto::TRecursive>("NPureCalcProto.TRecursive->NPureCalcProto.TRecursive"); +} - Y_UNIT_TEST(TestRecursiveIndirectly) { - CheckMessageIsInvalid<NPureCalcProto::TRecursiveIndirectly>( - "NPureCalcProto.TRecursiveIndirectly->NPureCalcProto.TRecursiveIndirectly.TNested->NPureCalcProto.TRecursiveIndirectly"); - } +Y_UNIT_TEST(TestRecursiveIndirectly) { + CheckMessageIsInvalid<NPureCalcProto::TRecursiveIndirectly>( + "NPureCalcProto.TRecursiveIndirectly->NPureCalcProto.TRecursiveIndirectly.TNested->NPureCalcProto.TRecursiveIndirectly"); +} - Y_UNIT_TEST(TestColumnsFilter) { - using namespace NYql::NPureCalc; +Y_UNIT_TEST(TestColumnsFilter) { + using namespace NYql::NPureCalc; - auto factory = MakeProgramFactory(); + auto factory = MakeProgramFactory(); - auto filter = THashSet<TString>({"FFixed64", "FBool", "FBytes"}); + auto filter = THashSet<TString>({"FFixed64", "FBool", "FBytes"}); - NPureCalcProto::TOptionalAllTypes canonicalMessage; - canonicalMessage.SetFFixed64(GetCanonicalMessage().GetFFixed64()); - canonicalMessage.SetFBool(GetCanonicalMessage().GetFBool()); - canonicalMessage.SetFBytes(GetCanonicalMessage().GetFBytes()); + NPureCalcProto::TOptionalAllTypes canonicalMessage; + canonicalMessage.SetFFixed64(GetCanonicalMessage().GetFFixed64()); + canonicalMessage.SetFBool(GetCanonicalMessage().GetFBool()); + canonicalMessage.SetFBytes(GetCanonicalMessage().GetFBytes()); - { - auto inputSpec = TProtobufInputSpec<NPureCalcProto::TAllTypes>(); - auto outputSpec = TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>(); - outputSpec.SetOutputColumnsFilter(filter); + { + auto inputSpec = TProtobufInputSpec<NPureCalcProto::TAllTypes>(); + auto outputSpec = TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>(); + outputSpec.SetOutputColumnsFilter(filter); - auto program = factory->MakePullStreamProgram( - inputSpec, - outputSpec, - "SELECT * FROM Input", - ETranslationMode::SQL - ); + auto program = factory->MakePullStreamProgram( + inputSpec, + outputSpec, + "SELECT * FROM Input", + ETranslationMode::SQL); - UNIT_ASSERT_EQUAL(program->GetUsedColumns(), filter); + UNIT_ASSERT_EQUAL(program->GetUsedColumns(), filter); - auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>()); + auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>()); - NPureCalcProto::TOptionalAllTypes* message; + NPureCalcProto::TOptionalAllTypes* message; - UNIT_ASSERT(message = stream->Fetch()); - AssertEqualToCanonical(*message, canonicalMessage); - UNIT_ASSERT(!stream->Fetch()); - } + UNIT_ASSERT(message = stream->Fetch()); + AssertEqualToCanonical(*message, canonicalMessage); + UNIT_ASSERT(!stream->Fetch()); } +} - Y_UNIT_TEST(TestColumnsFilterWithOptionalFields) { - using namespace NYql::NPureCalc; - - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestColumnsFilterWithOptionalFields) { + using namespace NYql::NPureCalc; - auto fields = THashSet<TString>({"FFixed64", "FBool", "FBytes"}); + auto factory = MakeProgramFactory(); - NPureCalcProto::TOptionalAllTypes canonicalMessage; - canonicalMessage.SetFFixed64(GetCanonicalMessage().GetFFixed64()); - canonicalMessage.SetFBool(GetCanonicalMessage().GetFBool()); - canonicalMessage.SetFBytes(GetCanonicalMessage().GetFBytes()); + auto fields = THashSet<TString>({"FFixed64", "FBool", "FBytes"}); - { - auto program = factory->MakePullStreamProgram( - TProtobufInputSpec<NPureCalcProto::TAllTypes>(), - TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>(), - "SELECT FFixed64, FBool, FBytes FROM Input", - ETranslationMode::SQL - ); + NPureCalcProto::TOptionalAllTypes canonicalMessage; + canonicalMessage.SetFFixed64(GetCanonicalMessage().GetFFixed64()); + canonicalMessage.SetFBool(GetCanonicalMessage().GetFBool()); + canonicalMessage.SetFBytes(GetCanonicalMessage().GetFBytes()); - UNIT_ASSERT_EQUAL(program->GetUsedColumns(), fields); + { + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TAllTypes>(), + TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>(), + "SELECT FFixed64, FBool, FBytes FROM Input", + ETranslationMode::SQL); - auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>()); + UNIT_ASSERT_EQUAL(program->GetUsedColumns(), fields); - NPureCalcProto::TOptionalAllTypes* message; + auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>()); - UNIT_ASSERT(message = stream->Fetch()); - AssertEqualToCanonical(*message, canonicalMessage); - UNIT_ASSERT(!stream->Fetch()); - } + NPureCalcProto::TOptionalAllTypes* message; - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePullStreamProgram( - TProtobufInputSpec<NPureCalcProto::TAllTypes>(), - TProtobufOutputSpec<NPureCalcProto::TAllTypes>(), - "SELECT FFixed64, FBool, FBytes FROM Input", - ETranslationMode::SQL - ); - }(), TCompileError, "Failed to optimize"); + UNIT_ASSERT(message = stream->Fetch()); + AssertEqualToCanonical(*message, canonicalMessage); + UNIT_ASSERT(!stream->Fetch()); } - Y_UNIT_TEST(TestUsedColumns) { - using namespace NYql::NPureCalc; + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TAllTypes>(), + TProtobufOutputSpec<NPureCalcProto::TAllTypes>(), + "SELECT FFixed64, FBool, FBytes FROM Input", + ETranslationMode::SQL); + }(), TCompileError, "Failed to optimize"); +} - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestUsedColumns) { + using namespace NYql::NPureCalc; - auto allFields = THashSet<TString>(); + auto factory = MakeProgramFactory(); - for (auto i: xrange(NPureCalcProto::TOptionalAllTypes::descriptor()->field_count())) { - allFields.emplace(NPureCalcProto::TOptionalAllTypes::descriptor()->field(i)->name()); - } + auto allFields = THashSet<TString>(); - { - auto program = factory->MakePullStreamProgram( - TProtobufInputSpec<NPureCalcProto::TAllTypes>(), - TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>(), - "SELECT * FROM Input", - ETranslationMode::SQL - ); - - UNIT_ASSERT_EQUAL(program->GetUsedColumns(), allFields); - } + for (auto i : xrange(NPureCalcProto::TOptionalAllTypes::descriptor()->field_count())) { + allFields.emplace(NPureCalcProto::TOptionalAllTypes::descriptor()->field(i)->name()); } - Y_UNIT_TEST(TestChaining) { - using namespace NYql::NPureCalc; + { + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TAllTypes>(), + TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>(), + "SELECT * FROM Input", + ETranslationMode::SQL); - auto factory = MakeProgramFactory(); + UNIT_ASSERT_EQUAL(program->GetUsedColumns(), allFields); + } +} - TString sql1 = "SELECT UNWRAP(X || CAST(\"HI\" AS Utf8)) AS X FROM Input"; - TString sql2 = "SELECT LENGTH(X) AS X FROM Input"; +Y_UNIT_TEST(TestChaining) { + using namespace NYql::NPureCalc; - { - auto program1 = factory->MakePullStreamProgram( - TProtobufInputSpec<NPureCalcProto::TStringMessage>(), - TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), - sql1, - ETranslationMode::SQL - ); - - auto program2 = factory->MakePullStreamProgram( - TProtobufInputSpec<NPureCalcProto::TStringMessage>(), - TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>(), - sql2, - ETranslationMode::SQL - ); - - auto input = MakeHolder<TStringMessageStreamImpl>(); - auto intermediate = program1->Apply(std::move(input)); - auto output = program2->Apply(std::move(intermediate)); - - TVector<int> expected = {2, 3, 4}; - TVector<int> actual{}; - - while (auto *x = output->Fetch()) { - actual.push_back(x->GetX()); - } + auto factory = MakeProgramFactory(); + + TString sql1 = "SELECT UNWRAP(X || CAST(\"HI\" AS Utf8)) AS X FROM Input"; + TString sql2 = "SELECT LENGTH(X) AS X FROM Input"; - UNIT_ASSERT_EQUAL(expected, actual); + { + auto program1 = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + sql1, + ETranslationMode::SQL); + + auto program2 = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>(), + sql2, + ETranslationMode::SQL); + + auto input = MakeHolder<TStringMessageStreamImpl>(); + auto intermediate = program1->Apply(std::move(input)); + auto output = program2->Apply(std::move(intermediate)); + + TVector<int> expected = {2, 3, 4}; + TVector<int> actual{}; + + while (auto* x = output->Fetch()) { + actual.push_back(x->GetX()); } - { - auto program1 = factory->MakePullListProgram( - TProtobufInputSpec<NPureCalcProto::TStringMessage>(), - TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), - sql1, - ETranslationMode::SQL - ); - - auto program2 = factory->MakePullListProgram( - TProtobufInputSpec<NPureCalcProto::TStringMessage>(), - TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>(), - sql2, - ETranslationMode::SQL - ); - - auto input = MakeHolder<TStringMessageStreamImpl>(); - auto intermediate = program1->Apply(std::move(input)); - auto output = program2->Apply(std::move(intermediate)); - - TVector<int> expected = {2, 3, 4}; - TVector<int> actual{}; - - while (auto *x = output->Fetch()) { - actual.push_back(x->GetX()); - } + UNIT_ASSERT_EQUAL(expected, actual); + } - UNIT_ASSERT_EQUAL(expected, actual); + { + auto program1 = factory->MakePullListProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + sql1, + ETranslationMode::SQL); + + auto program2 = factory->MakePullListProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>(), + sql2, + ETranslationMode::SQL); + + auto input = MakeHolder<TStringMessageStreamImpl>(); + auto intermediate = program1->Apply(std::move(input)); + auto output = program2->Apply(std::move(intermediate)); + + TVector<int> expected = {2, 3, 4}; + TVector<int> actual{}; + + while (auto* x = output->Fetch()) { + actual.push_back(x->GetX()); } - { - auto program1 = factory->MakePushStreamProgram( - TProtobufInputSpec<NPureCalcProto::TStringMessage>(), - TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), - sql1, - ETranslationMode::SQL - ); + UNIT_ASSERT_EQUAL(expected, actual); + } + + { + auto program1 = factory->MakePushStreamProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + sql1, + ETranslationMode::SQL); - auto program2 = factory->MakePushStreamProgram( - TProtobufInputSpec<NPureCalcProto::TStringMessage>(), - TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>(), - sql2, - ETranslationMode::SQL - ); + auto program2 = factory->MakePushStreamProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>(), + sql2, + ETranslationMode::SQL); - TVector<int> expected = {2, 3, 4, -100}; - TVector<int> actual{}; + TVector<int> expected = {2, 3, 4, -100}; + TVector<int> actual{}; - auto consumer = MakeHolder<TSimpleMessageConsumerImpl>(&actual); - auto intermediate = program2->Apply(std::move(consumer)); - auto input = program1->Apply(std::move(intermediate)); + auto consumer = MakeHolder<TSimpleMessageConsumerImpl>(&actual); + auto intermediate = program2->Apply(std::move(consumer)); + auto input = program1->Apply(std::move(intermediate)); - NPureCalcProto::TStringMessage Message; + NPureCalcProto::TStringMessage Message; - Message.SetX(""); - input->OnObject(&Message); + Message.SetX(""); + input->OnObject(&Message); - Message.SetX("1"); - input->OnObject(&Message); + Message.SetX("1"); + input->OnObject(&Message); - Message.SetX("22"); - input->OnObject(&Message); + Message.SetX("22"); + input->OnObject(&Message); - input->OnFinish(); + input->OnFinish(); - UNIT_ASSERT_EQUAL(expected, actual); - } + UNIT_ASSERT_EQUAL(expected, actual); } +} - Y_UNIT_TEST(TestTimestampColumn) { - using namespace NYql::NPureCalc; +Y_UNIT_TEST(TestTimestampColumn) { + using namespace NYql::NPureCalc; - auto factory = MakeProgramFactory(TProgramFactoryOptions() - .SetDeterministicTimeProviderSeed(1)); // seconds + auto factory = MakeProgramFactory(TProgramFactoryOptions() + .SetDeterministicTimeProviderSeed(1)); // seconds - NPureCalcProto::TOptionalAllTypes canonicalMessage; + NPureCalcProto::TOptionalAllTypes canonicalMessage; - { - auto inputSpec = TProtobufInputSpec<NPureCalcProto::TAllTypes>("MyTimestamp"); - auto outputSpec = TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>(); + { + auto inputSpec = TProtobufInputSpec<NPureCalcProto::TAllTypes>("MyTimestamp"); + auto outputSpec = TProtobufOutputSpec<NPureCalcProto::TOptionalAllTypes>(); - auto program = factory->MakePullStreamProgram( - inputSpec, - outputSpec, - "SELECT MyTimestamp AS FFixed64 FROM Input", - ETranslationMode::SQL - ); + auto program = factory->MakePullStreamProgram( + inputSpec, + outputSpec, + "SELECT MyTimestamp AS FFixed64 FROM Input", + ETranslationMode::SQL); - auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>()); + auto stream = program->Apply(MakeHolder<TAllTypesStreamImpl>()); - NPureCalcProto::TOptionalAllTypes* message; + NPureCalcProto::TOptionalAllTypes* message; - UNIT_ASSERT(message = stream->Fetch()); - UNIT_ASSERT_VALUES_EQUAL(message->GetFFixed64(), 1000000); // microseconds - UNIT_ASSERT(!stream->Fetch()); - } + UNIT_ASSERT(message = stream->Fetch()); + UNIT_ASSERT_VALUES_EQUAL(message->GetFFixed64(), 1000000); // microseconds + UNIT_ASSERT(!stream->Fetch()); } +} - Y_UNIT_TEST(TestTableNames) { - using namespace NYql::NPureCalc; +Y_UNIT_TEST(TestTableNames) { + using namespace NYql::NPureCalc; - auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseSystemColumns(true)); + auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseSystemColumns(true)); - auto runTest = [&](TStringBuf tableName, i32 value) { - auto program = factory->MakePullStreamProgram( - TProtobufInputSpec<NPureCalcProto::TSimpleMessage>(), - TProtobufOutputSpec<NPureCalcProto::TNamedSimpleMessage>(), - TString::Join("SELECT TableName() AS Name, X FROM ", tableName), - ETranslationMode::SQL - ); + auto runTest = [&](TStringBuf tableName, i32 value) { + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TSimpleMessage>(), + TProtobufOutputSpec<NPureCalcProto::TNamedSimpleMessage>(), + TString::Join("SELECT TableName() AS Name, X FROM ", tableName), + ETranslationMode::SQL); - auto stream = program->Apply(MakeHolder<TSimpleMessageStreamImpl>(value)); - auto message = stream->Fetch(); + auto stream = program->Apply(MakeHolder<TSimpleMessageStreamImpl>(value)); + auto message = stream->Fetch(); - UNIT_ASSERT(message); - UNIT_ASSERT_VALUES_EQUAL(message->GetX(), value); - UNIT_ASSERT_VALUES_EQUAL(message->GetName(), tableName); - UNIT_ASSERT(!stream->Fetch()); - }; + UNIT_ASSERT(message); + UNIT_ASSERT_VALUES_EQUAL(message->GetX(), value); + UNIT_ASSERT_VALUES_EQUAL(message->GetName(), tableName); + UNIT_ASSERT(!stream->Fetch()); + }; - runTest("Input", 37); - runTest("Input0", -23); - } + runTest("Input", 37); + runTest("Input0", -23); +} - void CheckMultiOutputs(TMaybe<TVector<google::protobuf::Arena*>> arenas) { - using namespace NYql::NPureCalc; +void CheckMultiOutputs(TMaybe<TVector<google::protobuf::Arena*>> arenas) { + using namespace NYql::NPureCalc; - auto factory = MakeProgramFactory(); - TString sExpr = R"( + auto factory = MakeProgramFactory(); + TString sExpr = R"( ( (let $type (ParseType '"Variant<Struct<BInt:Int32,BString:Utf8>, Struct<CUint:Uint32,CString:Utf8>, Struct<X:Utf8>>")) (let $stream (Self '0)) @@ -781,293 +763,276 @@ Y_UNIT_TEST_SUITE(TestProtoIO) { ) )"; - { - auto program = factory->MakePushStreamProgram( - TProtobufInputSpec<NPureCalcProto::TUnsplitted>(), - TProtobufMultiOutputSpec<NPureCalcProto::TSplitted1, NPureCalcProto::TSplitted2, NPureCalcProto::TStringMessage>( - {}, arenas - ), - sExpr, - ETranslationMode::SExpr - ); + { + auto program = factory->MakePushStreamProgram( + TProtobufInputSpec<NPureCalcProto::TUnsplitted>(), + TProtobufMultiOutputSpec<NPureCalcProto::TSplitted1, NPureCalcProto::TSplitted2, NPureCalcProto::TStringMessage>( + {}, arenas), + sExpr, + ETranslationMode::SExpr); - TVariantConsumerImpl::TType0 queue0; - TVariantConsumerImpl::TType1 queue1; - TVariantConsumerImpl::TType2 queue2; - int finalValue = 0; + TVariantConsumerImpl::TType0 queue0; + TVariantConsumerImpl::TType1 queue1; + TVariantConsumerImpl::TType2 queue2; + int finalValue = 0; - auto consumer = MakeHolder<TVariantConsumerImpl>(&queue0, &queue1, &queue2, &finalValue); - auto input = program->Apply(std::move(consumer)); + auto consumer = MakeHolder<TVariantConsumerImpl>(&queue0, &queue1, &queue2, &finalValue); + auto input = program->Apply(std::move(consumer)); - NPureCalcProto::TUnsplitted message; - message.SetAInt(-13); - message.SetAUint(47); - message.SetAString("first message"); - message.SetABool(true); + NPureCalcProto::TUnsplitted message; + message.SetAInt(-13); + message.SetAUint(47); + message.SetAString("first message"); + message.SetABool(true); - input->OnObject(&message); - UNIT_ASSERT(queue0.size() == 1 && queue1.empty() && queue2.empty() && finalValue == 0); + input->OnObject(&message); + UNIT_ASSERT(queue0.size() == 1 && queue1.empty() && queue2.empty() && finalValue == 0); - message.SetABool(false); - message.SetAString("second message"); + message.SetABool(false); + message.SetAString("second message"); - input->OnObject(&message); - UNIT_ASSERT(queue0.size() == 1 && queue1.size() == 1 && queue2.empty() && finalValue == 0); + input->OnObject(&message); + UNIT_ASSERT(queue0.size() == 1 && queue1.size() == 1 && queue2.empty() && finalValue == 0); - message.ClearABool(); + message.ClearABool(); - input->OnObject(&message); - UNIT_ASSERT(queue0.size() == 1 && queue1.size() == 1 && queue2.size() == 1 && finalValue == 0); + input->OnObject(&message); + UNIT_ASSERT(queue0.size() == 1 && queue1.size() == 1 && queue2.size() == 1 && finalValue == 0); - input->OnFinish(); - UNIT_ASSERT(queue0.size() == 1 && queue1.size() == 1 && queue2.size() == 1 && finalValue == 42); + input->OnFinish(); + UNIT_ASSERT(queue0.size() == 1 && queue1.size() == 1 && queue2.size() == 1 && finalValue == 42); - TVariantConsumerImpl::TType0 expected0 = {{-13, "first message"}}; - UNIT_ASSERT_EQUAL(queue0, expected0); + TVariantConsumerImpl::TType0 expected0 = {{-13, "first message"}}; + UNIT_ASSERT_EQUAL(queue0, expected0); - TVariantConsumerImpl::TType1 expected1 = {{47, "second message"}}; - UNIT_ASSERT_EQUAL(queue1, expected1); + TVariantConsumerImpl::TType1 expected1 = {{47, "second message"}}; + UNIT_ASSERT_EQUAL(queue1, expected1); - TVariantConsumerImpl::TType2 expected2 = {{"Error"}}; - UNIT_ASSERT_EQUAL(queue2, expected2); - } + TVariantConsumerImpl::TType2 expected2 = {{"Error"}}; + UNIT_ASSERT_EQUAL(queue2, expected2); + } - { - auto program1 = factory->MakePullStreamProgram( - TProtobufInputSpec<NPureCalcProto::TUnsplitted>(), - TProtobufMultiOutputSpec<NPureCalcProto::TSplitted1, NPureCalcProto::TSplitted2, NPureCalcProto::TStringMessage>( - {}, arenas - ), - sExpr, - ETranslationMode::SExpr - ); - - auto program2 = factory->MakePullListProgram( - TProtobufInputSpec<NPureCalcProto::TUnsplitted>(), - TProtobufMultiOutputSpec<NPureCalcProto::TSplitted1, NPureCalcProto::TSplitted2, NPureCalcProto::TStringMessage>( - {}, arenas - ), - sExpr, - ETranslationMode::SExpr - ); - - auto input1 = MakeHolder<TUnsplittedStreamImpl>(); - auto output1 = program1->Apply(std::move(input1)); - - auto input2 = MakeHolder<TUnsplittedStreamImpl>(); - auto output2 = program2->Apply(std::move(input2)); - - decltype(output1->Fetch()) variant1; - decltype(output2->Fetch()) variant2; - -#define ASSERT_EQUAL_FIELDS(X1, X2, I, F, E) \ - UNIT_ASSERT_EQUAL(X1.index(), I); \ - UNIT_ASSERT_EQUAL(X2.index(), I); \ - UNIT_ASSERT_EQUAL(std::get<I>(X1)->Get##F(), E); \ + { + auto program1 = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TUnsplitted>(), + TProtobufMultiOutputSpec<NPureCalcProto::TSplitted1, NPureCalcProto::TSplitted2, NPureCalcProto::TStringMessage>( + {}, arenas), + sExpr, + ETranslationMode::SExpr); + + auto program2 = factory->MakePullListProgram( + TProtobufInputSpec<NPureCalcProto::TUnsplitted>(), + TProtobufMultiOutputSpec<NPureCalcProto::TSplitted1, NPureCalcProto::TSplitted2, NPureCalcProto::TStringMessage>( + {}, arenas), + sExpr, + ETranslationMode::SExpr); + + auto input1 = MakeHolder<TUnsplittedStreamImpl>(); + auto output1 = program1->Apply(std::move(input1)); + + auto input2 = MakeHolder<TUnsplittedStreamImpl>(); + auto output2 = program2->Apply(std::move(input2)); + + decltype(output1->Fetch()) variant1; + decltype(output2->Fetch()) variant2; + +#define ASSERT_EQUAL_FIELDS(X1, X2, I, F, E) \ + UNIT_ASSERT_EQUAL(X1.index(), I); \ + UNIT_ASSERT_EQUAL(X2.index(), I); \ + UNIT_ASSERT_EQUAL(std::get<I>(X1)->Get##F(), E); \ UNIT_ASSERT_EQUAL(std::get<I>(X2)->Get##F(), E) - variant1 = output1->Fetch(); - variant2 = output2->Fetch(); - ASSERT_EQUAL_FIELDS(variant1, variant2, 2, X, "Error"); - ASSERT_EQUAL_FIELDS(variant1, variant2, 2, Arena, (arenas.Defined() ? arenas->at(2) : nullptr)); - - variant1 = output1->Fetch(); - variant2 = output2->Fetch(); - ASSERT_EQUAL_FIELDS(variant1, variant2, 1, CUint, 111); - ASSERT_EQUAL_FIELDS(variant1, variant2, 1, CString, "Hello!"); - ASSERT_EQUAL_FIELDS(variant1, variant2, 1, Arena, (arenas.Defined() ? arenas->at(1) : nullptr)); - - variant1 = output1->Fetch(); - variant2 = output2->Fetch(); - ASSERT_EQUAL_FIELDS(variant1, variant2, 0, BInt, -23); - ASSERT_EQUAL_FIELDS(variant1, variant2, 0, BString, "Hello!"); - ASSERT_EQUAL_FIELDS(variant1, variant2, 0, Arena, (arenas.Defined() ? arenas->at(0) : nullptr)); - - variant1 = output1->Fetch(); - variant2 = output2->Fetch(); - UNIT_ASSERT_EQUAL(variant1.index(), 0); - UNIT_ASSERT_EQUAL(variant2.index(), 0); - UNIT_ASSERT_EQUAL(std::get<0>(variant1), nullptr); - UNIT_ASSERT_EQUAL(std::get<0>(variant1), nullptr); + variant1 = output1->Fetch(); + variant2 = output2->Fetch(); + ASSERT_EQUAL_FIELDS(variant1, variant2, 2, X, "Error"); + ASSERT_EQUAL_FIELDS(variant1, variant2, 2, Arena, (arenas.Defined() ? arenas->at(2) : nullptr)); + + variant1 = output1->Fetch(); + variant2 = output2->Fetch(); + ASSERT_EQUAL_FIELDS(variant1, variant2, 1, CUint, 111); + ASSERT_EQUAL_FIELDS(variant1, variant2, 1, CString, "Hello!"); + ASSERT_EQUAL_FIELDS(variant1, variant2, 1, Arena, (arenas.Defined() ? arenas->at(1) : nullptr)); + + variant1 = output1->Fetch(); + variant2 = output2->Fetch(); + ASSERT_EQUAL_FIELDS(variant1, variant2, 0, BInt, -23); + ASSERT_EQUAL_FIELDS(variant1, variant2, 0, BString, "Hello!"); + ASSERT_EQUAL_FIELDS(variant1, variant2, 0, Arena, (arenas.Defined() ? arenas->at(0) : nullptr)); + + variant1 = output1->Fetch(); + variant2 = output2->Fetch(); + UNIT_ASSERT_EQUAL(variant1.index(), 0); + UNIT_ASSERT_EQUAL(variant2.index(), 0); + UNIT_ASSERT_EQUAL(std::get<0>(variant1), nullptr); + UNIT_ASSERT_EQUAL(std::get<0>(variant1), nullptr); #undef ASSERT_EQUAL_FIELDS - } - } - - Y_UNIT_TEST(TestMultiOutputs) { - CheckMultiOutputs(Nothing()); } +} - Y_UNIT_TEST(TestSupportedTypes) { +Y_UNIT_TEST(TestMultiOutputs) { + CheckMultiOutputs(Nothing()); +} - } +Y_UNIT_TEST(TestSupportedTypes) { +} - Y_UNIT_TEST(TestProtobufArena) { +Y_UNIT_TEST(TestProtobufArena) { + { + NPureCalcProto::TNestedRepeated input; + input.SetX(20); { - NPureCalcProto::TNestedRepeated input; - input.SetX(20); + auto* item = input.MutableY()->Add(); + item->SetX(100); { - auto* item = input.MutableY()->Add(); - item->SetX(100); - { - auto* y = item->MutableY(); - *y = GetCanonicalMessage(); - y->SetFUint64(1000); - } + auto* y = item->MutableY(); + *y = GetCanonicalMessage(); + y->SetFUint64(1000); } + } + { + auto* item = input.MutableY()->Add(); + item->SetX(200); { - auto* item = input.MutableY()->Add(); - item->SetX(200); - { - auto* y = item->MutableY(); - *y = GetCanonicalMessage(); - y->SetFUint64(2000); - } + auto* y = item->MutableY(); + *y = GetCanonicalMessage(); + y->SetFUint64(2000); } - - google::protobuf::Arena arena; - CheckPassThroughYql(input, &arena); } - { - google::protobuf::Arena arena1; - google::protobuf::Arena arena2; - TVector<google::protobuf::Arena*> arenas{&arena1, &arena2, &arena1}; - CheckMultiOutputs(arenas); - } + google::protobuf::Arena arena; + CheckPassThroughYql(input, &arena); } - Y_UNIT_TEST(TestFieldRenames) { - using namespace NYql::NPureCalc; + { + google::protobuf::Arena arena1; + google::protobuf::Arena arena2; + TVector<google::protobuf::Arena*> arenas{&arena1, &arena2, &arena1}; + CheckMultiOutputs(arenas); + } +} - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestFieldRenames) { + using namespace NYql::NPureCalc; - TString query = "SELECT InputAlias AS OutputAlias FROM Input"; + auto factory = MakeProgramFactory(); - auto inputProtoOptions = TProtoSchemaOptions(); - inputProtoOptions.SetFieldRenames({{"X", "InputAlias"}}); + TString query = "SELECT InputAlias AS OutputAlias FROM Input"; - auto inputSpec = TProtobufInputSpec<NPureCalcProto::TSimpleMessage>( - Nothing(), std::move(inputProtoOptions) - ); + auto inputProtoOptions = TProtoSchemaOptions(); + inputProtoOptions.SetFieldRenames({{"X", "InputAlias"}}); - auto outputProtoOptions = TProtoSchemaOptions(); - outputProtoOptions.SetFieldRenames({{"X", "OutputAlias"}}); + auto inputSpec = TProtobufInputSpec<NPureCalcProto::TSimpleMessage>( + Nothing(), std::move(inputProtoOptions)); - auto outputSpec = TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>( - std::move(outputProtoOptions) - ); + auto outputProtoOptions = TProtoSchemaOptions(); + outputProtoOptions.SetFieldRenames({{"X", "OutputAlias"}}); - { - auto program = factory->MakePullStreamProgram( - inputSpec, outputSpec, query, ETranslationMode::SQL - ); + auto outputSpec = TProtobufOutputSpec<NPureCalcProto::TSimpleMessage>( + std::move(outputProtoOptions)); - auto input = MakeHolder<TSimpleMessageStreamImpl>(1); - auto output = program->Apply(std::move(input)); + { + auto program = factory->MakePullStreamProgram( + inputSpec, outputSpec, query, ETranslationMode::SQL); - TVector<int> expected = {1}; - TVector<int> actual; + auto input = MakeHolder<TSimpleMessageStreamImpl>(1); + auto output = program->Apply(std::move(input)); - while (auto* x = output->Fetch()) { - actual.push_back(x->GetX()); - } + TVector<int> expected = {1}; + TVector<int> actual; - UNIT_ASSERT_VALUES_EQUAL(expected, actual); + while (auto* x = output->Fetch()) { + actual.push_back(x->GetX()); } - { - auto program = factory->MakePullListProgram( - inputSpec, outputSpec, query, ETranslationMode::SQL - ); + UNIT_ASSERT_VALUES_EQUAL(expected, actual); + } - auto input = MakeHolder<TSimpleMessageStreamImpl>(1); - auto output = program->Apply(std::move(input)); + { + auto program = factory->MakePullListProgram( + inputSpec, outputSpec, query, ETranslationMode::SQL); - TVector<int> expected = {1}; - TVector<int> actual; + auto input = MakeHolder<TSimpleMessageStreamImpl>(1); + auto output = program->Apply(std::move(input)); - while (auto* x = output->Fetch()) { - actual.push_back(x->GetX()); - } + TVector<int> expected = {1}; + TVector<int> actual; - UNIT_ASSERT_VALUES_EQUAL(expected, actual); + while (auto* x = output->Fetch()) { + actual.push_back(x->GetX()); } - { - auto program = factory->MakePushStreamProgram( - inputSpec, outputSpec, query, ETranslationMode::SQL - ); + UNIT_ASSERT_VALUES_EQUAL(expected, actual); + } - TVector<int> expected = {1, -100}; - TVector<int> actual; + { + auto program = factory->MakePushStreamProgram( + inputSpec, outputSpec, query, ETranslationMode::SQL); - auto consumer = MakeHolder<TSimpleMessageConsumerImpl>(&actual); - auto input = program->Apply(std::move(consumer)); + TVector<int> expected = {1, -100}; + TVector<int> actual; - NPureCalcProto::TSimpleMessage Message; + auto consumer = MakeHolder<TSimpleMessageConsumerImpl>(&actual); + auto input = program->Apply(std::move(consumer)); - Message.SetX(1); - input->OnObject(&Message); + NPureCalcProto::TSimpleMessage Message; - input->OnFinish(); + Message.SetX(1); + input->OnObject(&Message); - UNIT_ASSERT_VALUES_EQUAL(expected, actual); - } - } + input->OnFinish(); - Y_UNIT_TEST(TestNestedFieldRenames) { - using namespace NYql::NPureCalc; + UNIT_ASSERT_VALUES_EQUAL(expected, actual); + } +} - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestNestedFieldRenames) { + using namespace NYql::NPureCalc; - TString query = "SELECT InputAlias AS OutputAlias, X FROM Input"; + auto factory = MakeProgramFactory(); - auto inputProtoOptions = TProtoSchemaOptions(); - inputProtoOptions.SetFieldRenames({{"Y", "InputAlias"}, {"FInt64", "NestedField"}}); - inputProtoOptions.SetEnableRecursiveRenaming(true); + TString query = "SELECT InputAlias AS OutputAlias, X FROM Input"; - auto inputSpec = TProtobufInputSpec<NPureCalcProto::TSimpleNested>( - Nothing(), std::move(inputProtoOptions) - ); + auto inputProtoOptions = TProtoSchemaOptions(); + inputProtoOptions.SetFieldRenames({{"Y", "InputAlias"}, {"FInt64", "NestedField"}}); + inputProtoOptions.SetEnableRecursiveRenaming(true); - auto outputProtoOptions = TProtoSchemaOptions(); - outputProtoOptions.SetEnableRecursiveRenaming(true); + auto inputSpec = TProtobufInputSpec<NPureCalcProto::TSimpleNested>( + Nothing(), std::move(inputProtoOptions)); - outputProtoOptions.SetFieldRenames({{"Y", "OutputAlias"}, {"FInt64", "NestedField"}}); - auto outputSpecWithNestedRename = TProtobufOutputSpec<NPureCalcProto::TSimpleNested>( - outputProtoOptions - ); + auto outputProtoOptions = TProtoSchemaOptions(); + outputProtoOptions.SetEnableRecursiveRenaming(true); - outputProtoOptions.SetFieldRenames({{"Y", "OutputAlias"}}); - auto outputSpecWithoutNestedRename = TProtobufOutputSpec<NPureCalcProto::TSimpleNested>( - std::move(outputProtoOptions) - ); + outputProtoOptions.SetFieldRenames({{"Y", "OutputAlias"}, {"FInt64", "NestedField"}}); + auto outputSpecWithNestedRename = TProtobufOutputSpec<NPureCalcProto::TSimpleNested>( + outputProtoOptions); - { - auto program = factory->MakePullStreamProgram( - inputSpec, outputSpecWithNestedRename, query, ETranslationMode::SQL - ); + outputProtoOptions.SetFieldRenames({{"Y", "OutputAlias"}}); + auto outputSpecWithoutNestedRename = TProtobufOutputSpec<NPureCalcProto::TSimpleNested>( + std::move(outputProtoOptions)); - auto input = MakeHolder<TTSimpleNestedStreamImpl>(); - auto output = program->Apply(std::move(input)); + { + auto program = factory->MakePullStreamProgram( + inputSpec, outputSpecWithNestedRename, query, ETranslationMode::SQL); - TVector<int> expected = {3}; - TVector<int> actual; + auto input = MakeHolder<TTSimpleNestedStreamImpl>(); + auto output = program->Apply(std::move(input)); - while (auto* x = output->Fetch()) { - actual.push_back(x->GetY().GetFInt64()); - } + TVector<int> expected = {3}; + TVector<int> actual; - UNIT_ASSERT_VALUES_EQUAL(expected, actual); + while (auto* x = output->Fetch()) { + actual.push_back(x->GetY().GetFInt64()); } - { - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePullStreamProgram( - inputSpec, outputSpecWithoutNestedRename, query, ETranslationMode::SQL - ); - }(), TCompileError, "Failed to optimize"); - } + UNIT_ASSERT_VALUES_EQUAL(expected, actual); + } + + { + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePullStreamProgram( + inputSpec, outputSpecWithoutNestedRename, query, ETranslationMode::SQL); + }(), TCompileError, "Failed to optimize"); } } +} // Y_UNIT_TEST_SUITE(TestProtoIO) diff --git a/yql/essentials/public/purecalc/io_specs/protobuf/ut/ya.make b/yql/essentials/public/purecalc/io_specs/protobuf/ut/ya.make index 2519816d02e..69e36132340 100644 --- a/yql/essentials/public/purecalc/io_specs/protobuf/ut/ya.make +++ b/yql/essentials/public/purecalc/io_specs/protobuf/ut/ya.make @@ -2,6 +2,8 @@ IF (NOT SANITIZER_TYPE) UNITTEST() +ENABLE(YQL_STYLE_CPP) + PEERDIR( library/cpp/protobuf/util yql/essentials/public/udf/service/exception_policy diff --git a/yql/essentials/public/purecalc/io_specs/protobuf/ya.make b/yql/essentials/public/purecalc/io_specs/protobuf/ya.make index b9441ceecf4..600bf9df61d 100644 --- a/yql/essentials/public/purecalc/io_specs/protobuf/ya.make +++ b/yql/essentials/public/purecalc/io_specs/protobuf/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + PEERDIR( yql/essentials/public/purecalc/common yql/essentials/public/purecalc/io_specs/protobuf_raw diff --git a/yql/essentials/public/purecalc/io_specs/protobuf_raw/proto_holder.h b/yql/essentials/public/purecalc/io_specs/protobuf_raw/proto_holder.h index 7d4d843bfcf..6808a150945 100644 --- a/yql/essentials/public/purecalc/io_specs/protobuf_raw/proto_holder.h +++ b/yql/essentials/public/purecalc/io_specs/protobuf_raw/proto_holder.h @@ -7,25 +7,25 @@ #include <type_traits> namespace NYql::NPureCalc { - class TProtoDestroyer { - public: - template <typename T> - static inline void Destroy(T* t) noexcept { - if (t->GetArena() == nullptr) { - CheckedDelete(t); - } +class TProtoDestroyer { +public: + template <typename T> + static inline void Destroy(T* t) noexcept { + if (t->GetArena() == nullptr) { + CheckedDelete(t); } - }; + } +}; - template <typename TProto> - concept IsProtoMessage = std::is_base_of_v<NProtoBuf::Message, TProto>; +template <typename TProto> +concept IsProtoMessage = std::is_base_of_v<NProtoBuf::Message, TProto>; - template <IsProtoMessage TProto> - using TProtoHolder = THolder<TProto, TProtoDestroyer>; +template <IsProtoMessage TProto> +using TProtoHolder = THolder<TProto, TProtoDestroyer>; - template <IsProtoMessage TProto, typename... TArgs> - TProtoHolder<TProto> MakeProtoHolder(NProtoBuf::Arena* arena, TArgs&&... args) { - auto* ptr = NProtoBuf::Arena::CreateMessage<TProto>(arena, std::forward<TArgs>(args)...); - return TProtoHolder<TProto>(ptr); - } +template <IsProtoMessage TProto, typename... TArgs> +TProtoHolder<TProto> MakeProtoHolder(NProtoBuf::Arena* arena, TArgs&&... args) { + auto* ptr = NProtoBuf::Arena::CreateMessage<TProto>(arena, std::forward<TArgs>(args)...); + return TProtoHolder<TProto>(ptr); } +} // namespace NYql::NPureCalc diff --git a/yql/essentials/public/purecalc/io_specs/protobuf_raw/spec.cpp b/yql/essentials/public/purecalc/io_specs/protobuf_raw/spec.cpp index 150ebfa80a8..af4561a49b8 100644 --- a/yql/essentials/public/purecalc/io_specs/protobuf_raw/spec.cpp +++ b/yql/essentials/public/purecalc/io_specs/protobuf_raw/spec.cpp @@ -17,8 +17,7 @@ using namespace NKikimr::NMiniKQL; TProtobufRawInputSpec::TProtobufRawInputSpec( const Descriptor& descriptor, const TMaybe<TString>& timestampColumn, - const TProtoSchemaOptions& options -) + const TProtoSchemaOptions& options) : Descriptor_(descriptor) , TimestampColumn_(timestampColumn) , SchemaOptions_(options) @@ -58,8 +57,7 @@ TProtobufRawOutputSpec::TProtobufRawOutputSpec( const Descriptor& descriptor, MessageFactory* factory, const TProtoSchemaOptions& options, - Arena* arena -) + Arena* arena) : Descriptor_(descriptor) , Factory_(factory) , SchemaOptions_(options) @@ -104,8 +102,7 @@ TProtobufRawMultiOutputSpec::TProtobufRawMultiOutputSpec( TVector<const Descriptor*> descriptors, TMaybe<TVector<MessageFactory*>> factories, const TProtoSchemaOptions& options, - TMaybe<TVector<Arena*>> arenas -) + TMaybe<TVector<Arena*>> arenas) : Descriptors_(std::move(descriptors)) , SchemaOptions_(options) { @@ -171,408 +168,325 @@ const TProtoSchemaOptions& TProtobufRawMultiOutputSpec::GetSchemaOptions() const } namespace { - struct TFieldMapping { - TString Name; - const FieldDescriptor* Field; - TVector<TFieldMapping> NestedFields; - }; - - void FillFieldMappingsImpl( - const TStructType* fromType, - const Descriptor& toType, - TVector<TFieldMapping>& mappings, - const TMaybe<TString>& timestampColumn, - bool listIsOptional, - bool enableRecursiveRenaming, - const THashMap<TString, TString>& inverseFieldRenames - ) { - static const THashMap<TString, TString> emptyInverseFieldRenames; - mappings.resize(fromType->GetMembersCount()); - for (ui32 i = 0; i < fromType->GetMembersCount(); ++i) { - TString fieldName(fromType->GetMemberName(i)); - if (auto fieldRenamePtr = inverseFieldRenames.FindPtr(fieldName)) { - fieldName = *fieldRenamePtr; - } +struct TFieldMapping { + TString Name; + const FieldDescriptor* Field; + TVector<TFieldMapping> NestedFields; +}; + +void FillFieldMappingsImpl( + const TStructType* fromType, + const Descriptor& toType, + TVector<TFieldMapping>& mappings, + const TMaybe<TString>& timestampColumn, + bool listIsOptional, + bool enableRecursiveRenaming, + const THashMap<TString, TString>& inverseFieldRenames) { + static const THashMap<TString, TString> emptyInverseFieldRenames; + mappings.resize(fromType->GetMembersCount()); + for (ui32 i = 0; i < fromType->GetMembersCount(); ++i) { + TString fieldName(fromType->GetMemberName(i)); + if (auto fieldRenamePtr = inverseFieldRenames.FindPtr(fieldName)) { + fieldName = *fieldRenamePtr; + } - mappings[i].Name = fieldName; - mappings[i].Field = toType.FindFieldByName(fieldName); - YQL_ENSURE( - mappings[i].Field || timestampColumn && *timestampColumn == fieldName, - "Missing field: " << fieldName); - - const auto* fieldType = fromType->GetMemberType(i); - if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::List) { - const auto* listType = static_cast<const NKikimr::NMiniKQL::TListType*>(fieldType); - fieldType = listType->GetItemType(); - } else if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Optional) { - const auto* optionalType = static_cast<const NKikimr::NMiniKQL::TOptionalType*>(fieldType); - fieldType = optionalType->GetItemType(); - - if (listIsOptional) { - if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::List) { - const auto* listType = static_cast<const NKikimr::NMiniKQL::TListType*>(fieldType); - fieldType = listType->GetItemType(); - } + mappings[i].Name = fieldName; + mappings[i].Field = toType.FindFieldByName(fieldName); + YQL_ENSURE( + mappings[i].Field || timestampColumn && *timestampColumn == fieldName, + "Missing field: " << fieldName); + + const auto* fieldType = fromType->GetMemberType(i); + if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::List) { + const auto* listType = static_cast<const NKikimr::NMiniKQL::TListType*>(fieldType); + fieldType = listType->GetItemType(); + } else if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Optional) { + const auto* optionalType = static_cast<const NKikimr::NMiniKQL::TOptionalType*>(fieldType); + fieldType = optionalType->GetItemType(); + + if (listIsOptional) { + if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::List) { + const auto* listType = static_cast<const NKikimr::NMiniKQL::TListType*>(fieldType); + fieldType = listType->GetItemType(); } } - YQL_ENSURE(fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Struct || + } + YQL_ENSURE(fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Struct || fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Data, - "unsupported field kind [" << fieldType->GetKindAsStr() << "], field [" << fieldName << "]"); - if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Struct) { - FillFieldMappingsImpl( - static_cast<const NKikimr::NMiniKQL::TStructType*>(fieldType), - *mappings[i].Field->message_type(), - mappings[i].NestedFields, - Nothing(), - listIsOptional, - enableRecursiveRenaming, - enableRecursiveRenaming ? inverseFieldRenames : emptyInverseFieldRenames - ); - } + "unsupported field kind [" << fieldType->GetKindAsStr() << "], field [" << fieldName << "]"); + if (fieldType->GetKind() == NKikimr::NMiniKQL::TType::EKind::Struct) { + FillFieldMappingsImpl( + static_cast<const NKikimr::NMiniKQL::TStructType*>(fieldType), + *mappings[i].Field->message_type(), + mappings[i].NestedFields, + Nothing(), + listIsOptional, + enableRecursiveRenaming, + enableRecursiveRenaming ? inverseFieldRenames : emptyInverseFieldRenames); } } +} - /** - * Fills a tree of field mappings from the given yql struct type to protobuf message. - * - * @param fromType source yql type. - * @param toType target protobuf message type. - * @param mappings destination vector will be filled with field descriptors. Order of descriptors will match - * the order of field names. - */ - void FillFieldMappings( - const TStructType* fromType, - const Descriptor& toType, - TVector<TFieldMapping>& mappings, - const TMaybe<TString>& timestampColumn, - bool listIsOptional, - bool enableRecursiveRenaming, - const THashMap<TString, TString>& fieldRenames - ) { - THashMap<TString, TString> inverseFieldRenames; - for (const auto& [source, target]: fieldRenames) { - auto [iterator, emplaced] = inverseFieldRenames.emplace(target, source); - Y_ENSURE(emplaced, "Duplicate rename field found: " << source << " -> " << target); - } - - FillFieldMappingsImpl(fromType, toType, mappings, timestampColumn, listIsOptional, enableRecursiveRenaming, inverseFieldRenames); +/** + * Fills a tree of field mappings from the given yql struct type to protobuf message. + * + * @param fromType source yql type. + * @param toType target protobuf message type. + * @param mappings destination vector will be filled with field descriptors. Order of descriptors will match + * the order of field names. + */ +void FillFieldMappings( + const TStructType* fromType, + const Descriptor& toType, + TVector<TFieldMapping>& mappings, + const TMaybe<TString>& timestampColumn, + bool listIsOptional, + bool enableRecursiveRenaming, + const THashMap<TString, TString>& fieldRenames) { + THashMap<TString, TString> inverseFieldRenames; + for (const auto& [source, target] : fieldRenames) { + auto [iterator, emplaced] = inverseFieldRenames.emplace(target, source); + Y_ENSURE(emplaced, "Duplicate rename field found: " << source << " -> " << target); } - /** - * Extract field values from the given protobuf message into an array of unboxed values. - * - * @param factory to create nested unboxed values. - * @param source source protobuf message. - * @param destination destination array of unboxed values. Each element in the array corresponds to a field - * in the protobuf message. - * @param mappings vector of protobuf field descriptors which denotes relation between fields of the - * source message and elements of the destination array. - * @param scratch temporary string which will be used during conversion. - */ - void FillInputValue( - const THolderFactory& factory, - const Message* source, - TUnboxedValue* destination, - const TVector<TFieldMapping>& mappings, - const TMaybe<TString>& timestampColumn, - ITimeProvider* timeProvider, - EEnumPolicy enumPolicy - ) { - TString scratch; - auto reflection = source->GetReflection(); - for (ui32 i = 0; i < mappings.size(); ++i) { - auto mapping = mappings[i]; - if (!mapping.Field) { - YQL_ENSURE(timestampColumn && mapping.Name == *timestampColumn); - destination[i] = TUnboxedValuePod(timeProvider->Now().MicroSeconds()); - continue; - } - - const auto type = mapping.Field->type(); - if (mapping.Field->label() == FieldDescriptor::LABEL_REPEATED) { - const auto size = static_cast<ui32>(reflection->FieldSize(*source, mapping.Field)); - if (size == 0) { - destination[i] = factory.GetEmptyContainerLazy(); - } else { - TUnboxedValue* inplace = nullptr; - destination[i] = factory.CreateDirectArrayHolder(size, inplace); - for (ui32 j = 0; j < size; ++j) { - switch (type) { - case FieldDescriptor::TYPE_DOUBLE: - inplace[j] = TUnboxedValuePod(reflection->GetRepeatedDouble(*source, mapping.Field, j)); - break; - - case FieldDescriptor::TYPE_FLOAT: - inplace[j] = TUnboxedValuePod(reflection->GetRepeatedFloat(*source, mapping.Field, j)); - break; - - case FieldDescriptor::TYPE_INT64: - case FieldDescriptor::TYPE_SFIXED64: - case FieldDescriptor::TYPE_SINT64: - inplace[j] = TUnboxedValuePod(reflection->GetRepeatedInt64(*source, mapping.Field, j)); - break; - - case FieldDescriptor::TYPE_ENUM: - switch (EnumFormatType(*mapping.Field, enumPolicy)) { - case EEnumFormatType::Int32: - inplace[j] = TUnboxedValuePod(reflection->GetRepeatedEnumValue(*source, mapping.Field, j)); - break; - case EEnumFormatType::String: - inplace[j] = MakeString(reflection->GetRepeatedEnum(*source, mapping.Field, j)->name()); - break; - } - break; - - case FieldDescriptor::TYPE_UINT64: - case FieldDescriptor::TYPE_FIXED64: - inplace[j] = TUnboxedValuePod(reflection->GetRepeatedUInt64(*source, mapping.Field, j)); - break; - - case FieldDescriptor::TYPE_INT32: - case FieldDescriptor::TYPE_SFIXED32: - case FieldDescriptor::TYPE_SINT32: - inplace[j] = TUnboxedValuePod(reflection->GetRepeatedInt32(*source, mapping.Field, j)); - break; - - case FieldDescriptor::TYPE_UINT32: - case FieldDescriptor::TYPE_FIXED32: - inplace[j] = TUnboxedValuePod(reflection->GetRepeatedUInt32(*source, mapping.Field, j)); - break; - - case FieldDescriptor::TYPE_BOOL: - inplace[j] = TUnboxedValuePod(reflection->GetRepeatedBool(*source, mapping.Field, j)); - break; - - case FieldDescriptor::TYPE_STRING: - inplace[j] = MakeString(reflection->GetRepeatedStringReference(*source, mapping.Field, j, &scratch)); - break; - - case FieldDescriptor::TYPE_BYTES: - inplace[j] = MakeString(reflection->GetRepeatedStringReference(*source, mapping.Field, j, &scratch)); - break; - - case FieldDescriptor::TYPE_MESSAGE: - { - const Message& nestedMessage = reflection->GetRepeatedMessage(*source, mapping.Field, j); - TUnboxedValue* nestedValues = nullptr; - inplace[j] = factory.CreateDirectArrayHolder(static_cast<ui32>(mapping.NestedFields.size()), - nestedValues); - FillInputValue(factory, &nestedMessage, nestedValues, mapping.NestedFields, Nothing(), timeProvider, enumPolicy); - } - break; - - default: - ythrow yexception() << "Unsupported protobuf type: " << mapping.Field->type_name() << ", field: " << mapping.Field->name(); - } - } - } - } else { - if (!reflection->HasField(*source, mapping.Field)) { - continue; - } - - switch (type) { - case FieldDescriptor::TYPE_DOUBLE: - destination[i] = TUnboxedValuePod(reflection->GetDouble(*source, mapping.Field)); - break; - - case FieldDescriptor::TYPE_FLOAT: - destination[i] = TUnboxedValuePod(reflection->GetFloat(*source, mapping.Field)); - break; - - case FieldDescriptor::TYPE_INT64: - case FieldDescriptor::TYPE_SFIXED64: - case FieldDescriptor::TYPE_SINT64: - destination[i] = TUnboxedValuePod(reflection->GetInt64(*source, mapping.Field)); - break; - - case FieldDescriptor::TYPE_ENUM: - switch (EnumFormatType(*mapping.Field, enumPolicy)) { - case EEnumFormatType::Int32: - destination[i] = TUnboxedValuePod(reflection->GetEnumValue(*source, mapping.Field)); - break; - case EEnumFormatType::String: - destination[i] = MakeString(reflection->GetEnum(*source, mapping.Field)->name()); - break; - } - break; - - case FieldDescriptor::TYPE_UINT64: - case FieldDescriptor::TYPE_FIXED64: - destination[i] = TUnboxedValuePod(reflection->GetUInt64(*source, mapping.Field)); - break; - - case FieldDescriptor::TYPE_INT32: - case FieldDescriptor::TYPE_SFIXED32: - case FieldDescriptor::TYPE_SINT32: - destination[i] = TUnboxedValuePod(reflection->GetInt32(*source, mapping.Field)); - break; - - case FieldDescriptor::TYPE_UINT32: - case FieldDescriptor::TYPE_FIXED32: - destination[i] = TUnboxedValuePod(reflection->GetUInt32(*source, mapping.Field)); - break; - - case FieldDescriptor::TYPE_BOOL: - destination[i] = TUnboxedValuePod(reflection->GetBool(*source, mapping.Field)); - break; - - case FieldDescriptor::TYPE_STRING: - destination[i] = MakeString(reflection->GetStringReference(*source, mapping.Field, &scratch)); - break; - - case FieldDescriptor::TYPE_BYTES: - destination[i] = MakeString(reflection->GetStringReference(*source, mapping.Field, &scratch)); - break; - case FieldDescriptor::TYPE_MESSAGE: - { - const Message& nestedMessage = reflection->GetMessage(*source, mapping.Field); - TUnboxedValue* nestedValues = nullptr; - destination[i] = factory.CreateDirectArrayHolder(static_cast<ui32>(mapping.NestedFields.size()), - nestedValues); - FillInputValue(factory, &nestedMessage, nestedValues, mapping.NestedFields, Nothing(), timeProvider, enumPolicy); - } - break; + FillFieldMappingsImpl(fromType, toType, mappings, timestampColumn, listIsOptional, enableRecursiveRenaming, inverseFieldRenames); +} - default: - ythrow yexception() << "Unsupported protobuf type: " << mapping.Field->type_name() - << ", field: " << mapping.Field->name(); - } - } +/** + * Extract field values from the given protobuf message into an array of unboxed values. + * + * @param factory to create nested unboxed values. + * @param source source protobuf message. + * @param destination destination array of unboxed values. Each element in the array corresponds to a field + * in the protobuf message. + * @param mappings vector of protobuf field descriptors which denotes relation between fields of the + * source message and elements of the destination array. + * @param scratch temporary string which will be used during conversion. + */ +void FillInputValue( + const THolderFactory& factory, + const Message* source, + TUnboxedValue* destination, + const TVector<TFieldMapping>& mappings, + const TMaybe<TString>& timestampColumn, + ITimeProvider* timeProvider, + EEnumPolicy enumPolicy) { + TString scratch; + auto reflection = source->GetReflection(); + for (ui32 i = 0; i < mappings.size(); ++i) { + auto mapping = mappings[i]; + if (!mapping.Field) { + YQL_ENSURE(timestampColumn && mapping.Name == *timestampColumn); + destination[i] = TUnboxedValuePod(timeProvider->Now().MicroSeconds()); + continue; } - } - - /** - * Convert unboxed value to protobuf. - * - * @param source unboxed value to extract data from. Type of the value should be struct. It's UB to pass - * a non-struct value here. - * @param destination destination message. Data in this message will be overwritten - * by data from unboxed value. - * @param mappings vector of protobuf field descriptors which denotes relation between struct fields - * and message fields. For any i-th element of this vector, type of the i-th element of - * the unboxed structure must match type of the field pointed by descriptor. Size of this - * vector should match the number of fields in the struct. - */ - void FillOutputMessage( - const TUnboxedValue& source, - Message* destination, - const TVector<TFieldMapping>& mappings, - EEnumPolicy enumPolicy - ) { - auto reflection = destination->GetReflection(); - for (ui32 i = 0; i < mappings.size(); ++i) { - const auto& mapping = mappings[i]; - const auto& cell = source.GetElement(i); - if (!cell) { - reflection->ClearField(destination, mapping.Field); - continue; - } - const auto type = mapping.Field->type(); - if (mapping.Field->label() == FieldDescriptor::LABEL_REPEATED) { - const auto iter = cell.GetListIterator(); - reflection->ClearField(destination, mapping.Field); - for (TUnboxedValue item; iter.Next(item);) { - switch (mapping.Field->type()) { + const auto type = mapping.Field->type(); + if (mapping.Field->label() == FieldDescriptor::LABEL_REPEATED) { + const auto size = static_cast<ui32>(reflection->FieldSize(*source, mapping.Field)); + if (size == 0) { + destination[i] = factory.GetEmptyContainerLazy(); + } else { + TUnboxedValue* inplace = nullptr; + destination[i] = factory.CreateDirectArrayHolder(size, inplace); + for (ui32 j = 0; j < size; ++j) { + switch (type) { case FieldDescriptor::TYPE_DOUBLE: - reflection->AddDouble(destination, mapping.Field, item.Get<double>()); + inplace[j] = TUnboxedValuePod(reflection->GetRepeatedDouble(*source, mapping.Field, j)); break; case FieldDescriptor::TYPE_FLOAT: - reflection->AddFloat(destination, mapping.Field, item.Get<float>()); + inplace[j] = TUnboxedValuePod(reflection->GetRepeatedFloat(*source, mapping.Field, j)); break; case FieldDescriptor::TYPE_INT64: case FieldDescriptor::TYPE_SFIXED64: case FieldDescriptor::TYPE_SINT64: - reflection->AddInt64(destination, mapping.Field, item.Get<i64>()); + inplace[j] = TUnboxedValuePod(reflection->GetRepeatedInt64(*source, mapping.Field, j)); break; - case FieldDescriptor::TYPE_ENUM: { + case FieldDescriptor::TYPE_ENUM: switch (EnumFormatType(*mapping.Field, enumPolicy)) { case EEnumFormatType::Int32: - reflection->AddEnumValue(destination, mapping.Field, item.Get<i32>()); + inplace[j] = TUnboxedValuePod(reflection->GetRepeatedEnumValue(*source, mapping.Field, j)); break; - case EEnumFormatType::String: { - auto enumValueDescriptor = mapping.Field->enum_type()->FindValueByName(TString(item.AsStringRef())); - if (!enumValueDescriptor) { - enumValueDescriptor = mapping.Field->default_value_enum(); - } - reflection->AddEnum(destination, mapping.Field, enumValueDescriptor); + case EEnumFormatType::String: + inplace[j] = MakeString(reflection->GetRepeatedEnum(*source, mapping.Field, j)->name()); break; - } } break; - } case FieldDescriptor::TYPE_UINT64: case FieldDescriptor::TYPE_FIXED64: - reflection->AddUInt64(destination, mapping.Field, item.Get<ui64>()); + inplace[j] = TUnboxedValuePod(reflection->GetRepeatedUInt64(*source, mapping.Field, j)); break; case FieldDescriptor::TYPE_INT32: case FieldDescriptor::TYPE_SFIXED32: case FieldDescriptor::TYPE_SINT32: - reflection->AddInt32(destination, mapping.Field, item.Get<i32>()); + inplace[j] = TUnboxedValuePod(reflection->GetRepeatedInt32(*source, mapping.Field, j)); break; case FieldDescriptor::TYPE_UINT32: case FieldDescriptor::TYPE_FIXED32: - reflection->AddUInt32(destination, mapping.Field, item.Get<ui32>()); + inplace[j] = TUnboxedValuePod(reflection->GetRepeatedUInt32(*source, mapping.Field, j)); break; case FieldDescriptor::TYPE_BOOL: - reflection->AddBool(destination, mapping.Field, item.Get<bool>()); + inplace[j] = TUnboxedValuePod(reflection->GetRepeatedBool(*source, mapping.Field, j)); break; case FieldDescriptor::TYPE_STRING: - reflection->AddString(destination, mapping.Field, TString(item.AsStringRef())); + inplace[j] = MakeString(reflection->GetRepeatedStringReference(*source, mapping.Field, j, &scratch)); break; case FieldDescriptor::TYPE_BYTES: - reflection->AddString(destination, mapping.Field, TString(item.AsStringRef())); + inplace[j] = MakeString(reflection->GetRepeatedStringReference(*source, mapping.Field, j, &scratch)); break; - case FieldDescriptor::TYPE_MESSAGE: - { - auto* nestedMessage = reflection->AddMessage(destination, mapping.Field); - FillOutputMessage(item, nestedMessage, mapping.NestedFields, enumPolicy); - } - break; + case FieldDescriptor::TYPE_MESSAGE: { + const Message& nestedMessage = reflection->GetRepeatedMessage(*source, mapping.Field, j); + TUnboxedValue* nestedValues = nullptr; + inplace[j] = factory.CreateDirectArrayHolder(static_cast<ui32>(mapping.NestedFields.size()), + nestedValues); + FillInputValue(factory, &nestedMessage, nestedValues, mapping.NestedFields, Nothing(), timeProvider, enumPolicy); + } break; default: - ythrow yexception() << "Unsupported protobuf type: " - << mapping.Field->type_name() << ", field: " << mapping.Field->name(); + ythrow yexception() << "Unsupported protobuf type: " << mapping.Field->type_name() << ", field: " << mapping.Field->name(); } } - } else { - switch (type) { + } + } else { + if (!reflection->HasField(*source, mapping.Field)) { + continue; + } + + switch (type) { + case FieldDescriptor::TYPE_DOUBLE: + destination[i] = TUnboxedValuePod(reflection->GetDouble(*source, mapping.Field)); + break; + + case FieldDescriptor::TYPE_FLOAT: + destination[i] = TUnboxedValuePod(reflection->GetFloat(*source, mapping.Field)); + break; + + case FieldDescriptor::TYPE_INT64: + case FieldDescriptor::TYPE_SFIXED64: + case FieldDescriptor::TYPE_SINT64: + destination[i] = TUnboxedValuePod(reflection->GetInt64(*source, mapping.Field)); + break; + + case FieldDescriptor::TYPE_ENUM: + switch (EnumFormatType(*mapping.Field, enumPolicy)) { + case EEnumFormatType::Int32: + destination[i] = TUnboxedValuePod(reflection->GetEnumValue(*source, mapping.Field)); + break; + case EEnumFormatType::String: + destination[i] = MakeString(reflection->GetEnum(*source, mapping.Field)->name()); + break; + } + break; + + case FieldDescriptor::TYPE_UINT64: + case FieldDescriptor::TYPE_FIXED64: + destination[i] = TUnboxedValuePod(reflection->GetUInt64(*source, mapping.Field)); + break; + + case FieldDescriptor::TYPE_INT32: + case FieldDescriptor::TYPE_SFIXED32: + case FieldDescriptor::TYPE_SINT32: + destination[i] = TUnboxedValuePod(reflection->GetInt32(*source, mapping.Field)); + break; + + case FieldDescriptor::TYPE_UINT32: + case FieldDescriptor::TYPE_FIXED32: + destination[i] = TUnboxedValuePod(reflection->GetUInt32(*source, mapping.Field)); + break; + + case FieldDescriptor::TYPE_BOOL: + destination[i] = TUnboxedValuePod(reflection->GetBool(*source, mapping.Field)); + break; + + case FieldDescriptor::TYPE_STRING: + destination[i] = MakeString(reflection->GetStringReference(*source, mapping.Field, &scratch)); + break; + + case FieldDescriptor::TYPE_BYTES: + destination[i] = MakeString(reflection->GetStringReference(*source, mapping.Field, &scratch)); + break; + case FieldDescriptor::TYPE_MESSAGE: { + const Message& nestedMessage = reflection->GetMessage(*source, mapping.Field); + TUnboxedValue* nestedValues = nullptr; + destination[i] = factory.CreateDirectArrayHolder(static_cast<ui32>(mapping.NestedFields.size()), + nestedValues); + FillInputValue(factory, &nestedMessage, nestedValues, mapping.NestedFields, Nothing(), timeProvider, enumPolicy); + } break; + + default: + ythrow yexception() << "Unsupported protobuf type: " << mapping.Field->type_name() + << ", field: " << mapping.Field->name(); + } + } + } +} + +/** + * Convert unboxed value to protobuf. + * + * @param source unboxed value to extract data from. Type of the value should be struct. It's UB to pass + * a non-struct value here. + * @param destination destination message. Data in this message will be overwritten + * by data from unboxed value. + * @param mappings vector of protobuf field descriptors which denotes relation between struct fields + * and message fields. For any i-th element of this vector, type of the i-th element of + * the unboxed structure must match type of the field pointed by descriptor. Size of this + * vector should match the number of fields in the struct. + */ +void FillOutputMessage( + const TUnboxedValue& source, + Message* destination, + const TVector<TFieldMapping>& mappings, + EEnumPolicy enumPolicy) { + auto reflection = destination->GetReflection(); + for (ui32 i = 0; i < mappings.size(); ++i) { + const auto& mapping = mappings[i]; + const auto& cell = source.GetElement(i); + if (!cell) { + reflection->ClearField(destination, mapping.Field); + continue; + } + const auto type = mapping.Field->type(); + if (mapping.Field->label() == FieldDescriptor::LABEL_REPEATED) { + const auto iter = cell.GetListIterator(); + reflection->ClearField(destination, mapping.Field); + for (TUnboxedValue item; iter.Next(item);) { + switch (mapping.Field->type()) { case FieldDescriptor::TYPE_DOUBLE: - reflection->SetDouble(destination, mapping.Field, cell.Get<double>()); + reflection->AddDouble(destination, mapping.Field, item.Get<double>()); break; case FieldDescriptor::TYPE_FLOAT: - reflection->SetFloat(destination, mapping.Field, cell.Get<float>()); + reflection->AddFloat(destination, mapping.Field, item.Get<float>()); break; case FieldDescriptor::TYPE_INT64: case FieldDescriptor::TYPE_SFIXED64: case FieldDescriptor::TYPE_SINT64: - reflection->SetInt64(destination, mapping.Field, cell.Get<i64>()); + reflection->AddInt64(destination, mapping.Field, item.Get<i64>()); break; case FieldDescriptor::TYPE_ENUM: { switch (EnumFormatType(*mapping.Field, enumPolicy)) { case EEnumFormatType::Int32: - reflection->SetEnumValue(destination, mapping.Field, cell.Get<i32>()); + reflection->AddEnumValue(destination, mapping.Field, item.Get<i32>()); break; case EEnumFormatType::String: { - auto enumValueDescriptor = mapping.Field->enum_type()->FindValueByName(TString(cell.AsStringRef())); + auto enumValueDescriptor = mapping.Field->enum_type()->FindValueByName(TString(item.AsStringRef())); if (!enumValueDescriptor) { enumValueDescriptor = mapping.Field->default_value_enum(); } - reflection->SetEnum(destination, mapping.Field, enumValueDescriptor); + reflection->AddEnum(destination, mapping.Field, enumValueDescriptor); break; } } @@ -581,438 +495,497 @@ namespace { case FieldDescriptor::TYPE_UINT64: case FieldDescriptor::TYPE_FIXED64: - reflection->SetUInt64(destination, mapping.Field, cell.Get<ui64>()); + reflection->AddUInt64(destination, mapping.Field, item.Get<ui64>()); break; case FieldDescriptor::TYPE_INT32: case FieldDescriptor::TYPE_SFIXED32: case FieldDescriptor::TYPE_SINT32: - reflection->SetInt32(destination, mapping.Field, cell.Get<i32>()); + reflection->AddInt32(destination, mapping.Field, item.Get<i32>()); break; case FieldDescriptor::TYPE_UINT32: case FieldDescriptor::TYPE_FIXED32: - reflection->SetUInt32(destination, mapping.Field, cell.Get<ui32>()); + reflection->AddUInt32(destination, mapping.Field, item.Get<ui32>()); break; case FieldDescriptor::TYPE_BOOL: - reflection->SetBool(destination, mapping.Field, cell.Get<bool>()); + reflection->AddBool(destination, mapping.Field, item.Get<bool>()); break; case FieldDescriptor::TYPE_STRING: - reflection->SetString(destination, mapping.Field, TString(cell.AsStringRef())); + reflection->AddString(destination, mapping.Field, TString(item.AsStringRef())); break; case FieldDescriptor::TYPE_BYTES: - reflection->SetString(destination, mapping.Field, TString(cell.AsStringRef())); + reflection->AddString(destination, mapping.Field, TString(item.AsStringRef())); break; - case FieldDescriptor::TYPE_MESSAGE: - { - auto* nestedMessage = reflection->MutableMessage(destination, mapping.Field); - FillOutputMessage(cell, nestedMessage, mapping.NestedFields, enumPolicy); - } - break; + case FieldDescriptor::TYPE_MESSAGE: { + auto* nestedMessage = reflection->AddMessage(destination, mapping.Field); + FillOutputMessage(item, nestedMessage, mapping.NestedFields, enumPolicy); + } break; default: ythrow yexception() << "Unsupported protobuf type: " << mapping.Field->type_name() << ", field: " << mapping.Field->name(); } } + } else { + switch (type) { + case FieldDescriptor::TYPE_DOUBLE: + reflection->SetDouble(destination, mapping.Field, cell.Get<double>()); + break; + + case FieldDescriptor::TYPE_FLOAT: + reflection->SetFloat(destination, mapping.Field, cell.Get<float>()); + break; + + case FieldDescriptor::TYPE_INT64: + case FieldDescriptor::TYPE_SFIXED64: + case FieldDescriptor::TYPE_SINT64: + reflection->SetInt64(destination, mapping.Field, cell.Get<i64>()); + break; + + case FieldDescriptor::TYPE_ENUM: { + switch (EnumFormatType(*mapping.Field, enumPolicy)) { + case EEnumFormatType::Int32: + reflection->SetEnumValue(destination, mapping.Field, cell.Get<i32>()); + break; + case EEnumFormatType::String: { + auto enumValueDescriptor = mapping.Field->enum_type()->FindValueByName(TString(cell.AsStringRef())); + if (!enumValueDescriptor) { + enumValueDescriptor = mapping.Field->default_value_enum(); + } + reflection->SetEnum(destination, mapping.Field, enumValueDescriptor); + break; + } + } + break; + } + + case FieldDescriptor::TYPE_UINT64: + case FieldDescriptor::TYPE_FIXED64: + reflection->SetUInt64(destination, mapping.Field, cell.Get<ui64>()); + break; + + case FieldDescriptor::TYPE_INT32: + case FieldDescriptor::TYPE_SFIXED32: + case FieldDescriptor::TYPE_SINT32: + reflection->SetInt32(destination, mapping.Field, cell.Get<i32>()); + break; + + case FieldDescriptor::TYPE_UINT32: + case FieldDescriptor::TYPE_FIXED32: + reflection->SetUInt32(destination, mapping.Field, cell.Get<ui32>()); + break; + + case FieldDescriptor::TYPE_BOOL: + reflection->SetBool(destination, mapping.Field, cell.Get<bool>()); + break; + + case FieldDescriptor::TYPE_STRING: + reflection->SetString(destination, mapping.Field, TString(cell.AsStringRef())); + break; + + case FieldDescriptor::TYPE_BYTES: + reflection->SetString(destination, mapping.Field, TString(cell.AsStringRef())); + break; + + case FieldDescriptor::TYPE_MESSAGE: { + auto* nestedMessage = reflection->MutableMessage(destination, mapping.Field); + FillOutputMessage(cell, nestedMessage, mapping.NestedFields, enumPolicy); + } break; + + default: + ythrow yexception() << "Unsupported protobuf type: " + << mapping.Field->type_name() << ", field: " << mapping.Field->name(); + } } } +} - /** - * Converts input messages to unboxed values. - */ - class TInputConverter { - protected: - IWorker* Worker_; - TVector<TFieldMapping> Mappings_; - TPlainContainerCache Cache_; - TMaybe<TString> TimestampColumn_; - EEnumPolicy EnumPolicy_ = EEnumPolicy::Int32; - - public: - explicit TInputConverter(const TProtobufRawInputSpec& inputSpec, IWorker* worker) - : Worker_(worker) - , TimestampColumn_(inputSpec.GetTimestampColumn()) - , EnumPolicy_(inputSpec.GetSchemaOptions().EnumPolicy) - { - FillFieldMappings( - Worker_->GetInputType(), inputSpec.GetDescriptor(), - Mappings_, TimestampColumn_, - inputSpec.GetSchemaOptions().ListIsOptional, - inputSpec.GetSchemaOptions().EnableRecursiveRenaming, - inputSpec.GetSchemaOptions().FieldRenames - ); - } +/** + * Converts input messages to unboxed values. + */ +class TInputConverter { +protected: + IWorker* Worker_; + TVector<TFieldMapping> Mappings_; + TPlainContainerCache Cache_; + TMaybe<TString> TimestampColumn_; + EEnumPolicy EnumPolicy_ = EEnumPolicy::Int32; + +public: + explicit TInputConverter(const TProtobufRawInputSpec& inputSpec, IWorker* worker) + : Worker_(worker) + , TimestampColumn_(inputSpec.GetTimestampColumn()) + , EnumPolicy_(inputSpec.GetSchemaOptions().EnumPolicy) + { + FillFieldMappings( + Worker_->GetInputType(), inputSpec.GetDescriptor(), + Mappings_, TimestampColumn_, + inputSpec.GetSchemaOptions().ListIsOptional, + inputSpec.GetSchemaOptions().EnableRecursiveRenaming, + inputSpec.GetSchemaOptions().FieldRenames); + } + +public: + void DoConvert(const Message* message, TUnboxedValue& result) { + auto& holderFactory = Worker_->GetGraph().GetHolderFactory(); + TUnboxedValue* items = nullptr; + result = Cache_.NewArray(holderFactory, static_cast<ui32>(Mappings_.size()), items); + FillInputValue(holderFactory, message, items, Mappings_, TimestampColumn_, Worker_->GetTimeProvider(), EnumPolicy_); + } - public: - void DoConvert(const Message* message, TUnboxedValue& result) { - auto& holderFactory = Worker_->GetGraph().GetHolderFactory(); - TUnboxedValue* items = nullptr; - result = Cache_.NewArray(holderFactory, static_cast<ui32>(Mappings_.size()), items); - FillInputValue(holderFactory, message, items, Mappings_, TimestampColumn_, Worker_->GetTimeProvider(), EnumPolicy_); + void ClearCache() { + Cache_.Clear(); + } +}; + +template <typename TOutputSpec> +using OutputItemType = typename TOutputSpecTraits<TOutputSpec>::TOutputItemType; + +template <typename TOutputSpec> +class TOutputConverter; + +/** + * Converts unboxed values to output messages (single-output program case). + */ +template <> +class TOutputConverter<TProtobufRawOutputSpec> { +protected: + IWorker* Worker_; + TVector<TFieldMapping> OutputColumns_; + TProtoHolder<Message> Message_; + EEnumPolicy EnumPolicy_ = EEnumPolicy::Int32; + +public: + explicit TOutputConverter(const TProtobufRawOutputSpec& outputSpec, IWorker* worker) + : Worker_(worker) + , EnumPolicy_(outputSpec.GetSchemaOptions().EnumPolicy) + { + if (!Worker_->GetOutputType()->IsStruct()) { + ythrow yexception() << "protobuf output spec does not support multiple outputs"; } - void ClearCache() { - Cache_.Clear(); + FillFieldMappings( + static_cast<const NKikimr::NMiniKQL::TStructType*>(Worker_->GetOutputType()), + outputSpec.GetDescriptor(), + OutputColumns_, + Nothing(), + outputSpec.GetSchemaOptions().ListIsOptional, + outputSpec.GetSchemaOptions().EnableRecursiveRenaming, + outputSpec.GetSchemaOptions().FieldRenames); + + auto* factory = outputSpec.GetFactory(); + + if (!factory) { + factory = MessageFactory::generated_factory(); } - }; - - template <typename TOutputSpec> - using OutputItemType = typename TOutputSpecTraits<TOutputSpec>::TOutputItemType; - - template <typename TOutputSpec> - class TOutputConverter; - - /** - * Converts unboxed values to output messages (single-output program case). - */ - template <> - class TOutputConverter<TProtobufRawOutputSpec> { - protected: - IWorker* Worker_; - TVector<TFieldMapping> OutputColumns_; - TProtoHolder<Message> Message_; - EEnumPolicy EnumPolicy_ = EEnumPolicy::Int32; - - public: - explicit TOutputConverter(const TProtobufRawOutputSpec& outputSpec, IWorker* worker) - : Worker_(worker) - , EnumPolicy_(outputSpec.GetSchemaOptions().EnumPolicy) - { - if (!Worker_->GetOutputType()->IsStruct()) { - ythrow yexception() << "protobuf output spec does not support multiple outputs"; - } + + Message_.Reset(factory->GetPrototype(&outputSpec.GetDescriptor())->New(outputSpec.GetArena())); + } + + OutputItemType<TProtobufRawOutputSpec> DoConvert(TUnboxedValue value) { + FillOutputMessage(value, Message_.Get(), OutputColumns_, EnumPolicy_); + return Message_.Get(); + } +}; + +/* + * Converts unboxed values to output type (multi-output programs case). + */ +template <> +class TOutputConverter<TProtobufRawMultiOutputSpec> { +protected: + IWorker* Worker_; + TVector<TVector<TFieldMapping>> OutputColumns_; + TVector<TProtoHolder<Message>> Messages_; + EEnumPolicy EnumPolicy_ = EEnumPolicy::Int32; + +public: + explicit TOutputConverter(const TProtobufRawMultiOutputSpec& outputSpec, IWorker* worker) + : Worker_(worker) + , EnumPolicy_(outputSpec.GetSchemaOptions().EnumPolicy) + { + const auto* outputType = Worker_->GetOutputType(); + Y_ENSURE(outputType->IsVariant(), "protobuf multi-output spec requires multi-output program"); + const auto* variantType = static_cast<const NKikimr::NMiniKQL::TVariantType*>(outputType); + Y_ENSURE( + variantType->GetUnderlyingType()->IsTuple(), + "protobuf multi-output spec requires variant over tuple as program output type"); + Y_ENSURE( + outputSpec.GetOutputsNumber() == variantType->GetAlternativesCount(), + "number of outputs provided by spec does not match number of variant alternatives"); + + auto defaultFactory = MessageFactory::generated_factory(); + + for (ui32 i = 0; i < variantType->GetAlternativesCount(); ++i) { + const auto* type = variantType->GetAlternativeType(i); + Y_ASSERT(type->IsStruct()); + Y_ASSERT(OutputColumns_.size() == i && Messages_.size() == i); + + OutputColumns_.push_back({}); FillFieldMappings( - static_cast<const NKikimr::NMiniKQL::TStructType*>(Worker_->GetOutputType()), - outputSpec.GetDescriptor(), - OutputColumns_, + static_cast<const NKikimr::NMiniKQL::TStructType*>(type), + outputSpec.GetDescriptor(i), + OutputColumns_.back(), Nothing(), outputSpec.GetSchemaOptions().ListIsOptional, - outputSpec.GetSchemaOptions().EnableRecursiveRenaming, - outputSpec.GetSchemaOptions().FieldRenames - ); - - auto* factory = outputSpec.GetFactory(); + false, + {}); + auto factory = outputSpec.GetFactory(i); if (!factory) { - factory = MessageFactory::generated_factory(); + factory = defaultFactory; } - Message_.Reset(factory->GetPrototype(&outputSpec.GetDescriptor())->New(outputSpec.GetArena())); - } - - OutputItemType<TProtobufRawOutputSpec> DoConvert(TUnboxedValue value) { - FillOutputMessage(value, Message_.Get(), OutputColumns_, EnumPolicy_); - return Message_.Get(); + Messages_.push_back(TProtoHolder<Message>( + factory->GetPrototype(&outputSpec.GetDescriptor(i))->New(outputSpec.GetArena(i)))); } - }; - - /* - * Converts unboxed values to output type (multi-output programs case). - */ - template <> - class TOutputConverter<TProtobufRawMultiOutputSpec> { - protected: - IWorker* Worker_; - TVector<TVector<TFieldMapping>> OutputColumns_; - TVector<TProtoHolder<Message>> Messages_; - EEnumPolicy EnumPolicy_ = EEnumPolicy::Int32; - - public: - explicit TOutputConverter(const TProtobufRawMultiOutputSpec& outputSpec, IWorker* worker) - : Worker_(worker) - , EnumPolicy_(outputSpec.GetSchemaOptions().EnumPolicy) - { - const auto* outputType = Worker_->GetOutputType(); - Y_ENSURE(outputType->IsVariant(), "protobuf multi-output spec requires multi-output program"); - const auto* variantType = static_cast<const NKikimr::NMiniKQL::TVariantType*>(outputType); - Y_ENSURE( - variantType->GetUnderlyingType()->IsTuple(), - "protobuf multi-output spec requires variant over tuple as program output type" - ); - Y_ENSURE( - outputSpec.GetOutputsNumber() == variantType->GetAlternativesCount(), - "number of outputs provided by spec does not match number of variant alternatives" - ); - - auto defaultFactory = MessageFactory::generated_factory(); - - for (ui32 i = 0; i < variantType->GetAlternativesCount(); ++i) { - const auto* type = variantType->GetAlternativeType(i); - Y_ASSERT(type->IsStruct()); - Y_ASSERT(OutputColumns_.size() == i && Messages_.size() == i); - - OutputColumns_.push_back({}); - - FillFieldMappings( - static_cast<const NKikimr::NMiniKQL::TStructType*>(type), - outputSpec.GetDescriptor(i), - OutputColumns_.back(), - Nothing(), - outputSpec.GetSchemaOptions().ListIsOptional, - false, - {} - ); - - auto factory = outputSpec.GetFactory(i); - if (!factory) { - factory = defaultFactory; - } + } - Messages_.push_back(TProtoHolder<Message>( - factory->GetPrototype(&outputSpec.GetDescriptor(i))->New(outputSpec.GetArena(i)) - )); - } - } + OutputItemType<TProtobufRawMultiOutputSpec> DoConvert(TUnboxedValue value) { + auto index = value.GetVariantIndex(); + auto msgPtr = Messages_[index].Get(); + FillOutputMessage(value.GetVariantItem(), msgPtr, OutputColumns_[index], EnumPolicy_); + return {index, msgPtr}; + } +}; + +/** + * List (or, better, stream) of unboxed values. Used as an input value in pull workers. + */ +class TProtoListValue final: public TCustomListValue { +private: + mutable bool HasIterator_ = false; + THolder<IStream<Message*>> Underlying_; + TInputConverter Converter_; + IWorker* Worker_; + TScopedAlloc& ScopedAlloc_; + +public: + TProtoListValue( + TMemoryUsageInfo* memInfo, + const TProtobufRawInputSpec& inputSpec, + THolder<IStream<Message*>> underlying, + IWorker* worker) + : TCustomListValue(memInfo) + , Underlying_(std::move(underlying)) + , Converter_(inputSpec, worker) + , Worker_(worker) + , ScopedAlloc_(Worker_->GetScopedAlloc()) + { + } - OutputItemType<TProtobufRawMultiOutputSpec> DoConvert(TUnboxedValue value) { - auto index = value.GetVariantIndex(); - auto msgPtr = Messages_[index].Get(); - FillOutputMessage(value.GetVariantItem(), msgPtr, OutputColumns_[index], EnumPolicy_); - return {index, msgPtr}; - } - }; - - /** - * List (or, better, stream) of unboxed values. Used as an input value in pull workers. - */ - class TProtoListValue final: public TCustomListValue { - private: - mutable bool HasIterator_ = false; - THolder<IStream<Message*>> Underlying_; - TInputConverter Converter_; - IWorker* Worker_; - TScopedAlloc& ScopedAlloc_; - - public: - TProtoListValue( - TMemoryUsageInfo* memInfo, - const TProtobufRawInputSpec& inputSpec, - THolder<IStream<Message*>> underlying, - IWorker* worker - ) - : TCustomListValue(memInfo) - , Underlying_(std::move(underlying)) - , Converter_(inputSpec, worker) - , Worker_(worker) - , ScopedAlloc_(Worker_->GetScopedAlloc()) + ~TProtoListValue() override { { + // This list value stored in the worker's computation graph and destroyed upon the computation + // graph's destruction. This brings us to an interesting situation: scoped alloc is acquired, + // worker and computation graph are half-way destroyed, and now it's our turn to die. The problem is, + // the underlying stream may own another worker. This happens when chaining programs. Now, to destroy + // that worker correctly, we need to release our scoped alloc (because that worker has its own + // computation graph and scoped alloc). + // By the way, note that we shouldn't interact with the worker here because worker is in the middle of + // its own destruction. So we're using our own reference to the scoped alloc. That reference is alive + // because scoped alloc destroyed after computation graph. + auto unguard = Unguard(ScopedAlloc_); + Underlying_.Destroy(); } + } - ~TProtoListValue() override { - { - // This list value stored in the worker's computation graph and destroyed upon the computation - // graph's destruction. This brings us to an interesting situation: scoped alloc is acquired, - // worker and computation graph are half-way destroyed, and now it's our turn to die. The problem is, - // the underlying stream may own another worker. This happens when chaining programs. Now, to destroy - // that worker correctly, we need to release our scoped alloc (because that worker has its own - // computation graph and scoped alloc). - // By the way, note that we shouldn't interact with the worker here because worker is in the middle of - // its own destruction. So we're using our own reference to the scoped alloc. That reference is alive - // because scoped alloc destroyed after computation graph. - auto unguard = Unguard(ScopedAlloc_); - Underlying_.Destroy(); - } - } +public: + TUnboxedValue GetListIterator() const override { + YQL_ENSURE(!HasIterator_, "Only one pass over input is supported"); + HasIterator_ = true; + return TUnboxedValuePod(const_cast<TProtoListValue*>(this)); + } - public: - TUnboxedValue GetListIterator() const override { - YQL_ENSURE(!HasIterator_, "Only one pass over input is supported"); - HasIterator_ = true; - return TUnboxedValuePod(const_cast<TProtoListValue*>(this)); + bool Next(TUnboxedValue& result) override { + const Message* message; + { + auto unguard = Unguard(ScopedAlloc_); + message = Underlying_->Fetch(); } - bool Next(TUnboxedValue& result) override { - const Message* message; - { - auto unguard = Unguard(ScopedAlloc_); - message = Underlying_->Fetch(); - } - - if (!message) { - return false; - } + if (!message) { + return false; + } - Converter_.DoConvert(message, result); + Converter_.DoConvert(message, result); - return true; - } + return true; + } - EFetchStatus Fetch(TUnboxedValue& result) override { - if (Next(result)) { - return EFetchStatus::Ok; - } else { - return EFetchStatus::Finish; - } - } - }; - - /** - * Consumer which converts messages to unboxed values and relays them to the worker. Used as a return value - * of the push processor's Process function. - */ - class TProtoConsumerImpl final: public IConsumer<Message*> { - private: - TWorkerHolder<IPushStreamWorker> WorkerHolder_; - TInputConverter Converter_; - - public: - explicit TProtoConsumerImpl( - const TProtobufRawInputSpec& inputSpec, - TWorkerHolder<IPushStreamWorker> worker - ) - : WorkerHolder_(std::move(worker)) - , Converter_(inputSpec, WorkerHolder_.Get()) - { + EFetchStatus Fetch(TUnboxedValue& result) override { + if (Next(result)) { + return EFetchStatus::Ok; + } else { + return EFetchStatus::Finish; } + } +}; + +/** + * Consumer which converts messages to unboxed values and relays them to the worker. Used as a return value + * of the push processor's Process function. + */ +class TProtoConsumerImpl final: public IConsumer<Message*> { +private: + TWorkerHolder<IPushStreamWorker> WorkerHolder_; + TInputConverter Converter_; + +public: + explicit TProtoConsumerImpl( + const TProtobufRawInputSpec& inputSpec, + TWorkerHolder<IPushStreamWorker> worker) + : WorkerHolder_(std::move(worker)) + , Converter_(inputSpec, WorkerHolder_.Get()) + { + } - ~TProtoConsumerImpl() override { - with_lock(WorkerHolder_->GetScopedAlloc()) { - Converter_.ClearCache(); - } + ~TProtoConsumerImpl() override { + with_lock (WorkerHolder_->GetScopedAlloc()) { + Converter_.ClearCache(); } + } - public: - void OnObject(Message* message) override { - TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator()); +public: + void OnObject(Message* message) override { + TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator()); - with_lock(WorkerHolder_->GetScopedAlloc()) { - TUnboxedValue result; - Converter_.DoConvert(message, result); - WorkerHolder_->Push(std::move(result)); - } + with_lock (WorkerHolder_->GetScopedAlloc()) { + TUnboxedValue result; + Converter_.DoConvert(message, result); + WorkerHolder_->Push(std::move(result)); } + } - void OnFinish() override { - TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator()); + void OnFinish() override { + TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator()); - with_lock(WorkerHolder_->GetScopedAlloc()) { - WorkerHolder_->OnFinish(); - } - } - }; - - /** - * Protobuf input stream for unboxed value streams. - */ - template <typename TOutputSpec> - class TRawProtoStreamImpl final: public IStream<OutputItemType<TOutputSpec>> { - protected: - TWorkerHolder<IPullStreamWorker> WorkerHolder_; - TOutputConverter<TOutputSpec> Converter_; - - public: - explicit TRawProtoStreamImpl(const TOutputSpec& outputSpec, TWorkerHolder<IPullStreamWorker> worker) - : WorkerHolder_(std::move(worker)) - , Converter_(outputSpec, WorkerHolder_.Get()) - { + with_lock (WorkerHolder_->GetScopedAlloc()) { + WorkerHolder_->OnFinish(); } + } +}; - public: - OutputItemType<TOutputSpec> Fetch() override { - TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator()); +/** + * Protobuf input stream for unboxed value streams. + */ +template <typename TOutputSpec> +class TRawProtoStreamImpl final: public IStream<OutputItemType<TOutputSpec>> { +protected: + TWorkerHolder<IPullStreamWorker> WorkerHolder_; + TOutputConverter<TOutputSpec> Converter_; + +public: + explicit TRawProtoStreamImpl(const TOutputSpec& outputSpec, TWorkerHolder<IPullStreamWorker> worker) + : WorkerHolder_(std::move(worker)) + , Converter_(outputSpec, WorkerHolder_.Get()) + { + } - with_lock(WorkerHolder_->GetScopedAlloc()) { - TUnboxedValue value; +public: + OutputItemType<TOutputSpec> Fetch() override { + TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator()); - auto status = WorkerHolder_->GetOutput().Fetch(value); + with_lock (WorkerHolder_->GetScopedAlloc()) { + TUnboxedValue value; - YQL_ENSURE(status != EFetchStatus::Yield, "Yield is not supported in pull mode"); + auto status = WorkerHolder_->GetOutput().Fetch(value); - if (status == EFetchStatus::Finish) { - return TOutputSpecTraits<TOutputSpec>::StreamSentinel; - } + YQL_ENSURE(status != EFetchStatus::Yield, "Yield is not supported in pull mode"); - return Converter_.DoConvert(value); + if (status == EFetchStatus::Finish) { + return TOutputSpecTraits<TOutputSpec>::StreamSentinel; } + + return Converter_.DoConvert(value); } - }; - - /** - * Protobuf input stream for unboxed value lists. - */ - template <typename TOutputSpec> - class TRawProtoListImpl final: public IStream<OutputItemType<TOutputSpec>> { - protected: - TWorkerHolder<IPullListWorker> WorkerHolder_; - TOutputConverter<TOutputSpec> Converter_; - - public: - explicit TRawProtoListImpl(const TOutputSpec& outputSpec, TWorkerHolder<IPullListWorker> worker) - : WorkerHolder_(std::move(worker)) - , Converter_(outputSpec, WorkerHolder_.Get()) - { - } + } +}; - public: - OutputItemType<TOutputSpec> Fetch() override { - TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator()); +/** + * Protobuf input stream for unboxed value lists. + */ +template <typename TOutputSpec> +class TRawProtoListImpl final: public IStream<OutputItemType<TOutputSpec>> { +protected: + TWorkerHolder<IPullListWorker> WorkerHolder_; + TOutputConverter<TOutputSpec> Converter_; + +public: + explicit TRawProtoListImpl(const TOutputSpec& outputSpec, TWorkerHolder<IPullListWorker> worker) + : WorkerHolder_(std::move(worker)) + , Converter_(outputSpec, WorkerHolder_.Get()) + { + } - with_lock(WorkerHolder_->GetScopedAlloc()) { - TUnboxedValue value; +public: + OutputItemType<TOutputSpec> Fetch() override { + TBindTerminator bind(WorkerHolder_->GetGraph().GetTerminator()); - if (!WorkerHolder_->GetOutputIterator().Next(value)) { - return TOutputSpecTraits<TOutputSpec>::StreamSentinel; - } + with_lock (WorkerHolder_->GetScopedAlloc()) { + TUnboxedValue value; - return Converter_.DoConvert(value); + if (!WorkerHolder_->GetOutputIterator().Next(value)) { + return TOutputSpecTraits<TOutputSpec>::StreamSentinel; } + + return Converter_.DoConvert(value); } - }; - - /** - * Push relay used to convert generated unboxed value to a message and push it to the user's consumer. - */ - template <typename TOutputSpec> - class TPushRelayImpl: public IConsumer<const TUnboxedValue*> { - private: - THolder<IConsumer<OutputItemType<TOutputSpec>>> Underlying_; - TOutputConverter<TOutputSpec> Converter_; - IWorker* Worker_; - - public: - TPushRelayImpl( - const TOutputSpec& outputSpec, - IPushStreamWorker* worker, - THolder<IConsumer<OutputItemType<TOutputSpec>>> underlying - ) - : Underlying_(std::move(underlying)) - , Converter_(outputSpec, worker) - , Worker_(worker) - { - } + } +}; + +/** + * Push relay used to convert generated unboxed value to a message and push it to the user's consumer. + */ +template <typename TOutputSpec> +class TPushRelayImpl: public IConsumer<const TUnboxedValue*> { +private: + THolder<IConsumer<OutputItemType<TOutputSpec>>> Underlying_; + TOutputConverter<TOutputSpec> Converter_; + IWorker* Worker_; + +public: + TPushRelayImpl( + const TOutputSpec& outputSpec, + IPushStreamWorker* worker, + THolder<IConsumer<OutputItemType<TOutputSpec>>> underlying) + : Underlying_(std::move(underlying)) + , Converter_(outputSpec, worker) + , Worker_(worker) + { + } - // If you've read a comment in the TProtoListValue's destructor, you may be wondering why don't we do the - // same trick here. Well, that's because in push mode, consumer is destroyed before acquiring scoped alloc and - // destroying computation graph. + // If you've read a comment in the TProtoListValue's destructor, you may be wondering why don't we do the + // same trick here. Well, that's because in push mode, consumer is destroyed before acquiring scoped alloc and + // destroying computation graph. - public: - void OnObject(const TUnboxedValue* value) override { - OutputItemType<TOutputSpec> message = Converter_.DoConvert(*value); - auto unguard = Unguard(Worker_->GetScopedAlloc()); - Underlying_->OnObject(message); - } +public: + void OnObject(const TUnboxedValue* value) override { + OutputItemType<TOutputSpec> message = Converter_.DoConvert(*value); + auto unguard = Unguard(Worker_->GetScopedAlloc()); + Underlying_->OnObject(message); + } - void OnFinish() override { - auto unguard = Unguard(Worker_->GetScopedAlloc()); - Underlying_->OnFinish(); - } - }; -} + void OnFinish() override { + auto unguard = Unguard(Worker_->GetScopedAlloc()); + Underlying_->OnFinish(); + } +}; +} // namespace using ConsumerType = TInputSpecTraits<TProtobufRawInputSpec>::TConsumerType; void TInputSpecTraits<TProtobufRawInputSpec>::PreparePullStreamWorker( const TProtobufRawInputSpec& inputSpec, IPullStreamWorker* worker, - THolder<IStream<Message*>> stream -) { - with_lock(worker->GetScopedAlloc()) { + THolder<IStream<Message*>> stream) { + with_lock (worker->GetScopedAlloc()) { worker->SetInput( worker->GetGraph().GetHolderFactory().Create<TProtoListValue>(inputSpec, std::move(stream), worker), 0); } @@ -1021,9 +994,8 @@ void TInputSpecTraits<TProtobufRawInputSpec>::PreparePullStreamWorker( void TInputSpecTraits<TProtobufRawInputSpec>::PreparePullListWorker( const TProtobufRawInputSpec& inputSpec, IPullListWorker* worker, - THolder<IStream<Message*>> stream -) { - with_lock(worker->GetScopedAlloc()) { + THolder<IStream<Message*>> stream) { + with_lock (worker->GetScopedAlloc()) { worker->SetInput( worker->GetGraph().GetHolderFactory().Create<TProtoListValue>(inputSpec, std::move(stream), worker), 0); } @@ -1031,8 +1003,7 @@ void TInputSpecTraits<TProtobufRawInputSpec>::PreparePullListWorker( ConsumerType TInputSpecTraits<TProtobufRawInputSpec>::MakeConsumer( const TProtobufRawInputSpec& inputSpec, - TWorkerHolder<IPushStreamWorker> worker -) { + TWorkerHolder<IPushStreamWorker> worker) { return MakeHolder<TProtoConsumerImpl>(inputSpec, std::move(worker)); } @@ -1043,44 +1014,38 @@ using PullListReturnType = typename TOutputSpecTraits<TOutputSpec>::TPullListRet PullStreamReturnType<TProtobufRawOutputSpec> TOutputSpecTraits<TProtobufRawOutputSpec>::ConvertPullStreamWorkerToOutputType( const TProtobufRawOutputSpec& outputSpec, - TWorkerHolder<IPullStreamWorker> worker -) { + TWorkerHolder<IPullStreamWorker> worker) { return MakeHolder<TRawProtoStreamImpl<TProtobufRawOutputSpec>>(outputSpec, std::move(worker)); } PullListReturnType<TProtobufRawOutputSpec> TOutputSpecTraits<TProtobufRawOutputSpec>::ConvertPullListWorkerToOutputType( const TProtobufRawOutputSpec& outputSpec, - TWorkerHolder<IPullListWorker> worker -) { + TWorkerHolder<IPullListWorker> worker) { return MakeHolder<TRawProtoListImpl<TProtobufRawOutputSpec>>(outputSpec, std::move(worker)); } void TOutputSpecTraits<TProtobufRawOutputSpec>::SetConsumerToWorker( const TProtobufRawOutputSpec& outputSpec, IPushStreamWorker* worker, - THolder<IConsumer<TOutputItemType>> consumer -) { + THolder<IConsumer<TOutputItemType>> consumer) { worker->SetConsumer(MakeHolder<TPushRelayImpl<TProtobufRawOutputSpec>>(outputSpec, worker, std::move(consumer))); } PullStreamReturnType<TProtobufRawMultiOutputSpec> TOutputSpecTraits<TProtobufRawMultiOutputSpec>::ConvertPullStreamWorkerToOutputType( const TProtobufRawMultiOutputSpec& outputSpec, - TWorkerHolder<IPullStreamWorker> worker -) { + TWorkerHolder<IPullStreamWorker> worker) { return MakeHolder<TRawProtoStreamImpl<TProtobufRawMultiOutputSpec>>(outputSpec, std::move(worker)); } PullListReturnType<TProtobufRawMultiOutputSpec> TOutputSpecTraits<TProtobufRawMultiOutputSpec>::ConvertPullListWorkerToOutputType( const TProtobufRawMultiOutputSpec& outputSpec, - TWorkerHolder<IPullListWorker> worker -) { + TWorkerHolder<IPullListWorker> worker) { return MakeHolder<TRawProtoListImpl<TProtobufRawMultiOutputSpec>>(outputSpec, std::move(worker)); } void TOutputSpecTraits<TProtobufRawMultiOutputSpec>::SetConsumerToWorker( const TProtobufRawMultiOutputSpec& outputSpec, IPushStreamWorker* worker, - THolder<IConsumer<TOutputItemType>> consumer -) { + THolder<IConsumer<TOutputItemType>> consumer) { worker->SetConsumer(MakeHolder<TPushRelayImpl<TProtobufRawMultiOutputSpec>>(outputSpec, worker, std::move(consumer))); } diff --git a/yql/essentials/public/purecalc/io_specs/protobuf_raw/spec.h b/yql/essentials/public/purecalc/io_specs/protobuf_raw/spec.h index 436b243bffd..a2ef2829a10 100644 --- a/yql/essentials/public/purecalc/io_specs/protobuf_raw/spec.h +++ b/yql/essentials/public/purecalc/io_specs/protobuf_raw/spec.h @@ -8,250 +8,247 @@ #include <util/generic/maybe.h> namespace NYql { - namespace NPureCalc { - /** - * Processing mode for working with raw protobuf message inputs. - * - * In this mode purecalc accept pointers to abstract protobuf messages and processes them using the reflection - * mechanism. All passed messages should have the same descriptor (the one you pass to the constructor - * of the input spec). - * - * All working modes are implemented. In pull stream and pull list modes a program would accept a single object - * stream of const protobuf messages. In push mode, a program will return a consumer of const protobuf messages. - * - * The program synopsis follows: - * - * @code - * ... TPullStreamProgram::Apply(IStream<google::protobuf::Message*>); - * ... TPullListProgram::Apply(IStream<google::protobuf::Message*>); - * TConsumer<google::protobuf::Message*> TPushStreamProgram::Apply(...); - * @endcode - */ - class TProtobufRawInputSpec: public TInputSpecBase { - private: - const google::protobuf::Descriptor& Descriptor_; - const TMaybe<TString> TimestampColumn_; - const TProtoSchemaOptions SchemaOptions_; - mutable TVector<NYT::TNode> SavedSchemas_; - - public: - /** - * Build input spec and associate the given message descriptor. - */ - explicit TProtobufRawInputSpec( - const google::protobuf::Descriptor& descriptor, - const TMaybe<TString>& timestampColumn = Nothing(), - const TProtoSchemaOptions& options = {} - ); - - public: - const TVector<NYT::TNode>& GetSchemas() const override; - - /** - * Get the descriptor associated with this spec. - */ - const google::protobuf::Descriptor& GetDescriptor() const; - - const TMaybe<TString>& GetTimestampColumn() const; - - /* - * Get options that customize input struct type building. - */ - const TProtoSchemaOptions& GetSchemaOptions() const; - }; - - /** - * Processing mode for working with raw protobuf message outputs. - * - * In this mode purecalc yields pointers to abstract protobuf messages. All generated messages share the same - * descriptor so they can be safely converted into an appropriate message type. - * - * Note that one should not expect that the returned pointer will be valid forever; in can (and will) become - * outdated once a new output is requested/pushed. - * - * All working modes are implemented. In pull stream and pull list modes a program will return an object - * stream of non-const protobuf messages. In push mode, it will accept a single consumer of non-const - * messages. - * - * The program synopsis follows: - * - * @code - * IStream<google::protobuf::Message*> TPullStreamProgram::Apply(...); - * IStream<google::protobuf::Message*> TPullListProgram::Apply(...); - * ... TPushStreamProgram::Apply(TConsumer<google::protobuf::Message*>); - * @endcode - */ - class TProtobufRawOutputSpec: public TOutputSpecBase { - private: - const google::protobuf::Descriptor& Descriptor_; - google::protobuf::MessageFactory* Factory_; - TProtoSchemaOptions SchemaOptions_; - google::protobuf::Arena* Arena_; - mutable TMaybe<NYT::TNode> SavedSchema_; - - public: - /** - * Build output spec and associate the given message descriptor and maybe the given message factory. - */ - explicit TProtobufRawOutputSpec( - const google::protobuf::Descriptor& descriptor, - google::protobuf::MessageFactory* = nullptr, - const TProtoSchemaOptions& options = {}, - google::protobuf::Arena* arena = nullptr - ); - - public: - const NYT::TNode& GetSchema() const override; - - /** - * Get the descriptor associated with this spec. - */ - const google::protobuf::Descriptor& GetDescriptor() const; - - /** - * Set a new message factory which will be used to generate messages. Pass a null pointer to use the - * default factory. - */ - void SetFactory(google::protobuf::MessageFactory*); - - /** - * Get the message factory which is currently associated with this spec. - */ - google::protobuf::MessageFactory* GetFactory() const; - - /** - * Set a new arena which will be used to generate messages. Pass a null pointer to create on the heap. - */ - void SetArena(google::protobuf::Arena*); - - /** - * Get the arena which is currently associated with this spec. - */ - google::protobuf::Arena* GetArena() const; - - /** - * Get options that customize output struct type building. - */ - const TProtoSchemaOptions& GetSchemaOptions() const; - }; - - /** - * Processing mode for working with raw protobuf messages and several outputs. - * - * The program synopsis follows: - * - * @code - * IStream<std::pair<ui32, google::protobuf::Message*>> TPullStreamProgram::Apply(...); - * IStream<std::pair<ui32, google::protobuf::Message*>> TPullListProgram::Apply(...); - * ... TPushStreamProgram::Apply(TConsumer<std::pair<ui32, google::protobuf::Message*>>); - * @endcode - */ - class TProtobufRawMultiOutputSpec: public TOutputSpecBase { - private: - TVector<const google::protobuf::Descriptor*> Descriptors_; - TVector<google::protobuf::MessageFactory*> Factories_; - const TProtoSchemaOptions SchemaOptions_; - TVector<google::protobuf::Arena*> Arenas_; - mutable NYT::TNode SavedSchema_; - - public: - TProtobufRawMultiOutputSpec( - TVector<const google::protobuf::Descriptor*>, - TMaybe<TVector<google::protobuf::MessageFactory*>> = {}, - const TProtoSchemaOptions& options = {}, - TMaybe<TVector<google::protobuf::Arena*>> arenas = {} - ); - - public: - const NYT::TNode& GetSchema() const override; - - /** - * Get the descriptor associated with given output. - */ - const google::protobuf::Descriptor& GetDescriptor(ui32) const; - - /** - * Set a new message factory for given output. It will be used to generate messages for this output. - */ - void SetFactory(ui32, google::protobuf::MessageFactory*); - - /** - * Get the message factory which is currently associated with given output. - */ - google::protobuf::MessageFactory* GetFactory(ui32) const; - - /** - * Set a new arena for given output. It will be used to generate messages for this output. - */ - void SetArena(ui32, google::protobuf::Arena*); - - /** - * Get the arena which is currently associated with given output. - */ - google::protobuf::Arena* GetArena(ui32) const; - - /** - * Get number of outputs for this spec. - */ - ui32 GetOutputsNumber() const; - - /** - * Get options that customize output struct type building. - */ - const TProtoSchemaOptions& GetSchemaOptions() const; - }; - - template <> - struct TInputSpecTraits<TProtobufRawInputSpec> { - static const constexpr bool IsPartial = false; - - static const constexpr bool SupportPullStreamMode = true; - static const constexpr bool SupportPullListMode = true; - static const constexpr bool SupportPushStreamMode = true; - - using TConsumerType = THolder<IConsumer<google::protobuf::Message*>>; - - static void PreparePullStreamWorker(const TProtobufRawInputSpec&, IPullStreamWorker*, THolder<IStream<google::protobuf::Message*>>); - static void PreparePullListWorker(const TProtobufRawInputSpec&, IPullListWorker*, THolder<IStream<google::protobuf::Message*>>); - static TConsumerType MakeConsumer(const TProtobufRawInputSpec&, TWorkerHolder<IPushStreamWorker>); - }; - - template <> - struct TOutputSpecTraits<TProtobufRawOutputSpec> { - static const constexpr bool IsPartial = false; - - static const constexpr bool SupportPullStreamMode = true; - static const constexpr bool SupportPullListMode = true; - static const constexpr bool SupportPushStreamMode = true; - - using TOutputItemType = google::protobuf::Message*; - using TPullStreamReturnType = THolder<IStream<TOutputItemType>>; - using TPullListReturnType = THolder<IStream<TOutputItemType>>; - - static const constexpr TOutputItemType StreamSentinel = nullptr; - - static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufRawOutputSpec&, TWorkerHolder<IPullStreamWorker>); - static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufRawOutputSpec&, TWorkerHolder<IPullListWorker>); - static void SetConsumerToWorker(const TProtobufRawOutputSpec&, IPushStreamWorker*, THolder<IConsumer<TOutputItemType>>); - }; - - template <> - struct TOutputSpecTraits<TProtobufRawMultiOutputSpec> { - static const constexpr bool IsPartial = false; - - static const constexpr bool SupportPullStreamMode = true; - static const constexpr bool SupportPullListMode = true; - static const constexpr bool SupportPushStreamMode = true; - - using TOutputItemType = std::pair<ui32, google::protobuf::Message*>; - using TPullStreamReturnType = THolder<IStream<TOutputItemType>>; - using TPullListReturnType = THolder<IStream<TOutputItemType>>; - - static const constexpr TOutputItemType StreamSentinel = {0, nullptr}; - - static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufRawMultiOutputSpec&, TWorkerHolder<IPullStreamWorker>); - static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufRawMultiOutputSpec&, TWorkerHolder<IPullListWorker>); - static void SetConsumerToWorker(const TProtobufRawMultiOutputSpec&, IPushStreamWorker*, THolder<IConsumer<TOutputItemType>>); - }; - } -} +namespace NPureCalc { +/** + * Processing mode for working with raw protobuf message inputs. + * + * In this mode purecalc accept pointers to abstract protobuf messages and processes them using the reflection + * mechanism. All passed messages should have the same descriptor (the one you pass to the constructor + * of the input spec). + * + * All working modes are implemented. In pull stream and pull list modes a program would accept a single object + * stream of const protobuf messages. In push mode, a program will return a consumer of const protobuf messages. + * + * The program synopsis follows: + * + * @code + * ... TPullStreamProgram::Apply(IStream<google::protobuf::Message*>); + * ... TPullListProgram::Apply(IStream<google::protobuf::Message*>); + * TConsumer<google::protobuf::Message*> TPushStreamProgram::Apply(...); + * @endcode + */ +class TProtobufRawInputSpec: public TInputSpecBase { +private: + const google::protobuf::Descriptor& Descriptor_; + const TMaybe<TString> TimestampColumn_; + const TProtoSchemaOptions SchemaOptions_; + mutable TVector<NYT::TNode> SavedSchemas_; + +public: + /** + * Build input spec and associate the given message descriptor. + */ + explicit TProtobufRawInputSpec( + const google::protobuf::Descriptor& descriptor, + const TMaybe<TString>& timestampColumn = Nothing(), + const TProtoSchemaOptions& options = {}); + +public: + const TVector<NYT::TNode>& GetSchemas() const override; + + /** + * Get the descriptor associated with this spec. + */ + const google::protobuf::Descriptor& GetDescriptor() const; + + const TMaybe<TString>& GetTimestampColumn() const; + + /* + * Get options that customize input struct type building. + */ + const TProtoSchemaOptions& GetSchemaOptions() const; +}; + +/** + * Processing mode for working with raw protobuf message outputs. + * + * In this mode purecalc yields pointers to abstract protobuf messages. All generated messages share the same + * descriptor so they can be safely converted into an appropriate message type. + * + * Note that one should not expect that the returned pointer will be valid forever; in can (and will) become + * outdated once a new output is requested/pushed. + * + * All working modes are implemented. In pull stream and pull list modes a program will return an object + * stream of non-const protobuf messages. In push mode, it will accept a single consumer of non-const + * messages. + * + * The program synopsis follows: + * + * @code + * IStream<google::protobuf::Message*> TPullStreamProgram::Apply(...); + * IStream<google::protobuf::Message*> TPullListProgram::Apply(...); + * ... TPushStreamProgram::Apply(TConsumer<google::protobuf::Message*>); + * @endcode + */ +class TProtobufRawOutputSpec: public TOutputSpecBase { +private: + const google::protobuf::Descriptor& Descriptor_; + google::protobuf::MessageFactory* Factory_; + TProtoSchemaOptions SchemaOptions_; + google::protobuf::Arena* Arena_; + mutable TMaybe<NYT::TNode> SavedSchema_; + +public: + /** + * Build output spec and associate the given message descriptor and maybe the given message factory. + */ + explicit TProtobufRawOutputSpec( + const google::protobuf::Descriptor& descriptor, + google::protobuf::MessageFactory* = nullptr, + const TProtoSchemaOptions& options = {}, + google::protobuf::Arena* arena = nullptr); + +public: + const NYT::TNode& GetSchema() const override; + + /** + * Get the descriptor associated with this spec. + */ + const google::protobuf::Descriptor& GetDescriptor() const; + + /** + * Set a new message factory which will be used to generate messages. Pass a null pointer to use the + * default factory. + */ + void SetFactory(google::protobuf::MessageFactory*); + + /** + * Get the message factory which is currently associated with this spec. + */ + google::protobuf::MessageFactory* GetFactory() const; + + /** + * Set a new arena which will be used to generate messages. Pass a null pointer to create on the heap. + */ + void SetArena(google::protobuf::Arena*); + + /** + * Get the arena which is currently associated with this spec. + */ + google::protobuf::Arena* GetArena() const; + + /** + * Get options that customize output struct type building. + */ + const TProtoSchemaOptions& GetSchemaOptions() const; +}; + +/** + * Processing mode for working with raw protobuf messages and several outputs. + * + * The program synopsis follows: + * + * @code + * IStream<std::pair<ui32, google::protobuf::Message*>> TPullStreamProgram::Apply(...); + * IStream<std::pair<ui32, google::protobuf::Message*>> TPullListProgram::Apply(...); + * ... TPushStreamProgram::Apply(TConsumer<std::pair<ui32, google::protobuf::Message*>>); + * @endcode + */ +class TProtobufRawMultiOutputSpec: public TOutputSpecBase { +private: + TVector<const google::protobuf::Descriptor*> Descriptors_; + TVector<google::protobuf::MessageFactory*> Factories_; + const TProtoSchemaOptions SchemaOptions_; + TVector<google::protobuf::Arena*> Arenas_; + mutable NYT::TNode SavedSchema_; + +public: + TProtobufRawMultiOutputSpec( + TVector<const google::protobuf::Descriptor*>, + TMaybe<TVector<google::protobuf::MessageFactory*>> = {}, + const TProtoSchemaOptions& options = {}, + TMaybe<TVector<google::protobuf::Arena*>> arenas = {}); + +public: + const NYT::TNode& GetSchema() const override; + + /** + * Get the descriptor associated with given output. + */ + const google::protobuf::Descriptor& GetDescriptor(ui32) const; + + /** + * Set a new message factory for given output. It will be used to generate messages for this output. + */ + void SetFactory(ui32, google::protobuf::MessageFactory*); + + /** + * Get the message factory which is currently associated with given output. + */ + google::protobuf::MessageFactory* GetFactory(ui32) const; + + /** + * Set a new arena for given output. It will be used to generate messages for this output. + */ + void SetArena(ui32, google::protobuf::Arena*); + + /** + * Get the arena which is currently associated with given output. + */ + google::protobuf::Arena* GetArena(ui32) const; + + /** + * Get number of outputs for this spec. + */ + ui32 GetOutputsNumber() const; + + /** + * Get options that customize output struct type building. + */ + const TProtoSchemaOptions& GetSchemaOptions() const; +}; + +template <> +struct TInputSpecTraits<TProtobufRawInputSpec> { + static const constexpr bool IsPartial = false; + + static const constexpr bool SupportPullStreamMode = true; + static const constexpr bool SupportPullListMode = true; + static const constexpr bool SupportPushStreamMode = true; + + using TConsumerType = THolder<IConsumer<google::protobuf::Message*>>; + + static void PreparePullStreamWorker(const TProtobufRawInputSpec&, IPullStreamWorker*, THolder<IStream<google::protobuf::Message*>>); + static void PreparePullListWorker(const TProtobufRawInputSpec&, IPullListWorker*, THolder<IStream<google::protobuf::Message*>>); + static TConsumerType MakeConsumer(const TProtobufRawInputSpec&, TWorkerHolder<IPushStreamWorker>); +}; + +template <> +struct TOutputSpecTraits<TProtobufRawOutputSpec> { + static const constexpr bool IsPartial = false; + + static const constexpr bool SupportPullStreamMode = true; + static const constexpr bool SupportPullListMode = true; + static const constexpr bool SupportPushStreamMode = true; + + using TOutputItemType = google::protobuf::Message*; + using TPullStreamReturnType = THolder<IStream<TOutputItemType>>; + using TPullListReturnType = THolder<IStream<TOutputItemType>>; + + static const constexpr TOutputItemType StreamSentinel = nullptr; + + static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufRawOutputSpec&, TWorkerHolder<IPullStreamWorker>); + static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufRawOutputSpec&, TWorkerHolder<IPullListWorker>); + static void SetConsumerToWorker(const TProtobufRawOutputSpec&, IPushStreamWorker*, THolder<IConsumer<TOutputItemType>>); +}; + +template <> +struct TOutputSpecTraits<TProtobufRawMultiOutputSpec> { + static const constexpr bool IsPartial = false; + + static const constexpr bool SupportPullStreamMode = true; + static const constexpr bool SupportPullListMode = true; + static const constexpr bool SupportPushStreamMode = true; + + using TOutputItemType = std::pair<ui32, google::protobuf::Message*>; + using TPullStreamReturnType = THolder<IStream<TOutputItemType>>; + using TPullListReturnType = THolder<IStream<TOutputItemType>>; + + static const constexpr TOutputItemType StreamSentinel = {0, nullptr}; + + static TPullStreamReturnType ConvertPullStreamWorkerToOutputType(const TProtobufRawMultiOutputSpec&, TWorkerHolder<IPullStreamWorker>); + static TPullListReturnType ConvertPullListWorkerToOutputType(const TProtobufRawMultiOutputSpec&, TWorkerHolder<IPullListWorker>); + static void SetConsumerToWorker(const TProtobufRawMultiOutputSpec&, IPushStreamWorker*, THolder<IConsumer<TOutputItemType>>); +}; +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/io_specs/protobuf_raw/ya.make b/yql/essentials/public/purecalc/io_specs/protobuf_raw/ya.make index db3fab7e7a5..7b917148e2e 100644 --- a/yql/essentials/public/purecalc/io_specs/protobuf_raw/ya.make +++ b/yql/essentials/public/purecalc/io_specs/protobuf_raw/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + PEERDIR( yql/essentials/public/purecalc/common yql/essentials/public/purecalc/helpers/protobuf diff --git a/yql/essentials/public/purecalc/no_llvm/purecalc.h b/yql/essentials/public/purecalc/no_llvm/purecalc.h index 9b281a7caa7..83bd8a7b842 100644 --- a/yql/essentials/public/purecalc/no_llvm/purecalc.h +++ b/yql/essentials/public/purecalc/no_llvm/purecalc.h @@ -1,4 +1,3 @@ #pragma once #include "common/interface.h" - diff --git a/yql/essentials/public/purecalc/no_llvm/ya.make b/yql/essentials/public/purecalc/no_llvm/ya.make index 1cff51e09eb..84a73440c5c 100644 --- a/yql/essentials/public/purecalc/no_llvm/ya.make +++ b/yql/essentials/public/purecalc/no_llvm/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + ADDINCL( yql/essentials/public/purecalc ) diff --git a/yql/essentials/public/purecalc/ut/empty_stream.h b/yql/essentials/public/purecalc/ut/empty_stream.h index 8d10e647aee..64c3954f8e7 100644 --- a/yql/essentials/public/purecalc/ut/empty_stream.h +++ b/yql/essentials/public/purecalc/ut/empty_stream.h @@ -3,18 +3,18 @@ #include <yql/essentials/public/purecalc/purecalc.h> namespace NYql { - namespace NPureCalc { - template <typename T> - class TEmptyStreamImpl: public IStream<T> { - public: - T Fetch() override { - return nullptr; - } - }; - - template <typename T> - THolder<IStream<T>> EmptyStream() { - return MakeHolder<TEmptyStreamImpl<T>>(); - } +namespace NPureCalc { +template <typename T> +class TEmptyStreamImpl: public IStream<T> { +public: + T Fetch() override { + return nullptr; } +}; + +template <typename T> +THolder<IStream<T>> EmptyStream() { + return MakeHolder<TEmptyStreamImpl<T>>(); } +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/ut/fake_spec.cpp b/yql/essentials/public/purecalc/ut/fake_spec.cpp index 8acdbcfb347..12bbeb51340 100644 --- a/yql/essentials/public/purecalc/ut/fake_spec.cpp +++ b/yql/essentials/public/purecalc/ut/fake_spec.cpp @@ -1,82 +1,80 @@ #include "fake_spec.h" namespace NYql { - namespace NPureCalc { - NYT::TNode MakeFakeSchema(bool pg) { - auto itemType = NYT::TNode::CreateList(); - itemType.Add(pg ? "PgType" : "DataType"); - itemType.Add(pg ? "int4" : "Int32"); +namespace NPureCalc { +NYT::TNode MakeFakeSchema(bool pg) { + auto itemType = NYT::TNode::CreateList(); + itemType.Add(pg ? "PgType" : "DataType"); + itemType.Add(pg ? "int4" : "Int32"); - auto itemNode = NYT::TNode::CreateList(); - itemNode.Add("Name"); - itemNode.Add(std::move(itemType)); + auto itemNode = NYT::TNode::CreateList(); + itemNode.Add("Name"); + itemNode.Add(std::move(itemType)); - auto items = NYT::TNode::CreateList(); - items.Add(std::move(itemNode)); + auto items = NYT::TNode::CreateList(); + items.Add(std::move(itemNode)); - auto schema = NYT::TNode::CreateList(); - schema.Add("StructType"); - schema.Add(std::move(items)); + auto schema = NYT::TNode::CreateList(); + schema.Add("StructType"); + schema.Add(std::move(items)); - return schema; - } + return schema; +} - TFakeInputSpec FakeIS(ui32 inputsNumber, bool pg) { - auto spec = TFakeInputSpec(); - spec.Schemas = TVector<NYT::TNode>(inputsNumber, MakeFakeSchema(pg)); - return spec; - } +TFakeInputSpec FakeIS(ui32 inputsNumber, bool pg) { + auto spec = TFakeInputSpec(); + spec.Schemas = TVector<NYT::TNode>(inputsNumber, MakeFakeSchema(pg)); + return spec; +} - TFakeOutputSpec FakeOS(bool pg) { - auto spec = TFakeOutputSpec(); - spec.Schema = MakeFakeSchema(pg); - return spec; - } +TFakeOutputSpec FakeOS(bool pg) { + auto spec = TFakeOutputSpec(); + spec.Schema = MakeFakeSchema(pg); + return spec; +} - NYT::TNode CreateTypeNode(const TString& fieldType) { - return NYT::TNode::CreateList() - .Add("DataType") - .Add(fieldType); - } +NYT::TNode CreateTypeNode(const TString& fieldType) { + return NYT::TNode::CreateList() + .Add("DataType") + .Add(fieldType); +} - NYT::TNode CreateOptionalTypeNode(const TString& fieldType) { - return NYT::TNode::CreateList() - .Add("OptionalType") - .Add(CreateTypeNode(fieldType)); - } +NYT::TNode CreateOptionalTypeNode(const TString& fieldType) { + return NYT::TNode::CreateList() + .Add("OptionalType") + .Add(CreateTypeNode(fieldType)); +} - void AddField(NYT::TNode& node, const TString& fieldName, const TString& fieldType) { - node.Add( - NYT::TNode::CreateList() - .Add(fieldName) - .Add(CreateOptionalTypeNode(fieldType)) - ); - } +void AddField(NYT::TNode& node, const TString& fieldName, const TString& fieldType) { + node.Add( + NYT::TNode::CreateList() + .Add(fieldName) + .Add(CreateOptionalTypeNode(fieldType))); +} - NYT::TNode MakeFakeStructSchema() { - auto structMembers = NYT::TNode::CreateList(); - AddField(structMembers, "Id", "Uint32"); - AddField(structMembers, "Name", "Utf8"); - AddField(structMembers, "Body", "String"); +NYT::TNode MakeFakeStructSchema() { + auto structMembers = NYT::TNode::CreateList(); + AddField(structMembers, "Id", "Uint32"); + AddField(structMembers, "Name", "Utf8"); + AddField(structMembers, "Body", "String"); - auto rootMembers = NYT::TNode::CreateList(); - rootMembers.Add( - NYT::TNode::CreateList() - .Add("_r") - .Add(NYT::TNode::CreateList() - .Add("StructType") - .Add(std::move(structMembers))) - ); + auto rootMembers = NYT::TNode::CreateList(); + rootMembers.Add( + NYT::TNode::CreateList() + .Add("_r") + .Add(NYT::TNode::CreateList() + .Add("StructType") + .Add(std::move(structMembers)))); - return NYT::TNode::CreateList() - .Add("StructType") - .Add(std::move(rootMembers)); - } + return NYT::TNode::CreateList() + .Add("StructType") + .Add(std::move(rootMembers)); +} - TFakeOutputSpec FakeStructOS() { - auto spec = TFakeOutputSpec(); - spec.Schema = MakeFakeStructSchema(); - return spec; - } - } +TFakeOutputSpec FakeStructOS() { + auto spec = TFakeOutputSpec(); + spec.Schema = MakeFakeStructSchema(); + return spec; } +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/ut/fake_spec.h b/yql/essentials/public/purecalc/ut/fake_spec.h index 5f2ea310821..9e54c6a4139 100644 --- a/yql/essentials/public/purecalc/ut/fake_spec.h +++ b/yql/essentials/public/purecalc/ut/fake_spec.h @@ -3,53 +3,53 @@ #include <yql/essentials/public/purecalc/purecalc.h> namespace NYql { - namespace NPureCalc { - class TFakeInputSpec: public TInputSpecBase { - public: - TVector<NYT::TNode> Schemas = {NYT::TNode::CreateList()}; - - public: - const TVector<NYT::TNode>& GetSchemas() const override { - return Schemas; - } - }; - - class TFakeOutputSpec: public TOutputSpecBase { - public: - NYT::TNode Schema = NYT::TNode::CreateList(); - - public: - const NYT::TNode& GetSchema() const override { - return Schema; - } - }; - - template <> - struct TInputSpecTraits<TFakeInputSpec> { - static const constexpr bool IsPartial = false; - - static const constexpr bool SupportPullStreamMode = false; - static const constexpr bool SupportPullListMode = false; - static const constexpr bool SupportPushStreamMode = false; - - using TConsumerType = void; - }; - - template <> - struct TOutputSpecTraits<TFakeOutputSpec> { - static const constexpr bool IsPartial = false; - - static const constexpr bool SupportPullStreamMode = false; - static const constexpr bool SupportPullListMode = false; - static const constexpr bool SupportPushStreamMode = false; - - using TPullStreamReturnType = void; - using TPullListReturnType = void; - }; - - NYT::TNode MakeFakeSchema(bool pg = false); - TFakeInputSpec FakeIS(ui32 inputsNumber = 1, bool pg = false); - TFakeOutputSpec FakeOS(bool pg = false); - TFakeOutputSpec FakeStructOS(); +namespace NPureCalc { +class TFakeInputSpec: public TInputSpecBase { +public: + TVector<NYT::TNode> Schemas = {NYT::TNode::CreateList()}; + +public: + const TVector<NYT::TNode>& GetSchemas() const override { + return Schemas; } -} +}; + +class TFakeOutputSpec: public TOutputSpecBase { +public: + NYT::TNode Schema = NYT::TNode::CreateList(); + +public: + const NYT::TNode& GetSchema() const override { + return Schema; + } +}; + +template <> +struct TInputSpecTraits<TFakeInputSpec> { + static const constexpr bool IsPartial = false; + + static const constexpr bool SupportPullStreamMode = false; + static const constexpr bool SupportPullListMode = false; + static const constexpr bool SupportPushStreamMode = false; + + using TConsumerType = void; +}; + +template <> +struct TOutputSpecTraits<TFakeOutputSpec> { + static const constexpr bool IsPartial = false; + + static const constexpr bool SupportPullStreamMode = false; + static const constexpr bool SupportPullListMode = false; + static const constexpr bool SupportPushStreamMode = false; + + using TPullStreamReturnType = void; + using TPullListReturnType = void; +}; + +NYT::TNode MakeFakeSchema(bool pg = false); +TFakeInputSpec FakeIS(ui32 inputsNumber = 1, bool pg = false); +TFakeOutputSpec FakeOS(bool pg = false); +TFakeOutputSpec FakeStructOS(); +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/ut/lib/helpers.cpp b/yql/essentials/public/purecalc/ut/lib/helpers.cpp index cef9a995235..59383454e67 100644 --- a/yql/essentials/public/purecalc/ut/lib/helpers.cpp +++ b/yql/essentials/public/purecalc/ut/lib/helpers.cpp @@ -7,49 +7,45 @@ #include <util/string/ascii.h> #include <util/generic/hash_set.h> - namespace NYql { - namespace NPureCalc { - namespace NPrivate { - NYT::TNode GetSchema( - const TVector<TString>& fields, - const TVector<TString>& optionalFields - ) { - THashSet<TString> optionalFilter { - optionalFields.begin(), optionalFields.end() - }; - - NYT::TNode members {NYT::TNode::CreateList()}; - - auto addField = [&] (const TString& name, const TString& type) { - auto typeNode = NYT::TNode::CreateList() - .Add("DataType") - .Add(type); - - if (optionalFilter.contains(name)) { - typeNode = NYT::TNode::CreateList() - .Add("OptionalType") - .Add(typeNode); - } - - members.Add(NYT::TNode::CreateList() - .Add(name) - .Add(typeNode) - ); - }; - - for (const auto& field: fields) { - TString type {field}; - type[0] = AsciiToUpper(type[0]); - addField(field, type); - } - - NYT::TNode schema = NYT::TNode::CreateList() - .Add("StructType") - .Add(members); - - return schema; - } +namespace NPureCalc { +namespace NPrivate { +NYT::TNode GetSchema( + const TVector<TString>& fields, + const TVector<TString>& optionalFields) { + THashSet<TString> optionalFilter{ + optionalFields.begin(), optionalFields.end()}; + + NYT::TNode members{NYT::TNode::CreateList()}; + + auto addField = [&](const TString& name, const TString& type) { + auto typeNode = NYT::TNode::CreateList() + .Add("DataType") + .Add(type); + + if (optionalFilter.contains(name)) { + typeNode = NYT::TNode::CreateList() + .Add("OptionalType") + .Add(typeNode); } + + members.Add(NYT::TNode::CreateList() + .Add(name) + .Add(typeNode)); + }; + + for (const auto& field : fields) { + TString type{field}; + type[0] = AsciiToUpper(type[0]); + addField(field, type); } + + NYT::TNode schema = NYT::TNode::CreateList() + .Add("StructType") + .Add(members); + + return schema; } +} // namespace NPrivate +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/ut/lib/helpers.h b/yql/essentials/public/purecalc/ut/lib/helpers.h index 53a22661ec3..b6fd110ae46 100644 --- a/yql/essentials/public/purecalc/ut/lib/helpers.h +++ b/yql/essentials/public/purecalc/ut/lib/helpers.h @@ -5,14 +5,12 @@ #include <util/generic/vector.h> #include <util/stream/str.h> - namespace NYql { - namespace NPureCalc { - namespace NPrivate { - NYT::TNode GetSchema( - const TVector<TString>& fields, - const TVector<TString>& optionalFields = {} - ); - } - } -} +namespace NPureCalc { +namespace NPrivate { +NYT::TNode GetSchema( + const TVector<TString>& fields, + const TVector<TString>& optionalFields = {}); +} // namespace NPrivate +} // namespace NPureCalc +} // namespace NYql diff --git a/yql/essentials/public/purecalc/ut/lib/ya.make b/yql/essentials/public/purecalc/ut/lib/ya.make index 36134a2940b..bff80e2cb97 100644 --- a/yql/essentials/public/purecalc/ut/lib/ya.make +++ b/yql/essentials/public/purecalc/ut/lib/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + PEERDIR( contrib/libs/apache/arrow library/cpp/yson diff --git a/yql/essentials/public/purecalc/ut/test_eval.cpp b/yql/essentials/public/purecalc/ut/test_eval.cpp index 9ca2579301b..0c2cdc10808 100644 --- a/yql/essentials/public/purecalc/ut/test_eval.cpp +++ b/yql/essentials/public/purecalc/ut/test_eval.cpp @@ -6,72 +6,69 @@ #include <library/cpp/testing/unittest/registar.h> Y_UNIT_TEST_SUITE(TestEval) { - Y_UNIT_TEST(TestEvalExpr) { - using namespace NYql::NPureCalc; +Y_UNIT_TEST(TestEvalExpr) { + using namespace NYql::NPureCalc; - auto options = TProgramFactoryOptions(); - auto factory = MakeProgramFactory(options); + auto options = TProgramFactoryOptions(); + auto factory = MakeProgramFactory(options); + auto program = factory->MakePullListProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + "SELECT Unwrap(cast(EvaluateExpr('foo' || 'bar') as Utf8)) AS X", + ETranslationMode::SQL); + + auto stream = program->Apply(EmptyStream<NPureCalcProto::TStringMessage*>()); + + NPureCalcProto::TStringMessage* message; + + UNIT_ASSERT(message = stream->Fetch()); + UNIT_ASSERT_EQUAL(message->GetX(), "foobar"); + UNIT_ASSERT(!stream->Fetch()); +} + +Y_UNIT_TEST(TestSelfType) { + using namespace NYql::NPureCalc; + + auto options = TProgramFactoryOptions(); + auto factory = MakeProgramFactory(options); + + try { auto program = factory->MakePullListProgram( TProtobufInputSpec<NPureCalcProto::TStringMessage>(), TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), - "SELECT Unwrap(cast(EvaluateExpr('foo' || 'bar') as Utf8)) AS X", - ETranslationMode::SQL - ); + "$input = PROCESS Input;select unwrap(cast(FormatType(EvaluateType(TypeHandle(TypeOf($input)))) AS Utf8)) AS X", + ETranslationMode::SQL); auto stream = program->Apply(EmptyStream<NPureCalcProto::TStringMessage*>()); NPureCalcProto::TStringMessage* message; UNIT_ASSERT(message = stream->Fetch()); - UNIT_ASSERT_EQUAL(message->GetX(), "foobar"); + UNIT_ASSERT_VALUES_EQUAL(message->GetX(), "List<Struct<'X':Utf8>>"); UNIT_ASSERT(!stream->Fetch()); + } catch (const TCompileError& e) { + UNIT_FAIL(e.GetIssues()); } +} - Y_UNIT_TEST(TestSelfType) { - using namespace NYql::NPureCalc; - - auto options = TProgramFactoryOptions(); - auto factory = MakeProgramFactory(options); - - try { - auto program = factory->MakePullListProgram( - TProtobufInputSpec<NPureCalcProto::TStringMessage>(), - TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), - "$input = PROCESS Input;select unwrap(cast(FormatType(EvaluateType(TypeHandle(TypeOf($input)))) AS Utf8)) AS X", - ETranslationMode::SQL - ); - - auto stream = program->Apply(EmptyStream<NPureCalcProto::TStringMessage*>()); +Y_UNIT_TEST(CantUseSelfInsideEvaluation) { + using namespace NYql::NPureCalc; - NPureCalcProto::TStringMessage* message; + auto options = TProgramFactoryOptions(); + auto factory = MakeProgramFactory(options); - UNIT_ASSERT(message = stream->Fetch()); - UNIT_ASSERT_VALUES_EQUAL(message->GetX(), "List<Struct<'X':Utf8>>"); - UNIT_ASSERT(!stream->Fetch()); - } catch (const TCompileError& e) { - UNIT_FAIL(e.GetIssues()); - } - } + try { + auto program = factory->MakePullListProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + "$x = SELECT count(*) FROM Input;select unwrap(cast(EvaluateExpr($x) AS Utf8)) AS X", + ETranslationMode::SQL); - Y_UNIT_TEST(CantUseSelfInsideEvaluation) { - using namespace NYql::NPureCalc; - - auto options = TProgramFactoryOptions(); - auto factory = MakeProgramFactory(options); - - try { - auto program = factory->MakePullListProgram( - TProtobufInputSpec<NPureCalcProto::TStringMessage>(), - TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), - "$x = SELECT count(*) FROM Input;select unwrap(cast(EvaluateExpr($x) AS Utf8)) AS X", - ETranslationMode::SQL - ); - - program->Apply(EmptyStream<NPureCalcProto::TStringMessage*>()); - UNIT_FAIL("Exception is expected"); - } catch (const TCompileError& e) { - UNIT_ASSERT_C(TString(e.GetIssues()).Contains("Inputs aren't available during evaluation"), e.GetIssues()); - } + program->Apply(EmptyStream<NPureCalcProto::TStringMessage*>()); + UNIT_FAIL("Exception is expected"); + } catch (const TCompileError& e) { + UNIT_ASSERT_C(TString(e.GetIssues()).Contains("Inputs aren't available during evaluation"), e.GetIssues()); } } +} // Y_UNIT_TEST_SUITE(TestEval) diff --git a/yql/essentials/public/purecalc/ut/test_fatal_err.cpp b/yql/essentials/public/purecalc/ut/test_fatal_err.cpp index bb1452b16e3..54cac5b5874 100644 --- a/yql/essentials/public/purecalc/ut/test_fatal_err.cpp +++ b/yql/essentials/public/purecalc/ut/test_fatal_err.cpp @@ -6,22 +6,21 @@ #include <library/cpp/testing/unittest/registar.h> Y_UNIT_TEST_SUITE(TestFatalError) { - Y_UNIT_TEST(TestFailType) { - using namespace NYql::NPureCalc; +Y_UNIT_TEST(TestFailType) { + using namespace NYql::NPureCalc; - auto options = TProgramFactoryOptions(); - auto factory = MakeProgramFactory(options); + auto options = TProgramFactoryOptions(); + auto factory = MakeProgramFactory(options); - try { - factory->MakePullListProgram( - TProtobufInputSpec<NPureCalcProto::TStringMessage>(), - TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), - "pragma warning(\"disable\",\"4510\");select unwrap(cast(Yql::FailMe(AsAtom('type')) as Utf8)) as X;", - ETranslationMode::SQL - ); - UNIT_FAIL("Exception is expected"); - } catch (const TCompileError& e) { - UNIT_ASSERT_C(e.GetIssues().Contains("abnormal"), e.GetIssues()); - } + try { + factory->MakePullListProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + "pragma warning(\"disable\",\"4510\");select unwrap(cast(Yql::FailMe(AsAtom('type')) as Utf8)) as X;", + ETranslationMode::SQL); + UNIT_FAIL("Exception is expected"); + } catch (const TCompileError& e) { + UNIT_ASSERT_C(e.GetIssues().Contains("abnormal"), e.GetIssues()); } } +} // Y_UNIT_TEST_SUITE(TestFatalError) diff --git a/yql/essentials/public/purecalc/ut/test_langver.cpp b/yql/essentials/public/purecalc/ut/test_langver.cpp index 1b3cfa2ff46..3fe55b862f6 100644 --- a/yql/essentials/public/purecalc/ut/test_langver.cpp +++ b/yql/essentials/public/purecalc/ut/test_langver.cpp @@ -8,22 +8,22 @@ #include "fake_spec.h" Y_UNIT_TEST_SUITE(TestLangVer) { - Y_UNIT_TEST(TooHighLangVer) { - using namespace NYql::NPureCalc; +Y_UNIT_TEST(TooHighLangVer) { + using namespace NYql::NPureCalc; - auto options = TProgramFactoryOptions(); - options.SetLanguageVersion(NYql::GetMaxLangVersion()); - auto factory = MakeProgramFactory(options); + auto options = TProgramFactoryOptions(); + options.SetLanguageVersion(NYql::GetMaxLangVersion()); + auto factory = MakeProgramFactory(options); - try { - auto sql = TString(R"( + try { + auto sql = TString(R"( SELECT * FROM Input; )"); - factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - UNIT_FAIL("Exception is expected"); - } catch (const TCompileError& e) { - UNIT_ASSERT_C(e.GetIssues().Contains("version"), e.GetIssues()); - } + factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + UNIT_FAIL("Exception is expected"); + } catch (const TCompileError& e) { + UNIT_ASSERT_C(e.GetIssues().Contains("version"), e.GetIssues()); } } +} // Y_UNIT_TEST_SUITE(TestLangVer) diff --git a/yql/essentials/public/purecalc/ut/test_mixed_allocators.cpp b/yql/essentials/public/purecalc/ut/test_mixed_allocators.cpp index 2932538f78f..3dad746a045 100644 --- a/yql/essentials/public/purecalc/ut/test_mixed_allocators.cpp +++ b/yql/essentials/public/purecalc/ut/test_mixed_allocators.cpp @@ -9,83 +9,81 @@ using namespace NYql::NPureCalc; namespace { - class TStatelessInputSpec : public TInputSpecBase { - public: - TStatelessInputSpec() - : Schemas_({NYT::TNode::CreateList() - .Add("StructType") - .Add(NYT::TNode::CreateList() - .Add(NYT::TNode::CreateList() - .Add("InputValue") +class TStatelessInputSpec: public TInputSpecBase { +public: + TStatelessInputSpec() + : Schemas_({NYT::TNode::CreateList() + .Add("StructType") .Add(NYT::TNode::CreateList() - .Add("DataType") - .Add("Utf8") - ) - ) - ) - }) - {}; - - const TVector<NYT::TNode>& GetSchemas() const override { - return Schemas_; - } + .Add(NYT::TNode::CreateList() + .Add("InputValue") + .Add(NYT::TNode::CreateList() + .Add("DataType") + .Add("Utf8"))))}) + {}; + + const TVector<NYT::TNode>& GetSchemas() const override { + return Schemas_; + } - private: - const TVector<NYT::TNode> Schemas_; - }; +private: + const TVector<NYT::TNode> Schemas_; +}; - class TStatelessInputConsumer : public IConsumer<const NYql::NUdf::TUnboxedValue&> { - public: - TStatelessInputConsumer(TWorkerHolder<IPushStreamWorker> worker) - : Worker_(std::move(worker)) - {} +class TStatelessInputConsumer: public IConsumer<const NYql::NUdf::TUnboxedValue&> { +public: + TStatelessInputConsumer(TWorkerHolder<IPushStreamWorker> worker) + : Worker_(std::move(worker)) + { + } - void OnObject(const NYql::NUdf::TUnboxedValue& value) override { - with_lock (Worker_->GetScopedAlloc()) { - NYql::NUdf::TUnboxedValue* items = nullptr; - NYql::NUdf::TUnboxedValue result = Worker_->GetGraph().GetHolderFactory().CreateDirectArrayHolder(1, items); + void OnObject(const NYql::NUdf::TUnboxedValue& value) override { + with_lock (Worker_->GetScopedAlloc()) { + NYql::NUdf::TUnboxedValue* items = nullptr; + NYql::NUdf::TUnboxedValue result = Worker_->GetGraph().GetHolderFactory().CreateDirectArrayHolder(1, items); - items[0] = value; + items[0] = value; - Worker_->Push(std::move(result)); + Worker_->Push(std::move(result)); - // Clear graph after each object because - // values allocated on another allocator and should be released - Worker_->Invalidate(); - } + // Clear graph after each object because + // values allocated on another allocator and should be released + Worker_->Invalidate(); } + } - void OnFinish() override { - with_lock(Worker_->GetScopedAlloc()) { - Worker_->OnFinish(); - } + void OnFinish() override { + with_lock (Worker_->GetScopedAlloc()) { + Worker_->OnFinish(); } + } - private: - TWorkerHolder<IPushStreamWorker> Worker_; - }; +private: + TWorkerHolder<IPushStreamWorker> Worker_; +}; - class TStatelessConsumer : public IConsumer<NPureCalcProto::TStringMessage*> { - const TString ExpectedData_; - const ui64 ExpectedRows_; - ui64 RowId_ = 0; +class TStatelessConsumer: public IConsumer<NPureCalcProto::TStringMessage*> { + const TString ExpectedData_; + const ui64 ExpectedRows_; + ui64 RowId_ = 0; - public: - TStatelessConsumer(const TString& expectedData, ui64 expectedRows) - : ExpectedData_(expectedData) - , ExpectedRows_(expectedRows) - {} +public: + TStatelessConsumer(const TString& expectedData, ui64 expectedRows) + : ExpectedData_(expectedData) + , ExpectedRows_(expectedRows) + { + } - void OnObject(NPureCalcProto::TStringMessage* message) override { - UNIT_ASSERT_VALUES_EQUAL_C(ExpectedData_, message->GetX(), RowId_); - RowId_++; - } + void OnObject(NPureCalcProto::TStringMessage* message) override { + UNIT_ASSERT_VALUES_EQUAL_C(ExpectedData_, message->GetX(), RowId_); + RowId_++; + } - void OnFinish() override { - UNIT_ASSERT_VALUES_EQUAL(ExpectedRows_, RowId_); - } - }; -} + void OnFinish() override { + UNIT_ASSERT_VALUES_EQUAL(ExpectedRows_, RowId_); + } +}; +} // namespace template <> struct TInputSpecTraits<TStatelessInputSpec> { @@ -100,40 +98,39 @@ struct TInputSpecTraits<TStatelessInputSpec> { }; Y_UNIT_TEST_SUITE(TestMixedAllocators) { - Y_UNIT_TEST(TestPushStream) { - const auto targetString = "large string >= 14 bytes"; - const auto factory = MakeProgramFactory(); - const auto sql = TStringBuilder() << "SELECT InputValue AS X FROM Input WHERE InputValue = \"" << targetString << "\";"; - - const auto program = factory->MakePushStreamProgram( - TStatelessInputSpec(), - TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), - sql - ); - - const ui64 numberRows = 5; - const auto inputConsumer = program->Apply(MakeHolder<TStatelessConsumer>(targetString, numberRows)); - NKikimr::NMiniKQL::TScopedAlloc alloc(__LOCATION__, NKikimr::TAlignedPagePoolCounters(), true, false); - - const auto pushString = [&](TString inputValue) { - NYql::NUdf::TUnboxedValue stringValue; - with_lock(alloc) { - stringValue = NKikimr::NMiniKQL::MakeString(inputValue); - alloc.Ref().LockObject(stringValue); - } - - inputConsumer->OnObject(stringValue); - - with_lock(alloc) { - alloc.Ref().UnlockObject(stringValue); - stringValue.Clear(); - } - }; - - for (ui64 i = 0; i < numberRows; ++i) { - pushString(targetString); - pushString("another large string >= 14 bytes"); +Y_UNIT_TEST(TestPushStream) { + const auto targetString = "large string >= 14 bytes"; + const auto factory = MakeProgramFactory(); + const auto sql = TStringBuilder() << "SELECT InputValue AS X FROM Input WHERE InputValue = \"" << targetString << "\";"; + + const auto program = factory->MakePushStreamProgram( + TStatelessInputSpec(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + sql); + + const ui64 numberRows = 5; + const auto inputConsumer = program->Apply(MakeHolder<TStatelessConsumer>(targetString, numberRows)); + NKikimr::NMiniKQL::TScopedAlloc alloc(__LOCATION__, NKikimr::TAlignedPagePoolCounters(), true, false); + + const auto pushString = [&](TString inputValue) { + NYql::NUdf::TUnboxedValue stringValue; + with_lock (alloc) { + stringValue = NKikimr::NMiniKQL::MakeString(inputValue); + alloc.Ref().LockObject(stringValue); } - inputConsumer->OnFinish(); + + inputConsumer->OnObject(stringValue); + + with_lock (alloc) { + alloc.Ref().UnlockObject(stringValue); + stringValue.Clear(); + } + }; + + for (ui64 i = 0; i < numberRows; ++i) { + pushString(targetString); + pushString("another large string >= 14 bytes"); } + inputConsumer->OnFinish(); } +} // Y_UNIT_TEST_SUITE(TestMixedAllocators) diff --git a/yql/essentials/public/purecalc/ut/test_pg.cpp b/yql/essentials/public/purecalc/ut/test_pg.cpp index 3d26cfbd1be..c45e2ed569d 100644 --- a/yql/essentials/public/purecalc/ut/test_pg.cpp +++ b/yql/essentials/public/purecalc/ut/test_pg.cpp @@ -7,65 +7,65 @@ #include <library/cpp/testing/unittest/registar.h> Y_UNIT_TEST_SUITE(TestPg) { - using namespace NYql::NPureCalc; +using namespace NYql::NPureCalc; - Y_UNIT_TEST(TestPgCompile) { - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestPgCompile) { + auto factory = MakeProgramFactory(); - auto sql = TString(R"( + auto sql = TString(R"( SELECT * FROM "Input"; )"); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePullListProgram(FakeIS(1,true), FakeOS(true), sql, ETranslationMode::PG); - }()); + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePullListProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); + }()); - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePullStreamProgram(FakeIS(1,true), FakeOS(true), sql, ETranslationMode::PG); - }(), TCompileError, "PullList mode"); + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePullStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); + }(), TCompileError, "PullList mode"); - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePushStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); - }(), TCompileError, "PullList mode"); - } + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePushStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); + }(), TCompileError, "PullList mode"); +} - Y_UNIT_TEST(TestSqlWrongTableName) { - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestSqlWrongTableName) { + auto factory = MakeProgramFactory(); - auto sql = TString(R"( + auto sql = TString(R"( SELECT * FROM WrongTable; )"); - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePullListProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); - }(), TCompileError, "Failed to optimize"); + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePullListProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); + }(), TCompileError, "Failed to optimize"); - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePullStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); - }(), TCompileError, "PullList mode"); + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePullStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); + }(), TCompileError, "PullList mode"); - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePushStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); - }(), TCompileError, "PullList mode"); - } + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePushStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); + }(), TCompileError, "PullList mode"); +} - Y_UNIT_TEST(TestInvalidSql) { - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestInvalidSql) { + auto factory = MakeProgramFactory(); - auto sql = TString(R"( + auto sql = TString(R"( Just some invalid SQL; )"); - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePullListProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); - }(), TCompileError, "failed to parse PG"); + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePullListProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); + }(), TCompileError, "failed to parse PG"); - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePullStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); - }(), TCompileError, "PullList mode"); + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePullStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); + }(), TCompileError, "PullList mode"); - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePushStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); - }(), TCompileError, "PullList mode"); - } + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePushStreamProgram(FakeIS(1, true), FakeOS(true), sql, ETranslationMode::PG); + }(), TCompileError, "PullList mode"); } +} // Y_UNIT_TEST_SUITE(TestPg) diff --git a/yql/essentials/public/purecalc/ut/test_pool.cpp b/yql/essentials/public/purecalc/ut/test_pool.cpp index b3de36cbf5f..90ae69b8bc2 100644 --- a/yql/essentials/public/purecalc/ut/test_pool.cpp +++ b/yql/essentials/public/purecalc/ut/test_pool.cpp @@ -10,175 +10,172 @@ using namespace NYql::NPureCalc; namespace { - class TStringMessageStreamImpl: public IStream<NPureCalcProto::TStringMessage*> { - private: - ui32 I_ = 0; - NPureCalcProto::TStringMessage Message_{}; - - public: - NPureCalcProto::TStringMessage* Fetch() override { - if (I_ >= 3) { - return nullptr; - } else { - Message_.SetX(ToString(I_)); - ++I_; - return &Message_; - } +class TStringMessageStreamImpl: public IStream<NPureCalcProto::TStringMessage*> { +private: + ui32 I_ = 0; + NPureCalcProto::TStringMessage Message_{}; + +public: + NPureCalcProto::TStringMessage* Fetch() override { + if (I_ >= 3) { + return nullptr; + } else { + Message_.SetX(ToString(I_)); + ++I_; + return &Message_; } - }; + } +}; - class TStringMessageConsumerImpl: public IConsumer<NPureCalcProto::TStringMessage*> { - private: - TVector<TString>* Buf_; +class TStringMessageConsumerImpl: public IConsumer<NPureCalcProto::TStringMessage*> { +private: + TVector<TString>* Buf_; - public: - TStringMessageConsumerImpl(TVector<TString>* buf) - : Buf_(buf) - { - } +public: + TStringMessageConsumerImpl(TVector<TString>* buf) + : Buf_(buf) + { + } - public: - void OnObject(NPureCalcProto::TStringMessage* t) override { - Buf_->push_back(t->GetX()); - } +public: + void OnObject(NPureCalcProto::TStringMessage* t) override { + Buf_->push_back(t->GetX()); + } - void OnFinish() override { - } - }; + void OnFinish() override { + } +}; -} +} // namespace Y_UNIT_TEST_SUITE(TestWorkerPool) { - static TString sql = "SELECT 'abc'u || X AS X FROM Input"; +static TString sql = "SELECT 'abc'u || X AS X FROM Input"; - static TVector<TString> expected{"abc0", "abc1", "abc2"}; +static TVector<TString> expected{"abc0", "abc1", "abc2"}; - void TestPullStreamImpl(bool useWorkerPool) { - auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseWorkerPool(useWorkerPool)); +void TestPullStreamImpl(bool useWorkerPool) { + auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseWorkerPool(useWorkerPool)); - auto program = factory->MakePullStreamProgram( - TProtobufInputSpec<NPureCalcProto::TStringMessage>(), - TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), - sql, - ETranslationMode::SQL - ); + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + sql, + ETranslationMode::SQL); - auto check = [](IStream<NPureCalcProto::TStringMessage*>* output) { - TVector<TString> actual; - while (auto *x = output->Fetch()) { - actual.push_back(x->GetX()); - } + auto check = [](IStream<NPureCalcProto::TStringMessage*>* output) { + TVector<TString> actual; + while (auto* x = output->Fetch()) { + actual.push_back(x->GetX()); + } - UNIT_ASSERT_VALUES_EQUAL(expected, actual); - }; + UNIT_ASSERT_VALUES_EQUAL(expected, actual); + }; - // Sequential use - for (size_t i = 0; i < 2; ++i) { - auto output = program->Apply(MakeHolder<TStringMessageStreamImpl>()); - check(output.Get()); - } - // Parallel use - { - auto output1 = program->Apply(MakeHolder<TStringMessageStreamImpl>()); - auto output2 = program->Apply(MakeHolder<TStringMessageStreamImpl>()); - check(output1.Get()); - check(output2.Get()); - } + // Sequential use + for (size_t i = 0; i < 2; ++i) { + auto output = program->Apply(MakeHolder<TStringMessageStreamImpl>()); + check(output.Get()); } - - Y_UNIT_TEST(TestPullStreamUseWorkerPool) { - TestPullStreamImpl(true); + // Parallel use + { + auto output1 = program->Apply(MakeHolder<TStringMessageStreamImpl>()); + auto output2 = program->Apply(MakeHolder<TStringMessageStreamImpl>()); + check(output1.Get()); + check(output2.Get()); } +} - Y_UNIT_TEST(TestPullStreamNoWorkerPool) { - TestPullStreamImpl(false); - } +Y_UNIT_TEST(TestPullStreamUseWorkerPool) { + TestPullStreamImpl(true); +} - void TestPullListImpl(bool useWorkerPool) { - auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseWorkerPool(useWorkerPool)); - - auto program = factory->MakePullListProgram( - TProtobufInputSpec<NPureCalcProto::TStringMessage>(), - TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), - sql, - ETranslationMode::SQL - ); - - auto check = [](IStream<NPureCalcProto::TStringMessage*>* output) { - TVector<TString> actual; - while (auto *x = output->Fetch()) { - actual.push_back(x->GetX()); - } - - UNIT_ASSERT_VALUES_EQUAL(expected, actual); - }; - - // Sequential use - for (size_t i = 0; i < 2; ++i) { - auto output = program->Apply(MakeHolder<TStringMessageStreamImpl>()); - check(output.Get()); - } - // Parallel use - { - auto output1 = program->Apply(MakeHolder<TStringMessageStreamImpl>()); - auto output2 = program->Apply(MakeHolder<TStringMessageStreamImpl>()); - check(output1.Get()); - check(output2.Get()); +Y_UNIT_TEST(TestPullStreamNoWorkerPool) { + TestPullStreamImpl(false); +} + +void TestPullListImpl(bool useWorkerPool) { + auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseWorkerPool(useWorkerPool)); + + auto program = factory->MakePullListProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + sql, + ETranslationMode::SQL); + + auto check = [](IStream<NPureCalcProto::TStringMessage*>* output) { + TVector<TString> actual; + while (auto* x = output->Fetch()) { + actual.push_back(x->GetX()); } - } - Y_UNIT_TEST(TestPullListUseWorkerPool) { - TestPullListImpl(true); - } + UNIT_ASSERT_VALUES_EQUAL(expected, actual); + }; - Y_UNIT_TEST(TestPullListNoWorkerPool) { - TestPullListImpl(false); + // Sequential use + for (size_t i = 0; i < 2; ++i) { + auto output = program->Apply(MakeHolder<TStringMessageStreamImpl>()); + check(output.Get()); } + // Parallel use + { + auto output1 = program->Apply(MakeHolder<TStringMessageStreamImpl>()); + auto output2 = program->Apply(MakeHolder<TStringMessageStreamImpl>()); + check(output1.Get()); + check(output2.Get()); + } +} - void TestPushStreamImpl(bool useWorkerPool) { - auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseWorkerPool(useWorkerPool)); - - auto program = factory->MakePushStreamProgram( - TProtobufInputSpec<NPureCalcProto::TStringMessage>(), - TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), - sql, - ETranslationMode::SQL - ); - - auto check = [](IConsumer<NPureCalcProto::TStringMessage*>* input, const TVector<TString>& result) { - NPureCalcProto::TStringMessage message; - for (auto s: {"0", "1", "2"}) { - message.SetX(s); - input->OnObject(&message); - } - input->OnFinish(); - - UNIT_ASSERT_VALUES_EQUAL(expected, result); - }; - - // Sequential use - for (size_t i = 0; i < 2; ++i) { - TVector<TString> actual; - auto input = program->Apply(MakeHolder<TStringMessageConsumerImpl>(&actual)); - check(input.Get(), actual); - } +Y_UNIT_TEST(TestPullListUseWorkerPool) { + TestPullListImpl(true); +} + +Y_UNIT_TEST(TestPullListNoWorkerPool) { + TestPullListImpl(false); +} + +void TestPushStreamImpl(bool useWorkerPool) { + auto factory = MakeProgramFactory(TProgramFactoryOptions().SetUseWorkerPool(useWorkerPool)); + + auto program = factory->MakePushStreamProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + sql, + ETranslationMode::SQL); - // Parallel use - { - TVector<TString> actual1; - auto input1 = program->Apply(MakeHolder<TStringMessageConsumerImpl>(&actual1)); - TVector<TString> actual2; - auto input2 = program->Apply(MakeHolder<TStringMessageConsumerImpl>(&actual2)); - check(input1.Get(), actual1); - check(input2.Get(), actual2); + auto check = [](IConsumer<NPureCalcProto::TStringMessage*>* input, const TVector<TString>& result) { + NPureCalcProto::TStringMessage message; + for (auto s : {"0", "1", "2"}) { + message.SetX(s); + input->OnObject(&message); } - } + input->OnFinish(); + + UNIT_ASSERT_VALUES_EQUAL(expected, result); + }; - Y_UNIT_TEST(TestPushStreamUseWorkerPool) { - TestPushStreamImpl(true); + // Sequential use + for (size_t i = 0; i < 2; ++i) { + TVector<TString> actual; + auto input = program->Apply(MakeHolder<TStringMessageConsumerImpl>(&actual)); + check(input.Get(), actual); } - Y_UNIT_TEST(TestPushStreamNoWorkerPool) { - TestPushStreamImpl(false); + // Parallel use + { + TVector<TString> actual1; + auto input1 = program->Apply(MakeHolder<TStringMessageConsumerImpl>(&actual1)); + TVector<TString> actual2; + auto input2 = program->Apply(MakeHolder<TStringMessageConsumerImpl>(&actual2)); + check(input1.Get(), actual1); + check(input2.Get(), actual2); } } + +Y_UNIT_TEST(TestPushStreamUseWorkerPool) { + TestPushStreamImpl(true); +} + +Y_UNIT_TEST(TestPushStreamNoWorkerPool) { + TestPushStreamImpl(false); +} +} // Y_UNIT_TEST_SUITE(TestWorkerPool) diff --git a/yql/essentials/public/purecalc/ut/test_sexpr.cpp b/yql/essentials/public/purecalc/ut/test_sexpr.cpp index 9c50dd1f291..5b46f685148 100644 --- a/yql/essentials/public/purecalc/ut/test_sexpr.cpp +++ b/yql/essentials/public/purecalc/ut/test_sexpr.cpp @@ -7,49 +7,49 @@ #include <library/cpp/testing/unittest/registar.h> Y_UNIT_TEST_SUITE(TestSExpr) { - Y_UNIT_TEST(TestSExprCompile) { - using namespace NYql::NPureCalc; +Y_UNIT_TEST(TestSExprCompile) { + using namespace NYql::NPureCalc; - auto factory = MakeProgramFactory(); + auto factory = MakeProgramFactory(); - auto expr = TString(R"( + auto expr = TString(R"( ( (return (Self '0)) ) )"); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePullStreamProgram(FakeIS(), FakeOS(), expr, ETranslationMode::SExpr); - }()); + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePullStreamProgram(FakeIS(), FakeOS(), expr, ETranslationMode::SExpr); + }()); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePullListProgram(FakeIS(), FakeOS(), expr, ETranslationMode::SExpr); - }()); + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePullListProgram(FakeIS(), FakeOS(), expr, ETranslationMode::SExpr); + }()); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePushStreamProgram(FakeIS(), FakeOS(), expr, ETranslationMode::SExpr); - }()); - } + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePushStreamProgram(FakeIS(), FakeOS(), expr, ETranslationMode::SExpr); + }()); +} - Y_UNIT_TEST(TestInvalidSExpr) { - using namespace NYql::NPureCalc; +Y_UNIT_TEST(TestInvalidSExpr) { + using namespace NYql::NPureCalc; - auto factory = MakeProgramFactory(); + auto factory = MakeProgramFactory(); - auto sql = TString(R"( + auto sql = TString(R"( Some totally invalid SExpr )"); - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SExpr); - }(), TCompileError, "failed to parse s-expression"); + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SExpr); + }(), TCompileError, "failed to parse s-expression"); - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SExpr); - }(), TCompileError, "failed to parse s-expression"); + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SExpr); + }(), TCompileError, "failed to parse s-expression"); - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SExpr); - }(), TCompileError, "failed to parse s-expression"); - } + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SExpr); + }(), TCompileError, "failed to parse s-expression"); } +} // Y_UNIT_TEST_SUITE(TestSExpr) diff --git a/yql/essentials/public/purecalc/ut/test_sql.cpp b/yql/essentials/public/purecalc/ut/test_sql.cpp index 1b6b69279a8..7b7917751a4 100644 --- a/yql/essentials/public/purecalc/ut/test_sql.cpp +++ b/yql/essentials/public/purecalc/ut/test_sql.cpp @@ -7,36 +7,36 @@ #include <library/cpp/testing/unittest/registar.h> Y_UNIT_TEST_SUITE(TestSql) { - using namespace NYql::NPureCalc; +using namespace NYql::NPureCalc; - Y_UNIT_TEST(TestSqlCompile) { - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestSqlCompile) { + auto factory = MakeProgramFactory(); - auto sql = TString(R"( + auto sql = TString(R"( SELECT * FROM Input; )"); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }()); + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }()); + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); - auto program = factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - auto expectedIssues = TString(R"(<main>: Warning: Type annotation, code: 1030 + auto program = factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + auto expectedIssues = TString(R"(<main>: Warning: Type annotation, code: 1030 generated.sql:2:13: Warning: At function: PersistableRepr generated.sql:2:13: Warning: Persistable required. Atom, key, world, datasink, datasource, callable, resource, stream and lambda are not persistable, code: 1104 )"); - UNIT_ASSERT_VALUES_EQUAL(expectedIssues, program->GetIssues().ToString()); - } + UNIT_ASSERT_VALUES_EQUAL(expectedIssues, program->GetIssues().ToString()); +} - Y_UNIT_TEST(TestStructCastMessage) { - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestStructCastMessage) { + auto factory = MakeProgramFactory(); - auto sql = TString(R"( + auto sql = TString(R"( $l = ($x) -> { return <| Id: $x.Name, Name: CAST($x.Name AS String), Body: Just('foo') |>; }; @@ -44,54 +44,54 @@ Y_UNIT_TEST_SUITE(TestSql) { SELECT $l(TableRow()) AS _r FROM Input )"); - try { - factory->MakePullListProgram(FakeIS(), FakeStructOS(), sql, ETranslationMode::SQL); - UNIT_ASSERT_C(false, "Unreachable"); - } catch (const NYql::NPureCalc::TCompileError& error) { - auto issue = error.GetIssues(); - UNIT_ASSERT_C(issue.Contains("Failed to convert 'Id': Int32 to Optional<Uint32>"), issue); - UNIT_ASSERT_C(!issue.Contains("Body"), issue); - } + try { + factory->MakePullListProgram(FakeIS(), FakeStructOS(), sql, ETranslationMode::SQL); + UNIT_ASSERT_C(false, "Unreachable"); + } catch (const NYql::NPureCalc::TCompileError& error) { + auto issue = error.GetIssues(); + UNIT_ASSERT_C(issue.Contains("Failed to convert 'Id': Int32 to Optional<Uint32>"), issue); + UNIT_ASSERT_C(!issue.Contains("Body"), issue); } +} - Y_UNIT_TEST(TestSqlCompileSingleUnnamedInput) { - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestSqlCompileSingleUnnamedInput) { + auto factory = MakeProgramFactory(); - auto sql = TString(R"( + auto sql = TString(R"( SELECT * FROM TABLES() )"); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }()); + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }()); + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }()); - } + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); +} - Y_UNIT_TEST(TestSqlCompileNamedMultiinputs) { - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestSqlCompileNamedMultiinputs) { + auto factory = MakeProgramFactory(); - auto sql = TString(R"( + auto sql = TString(R"( SELECT * FROM Input0 UNION ALL SELECT * FROM Input1 )"); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePullListProgram(FakeIS(2), FakeOS(), sql, ETranslationMode::SQL); - }()); - } + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePullListProgram(FakeIS(2), FakeOS(), sql, ETranslationMode::SQL); + }()); +} - Y_UNIT_TEST(TestSqlCompileUnnamedMultiinputs) { - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestSqlCompileUnnamedMultiinputs) { + auto factory = MakeProgramFactory(); - auto sql = TString(R"( + auto sql = TString(R"( $t0, $t1, $t2 = PROCESS TABLES(); SELECT * FROM $t0 UNION ALL @@ -100,117 +100,117 @@ Y_UNIT_TEST_SUITE(TestSql) { SELECT * FROM $t2 )"); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePullListProgram(FakeIS(3), FakeOS(), sql, ETranslationMode::SQL); - }()); - } + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePullListProgram(FakeIS(3), FakeOS(), sql, ETranslationMode::SQL); + }()); +} - Y_UNIT_TEST(TestSqlCompileWithWarning) { - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestSqlCompileWithWarning) { + auto factory = MakeProgramFactory(); - auto sql = TString(R"( + auto sql = TString(R"( $x = 1; $y = 2; SELECT $x as Name FROM Input; )"); - auto expectedIssues = TString(R"(generated.sql:3:13: Warning: Symbol $y is not used, code: 4527 + auto expectedIssues = TString(R"(generated.sql:3:13: Warning: Symbol $y is not used, code: 4527 <main>: Warning: Type annotation, code: 1030 generated.sql:4:13: Warning: At function: PersistableRepr generated.sql:4:13: Warning: Persistable required. Atom, key, world, datasink, datasource, callable, resource, stream and lambda are not persistable, code: 1104 )"); - auto program = factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - UNIT_ASSERT_VALUES_EQUAL(expectedIssues, program->GetIssues().ToString()); - } + auto program = factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + UNIT_ASSERT_VALUES_EQUAL(expectedIssues, program->GetIssues().ToString()); +} - Y_UNIT_TEST(TestSqlWrongTableName) { - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestSqlWrongTableName) { + auto factory = MakeProgramFactory(); - auto sql = TString(R"( + auto sql = TString(R"( SELECT * FROM WrongTable; )"); - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }(), TCompileError, "Failed to optimize"); + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }(), TCompileError, "Failed to optimize"); - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }(), TCompileError, "Failed to optimize"); + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }(), TCompileError, "Failed to optimize"); - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }(), TCompileError, "Failed to optimize"); - } + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }(), TCompileError, "Failed to optimize"); +} - Y_UNIT_TEST(TestAllocateLargeStringOnEvaluate) { - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestAllocateLargeStringOnEvaluate) { + auto factory = MakeProgramFactory(); - auto sql = TString(R"( + auto sql = TString(R"( $data = Length(EvaluateExpr("long string" || " very loooong string")); SELECT $data as Name FROM Input; )"); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }()); + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }()); + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }()); - } + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); +} - Y_UNIT_TEST(TestInvalidSql) { - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestInvalidSql) { + auto factory = MakeProgramFactory(); - auto sql = TString(R"( + auto sql = TString(R"( Just some invalid SQL; )"); - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }(), TCompileError, "failed to parse SQL"); + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }(), TCompileError, "failed to parse SQL"); - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }(), TCompileError, "failed to parse SQL"); + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }(), TCompileError, "failed to parse SQL"); - UNIT_ASSERT_EXCEPTION_CONTAINS([&](){ - factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }(), TCompileError, "failed to parse SQL"); - } + UNIT_ASSERT_EXCEPTION_CONTAINS([&]() { + factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }(), TCompileError, "failed to parse SQL"); +} - Y_UNIT_TEST(TestUseProcess) { - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestUseProcess) { + auto factory = MakeProgramFactory(); - auto sql = TString(R"( + auto sql = TString(R"( $processor = ($row) -> ($row); PROCESS Input using $processor(TableRow()); )"); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }()); + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePullStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }()); + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }()); - } + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePushStreamProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); +} - Y_UNIT_TEST(TestUseCodegen) { - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestUseCodegen) { + auto factory = MakeProgramFactory(); - auto sql = TString(R"( + auto sql = TString(R"( $processor = ($row) -> { $lambda = EvaluateCode(LambdaCode(($row) -> ($row))); return $lambda($row); @@ -219,15 +219,15 @@ Y_UNIT_TEST_SUITE(TestSql) { PROCESS Input using $processor(TableRow()); )"); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }()); - } + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); +} - Y_UNIT_TEST(TestUseDefineSubquery) { - auto factory = MakeProgramFactory(); +Y_UNIT_TEST(TestUseDefineSubquery) { + auto factory = MakeProgramFactory(); - auto sql = TString(R"( + auto sql = TString(R"( DEFINE SUBQUERY $source() AS PROCESS Input; END DEFINE; @@ -239,8 +239,8 @@ Y_UNIT_TEST_SUITE(TestSql) { PROCESS $handler($source); )"); - UNIT_ASSERT_NO_EXCEPTION([&](){ - factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); - }()); - } + UNIT_ASSERT_NO_EXCEPTION([&]() { + factory->MakePullListProgram(FakeIS(), FakeOS(), sql, ETranslationMode::SQL); + }()); } +} // Y_UNIT_TEST_SUITE(TestSql) diff --git a/yql/essentials/public/purecalc/ut/test_udf.cpp b/yql/essentials/public/purecalc/ut/test_udf.cpp index 732917739e7..702225c656e 100644 --- a/yql/essentials/public/purecalc/ut/test_udf.cpp +++ b/yql/essentials/public/purecalc/ut/test_udf.cpp @@ -7,18 +7,19 @@ #include <yql/essentials/public/udf/udf_type_builder.h> #include <library/cpp/testing/unittest/registar.h> -class TMyModule : public NKikimr::NUdf::IUdfModule { +class TMyModule: public NKikimr::NUdf::IUdfModule { public: - class TFunc : public NKikimr::NUdf::TBoxedValue { + class TFunc: public NKikimr::NUdf::TBoxedValue { public: TFunc(NKikimr::NUdf::TCounter counter, NKikimr::NUdf::TScopedProbe scopedProbe) : Counter_(counter) , ScopedProbe_(scopedProbe) - {} + { + } NKikimr::NUdf::TUnboxedValue Run(const NKikimr::NUdf::IValueBuilder* valueBuilder, const NKikimr::NUdf::TUnboxedValuePod* args) const override { Y_UNUSED(valueBuilder); - with_lock(ScopedProbe_) { + with_lock (ScopedProbe_) { Counter_.Inc(); return NKikimr::NUdf::TUnboxedValuePod(args[0].Get<i32>()); } @@ -45,9 +46,8 @@ public: if (name == NKikimr::NUdf::TStringRef::Of("Func")) { builder.SimpleSignature<i32(i32)>(); builder.Implementation(new TFunc( - builder.GetCounter("FuncCalls",true), - builder.GetScopedProbe("FuncTime") - )); + builder.GetCounter("FuncCalls", true), + builder.GetScopedProbe("FuncTime"))); } } @@ -55,12 +55,13 @@ public: } }; -class TMyCountersProvider : public NKikimr::NUdf::ICountersProvider, public NKikimr::NUdf::IScopedProbeHost { +class TMyCountersProvider: public NKikimr::NUdf::ICountersProvider, public NKikimr::NUdf::IScopedProbeHost { public: TMyCountersProvider(i64* calls, TString* log) : Calls_(calls) , Log_(log) - {} + { + } NKikimr::NUdf::TCounter GetCounter(const NKikimr::NUdf::TStringRef& module, const NKikimr::NUdf::TStringRef& name, bool deriv) override { UNIT_ASSERT_VALUES_EQUAL(module, "MyModule"); @@ -91,11 +92,11 @@ private: }; namespace NPureCalcProto { - class TUnparsed; - class TParsed; -} +class TUnparsed; +class TParsed; +} // namespace NPureCalcProto -class TDocInput : public NYql::NPureCalc::IStream<NPureCalcProto::TUnparsed*> { +class TDocInput: public NYql::NPureCalc::IStream<NPureCalcProto::TUnparsed*> { public: NPureCalcProto::TUnparsed* Fetch() override { if (Extracted) { @@ -113,83 +114,83 @@ public: }; Y_UNIT_TEST_SUITE(TestUdf) { - Y_UNIT_TEST(TestCounters) { - using namespace NYql::NPureCalc; - - auto factory = MakeProgramFactory(); - - i64 callCounter = 0; - TMyCountersProvider myCountersProvider(&callCounter, nullptr); - factory->AddUdfModule("MyModule", new TMyModule); - factory->SetCountersProvider(&myCountersProvider); - - auto program = factory->MakePullStreamProgram( - TProtobufInputSpec<NPureCalcProto::TUnparsed>(), - TProtobufOutputSpec<NPureCalcProto::TParsed>(), - "select MyModule::Func(1) as A, 2 as B, 3 as C from Input", - ETranslationMode::SQL); - - auto out = program->Apply(MakeHolder<TDocInput>()); - auto* message = out->Fetch(); - UNIT_ASSERT(message); - UNIT_ASSERT_VALUES_EQUAL(message->GetA(), 1); - UNIT_ASSERT_VALUES_EQUAL(message->GetB(), 2); - UNIT_ASSERT_VALUES_EQUAL(message->GetC(), 3); - UNIT_ASSERT_VALUES_EQUAL(callCounter, 1); - UNIT_ASSERT(!out->Fetch()); - } +Y_UNIT_TEST(TestCounters) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(); + + i64 callCounter = 0; + TMyCountersProvider myCountersProvider(&callCounter, nullptr); + factory->AddUdfModule("MyModule", new TMyModule); + factory->SetCountersProvider(&myCountersProvider); + + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TUnparsed>(), + TProtobufOutputSpec<NPureCalcProto::TParsed>(), + "select MyModule::Func(1) as A, 2 as B, 3 as C from Input", + ETranslationMode::SQL); + + auto out = program->Apply(MakeHolder<TDocInput>()); + auto* message = out->Fetch(); + UNIT_ASSERT(message); + UNIT_ASSERT_VALUES_EQUAL(message->GetA(), 1); + UNIT_ASSERT_VALUES_EQUAL(message->GetB(), 2); + UNIT_ASSERT_VALUES_EQUAL(message->GetC(), 3); + UNIT_ASSERT_VALUES_EQUAL(callCounter, 1); + UNIT_ASSERT(!out->Fetch()); +} - Y_UNIT_TEST(TestCountersFilteredColumns) { - using namespace NYql::NPureCalc; - - auto factory = MakeProgramFactory(); - - i64 callCounter = 0; - TMyCountersProvider myCountersProvider(&callCounter, nullptr); - factory->AddUdfModule("MyModule", new TMyModule); - factory->SetCountersProvider(&myCountersProvider); - - auto ospec = TProtobufOutputSpec<NPureCalcProto::TParsed>(); - ospec.SetOutputColumnsFilter(THashSet<TString>({"B", "C"})); - auto program = factory->MakePullStreamProgram( - TProtobufInputSpec<NPureCalcProto::TUnparsed>(), - ospec, - "select MyModule::Func(1) as A, 2 as B, 3 as C from Input", - ETranslationMode::SQL); - - auto out = program->Apply(MakeHolder<TDocInput>()); - auto* message = out->Fetch(); - UNIT_ASSERT(message); - UNIT_ASSERT_VALUES_EQUAL(message->GetA(), 0); - UNIT_ASSERT_VALUES_EQUAL(message->GetB(), 2); - UNIT_ASSERT_VALUES_EQUAL(message->GetC(), 3); - UNIT_ASSERT_VALUES_EQUAL(callCounter, 0); - UNIT_ASSERT(!out->Fetch()); - } +Y_UNIT_TEST(TestCountersFilteredColumns) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(); + + i64 callCounter = 0; + TMyCountersProvider myCountersProvider(&callCounter, nullptr); + factory->AddUdfModule("MyModule", new TMyModule); + factory->SetCountersProvider(&myCountersProvider); + + auto ospec = TProtobufOutputSpec<NPureCalcProto::TParsed>(); + ospec.SetOutputColumnsFilter(THashSet<TString>({"B", "C"})); + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TUnparsed>(), + ospec, + "select MyModule::Func(1) as A, 2 as B, 3 as C from Input", + ETranslationMode::SQL); + + auto out = program->Apply(MakeHolder<TDocInput>()); + auto* message = out->Fetch(); + UNIT_ASSERT(message); + UNIT_ASSERT_VALUES_EQUAL(message->GetA(), 0); + UNIT_ASSERT_VALUES_EQUAL(message->GetB(), 2); + UNIT_ASSERT_VALUES_EQUAL(message->GetC(), 3); + UNIT_ASSERT_VALUES_EQUAL(callCounter, 0); + UNIT_ASSERT(!out->Fetch()); +} - Y_UNIT_TEST(TestScopedProbes) { - using namespace NYql::NPureCalc; - - auto factory = MakeProgramFactory(); - - TString log; - TMyCountersProvider myCountersProvider(nullptr, &log); - factory->AddUdfModule("MyModule", new TMyModule); - factory->SetCountersProvider(&myCountersProvider); - - auto program = factory->MakePullStreamProgram( - TProtobufInputSpec<NPureCalcProto::TUnparsed>(), - TProtobufOutputSpec<NPureCalcProto::TParsed>(), - "select MyModule::Func(1) as A, 2 as B, 3 as C from Input", - ETranslationMode::SQL); - - auto out = program->Apply(MakeHolder<TDocInput>()); - auto* message = out->Fetch(); - UNIT_ASSERT(message); - UNIT_ASSERT_VALUES_EQUAL(message->GetA(), 1); - UNIT_ASSERT_VALUES_EQUAL(message->GetB(), 2); - UNIT_ASSERT_VALUES_EQUAL(message->GetC(), 3); - UNIT_ASSERT_VALUES_EQUAL(log, "Enter\nExit\n"); - UNIT_ASSERT(!out->Fetch()); - } +Y_UNIT_TEST(TestScopedProbes) { + using namespace NYql::NPureCalc; + + auto factory = MakeProgramFactory(); + + TString log; + TMyCountersProvider myCountersProvider(nullptr, &log); + factory->AddUdfModule("MyModule", new TMyModule); + factory->SetCountersProvider(&myCountersProvider); + + auto program = factory->MakePullStreamProgram( + TProtobufInputSpec<NPureCalcProto::TUnparsed>(), + TProtobufOutputSpec<NPureCalcProto::TParsed>(), + "select MyModule::Func(1) as A, 2 as B, 3 as C from Input", + ETranslationMode::SQL); + + auto out = program->Apply(MakeHolder<TDocInput>()); + auto* message = out->Fetch(); + UNIT_ASSERT(message); + UNIT_ASSERT_VALUES_EQUAL(message->GetA(), 1); + UNIT_ASSERT_VALUES_EQUAL(message->GetB(), 2); + UNIT_ASSERT_VALUES_EQUAL(message->GetC(), 3); + UNIT_ASSERT_VALUES_EQUAL(log, "Enter\nExit\n"); + UNIT_ASSERT(!out->Fetch()); } +} // Y_UNIT_TEST_SUITE(TestUdf) diff --git a/yql/essentials/public/purecalc/ut/test_user_data.cpp b/yql/essentials/public/purecalc/ut/test_user_data.cpp index b87940ab6b2..164a3b4de33 100644 --- a/yql/essentials/public/purecalc/ut/test_user_data.cpp +++ b/yql/essentials/public/purecalc/ut/test_user_data.cpp @@ -6,57 +6,55 @@ #include <library/cpp/testing/unittest/registar.h> Y_UNIT_TEST_SUITE(TestUserData) { - Y_UNIT_TEST(TestUserData) { - using namespace NYql::NPureCalc; +Y_UNIT_TEST(TestUserData) { + using namespace NYql::NPureCalc; + auto options = TProgramFactoryOptions() + .AddFile(NYql::NUserData::EDisposition::INLINE, "my_file.txt", "my content!"); + + auto factory = MakeProgramFactory(options); + + auto program = factory->MakePullListProgram( + TProtobufInputSpec<NPureCalcProto::TStringMessage>(), + TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), + "SELECT UNWRAP(CAST(FileContent(\"my_file.txt\") AS Utf8)) AS X", + ETranslationMode::SQL); + + auto stream = program->Apply(EmptyStream<NPureCalcProto::TStringMessage*>()); + + NPureCalcProto::TStringMessage* message; + + UNIT_ASSERT(message = stream->Fetch()); + UNIT_ASSERT_EQUAL(message->GetX(), "my content!"); + UNIT_ASSERT(!stream->Fetch()); +} + +Y_UNIT_TEST(TestUserDataLibrary) { + using namespace NYql::NPureCalc; + + try { auto options = TProgramFactoryOptions() - .AddFile(NYql::NUserData::EDisposition::INLINE, "my_file.txt", "my content!"); + .AddLibrary(NYql::NUserData::EDisposition::INLINE, "a.sql", "$x = 1; EXPORT $x;") + .AddLibrary(NYql::NUserData::EDisposition::INLINE, "b.sql", "IMPORT a SYMBOLS $x; $y = CAST($x + 1 AS String); EXPORT $y;"); auto factory = MakeProgramFactory(options); auto program = factory->MakePullListProgram( TProtobufInputSpec<NPureCalcProto::TStringMessage>(), TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), - "SELECT UNWRAP(CAST(FileContent(\"my_file.txt\") AS Utf8)) AS X", - ETranslationMode::SQL - ); + "IMPORT b SYMBOLS $y; SELECT CAST($y AS Utf8) ?? '' AS X;", + ETranslationMode::SQL); auto stream = program->Apply(EmptyStream<NPureCalcProto::TStringMessage*>()); NPureCalcProto::TStringMessage* message; UNIT_ASSERT(message = stream->Fetch()); - UNIT_ASSERT_EQUAL(message->GetX(), "my content!"); + UNIT_ASSERT_EQUAL(message->GetX(), "2"); UNIT_ASSERT(!stream->Fetch()); - } - - Y_UNIT_TEST(TestUserDataLibrary) { - using namespace NYql::NPureCalc; - - try { - auto options = TProgramFactoryOptions() - .AddLibrary(NYql::NUserData::EDisposition::INLINE, "a.sql", "$x = 1; EXPORT $x;") - .AddLibrary(NYql::NUserData::EDisposition::INLINE, "b.sql", "IMPORT a SYMBOLS $x; $y = CAST($x + 1 AS String); EXPORT $y;"); - - auto factory = MakeProgramFactory(options); - - auto program = factory->MakePullListProgram( - TProtobufInputSpec<NPureCalcProto::TStringMessage>(), - TProtobufOutputSpec<NPureCalcProto::TStringMessage>(), - "IMPORT b SYMBOLS $y; SELECT CAST($y AS Utf8) ?? '' AS X;", - ETranslationMode::SQL - ); - - auto stream = program->Apply(EmptyStream<NPureCalcProto::TStringMessage*>()); - - NPureCalcProto::TStringMessage* message; - - UNIT_ASSERT(message = stream->Fetch()); - UNIT_ASSERT_EQUAL(message->GetX(), "2"); - UNIT_ASSERT(!stream->Fetch()); - } catch (const TCompileError& e) { - Cerr << e; - throw e; - } + } catch (const TCompileError& e) { + Cerr << e; + throw e; } } +} // Y_UNIT_TEST_SUITE(TestUserData) diff --git a/yql/essentials/public/purecalc/ut/ya.make b/yql/essentials/public/purecalc/ut/ya.make index 2f4882c1291..a9fbe4dd9ab 100644 --- a/yql/essentials/public/purecalc/ut/ya.make +++ b/yql/essentials/public/purecalc/ut/ya.make @@ -1,5 +1,7 @@ UNITTEST() +ENABLE(YQL_STYLE_CPP) + SRCS( empty_stream.h fake_spec.cpp diff --git a/yql/essentials/public/purecalc/ya.make b/yql/essentials/public/purecalc/ya.make index e7f3ff8818f..0d820f08348 100644 --- a/yql/essentials/public/purecalc/ya.make +++ b/yql/essentials/public/purecalc/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + SRCS( purecalc.cpp ) |
