diff options
author | vvvv <[email protected]> | 2025-10-06 13:26:25 +0300 |
---|---|---|
committer | vvvv <[email protected]> | 2025-10-06 14:06:25 +0300 |
commit | eca8ce9cb1613d5c983185c4e43c20651a9638aa (patch) | |
tree | 61ee5ae779948e61af9a7691d19eaa2c09869121 | |
parent | 4adf7eecae16a9b228b28cc5f64c27ef69ad5ec2 (diff) |
YQL-20086 udfs
init
commit_hash:f9684778bf1ea956965f2360b80b91edb7d4ffbe
174 files changed, 16316 insertions, 16310 deletions
diff --git a/yql/essentials/udfs/common/compress_base/compress_udf.cpp b/yql/essentials/udfs/common/compress_base/compress_udf.cpp index efd2d0b3c54..2323f0a082a 100644 --- a/yql/essentials/udfs/common/compress_base/compress_udf.cpp +++ b/yql/essentials/udfs/common/compress_base/compress_udf.cpp @@ -3,15 +3,15 @@ using namespace NYql::NUdf; namespace NCompress { - SIMPLE_MODULE(TCompressModule, EXPORTED_COMPRESS_BASE_UDF); -} +SIMPLE_MODULE(TCompressModule, EXPORTED_COMPRESS_BASE_UDF); +} // namespace NCompress namespace NDecompress { - SIMPLE_MODULE(TDecompressModule, EXPORTED_DECOMPRESS_BASE_UDF); -} +SIMPLE_MODULE(TDecompressModule, EXPORTED_DECOMPRESS_BASE_UDF); +} // namespace NDecompress namespace NTryDecompress { - SIMPLE_MODULE(TTryDecompressModule, EXPORTED_TRY_DECOMPRESS_BASE_UDF); -} +SIMPLE_MODULE(TTryDecompressModule, EXPORTED_TRY_DECOMPRESS_BASE_UDF); +} // namespace NTryDecompress REGISTER_MODULES(NCompress::TCompressModule, NDecompress::TDecompressModule, NTryDecompress::TTryDecompressModule); diff --git a/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.cpp b/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.cpp index 237abe271eb..c9b6f7eb890 100644 --- a/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.cpp +++ b/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.cpp @@ -1 +1 @@ -#include "compress_base_udf.h"
\ No newline at end of file +#include "compress_base_udf.h" diff --git a/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.h b/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.h index 58709134d6a..9e655d96469 100644 --- a/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.h +++ b/yql/essentials/udfs/common/compress_base/lib/compress_base_udf.h @@ -16,203 +16,210 @@ using namespace NYql::NUdf; namespace NCompress { - SIMPLE_UDF(TGzip, char*(TAutoMap<char*>, ui8)) { - TString result; - TStringOutput output(result); - TZLibCompress compress(&output, ZLib::GZip, args[1].Get<ui8>()); - compress.Write(args[0].AsStringRef()); - compress.Finish(); - return valueBuilder->NewString(result); - } +SIMPLE_UDF(TGzip, char*(TAutoMap<char*>, ui8)) { + TString result; + TStringOutput output(result); + TZLibCompress compress(&output, ZLib::GZip, args[1].Get<ui8>()); + compress.Write(args[0].AsStringRef()); + compress.Finish(); + return valueBuilder->NewString(result); +} - SIMPLE_UDF(TZlib, char*(TAutoMap<char*>, ui8)) { - TString result; - TStringOutput output(result); - TZLibCompress compress(&output, ZLib::ZLib, args[1].Get<ui8>()); - compress.Write(args[0].AsStringRef()); - compress.Finish(); - return valueBuilder->NewString(result); - } +SIMPLE_UDF(TZlib, char*(TAutoMap<char*>, ui8)) { + TString result; + TStringOutput output(result); + TZLibCompress compress(&output, ZLib::ZLib, args[1].Get<ui8>()); + compress.Write(args[0].AsStringRef()); + compress.Finish(); + return valueBuilder->NewString(result); +} - SIMPLE_UDF(TBrotli, char*(TAutoMap<char*>, ui8)) { - TString result; - TStringOutput output(result); - TBrotliCompress compress(&output, args[1].Get<ui8>()); - compress.Write(args[0].AsStringRef()); - compress.Finish(); - return valueBuilder->NewString(result); - } +SIMPLE_UDF(TBrotli, char*(TAutoMap<char*>, ui8)) { + TString result; + TStringOutput output(result); + TBrotliCompress compress(&output, args[1].Get<ui8>()); + compress.Write(args[0].AsStringRef()); + compress.Finish(); + return valueBuilder->NewString(result); +} - SIMPLE_UDF(TLzma, char*(TAutoMap<char*>, ui8)) { - TString result; - TStringOutput output(result); - TLzmaCompress compress(&output, args[1].Get<ui8>()); - compress.Write(args[0].AsStringRef()); - compress.Finish(); - return valueBuilder->NewString(result); - } +SIMPLE_UDF(TLzma, char*(TAutoMap<char*>, ui8)) { + TString result; + TStringOutput output(result); + TLzmaCompress compress(&output, args[1].Get<ui8>()); + compress.Write(args[0].AsStringRef()); + compress.Finish(); + return valueBuilder->NewString(result); +} - SIMPLE_UDF(TBZip2, char*(TAutoMap<char*>, ui8)) { - TString result; - TStringOutput output(result); - TBZipCompress compress(&output, args[1].Get<ui8>()); - compress.Write(args[0].AsStringRef()); - compress.Finish(); - return valueBuilder->NewString(result); - } +SIMPLE_UDF(TBZip2, char*(TAutoMap<char*>, ui8)) { + TString result; + TStringOutput output(result); + TBZipCompress compress(&output, args[1].Get<ui8>()); + compress.Write(args[0].AsStringRef()); + compress.Finish(); + return valueBuilder->NewString(result); +} - SIMPLE_UDF(TSnappy, char*(TAutoMap<char*>)) { - TString result; - const TStringRef& input = args[0].AsStringRef(); - snappy::Compress(input.Data(), input.Size(), &result); - return valueBuilder->NewString(result); - } +SIMPLE_UDF(TSnappy, char*(TAutoMap<char*>)) { + TString result; + const TStringRef& input = args[0].AsStringRef(); + snappy::Compress(input.Data(), input.Size(), &result); + return valueBuilder->NewString(result); +} - SIMPLE_UDF(TZstd, char*(TAutoMap<char*>, ui8)) { - TString result; - TStringOutput output(result); - TZstdCompress compress(&output, args[1].Get<ui8>()); - compress.Write(args[0].AsStringRef()); - compress.Finish(); - return valueBuilder->NewString(result); - } +SIMPLE_UDF(TZstd, char*(TAutoMap<char*>, ui8)) { + TString result; + TStringOutput output(result); + TZstdCompress compress(&output, args[1].Get<ui8>()); + compress.Write(args[0].AsStringRef()); + compress.Finish(); + return valueBuilder->NewString(result); } +} // namespace NCompress namespace NDecompress { - SIMPLE_UDF(TGzip, char*(TAutoMap<char*>)) { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TZLibDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } +SIMPLE_UDF(TGzip, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TZLibDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} - SIMPLE_UDF(TZlib, char*(TAutoMap<char*>)) { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TZLibDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } +SIMPLE_UDF(TZlib, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TZLibDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} - SIMPLE_UDF(TBrotli, char*(TAutoMap<char*>)) { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TBrotliDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } +SIMPLE_UDF(TBrotli, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TBrotliDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} - SIMPLE_UDF(TLzma, char*(TAutoMap<char*>)) { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TLzmaDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } +SIMPLE_UDF(TLzma, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TLzmaDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} - SIMPLE_UDF(TBZip2, char*(TAutoMap<char*>)) { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TBZipDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } +SIMPLE_UDF(TBZip2, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TBZipDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} - SIMPLE_UDF(TSnappy, char*(TAutoMap<char*>)) { - TString result; - const auto& value = args->AsStringRef(); - if (snappy::Uncompress(value.Data(), value.Size(), &result)) { - return valueBuilder->NewString(result); - } - - ythrow yexception() << "failed to decompress message with snappy"; - } - - SIMPLE_UDF(TZstd, char*(TAutoMap<char*>)) { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TZstdDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); +SIMPLE_UDF(TSnappy, char*(TAutoMap<char*>)) { + TString result; + const auto& value = args->AsStringRef(); + if (snappy::Uncompress(value.Data(), value.Size(), &result)) { + return valueBuilder->NewString(result); } - SIMPLE_UDF(TXz, char*(TAutoMap<char*>)) { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TXzDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } + ythrow yexception() << "failed to decompress message with snappy"; } +SIMPLE_UDF(TZstd, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TZstdDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} + +SIMPLE_UDF(TXz, char*(TAutoMap<char*>)) { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TXzDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} +} // namespace NDecompress + namespace NTryDecompress { - SIMPLE_UDF(TGzip, TOptional<char*>(TAutoMap<char*>)) try { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TZLibDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } catch (const std::exception&) { - return TUnboxedValuePod(); - } +SIMPLE_UDF(TGzip, TOptional<char*>(TAutoMap<char*>)) +try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TZLibDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} catch (const std::exception&) { + return TUnboxedValuePod(); +} - SIMPLE_UDF(TZlib, TOptional<char*>(TAutoMap<char*>)) try { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TZLibDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } catch (const std::exception&) { - return TUnboxedValuePod(); - } +SIMPLE_UDF(TZlib, TOptional<char*>(TAutoMap<char*>)) +try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TZLibDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} catch (const std::exception&) { + return TUnboxedValuePod(); +} - SIMPLE_UDF(TBrotli, TOptional<char*>(TAutoMap<char*>)) try { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TBrotliDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } catch (const std::exception&) { - return TUnboxedValuePod(); - } +SIMPLE_UDF(TBrotli, TOptional<char*>(TAutoMap<char*>)) +try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TBrotliDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} catch (const std::exception&) { + return TUnboxedValuePod(); +} - SIMPLE_UDF(TLzma, TOptional<char*>(TAutoMap<char*>)) try { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TLzmaDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } catch (const std::exception&) { - return TUnboxedValuePod(); - } +SIMPLE_UDF(TLzma, TOptional<char*>(TAutoMap<char*>)) +try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TLzmaDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} catch (const std::exception&) { + return TUnboxedValuePod(); +} - SIMPLE_UDF(TBZip2, TOptional<char*>(TAutoMap<char*>)) try { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TBZipDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } catch (const std::exception&) { - return TUnboxedValuePod(); - } +SIMPLE_UDF(TBZip2, TOptional<char*>(TAutoMap<char*>)) +try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TBZipDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} catch (const std::exception&) { + return TUnboxedValuePod(); +} - SIMPLE_UDF(TSnappy, TOptional<char*>(TAutoMap<char*>)) { - TString result; - const auto& value = args->AsStringRef(); - if (snappy::Uncompress(value.Data(), value.Size(), &result)) { - return valueBuilder->NewString(result); - } - return TUnboxedValuePod(); +SIMPLE_UDF(TSnappy, TOptional<char*>(TAutoMap<char*>)) { + TString result; + const auto& value = args->AsStringRef(); + if (snappy::Uncompress(value.Data(), value.Size(), &result)) { + return valueBuilder->NewString(result); } + return TUnboxedValuePod(); +} - SIMPLE_UDF(TZstd, TOptional<char*>(TAutoMap<char*>)) try { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TZstdDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } catch (const std::exception&) { - return TUnboxedValuePod(); - } +SIMPLE_UDF(TZstd, TOptional<char*>(TAutoMap<char*>)) +try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TZstdDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} catch (const std::exception&) { + return TUnboxedValuePod(); +} - SIMPLE_UDF(TXz, TOptional<char*>(TAutoMap<char*>)) try { - const auto& ref = args->AsStringRef(); - TMemoryInput input(ref.Data(), ref.Size()); - TXzDecompress decompress(&input); - return valueBuilder->NewString(decompress.ReadAll()); - } catch (const std::exception&) { - return TUnboxedValuePod(); - } +SIMPLE_UDF(TXz, TOptional<char*>(TAutoMap<char*>)) +try { + const auto& ref = args->AsStringRef(); + TMemoryInput input(ref.Data(), ref.Size()); + TXzDecompress decompress(&input); + return valueBuilder->NewString(decompress.ReadAll()); +} catch (const std::exception&) { + return TUnboxedValuePod(); } +} // namespace NTryDecompress -#define EXPORTED_COMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd -#define EXPORTED_DECOMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd, TXz +#define EXPORTED_COMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd +#define EXPORTED_DECOMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd, TXz #define EXPORTED_TRY_DECOMPRESS_BASE_UDF TGzip, TZlib, TBrotli, TLzma, TBZip2, TSnappy, TZstd, TXz diff --git a/yql/essentials/udfs/common/compress_base/lib/ya.make b/yql/essentials/udfs/common/compress_base/lib/ya.make index ca606d244a0..0d03f21c597 100644 --- a/yql/essentials/udfs/common/compress_base/lib/ya.make +++ b/yql/essentials/udfs/common/compress_base/lib/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( compress_base_udf.cpp ) diff --git a/yql/essentials/udfs/common/compress_base/ya.make b/yql/essentials/udfs/common/compress_base/ya.make index 4859a4e53cd..93861b3d98b 100644 --- a/yql/essentials/udfs/common/compress_base/ya.make +++ b/yql/essentials/udfs/common/compress_base/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( compress_udf.cpp ) diff --git a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp index acc42ad2cbe..f6ae912cb79 100644 --- a/yql/essentials/udfs/common/datetime2/datetime_udf.cpp +++ b/yql/essentials/udfs/common/datetime2/datetime_udf.cpp @@ -61,25 +61,25 @@ extern const char TM64ResourceName[] = "DateTime2.TM64"; namespace { -template<typename Type> +template <typename Type> static void PrintTypeAlternatives(NUdf::IFunctionTypeInfoBuilder& builder, - ITypeInfoHelper::TPtr typeInfoHelper, TStringBuilder& strBuilder) + ITypeInfoHelper::TPtr typeInfoHelper, TStringBuilder& strBuilder) { TTypePrinter(*typeInfoHelper, builder.SimpleType<Type>()).Out(strBuilder.Out); } -template<typename Type, typename Head, typename... Tail> +template <typename Type, typename Head, typename... Tail> static void PrintTypeAlternatives(NUdf::IFunctionTypeInfoBuilder& builder, - ITypeInfoHelper::TPtr typeInfoHelper, TStringBuilder& strBuilder) + ITypeInfoHelper::TPtr typeInfoHelper, TStringBuilder& strBuilder) { PrintTypeAlternatives<Type>(builder, typeInfoHelper, strBuilder); strBuilder << " or "; PrintTypeAlternatives<Head, Tail...>(builder, typeInfoHelper, strBuilder); } -template<typename... Types> +template <typename... Types> static void SetInvalidTypeError(NUdf::IFunctionTypeInfoBuilder& builder, - ITypeInfoHelper::TPtr typeInfoHelper, const TType* argType) + ITypeInfoHelper::TPtr typeInfoHelper, const TType* argType) { ::TStringBuilder sb; sb << "Invalid argument type: got "; @@ -91,26 +91,25 @@ static void SetInvalidTypeError(NUdf::IFunctionTypeInfoBuilder& builder, } static void SetResourceExpectedError(NUdf::IFunctionTypeInfoBuilder& builder, - ITypeInfoHelper::TPtr typeInfoHelper, const TType* argType) + ITypeInfoHelper::TPtr typeInfoHelper, const TType* argType) { SetInvalidTypeError< TResource<TMResourceName>, - TResource<TM64ResourceName> - >(builder, typeInfoHelper, argType); + TResource<TM64ResourceName>>(builder, typeInfoHelper, argType); } static void SetIntervalExpectedError(NUdf::IFunctionTypeInfoBuilder& builder, - ITypeInfoHelper::TPtr typeInfoHelper, const TType* argType) + ITypeInfoHelper::TPtr typeInfoHelper, const TType* argType) { SetInvalidTypeError<TInterval, TInterval64>(builder, typeInfoHelper, argType); } -template<const char* TResourceName> +template <const char* TResourceName> static void PrintTagAlternatives(TStringBuilder& strBuilder) { strBuilder << "'" << TResourceName << "'"; } -template<const char* TResourceName, const char* Head, const char*... Tail> +template <const char* TResourceName, const char* Head, const char*... Tail> static void PrintTagAlternatives(TStringBuilder& strBuilder) { PrintTagAlternatives<TResourceName>(strBuilder); strBuilder << " or "; @@ -118,7 +117,7 @@ static void PrintTagAlternatives(TStringBuilder& strBuilder) { } static void SetUnexpectedTagError(NUdf::IFunctionTypeInfoBuilder& builder, - TStringRef tag) + TStringRef tag) { ::TStringBuilder sb; sb << "Unexpected Resource tag: got '" << tag << "', but "; @@ -153,20 +152,20 @@ public: return value * i64(86400) * TWResult(ScaleAfterSeconds); } - template<typename TTzDate> + template <typename TTzDate> static TResult TzBlockCore(TBlockItem tzDate); - template<> + template <> static TResult TzBlockCore<TTzDate>(TBlockItem tzDate) { return DateCore(tzDate.Get<ui16>()); } - template<> + template <> static TResult TzBlockCore<TTzDatetime>(TBlockItem tzDate) { return DatetimeCore(tzDate.Get<ui32>()); } - template<> + template <> static TResult TzBlockCore<TTzTimestamp>(TBlockItem tzDate) { return TimestampCore(tzDate.Get<ui64>()); } @@ -200,7 +199,7 @@ public: return name; } - template<typename TTzDate, typename TOutput> + template <typename TTzDate, typename TOutput> static auto MakeTzBlockExec() { using TReader = TTzDateBlockReader<TTzDate, /*Nullable*/ false>; return UnaryPreallocatedReaderExecImpl<TReader, TOutput, TzBlockCore<TTzDate>>; @@ -253,7 +252,6 @@ public: isOptional = true; } - TDataTypeInspector data(*typeInfoHelper, argType); if (!data) { builder.SetError("Data type expected"); @@ -301,11 +299,11 @@ public: if (typeId == TDataType<TDate>::Id || typeId == TDataType<TTzDate>::Id) { if (block) { const auto exec = (typeId == TDataType<TTzDate>::Id) - ? MakeTzBlockExec<TTzDate, TResult>() - : UnaryPreallocatedExecImpl<ui16, TResult, DateCore>; + ? MakeTzBlockExec<TTzDate, TResult>() + : UnaryPreallocatedExecImpl<ui16, TResult, DateCore>; builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); } else { builder.Implementation(new TUnaryOverOptionalImpl<ui16, TResult, DateCore>()); } @@ -315,11 +313,11 @@ public: if (typeId == TDataType<TDatetime>::Id || typeId == TDataType<TTzDatetime>::Id) { if (block) { const auto exec = (typeId == TDataType<TTzDatetime>::Id) - ? MakeTzBlockExec<TTzDatetime, TResult>() - : UnaryPreallocatedExecImpl<ui32, TResult, DatetimeCore>; + ? MakeTzBlockExec<TTzDatetime, TResult>() + : UnaryPreallocatedExecImpl<ui32, TResult, DatetimeCore>; builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); } else { builder.Implementation(new TUnaryOverOptionalImpl<ui32, TResult, DatetimeCore>()); } @@ -329,11 +327,11 @@ public: if (typeId == TDataType<TTimestamp>::Id || typeId == TDataType<TTzTimestamp>::Id) { if (block) { const auto exec = (typeId == TDataType<TTzTimestamp>::Id) - ? MakeTzBlockExec<TTzTimestamp, TResult>() - : UnaryPreallocatedExecImpl<ui64, TResult, TimestampCore>; + ? MakeTzBlockExec<TTzTimestamp, TResult>() + : UnaryPreallocatedExecImpl<ui64, TResult, TimestampCore>; builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + exec, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); } else { builder.Implementation(new TUnaryOverOptionalImpl<ui64, TResult, TimestampCore>()); } @@ -343,7 +341,7 @@ public: if (typeId == TDataType<TInterval>::Id) { if (block) { builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - UnaryPreallocatedExecImpl<i64, TSignedResult, IntervalCore>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + UnaryPreallocatedExecImpl<i64, TSignedResult, IntervalCore>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); } else { builder.Implementation(new TUnaryOverOptionalImpl<i64, TSignedResult, IntervalCore>()); } @@ -485,7 +483,7 @@ struct TGetTimeComponent { if (typeId == TDataType<TDate>::Id) { if (block) { builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - UnaryPreallocatedExecImpl<ui16, TFieldStorage, Core<ui16, true, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + UnaryPreallocatedExecImpl<ui16, TFieldStorage, Core<ui16, true, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); } else { builder.Implementation(new TUnaryOverOptionalImpl<ui16, TFieldStorage, Core<ui16, true, false>>()); } @@ -494,7 +492,7 @@ struct TGetTimeComponent { if (typeId == TDataType<TDatetime>::Id) { if (block) { builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - UnaryPreallocatedExecImpl<ui32, TFieldStorage, Core<ui32, false, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + UnaryPreallocatedExecImpl<ui32, TFieldStorage, Core<ui32, false, false>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); } else { builder.Implementation(new TUnaryOverOptionalImpl<ui32, TFieldStorage, Core<ui32, false, false>>()); } @@ -503,7 +501,7 @@ struct TGetTimeComponent { if (typeId == TDataType<TTimestamp>::Id) { if (block) { builder.Implementation(new TSimpleArrowUdfImpl(argBlockTypes, outputType, block.IsScalar(), - UnaryPreallocatedExecImpl<ui64, TFieldStorage, Core<ui64, false, true>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); + UnaryPreallocatedExecImpl<ui64, TFieldStorage, Core<ui64, false, true>>, builder, TString(name), arrow::compute::NullHandling::INTERSECTION)); } else { builder.Implementation(new TUnaryOverOptionalImpl<ui64, TFieldStorage, Core<ui64, false, true>>()); } @@ -531,6 +529,7 @@ struct TGetTimeComponent { SetUnexpectedTagError(builder, resource.GetTag()); return true; } + private: template <typename TInput, bool AlwaysZero, bool InputFractional> static TFieldStorage Core(TInput val) { @@ -553,8 +552,8 @@ private: } } - template<typename TResult, TResult (*Func)(const TUnboxedValuePod&)> - class TImpl : public TBoxedValue { + template <typename TResult, TResult (*Func)(const TUnboxedValuePod&)> + class TImpl: public TBoxedValue { public: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { Y_UNUSED(valueBuilder); @@ -563,7 +562,7 @@ private: } }; - template<typename TResult, const char* TResourceName, TResult (*Func)(const TUnboxedValuePod&)> + template <typename TResult, const char* TResourceName, TResult (*Func)(const TUnboxedValuePod&)> static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { builder.Returns<TResult>(); builder.Args()->Add<TAutoMap<TResource<TResourceName>>>(); @@ -576,21 +575,21 @@ private: namespace { -template<const char* TResourceName, typename TValue, - typename TStorage = std::conditional_t<TResourceName == TMResourceName, - TTMStorage, TTM64Storage>> +template <const char* TResourceName, typename TValue, + typename TStorage = std::conditional_t<TResourceName == TMResourceName, + TTMStorage, TTM64Storage>> const TStorage& Reference(const TValue& value) { return *reinterpret_cast<const TStorage*>(value.GetRawPtr()); } -template<const char* TResourceName, typename TValue, - typename TStorage = std::conditional_t<TResourceName == TMResourceName, - TTMStorage, TTM64Storage>> +template <const char* TResourceName, typename TValue, + typename TStorage = std::conditional_t<TResourceName == TMResourceName, + TTMStorage, TTM64Storage>> TStorage& Reference(TValue& value) { return *reinterpret_cast<TStorage*>(value.GetRawPtr()); } -template<const char* TResourceName> +template <const char* TResourceName> TUnboxedValuePod DoAddMonths(const TUnboxedValuePod& date, i64 months, const NUdf::IDateBuilder& builder) { auto result = date; auto& storage = Reference<TResourceName>(result); @@ -600,12 +599,12 @@ TUnboxedValuePod DoAddMonths(const TUnboxedValuePod& date, i64 months, const NUd return result; } -template<const char* TResourceName> +template <const char* TResourceName> TUnboxedValuePod DoAddQuarters(const TUnboxedValuePod& date, i64 quarters, const NUdf::IDateBuilder& builder) { return DoAddMonths<TResourceName>(date, quarters * 3ll, builder); } -template<const char* TResourceName> +template <const char* TResourceName> TUnboxedValuePod DoAddYears(const TUnboxedValuePod& date, i64 years, const NUdf::IDateBuilder& builder) { auto result = date; auto& storage = Reference<TResourceName>(result); @@ -615,681 +614,671 @@ TUnboxedValuePod DoAddYears(const TUnboxedValuePod& date, i64 years, const NUdf: return result; } -#define ACCESSORS_POLY(field, type, wtype) \ - template<const char* TResourceName, typename TValue, typename TRetType \ - = std::conditional_t<TResourceName == TMResourceName, type, wtype>> \ - inline TRetType Get##field(const TValue& tm) { \ - return (TRetType)Reference<TResourceName>(tm).field; \ - } \ - template<const char* TResourceName, typename TValue, typename TArgType \ - = std::conditional_t<TResourceName == TMResourceName, type, wtype>> \ - inline void Set##field(TValue& tm, TArgType value) { \ - Reference<TResourceName>(tm).field = value; \ - } \ +#define ACCESSORS_POLY(field, type, wtype) \ + template <const char* TResourceName, typename TValue, typename TRetType = std::conditional_t<TResourceName == TMResourceName, type, wtype>> \ + inline TRetType Get##field(const TValue& tm) { \ + return (TRetType)Reference<TResourceName>(tm).field; \ + } \ + template <const char* TResourceName, typename TValue, typename TArgType = std::conditional_t<TResourceName == TMResourceName, type, wtype>> \ + inline void Set##field(TValue& tm, TArgType value) { \ + Reference<TResourceName>(tm).field = value; \ + } #define ACCESSORS(field, type) \ ACCESSORS_POLY(field, type, type) - ACCESSORS_POLY(Year, ui16, i32) - ACCESSORS(DayOfYear, ui16) - ACCESSORS(WeekOfYear, ui8) - ACCESSORS(WeekOfYearIso8601, ui8) - ACCESSORS(DayOfWeek, ui8) - ACCESSORS(Month, ui8) - ACCESSORS(Day, ui8) - ACCESSORS(Hour, ui8) - ACCESSORS(Minute, ui8) - ACCESSORS(Second, ui8) - ACCESSORS(Microsecond, ui32) - ACCESSORS(TimezoneId, ui16) +ACCESSORS_POLY(Year, ui16, i32) +ACCESSORS(DayOfYear, ui16) +ACCESSORS(WeekOfYear, ui8) +ACCESSORS(WeekOfYearIso8601, ui8) +ACCESSORS(DayOfWeek, ui8) +ACCESSORS(Month, ui8) +ACCESSORS(Day, ui8) +ACCESSORS(Hour, ui8) +ACCESSORS(Minute, ui8) +ACCESSORS(Second, ui8) +ACCESSORS(Microsecond, ui32) +ACCESSORS(TimezoneId, ui16) #undef ACCESSORS #undef ACCESSORS_POLY - template<const char* TResourceName> - inline bool ValidateYear(std::conditional_t<TResourceName == TMResourceName, ui16, i32> year) { - if constexpr (TResourceName == TMResourceName) { - return year >= NUdf::MIN_YEAR || year < NUdf::MAX_YEAR; - } else { - return year >= NUdf::MIN_YEAR32 || year < NUdf::MAX_YEAR32; - } +template <const char* TResourceName> +inline bool ValidateYear(std::conditional_t<TResourceName == TMResourceName, ui16, i32> year) { + if constexpr (TResourceName == TMResourceName) { + return year >= NUdf::MIN_YEAR || year < NUdf::MAX_YEAR; + } else { + return year >= NUdf::MIN_YEAR32 || year < NUdf::MAX_YEAR32; } +} - inline bool ValidateMonth(ui8 month) { - return month >= 1 && month <= 12; - } +inline bool ValidateMonth(ui8 month) { + return month >= 1 && month <= 12; +} - inline bool ValidateDay(ui8 day) { - return day >= 1 && day <= 31; - } +inline bool ValidateDay(ui8 day) { + return day >= 1 && day <= 31; +} - inline bool ValidateHour(ui8 hour) { - return hour < 24; - } +inline bool ValidateHour(ui8 hour) { + return hour < 24; +} - inline bool ValidateMinute(ui8 minute) { - return minute < 60; - } +inline bool ValidateMinute(ui8 minute) { + return minute < 60; +} - inline bool ValidateSecond(ui8 second) { - return second < 60; - } +inline bool ValidateSecond(ui8 second) { + return second < 60; +} - inline bool ValidateMicrosecond(ui32 microsecond) { - return microsecond < 1000000; - } +inline bool ValidateMicrosecond(ui32 microsecond) { + return microsecond < 1000000; +} - inline bool ValidateTimezoneId(ui16 timezoneId) { - const auto& zones = NTi::GetTimezones(); - return timezoneId < zones.size() && !zones[timezoneId].empty(); - } +inline bool ValidateTimezoneId(ui16 timezoneId) { + const auto& zones = NTi::GetTimezones(); + return timezoneId < zones.size() && !zones[timezoneId].empty(); +} - inline bool ValidateMonthShortName(const std::string_view& monthName, ui8& month) { - static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) { - int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size())); - if (cmp == 0) - return a.size() < b.size(); - return cmp < 0; - }; - static const std::map<std::string_view, ui8, decltype(cmp)> mp = { - {"jan", 1}, - {"feb", 2}, - {"mar", 3}, - {"apr", 4}, - {"may", 5}, - {"jun", 6}, - {"jul", 7}, - {"aug", 8}, - {"sep", 9}, - {"oct", 10}, - {"nov", 11}, - {"dec", 12} - }; - const auto& it = mp.find(monthName); - if (it != mp.end()) { - month = it -> second; - return true; +inline bool ValidateMonthShortName(const std::string_view& monthName, ui8& month) { + static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) { + int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size())); + if (cmp == 0) { + return a.size() < b.size(); } - return false; - } - - inline bool ValidateMonthFullName(const std::string_view& monthName, ui8& month) { - static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) { - int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size())); - if (cmp == 0) - return a.size() < b.size(); - return cmp < 0; - }; - static const std::map<std::string_view, ui8, decltype(cmp)> mp = { - {"january", 1}, - {"february", 2}, - {"march", 3}, - {"april", 4}, - {"may", 5}, - {"june", 6}, - {"july", 7}, - {"august", 8}, - {"september", 9}, - {"october", 10}, - {"november", 11}, - {"december", 12} - }; - const auto& it = mp.find(monthName); - if (it != mp.end()) { - month = it -> second; - return true; + return cmp < 0; + }; + static const std::map<std::string_view, ui8, decltype(cmp)> mp = { + {"jan", 1}, + {"feb", 2}, + {"mar", 3}, + {"apr", 4}, + {"may", 5}, + {"jun", 6}, + {"jul", 7}, + {"aug", 8}, + {"sep", 9}, + {"oct", 10}, + {"nov", 11}, + {"dec", 12}}; + const auto& it = mp.find(monthName); + if (it != mp.end()) { + month = it->second; + return true; + } + return false; +} + +inline bool ValidateMonthFullName(const std::string_view& monthName, ui8& month) { + static constexpr auto cmp = [](const std::string_view& a, const std::string_view& b) { + int cmp = strnicmp(a.data(), b.data(), std::min(a.size(), b.size())); + if (cmp == 0) { + return a.size() < b.size(); } - return false; + return cmp < 0; + }; + static const std::map<std::string_view, ui8, decltype(cmp)> mp = { + {"january", 1}, + {"february", 2}, + {"march", 3}, + {"april", 4}, + {"may", 5}, + {"june", 6}, + {"july", 7}, + {"august", 8}, + {"september", 9}, + {"october", 10}, + {"november", 11}, + {"december", 12}}; + const auto& it = mp.find(monthName); + if (it != mp.end()) { + month = it->second; + return true; } + return false; +} - template<typename TType> - inline bool Validate(typename TDataType<TType>::TLayout arg); +template <typename TType> +inline bool Validate(typename TDataType<TType>::TLayout arg); - template<> - inline bool Validate<TTimestamp>(ui64 timestamp) { - return timestamp < MAX_TIMESTAMP; - } +template <> +inline bool Validate<TTimestamp>(ui64 timestamp) { + return timestamp < MAX_TIMESTAMP; +} - template<> - inline bool Validate<TTimestamp64>(i64 timestamp) { - return timestamp >= MIN_TIMESTAMP64 && timestamp <= MAX_TIMESTAMP64; - } +template <> +inline bool Validate<TTimestamp64>(i64 timestamp) { + return timestamp >= MIN_TIMESTAMP64 && timestamp <= MAX_TIMESTAMP64; +} - template<> - inline bool Validate<TInterval>(i64 interval) { - return interval > -i64(MAX_TIMESTAMP) && interval < i64(MAX_TIMESTAMP); - } +template <> +inline bool Validate<TInterval>(i64 interval) { + return interval > -i64(MAX_TIMESTAMP) && interval < i64(MAX_TIMESTAMP); +} - template<> - inline bool Validate<TInterval64>(i64 interval) { - return interval >= -MAX_INTERVAL64 && interval <= MAX_INTERVAL64; - } +template <> +inline bool Validate<TInterval64>(i64 interval) { + return interval >= -MAX_INTERVAL64 && interval <= MAX_INTERVAL64; +} - // Split +// Split - template<typename TUserDataType, bool Nullable> - using TSplitArgReader = std::conditional_t<TTzDataType<TUserDataType>::Result, - TTzDateBlockReader<TUserDataType, Nullable>, - TFixedSizeBlockReader<typename TDataType<TUserDataType>::TLayout, Nullable>>; +template <typename TUserDataType, bool Nullable> +using TSplitArgReader = std::conditional_t<TTzDataType<TUserDataType>::Result, + TTzDateBlockReader<TUserDataType, Nullable>, + TFixedSizeBlockReader<typename TDataType<TUserDataType>::TLayout, Nullable>>; - template<typename TUserDataType> - struct TSplitKernelExec : TUnaryKernelExec<TSplitKernelExec<TUserDataType>, TSplitArgReader<TUserDataType, false>, TResourceArrayBuilder<false>> { - static void Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& valueBuilder); +template <typename TUserDataType> +struct TSplitKernelExec: TUnaryKernelExec<TSplitKernelExec<TUserDataType>, TSplitArgReader<TUserDataType, false>, TResourceArrayBuilder<false>> { + static void Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& valueBuilder); - template<typename TSink> - static void Process(const IValueBuilder* valueBuilder, TBlockItem arg, const TSink& sink) { - try { - TBlockItem res {0}; - Split(arg, Reference<TMResourceName>(res), *valueBuilder); - sink(res); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << e.what()).c_str()); - } + template <typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem arg, const TSink& sink) { + try { + TBlockItem res{0}; + Split(arg, Reference<TMResourceName>(res), *valueBuilder); + sink(res); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << e.what()).c_str()); } - }; + } +}; - template <typename TUserDataType> - class TSplit : public TBoxedValue { - const TSourcePosition Pos_; +template <typename TUserDataType> +class TSplit: public TBoxedValue { + const TSourcePosition Pos_; - public: - explicit TSplit(TSourcePosition pos) - : Pos_(pos) - {} +public: + explicit TSplit(TSourcePosition pos) + : Pos_(pos) + { + } - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override; + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override; - static bool DeclareSignature( - TStringRef name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) - { - const auto typeInfoHelper = builder.TypeInfoHelper(); + static bool DeclareSignature( + TStringRef name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + const auto typeInfoHelper = builder.TypeInfoHelper(); - TTupleTypeInspector tuple(*typeInfoHelper, userType); - Y_ENSURE(tuple); - Y_ENSURE(tuple.GetElementsCount() > 0); - TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0)); - Y_ENSURE(argsTuple); + TTupleTypeInspector tuple(*typeInfoHelper, userType); + Y_ENSURE(tuple); + Y_ENSURE(tuple.GetElementsCount() > 0); + TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0)); + Y_ENSURE(argsTuple); - if (argsTuple.GetElementsCount() != 1) { - builder.SetError("Expected one argument"); - return true; - } - auto argType = argsTuple.GetElementType(0); + if (argsTuple.GetElementsCount() != 1) { + builder.SetError("Expected one argument"); + return true; + } + auto argType = argsTuple.GetElementType(0); - builder.UserType(userType); - builder.SupportsBlocks(); - builder.IsStrict(); + builder.UserType(userType); + builder.SupportsBlocks(); + builder.IsStrict(); - TBlockTypeInspector block(*typeInfoHelper, argType); - if (block) { - const auto* blockArgType = builder.Block(false)->Item<TUserDataType>().Build(); - builder.Args()->Add(blockArgType).Flags(ICallablePayload::TArgumentFlags::AutoMap); - const auto* retType = builder.Resource(TMResourceName); - const auto* blockRetType = builder.Block(false)->Item(retType).Build(); - builder.Returns(blockRetType); + TBlockTypeInspector block(*typeInfoHelper, argType); + if (block) { + const auto* blockArgType = builder.Block(false)->Item<TUserDataType>().Build(); + builder.Args()->Add(blockArgType).Flags(ICallablePayload::TArgumentFlags::AutoMap); + const auto* retType = builder.Resource(TMResourceName); + const auto* blockRetType = builder.Block(false)->Item(retType).Build(); + builder.Returns(blockRetType); - if (!typesOnly) { - builder.Implementation(new TSimpleArrowUdfImpl({blockArgType}, retType, block.IsScalar(), - TSplitKernelExec<TUserDataType>::Do, builder, TString(name), arrow::compute::NullHandling::COMPUTED_NO_PREALLOCATE)); - } + if (!typesOnly) { + builder.Implementation(new TSimpleArrowUdfImpl({blockArgType}, retType, block.IsScalar(), + TSplitKernelExec<TUserDataType>::Do, builder, TString(name), arrow::compute::NullHandling::COMPUTED_NO_PREALLOCATE)); + } + } else { + builder.Args()->Add<TUserDataType>().Flags(ICallablePayload::TArgumentFlags::AutoMap); + if constexpr (NUdf::TDataType<TUserDataType>::Features & NYql::NUdf::ExtDateType) { + builder.Returns(builder.Resource(TM64ResourceName)); } else { - builder.Args()->Add<TUserDataType>().Flags(ICallablePayload::TArgumentFlags::AutoMap); - if constexpr (NUdf::TDataType<TUserDataType>::Features & NYql::NUdf::ExtDateType) { - builder.Returns(builder.Resource(TM64ResourceName)); - } else { - builder.Returns(builder.Resource(TMResourceName)); - } - - if (!typesOnly) { - builder.Implementation(new TSplit<TUserDataType>(builder.GetSourcePosition())); - } + builder.Returns(builder.Resource(TMResourceName)); } - return true; + if (!typesOnly) { + builder.Implementation(new TSplit<TUserDataType>(builder.GetSourcePosition())); + } } - }; - template <> - void TSplitKernelExec<TDate>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) { - storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>()); - } - - template <> - void TSplitKernelExec<TDatetime>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) { - storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>()); + return true; } +}; - template <> - void TSplitKernelExec<TTimestamp>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) { - storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>()); - } +template <> +void TSplitKernelExec<TDate>::Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& builder) { + storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>()); +} - template <> - void TSplitKernelExec<TTzDate>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) { - storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>(), arg.GetTimezoneId()); - } +template <> +void TSplitKernelExec<TDatetime>::Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& builder) { + storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>()); +} - template <> - void TSplitKernelExec<TTzDatetime>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) { - storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>(), arg.GetTimezoneId()); - } +template <> +void TSplitKernelExec<TTimestamp>::Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& builder) { + storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>()); +} - template <> - void TSplitKernelExec<TTzTimestamp>::Split(TBlockItem arg, TTMStorage &storage, const IValueBuilder& builder) { - storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>(), arg.GetTimezoneId()); - } +template <> +void TSplitKernelExec<TTzDate>::Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& builder) { + storage.FromDate(builder.GetDateBuilder(), arg.Get<ui16>(), arg.GetTimezoneId()); +} - template <> - void TSplitKernelExec<TDate32>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { - ythrow yexception() << "Not implemented"; - } +template <> +void TSplitKernelExec<TTzDatetime>::Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& builder) { + storage.FromDatetime(builder.GetDateBuilder(), arg.Get<ui32>(), arg.GetTimezoneId()); +} - template <> - void TSplitKernelExec<TDatetime64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { - ythrow yexception() << "Not implemented"; - } +template <> +void TSplitKernelExec<TTzTimestamp>::Split(TBlockItem arg, TTMStorage& storage, const IValueBuilder& builder) { + storage.FromTimestamp(builder.GetDateBuilder(), arg.Get<ui64>(), arg.GetTimezoneId()); +} - template <> - void TSplitKernelExec<TTimestamp64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { - ythrow yexception() << "Not implemented"; - } +template <> +void TSplitKernelExec<TDate32>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { + ythrow yexception() << "Not implemented"; +} - template <> - void TSplitKernelExec<TTzDate32>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { - ythrow yexception() << "Not implemented"; - } +template <> +void TSplitKernelExec<TDatetime64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { + ythrow yexception() << "Not implemented"; +} - template <> - void TSplitKernelExec<TTzDatetime64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { - ythrow yexception() << "Not implemented"; - } +template <> +void TSplitKernelExec<TTimestamp64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { + ythrow yexception() << "Not implemented"; +} - template <> - void TSplitKernelExec<TTzTimestamp64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { - ythrow yexception() << "Not implemented"; - } +template <> +void TSplitKernelExec<TTzDate32>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { + ythrow yexception() << "Not implemented"; +} - template <> - TUnboxedValue TSplit<TDate>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +void TSplitKernelExec<TTzDatetime64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { + ythrow yexception() << "Not implemented"; +} - auto& builder = valueBuilder->GetDateBuilder(); - TUnboxedValuePod result(0); - auto& storage = Reference<TMResourceName>(result); - storage.FromDate(builder, args[0].Get<ui16>()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - } +template <> +void TSplitKernelExec<TTzTimestamp64>::Split(TBlockItem, TTMStorage&, const IValueBuilder&) { + ythrow yexception() << "Not implemented"; +} - template <> - TUnboxedValue TSplit<TDate32>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +TUnboxedValue TSplit<TDate>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - TUnboxedValuePod result(0); - auto& storage = Reference<TM64ResourceName>(result); - storage.FromDate32(valueBuilder->GetDateBuilder(), args[0].Get<i32>()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TMResourceName>(result); + storage.FromDate(builder, args[0].Get<ui16>()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} - template <> - TUnboxedValue TSplit<TDatetime>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +TUnboxedValue TSplit<TDate32>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - auto& builder = valueBuilder->GetDateBuilder(); - TUnboxedValuePod result(0); - auto& storage = Reference<TMResourceName>(result); - storage.FromDatetime(builder, args[0].Get<ui32>()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + TUnboxedValuePod result(0); + auto& storage = Reference<TM64ResourceName>(result); + storage.FromDate32(valueBuilder->GetDateBuilder(), args[0].Get<i32>()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} - template <> - TUnboxedValue TSplit<TDatetime64>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +TUnboxedValue TSplit<TDatetime>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - TUnboxedValuePod result(0); - auto& storage = Reference<TM64ResourceName>(result); - storage.FromDatetime64(valueBuilder->GetDateBuilder(), args[0].Get<i64>()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TMResourceName>(result); + storage.FromDatetime(builder, args[0].Get<ui32>()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} - template <> - TUnboxedValue TSplit<TTimestamp>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +TUnboxedValue TSplit<TDatetime64>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - auto& builder = valueBuilder->GetDateBuilder(); - TUnboxedValuePod result(0); - auto& storage = Reference<TMResourceName>(result); - storage.FromTimestamp(builder, args[0].Get<ui64>()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + TUnboxedValuePod result(0); + auto& storage = Reference<TM64ResourceName>(result); + storage.FromDatetime64(valueBuilder->GetDateBuilder(), args[0].Get<i64>()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} - template <> - TUnboxedValue TSplit<TTimestamp64>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +TUnboxedValue TSplit<TTimestamp>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - TUnboxedValuePod result(0); - auto& storage = Reference<TM64ResourceName>(result); - storage.FromTimestamp64(valueBuilder->GetDateBuilder(), args[0].Get<i64>()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TMResourceName>(result); + storage.FromTimestamp(builder, args[0].Get<ui64>()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} - template <> - TUnboxedValue TSplit<TTzDate>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +TUnboxedValue TSplit<TTimestamp64>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - auto& builder = valueBuilder->GetDateBuilder(); - TUnboxedValuePod result(0); - auto& storage = Reference<TMResourceName>(result); - storage.FromDate(builder, args[0].Get<ui16>(), args[0].GetTimezoneId()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + TUnboxedValuePod result(0); + auto& storage = Reference<TM64ResourceName>(result); + storage.FromTimestamp64(valueBuilder->GetDateBuilder(), args[0].Get<i64>()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} - template <> - TUnboxedValue TSplit<TTzDate32>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +TUnboxedValue TSplit<TTzDate>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - auto& builder = valueBuilder->GetDateBuilder(); - TUnboxedValuePod result(0); - auto& storage = Reference<TM64ResourceName>(result); - storage.FromDate32(builder, args[0].Get<i32>(), args[0].GetTimezoneId()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TMResourceName>(result); + storage.FromDate(builder, args[0].Get<ui16>(), args[0].GetTimezoneId()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} +template <> +TUnboxedValue TSplit<TTzDate32>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - template <> - TUnboxedValue TSplit<TTzDatetime>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); - - auto& builder = valueBuilder->GetDateBuilder(); - TUnboxedValuePod result(0); - auto& storage = Reference<TMResourceName>(result); - storage.FromDatetime(builder, args[0].Get<ui32>(), args[0].GetTimezoneId()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TM64ResourceName>(result); + storage.FromDate32(builder, args[0].Get<i32>(), args[0].GetTimezoneId()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} - template <> - TUnboxedValue TSplit<TTzDatetime64>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +TUnboxedValue TSplit<TTzDatetime>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - auto& builder = valueBuilder->GetDateBuilder(); - TUnboxedValuePod result(0); - auto& storage = Reference<TM64ResourceName>(result); - storage.FromDatetime64(builder, args[0].Get<i64>(), args[0].GetTimezoneId()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TMResourceName>(result); + storage.FromDatetime(builder, args[0].Get<ui32>(), args[0].GetTimezoneId()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} +template <> +TUnboxedValue TSplit<TTzDatetime64>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - template <> - TUnboxedValue TSplit<TTzTimestamp>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); - - auto& builder = valueBuilder->GetDateBuilder(); - TUnboxedValuePod result(0); - auto& storage = Reference<TMResourceName>(result); - storage.FromTimestamp(builder, args[0].Get<ui64>(), args[0].GetTimezoneId()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TM64ResourceName>(result); + storage.FromDatetime64(builder, args[0].Get<i64>(), args[0].GetTimezoneId()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} - template <> - TUnboxedValue TSplit<TTzTimestamp64>::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); +template <> +TUnboxedValue TSplit<TTzTimestamp>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - auto& builder = valueBuilder->GetDateBuilder(); - TUnboxedValuePod result(0); - auto& storage = Reference<TM64ResourceName>(result); - storage.FromTimestamp64(builder, args[0].Get<i64>(), args[0].GetTimezoneId()); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TMResourceName>(result); + storage.FromTimestamp(builder, args[0].Get<ui64>(), args[0].GetTimezoneId()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } +} - // Make* +template <> +TUnboxedValue TSplit<TTzTimestamp64>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - template<typename TUserDataType, bool Nullable> - using TMakeResBuilder = std::conditional_t<TTzDataType<TUserDataType>::Result, - TTzDateArrayBuilder<TUserDataType, Nullable>, - TFixedSizeArrayBuilder<typename TDataType<TUserDataType>::TLayout, Nullable>>; + auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TM64ResourceName>(result); + storage.FromTimestamp64(builder, args[0].Get<i64>(), args[0].GetTimezoneId()); + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } +} - template<typename TUserDataType> - struct TMakeDateKernelExec : TUnaryKernelExec<TMakeDateKernelExec<TUserDataType>, TReaderTraits::TResource<false>, TMakeResBuilder<TUserDataType, false>> { - static TBlockItem Make(TTMStorage& storage, const IValueBuilder& valueBuilder); +// Make* - template<typename TSink> - static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { - auto& storage = Reference<TMResourceName>(item); - sink(TBlockItem(Make(storage, *valueBuilder))); - } - }; +template <typename TUserDataType, bool Nullable> +using TMakeResBuilder = std::conditional_t<TTzDataType<TUserDataType>::Result, + TTzDateArrayBuilder<TUserDataType, Nullable>, + TFixedSizeArrayBuilder<typename TDataType<TUserDataType>::TLayout, Nullable>>; - template<> TBlockItem TMakeDateKernelExec<TDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { - TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ false)); - return res; - } +template <typename TUserDataType> +struct TMakeDateKernelExec: TUnaryKernelExec<TMakeDateKernelExec<TUserDataType>, TReaderTraits::TResource<false>, TMakeResBuilder<TUserDataType, false>> { + static TBlockItem Make(TTMStorage& storage, const IValueBuilder& valueBuilder); - template<> TBlockItem TMakeDateKernelExec<TDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { - TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder())); - return res; + template <typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { + auto& storage = Reference<TMResourceName>(item); + sink(TBlockItem(Make(storage, *valueBuilder))); } +}; - template<> TBlockItem TMakeDateKernelExec<TTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { - TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder())); - return res; - } +template <> +TBlockItem TMakeDateKernelExec<TDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { + TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ false)); + return res; +} - template<> TBlockItem TMakeDateKernelExec<TTzDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { - TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ true)); - res.SetTimezoneId(storage.TimezoneId); - return res; - } +template <> +TBlockItem TMakeDateKernelExec<TDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { + TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder())); + return res; +} - template<> TBlockItem TMakeDateKernelExec<TTzDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { - TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder())); - res.SetTimezoneId(storage.TimezoneId); - return res; - } +template <> +TBlockItem TMakeDateKernelExec<TTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { + TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder())); + return res; +} - template<> TBlockItem TMakeDateKernelExec<TTzTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { - TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder())); - res.SetTimezoneId(storage.TimezoneId); - return res; - } +template <> +TBlockItem TMakeDateKernelExec<TTzDate>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { + TBlockItem res(storage.ToDate(valueBuilder.GetDateBuilder(), /*local*/ true)); + res.SetTimezoneId(storage.TimezoneId); + return res; +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDate, TDate(TAutoMap<TResource<TMResourceName>>)) { - auto& builder = valueBuilder->GetDateBuilder(); - auto& storage = Reference<TMResourceName>(args[0]); - return TUnboxedValuePod(storage.ToDate(builder, false)); - } - END_SIMPLE_ARROW_UDF(TMakeDate, TMakeDateKernelExec<TDate>::Do); +template <> +TBlockItem TMakeDateKernelExec<TTzDatetime>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { + TBlockItem res(storage.ToDatetime(valueBuilder.GetDateBuilder())); + res.SetTimezoneId(storage.TimezoneId); + return res; +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDatetime, TDatetime(TAutoMap<TResource<TMResourceName>>)) { - auto& builder = valueBuilder->GetDateBuilder(); - auto& storage = Reference<TMResourceName>(args[0]); - return TUnboxedValuePod(storage.ToDatetime(builder)); - } - END_SIMPLE_ARROW_UDF(TMakeDatetime, TMakeDateKernelExec<TDatetime>::Do); +template <> +TBlockItem TMakeDateKernelExec<TTzTimestamp>::Make(TTMStorage& storage, const IValueBuilder& valueBuilder) { + TBlockItem res(storage.ToTimestamp(valueBuilder.GetDateBuilder())); + res.SetTimezoneId(storage.TimezoneId); + return res; +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTimestamp, TTimestamp(TAutoMap<TResource<TMResourceName>>)) { - auto& builder = valueBuilder->GetDateBuilder(); - auto& storage = Reference<TMResourceName>(args[0]); - return TUnboxedValuePod(storage.ToTimestamp(builder)); - } - END_SIMPLE_ARROW_UDF(TMakeTimestamp, TMakeDateKernelExec<TTimestamp>::Do); +BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDate, TDate(TAutoMap<TResource<TMResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference<TMResourceName>(args[0]); + return TUnboxedValuePod(storage.ToDate(builder, false)); +} +END_SIMPLE_ARROW_UDF(TMakeDate, TMakeDateKernelExec<TDate>::Do); - BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDate, TTzDate(TAutoMap<TResource<TMResourceName>>)) { - auto& builder = valueBuilder->GetDateBuilder(); - auto& storage = Reference<TMResourceName>(args[0]); - try { - TUnboxedValuePod result(storage.ToDate(builder, true)); - result.SetTimezoneId(storage.TimezoneId); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << "Timestamp " - << storage.ToString() - << " cannot be casted to TzDate" - ).c_str()); - } - } - END_SIMPLE_ARROW_UDF(TMakeTzDate, TMakeDateKernelExec<TTzDate>::Do); +BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeDatetime, TDatetime(TAutoMap<TResource<TMResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference<TMResourceName>(args[0]); + return TUnboxedValuePod(storage.ToDatetime(builder)); +} +END_SIMPLE_ARROW_UDF(TMakeDatetime, TMakeDateKernelExec<TDatetime>::Do); - BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDatetime, TTzDatetime(TAutoMap<TResource<TMResourceName>>)) { - auto& builder = valueBuilder->GetDateBuilder(); - auto& storage = Reference<TMResourceName>(args[0]); - TUnboxedValuePod result(storage.ToDatetime(builder)); - result.SetTimezoneId(storage.TimezoneId); - return result; - } - END_SIMPLE_ARROW_UDF(TMakeTzDatetime, TMakeDateKernelExec<TTzDatetime>::Do); +BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTimestamp, TTimestamp(TAutoMap<TResource<TMResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference<TMResourceName>(args[0]); + return TUnboxedValuePod(storage.ToTimestamp(builder)); +} +END_SIMPLE_ARROW_UDF(TMakeTimestamp, TMakeDateKernelExec<TTimestamp>::Do); - BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzTimestamp, TTzTimestamp(TAutoMap<TResource<TMResourceName>>)) { - auto& builder = valueBuilder->GetDateBuilder(); - auto& storage = Reference<TMResourceName>(args[0]); - TUnboxedValuePod result(storage.ToTimestamp(builder)); +BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDate, TTzDate(TAutoMap<TResource<TMResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference<TMResourceName>(args[0]); + try { + TUnboxedValuePod result(storage.ToDate(builder, true)); result.SetTimezoneId(storage.TimezoneId); return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << "Timestamp " + << storage.ToString() + << " cannot be casted to TzDate") + .c_str()); } - END_SIMPLE_ARROW_UDF(TMakeTzTimestamp, TMakeDateKernelExec<TTzTimestamp>::Do); +} +END_SIMPLE_ARROW_UDF(TMakeTzDate, TMakeDateKernelExec<TTzDate>::Do); +BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzDatetime, TTzDatetime(TAutoMap<TResource<TMResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference<TMResourceName>(args[0]); + TUnboxedValuePod result(storage.ToDatetime(builder)); + result.SetTimezoneId(storage.TimezoneId); + return result; +} +END_SIMPLE_ARROW_UDF(TMakeTzDatetime, TMakeDateKernelExec<TTzDatetime>::Do); - SIMPLE_STRICT_UDF(TConvert, TResource<TM64ResourceName>(TAutoMap<TResource<TMResourceName>>)) { - Y_UNUSED(valueBuilder); - TUnboxedValuePod result(0); - auto& arg = Reference<TMResourceName>(args[0]); - auto& storage = Reference<TM64ResourceName>(result); - storage.From(arg); - return result; - } +BEGIN_SIMPLE_STRICT_ARROW_UDF(TMakeTzTimestamp, TTzTimestamp(TAutoMap<TResource<TMResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference<TMResourceName>(args[0]); + TUnboxedValuePod result(storage.ToTimestamp(builder)); + result.SetTimezoneId(storage.TimezoneId); + return result; +} +END_SIMPLE_ARROW_UDF(TMakeTzTimestamp, TMakeDateKernelExec<TTzTimestamp>::Do); - SIMPLE_STRICT_UDF(TMakeDate32, TDate32(TAutoMap<TResource<TM64ResourceName>>)) { - auto& storage = Reference<TM64ResourceName>(args[0]); - return TUnboxedValuePod(storage.ToDate32(valueBuilder->GetDateBuilder(), false)); - } +SIMPLE_STRICT_UDF(TConvert, TResource<TM64ResourceName>(TAutoMap<TResource<TMResourceName>>)) { + Y_UNUSED(valueBuilder); + TUnboxedValuePod result(0); + auto& arg = Reference<TMResourceName>(args[0]); + auto& storage = Reference<TM64ResourceName>(result); + storage.From(arg); + return result; +} - SIMPLE_STRICT_UDF(TMakeDatetime64, TDatetime64(TAutoMap<TResource<TM64ResourceName>>)) { - auto& storage = Reference<TM64ResourceName>(args[0]); - return TUnboxedValuePod(storage.ToDatetime64(valueBuilder->GetDateBuilder())); - } +SIMPLE_STRICT_UDF(TMakeDate32, TDate32(TAutoMap<TResource<TM64ResourceName>>)) { + auto& storage = Reference<TM64ResourceName>(args[0]); + return TUnboxedValuePod(storage.ToDate32(valueBuilder->GetDateBuilder(), false)); +} - SIMPLE_STRICT_UDF(TMakeTimestamp64, TTimestamp64(TAutoMap<TResource<TM64ResourceName>>)) { - auto& storage = Reference<TM64ResourceName>(args[0]); - return TUnboxedValuePod(storage.ToTimestamp64(valueBuilder->GetDateBuilder())); - } +SIMPLE_STRICT_UDF(TMakeDatetime64, TDatetime64(TAutoMap<TResource<TM64ResourceName>>)) { + auto& storage = Reference<TM64ResourceName>(args[0]); + return TUnboxedValuePod(storage.ToDatetime64(valueBuilder->GetDateBuilder())); +} - SIMPLE_STRICT_UDF(TMakeTzDate32, TTzDate32(TAutoMap<TResource<TM64ResourceName>>)) { - auto& builder = valueBuilder->GetDateBuilder(); - auto& storage = Reference<TM64ResourceName>(args[0]); - try { - TUnboxedValuePod result(storage.ToDate32(builder, true)); - result.SetTimezoneId(storage.TimezoneId); - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << "Timestamp " - << storage.ToString() - << " cannot be casted to TzDate32" - ).c_str()); - } - } +SIMPLE_STRICT_UDF(TMakeTimestamp64, TTimestamp64(TAutoMap<TResource<TM64ResourceName>>)) { + auto& storage = Reference<TM64ResourceName>(args[0]); + return TUnboxedValuePod(storage.ToTimestamp64(valueBuilder->GetDateBuilder())); +} - SIMPLE_STRICT_UDF(TMakeTzDatetime64, TTzDatetime64(TAutoMap<TResource<TM64ResourceName>>)) { - auto& storage = Reference<TM64ResourceName>(args[0]); - TUnboxedValuePod result(storage.ToDatetime64(valueBuilder->GetDateBuilder())); +SIMPLE_STRICT_UDF(TMakeTzDate32, TTzDate32(TAutoMap<TResource<TM64ResourceName>>)) { + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference<TM64ResourceName>(args[0]); + try { + TUnboxedValuePod result(storage.ToDate32(builder, true)); result.SetTimezoneId(storage.TimezoneId); return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << "Timestamp " + << storage.ToString() + << " cannot be casted to TzDate32") + .c_str()); } +} - SIMPLE_STRICT_UDF(TMakeTzTimestamp64, TTzTimestamp64(TAutoMap<TResource<TM64ResourceName>>)) { - auto& storage = Reference<TM64ResourceName>(args[0]); - TUnboxedValuePod result(storage.ToTimestamp64(valueBuilder->GetDateBuilder())); - result.SetTimezoneId(storage.TimezoneId); - return result; - } +SIMPLE_STRICT_UDF(TMakeTzDatetime64, TTzDatetime64(TAutoMap<TResource<TM64ResourceName>>)) { + auto& storage = Reference<TM64ResourceName>(args[0]); + TUnboxedValuePod result(storage.ToDatetime64(valueBuilder->GetDateBuilder())); + result.SetTimezoneId(storage.TimezoneId); + return result; +} + +SIMPLE_STRICT_UDF(TMakeTzTimestamp64, TTzTimestamp64(TAutoMap<TResource<TM64ResourceName>>)) { + auto& storage = Reference<TM64ResourceName>(args[0]); + TUnboxedValuePod result(storage.ToTimestamp64(valueBuilder->GetDateBuilder())); + result.SetTimezoneId(storage.TimezoneId); + return result; +} - // Get* +// Get* // #define GET_METHOD(field, type) \ // struct TGet##field##KernelExec : TUnaryKernelExec<TGet##field##KernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<type, false>> { \ @@ -1305,9 +1294,9 @@ TUnboxedValuePod DoAddYears(const TUnboxedValuePod& date, i64 years, const NUdf: // } \ // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGet##field, TGet##field##KernelExec::Do, arrow::compute::NullHandling::INTERSECTION); -template<const char* TUdfName, - typename TResultType, TResultType (*Accessor)(const TUnboxedValuePod&), - typename TResultWType, TResultWType (*WAccessor)(const TUnboxedValuePod&)> +template <const char* TUdfName, + typename TResultType, TResultType (*Accessor)(const TUnboxedValuePod&), + typename TResultWType, TResultWType (*WAccessor)(const TUnboxedValuePod&)> class TGetDateComponent: public ::NYql::NUdf::TBoxedValue { public: typedef bool TTypeAwareMarker; @@ -1393,9 +1382,10 @@ public: SetUnexpectedTagError(builder, resource.GetTag()); return true; } + private: - template<typename TResult, TResult (*Func)(const TUnboxedValuePod&)> - class TImpl : public TBoxedValue { + template <typename TResult, TResult (*Func)(const TUnboxedValuePod&)> + class TImpl: public TBoxedValue { public: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { Y_UNUSED(valueBuilder); @@ -1404,7 +1394,7 @@ private: } }; - template<typename TResult, const char* TResourceName, TResult (*Func)(const TUnboxedValuePod&)> + template <typename TResult, const char* TResourceName, TResult (*Func)(const TUnboxedValuePod&)> static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { builder.Returns<TResult>(); builder.Args()->Add<TAutoMap<TResource<TResourceName>>>(); @@ -1416,7 +1406,7 @@ private: }; // TODO: Merge this with <TGetDateComponent> class. -template<const char* TUdfName, auto Accessor, auto WAccessor> +template <const char* TUdfName, auto Accessor, auto WAccessor> class TGetDateComponentName: public ::NYql::NUdf::TBoxedValue { public: typedef bool TTypeAwareMarker; @@ -1502,9 +1492,10 @@ public: SetUnexpectedTagError(builder, resource.GetTag()); return true; } + private: - template<auto Func> - class TImpl : public TBoxedValue { + template <auto Func> + class TImpl: public TBoxedValue { public: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { EMPTY_RESULT_ON_EMPTY_ARG(0); @@ -1512,7 +1503,7 @@ private: } }; - template<const char* TResourceName, auto Func> + template <const char* TResourceName, auto Func> static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { builder.Returns<char*>(); builder.Args()->Add<TAutoMap<TResource<TResourceName>>>(); @@ -1523,115 +1514,111 @@ private: } }; - // template<typename TValue> - // TValue GetMonthNameValue(size_t idx) { - // static const std::array<TValue, 12U> monthNames = {{ - // TValue::Embedded(TStringRef::Of("January")), - // TValue::Embedded(TStringRef::Of("February")), - // TValue::Embedded(TStringRef::Of("March")), - // TValue::Embedded(TStringRef::Of("April")), - // TValue::Embedded(TStringRef::Of("May")), - // TValue::Embedded(TStringRef::Of("June")), - // TValue::Embedded(TStringRef::Of("July")), - // TValue::Embedded(TStringRef::Of("August")), - // TValue::Embedded(TStringRef::Of("September")), - // TValue::Embedded(TStringRef::Of("October")), - // TValue::Embedded(TStringRef::Of("November")), - // TValue::Embedded(TStringRef::Of("December")) - // }}; - // return monthNames.at(idx); - // } - - // struct TGetMonthNameKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> { - // template<typename TSink> - // static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { - // Y_UNUSED(valueBuilder); - // sink(GetMonthNameValue<TBlockItem>(GetMonth(item) - 1U)); - // } - // }; - - // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetMonthName, char*(TAutoMap<TResource<TMResourceName>>)) { - // Y_UNUSED(valueBuilder); - // return GetMonthNameValue<TUnboxedValue>(GetMonth(*args) - 1U); - // } - // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetMonthName, TGetMonthNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); - -template<const char* TResourceName> +// template<typename TValue> +// TValue GetMonthNameValue(size_t idx) { +// static const std::array<TValue, 12U> monthNames = {{ +// TValue::Embedded(TStringRef::Of("January")), +// TValue::Embedded(TStringRef::Of("February")), +// TValue::Embedded(TStringRef::Of("March")), +// TValue::Embedded(TStringRef::Of("April")), +// TValue::Embedded(TStringRef::Of("May")), +// TValue::Embedded(TStringRef::Of("June")), +// TValue::Embedded(TStringRef::Of("July")), +// TValue::Embedded(TStringRef::Of("August")), +// TValue::Embedded(TStringRef::Of("September")), +// TValue::Embedded(TStringRef::Of("October")), +// TValue::Embedded(TStringRef::Of("November")), +// TValue::Embedded(TStringRef::Of("December")) +// }}; +// return monthNames.at(idx); +// } + +// struct TGetMonthNameKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> { +// template<typename TSink> +// static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { +// Y_UNUSED(valueBuilder); +// sink(GetMonthNameValue<TBlockItem>(GetMonth(item) - 1U)); +// } +// }; + +// BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetMonthName, char*(TAutoMap<TResource<TMResourceName>>)) { +// Y_UNUSED(valueBuilder); +// return GetMonthNameValue<TUnboxedValue>(GetMonth(*args) - 1U); +// } +// END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetMonthName, TGetMonthNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); + +template <const char* TResourceName> TUnboxedValue GetMonthName(const IValueBuilder* valueBuilder, const TUnboxedValuePod& arg) { Y_UNUSED(valueBuilder); - static const std::array<TUnboxedValue, 12U> monthNames = {{ - TUnboxedValuePod::Embedded(TStringRef::Of("January")), - TUnboxedValuePod::Embedded(TStringRef::Of("February")), - TUnboxedValuePod::Embedded(TStringRef::Of("March")), - TUnboxedValuePod::Embedded(TStringRef::Of("April")), - TUnboxedValuePod::Embedded(TStringRef::Of("May")), - TUnboxedValuePod::Embedded(TStringRef::Of("June")), - TUnboxedValuePod::Embedded(TStringRef::Of("July")), - TUnboxedValuePod::Embedded(TStringRef::Of("August")), - TUnboxedValuePod::Embedded(TStringRef::Of("September")), - TUnboxedValuePod::Embedded(TStringRef::Of("October")), - TUnboxedValuePod::Embedded(TStringRef::Of("November")), - TUnboxedValuePod::Embedded(TStringRef::Of("December")) - }}; + static const std::array<TUnboxedValue, 12U> monthNames = {{TUnboxedValuePod::Embedded(TStringRef::Of("January")), + TUnboxedValuePod::Embedded(TStringRef::Of("February")), + TUnboxedValuePod::Embedded(TStringRef::Of("March")), + TUnboxedValuePod::Embedded(TStringRef::Of("April")), + TUnboxedValuePod::Embedded(TStringRef::Of("May")), + TUnboxedValuePod::Embedded(TStringRef::Of("June")), + TUnboxedValuePod::Embedded(TStringRef::Of("July")), + TUnboxedValuePod::Embedded(TStringRef::Of("August")), + TUnboxedValuePod::Embedded(TStringRef::Of("September")), + TUnboxedValuePod::Embedded(TStringRef::Of("October")), + TUnboxedValuePod::Embedded(TStringRef::Of("November")), + TUnboxedValuePod::Embedded(TStringRef::Of("December"))}}; return monthNames.at(GetMonth<TResourceName>(arg) - 1U); } - // struct TGetDayOfMonthKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<ui8, false>> { - // template<typename TSink> - // static void Process(TBlockItem item, const TSink& sink) { - // sink(GetDay(item)); - // } - // }; +// struct TGetDayOfMonthKernelExec : TUnaryKernelExec<TGetMonthNameKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<ui8, false>> { +// template<typename TSink> +// static void Process(TBlockItem item, const TSink& sink) { +// sink(GetDay(item)); +// } +// }; - // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfMonth, ui8(TAutoMap<TResource<TMResourceName>>)) { - // Y_UNUSED(valueBuilder); - // return TUnboxedValuePod(GetDay(args[0])); - // } - // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfMonth, TGetDayOfMonthKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); +// BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfMonth, ui8(TAutoMap<TResource<TMResourceName>>)) { +// Y_UNUSED(valueBuilder); +// return TUnboxedValuePod(GetDay(args[0])); +// } +// END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfMonth, TGetDayOfMonthKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); -template<const char* TResourceName> +template <const char* TResourceName> TUnboxedValue GetDayOfWeekName(const IValueBuilder* valueBuilder, const TUnboxedValuePod& arg) { Y_UNUSED(valueBuilder); - static const std::array<TUnboxedValue, 7U> dayNames = {{ - TUnboxedValuePod::Embedded(TStringRef::Of("Monday")), - TUnboxedValuePod::Embedded(TStringRef::Of("Tuesday")), - TUnboxedValuePod::Embedded(TStringRef::Of("Wednesday")), - TUnboxedValuePod::Embedded(TStringRef::Of("Thursday")), - TUnboxedValuePod::Embedded(TStringRef::Of("Friday")), - TUnboxedValuePod::Embedded(TStringRef::Of("Saturday")), - TUnboxedValuePod::Embedded(TStringRef::Of("Sunday")) - }}; + static const std::array<TUnboxedValue, 7U> dayNames = {{TUnboxedValuePod::Embedded(TStringRef::Of("Monday")), + TUnboxedValuePod::Embedded(TStringRef::Of("Tuesday")), + TUnboxedValuePod::Embedded(TStringRef::Of("Wednesday")), + TUnboxedValuePod::Embedded(TStringRef::Of("Thursday")), + TUnboxedValuePod::Embedded(TStringRef::Of("Friday")), + TUnboxedValuePod::Embedded(TStringRef::Of("Saturday")), + TUnboxedValuePod::Embedded(TStringRef::Of("Sunday"))}}; return dayNames.at(GetDayOfWeek<TResourceName>(arg) - 1U); } - // struct TGetDayOfWeekNameKernelExec : TUnaryKernelExec<TGetDayOfWeekNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> { - // template<typename TSink> - // static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { - // Y_UNUSED(valueBuilder); - // sink(GetDayNameValue<TBlockItem>(GetDayOfWeek(item) - 1U)); - // } - // }; - - // BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfWeekName, char*(TAutoMap<TResource<TMResourceName>>)) { - // Y_UNUSED(valueBuilder); - // return GetDayNameValue<TUnboxedValuePod>(GetDayOfWeek(*args) - 1U); - // } - // END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfWeekName, TGetDayOfWeekNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); - - struct TTGetTimezoneNameKernelExec : TUnaryKernelExec<TTGetTimezoneNameKernelExec, TReaderTraits::TResource<false>, TStringArrayBuilder<arrow::BinaryType, false>> { - template<typename TSink> - static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { - Y_UNUSED(valueBuilder); - auto timezoneId = GetTimezoneId<TMResourceName>(item); - if (timezoneId >= NTi::GetTimezones().size()) { - sink(TBlockItem{}); - } else { - sink(TBlockItem{NTi::GetTimezones()[timezoneId]}); - } +// struct TGetDayOfWeekNameKernelExec : TUnaryKernelExec<TGetDayOfWeekNameKernelExec, TReaderTraits::TResource<true>, TStringArrayBuilder<arrow::StringType, false>> { +// template<typename TSink> +// static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { +// Y_UNUSED(valueBuilder); +// sink(GetDayNameValue<TBlockItem>(GetDayOfWeek(item) - 1U)); +// } +// }; + +// BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetDayOfWeekName, char*(TAutoMap<TResource<TMResourceName>>)) { +// Y_UNUSED(valueBuilder); +// return GetDayNameValue<TUnboxedValuePod>(GetDayOfWeek(*args) - 1U); +// } +// END_SIMPLE_ARROW_UDF_WITH_NULL_HANDLING(TGetDayOfWeekName, TGetDayOfWeekNameKernelExec::Do, arrow::compute::NullHandling::INTERSECTION); + +struct TTGetTimezoneNameKernelExec: TUnaryKernelExec<TTGetTimezoneNameKernelExec, TReaderTraits::TResource<false>, TStringArrayBuilder<arrow::BinaryType, false>> { + template <typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { + Y_UNUSED(valueBuilder); + auto timezoneId = GetTimezoneId<TMResourceName>(item); + if (timezoneId >= NTi::GetTimezones().size()) { + sink(TBlockItem{}); + } else { + sink(TBlockItem{NTi::GetTimezones()[timezoneId]}); } - }; + } +}; -template<const char* TResourceName> +template <const char* TResourceName> TUnboxedValue GetTimezoneName(const IValueBuilder* valueBuilder, const TUnboxedValuePod& arg) { const ui16 tzId = GetTimezoneId<TResourceName>(arg); const auto& tzNames = NTi::GetTimezones(); @@ -1641,212 +1628,202 @@ TUnboxedValue GetTimezoneName(const IValueBuilder* valueBuilder, const TUnboxedV return valueBuilder->NewString(tzNames[tzId]); } - // Update +// Update - class TUpdate : public TBoxedValue { - public: - typedef bool TTypeAwareMarker; - static const TStringRef& Name() { - static auto name = TStringRef::Of("Update"); - return name; - } - - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) - { - if (Name() != name) { - return false; - } - - if (!userType) { - // XXX: Function became polymorphic when overload for - // wide resources was implemented. Hence, to make it - // backward compatible with previous versions, the - // absence of the userType is considered as using the - // old version (i.e. without type awareness) that - // provides implementation only for narrow dates. - BuildSignature<TMResourceName>(builder, typesOnly); - return true; - } +class TUpdate: public TBoxedValue { +public: + typedef bool TTypeAwareMarker; + static const TStringRef& Name() { + static auto name = TStringRef::Of("Update"); + return name; + } - builder.UserType(userType); + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + if (Name() != name) { + return false; + } - const auto typeInfoHelper = builder.TypeInfoHelper(); - TTupleTypeInspector tuple(*typeInfoHelper, userType); - Y_ENSURE(tuple, "Tuple with args and options tuples expected"); - Y_ENSURE(tuple.GetElementsCount() > 0, - "Tuple has to contain positional arguments"); + if (!userType) { + // XXX: Function became polymorphic when overload for + // wide resources was implemented. Hence, to make it + // backward compatible with previous versions, the + // absence of the userType is considered as using the + // old version (i.e. without type awareness) that + // provides implementation only for narrow dates. + BuildSignature<TMResourceName>(builder, typesOnly); + return true; + } - TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0)); - Y_ENSURE(argsTuple, "Tuple with args expected"); - if (argsTuple.GetElementsCount() == 0) { - builder.SetError("At least one argument expected"); - return true; - } + builder.UserType(userType); - auto argType = argsTuple.GetElementType(0); + const auto typeInfoHelper = builder.TypeInfoHelper(); + TTupleTypeInspector tuple(*typeInfoHelper, userType); + Y_ENSURE(tuple, "Tuple with args and options tuples expected"); + Y_ENSURE(tuple.GetElementsCount() > 0, + "Tuple has to contain positional arguments"); - if (const auto optType = TOptionalTypeInspector(*typeInfoHelper, argType)) { - argType = optType.GetItemType(); - } + TTupleTypeInspector argsTuple(*typeInfoHelper, tuple.GetElementType(0)); + Y_ENSURE(argsTuple, "Tuple with args expected"); + if (argsTuple.GetElementsCount() == 0) { + builder.SetError("At least one argument expected"); + return true; + } - TResourceTypeInspector resource(*typeInfoHelper, argType); - if (!resource) { - TDataTypeInspector data(*typeInfoHelper, argType); - if (!data) { - SetResourceExpectedError(builder, typeInfoHelper, argType); - return true; - } + auto argType = argsTuple.GetElementType(0); - const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features; - if (features & NUdf::ExtDateType) { - BuildSignature<TM64ResourceName>(builder, typesOnly); - return true; - } - if (features & (NUdf::DateType | NUdf::TzDateType)) { - BuildSignature<TMResourceName>(builder, typesOnly); - return true; - } + if (const auto optType = TOptionalTypeInspector(*typeInfoHelper, argType)) { + argType = optType.GetItemType(); + } + TResourceTypeInspector resource(*typeInfoHelper, argType); + if (!resource) { + TDataTypeInspector data(*typeInfoHelper, argType); + if (!data) { SetResourceExpectedError(builder, typeInfoHelper, argType); return true; } - if (resource.GetTag() == TStringRef::Of(TM64ResourceName)) { + const auto features = NUdf::GetDataTypeInfo(NUdf::GetDataSlot(data.GetTypeId())).Features; + if (features & NUdf::ExtDateType) { BuildSignature<TM64ResourceName>(builder, typesOnly); return true; } - - if (resource.GetTag() == TStringRef::Of(TMResourceName)) { + if (features & (NUdf::DateType | NUdf::TzDateType)) { BuildSignature<TMResourceName>(builder, typesOnly); return true; } - SetUnexpectedTagError(builder, resource.GetTag()); + SetResourceExpectedError(builder, typeInfoHelper, argType); return true; } - private: - template<const char* TResourceName> - class TImpl : public TBoxedValue { - public: - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); - auto result = args[0]; - - if (args[1]) { - auto year = args[1].Get<std::conditional_t<TResourceName == TMResourceName, ui16, i32>>(); - if (!ValidateYear<TResourceName>(year)) { - return TUnboxedValuePod(); - } - SetYear<TResourceName>(result, year); - } - if (args[2]) { - auto month = args[2].Get<ui8>(); - if (!ValidateMonth(month)) { - return TUnboxedValuePod(); - } - SetMonth<TResourceName>(result, month); + + if (resource.GetTag() == TStringRef::Of(TM64ResourceName)) { + BuildSignature<TM64ResourceName>(builder, typesOnly); + return true; + } + + if (resource.GetTag() == TStringRef::Of(TMResourceName)) { + BuildSignature<TMResourceName>(builder, typesOnly); + return true; + } + + SetUnexpectedTagError(builder, resource.GetTag()); + return true; + } + +private: + template <const char* TResourceName> + class TImpl: public TBoxedValue { + public: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); + auto result = args[0]; + + if (args[1]) { + auto year = args[1].Get<std::conditional_t<TResourceName == TMResourceName, ui16, i32>>(); + if (!ValidateYear<TResourceName>(year)) { + return TUnboxedValuePod(); } - if (args[3]) { - auto day = args[3].Get<ui8>(); - if (!ValidateDay(day)) { - return TUnboxedValuePod(); - } - SetDay<TResourceName>(result, day); + SetYear<TResourceName>(result, year); + } + if (args[2]) { + auto month = args[2].Get<ui8>(); + if (!ValidateMonth(month)) { + return TUnboxedValuePod(); } - if (args[4]) { - auto hour = args[4].Get<ui8>(); - if (!ValidateHour(hour)) { - return TUnboxedValuePod(); - } - SetHour<TResourceName>(result, hour); + SetMonth<TResourceName>(result, month); + } + if (args[3]) { + auto day = args[3].Get<ui8>(); + if (!ValidateDay(day)) { + return TUnboxedValuePod(); } - if (args[5]) { - auto minute = args[5].Get<ui8>(); - if (!ValidateMinute(minute)) { - return TUnboxedValuePod(); - } - SetMinute<TResourceName>(result, minute); + SetDay<TResourceName>(result, day); + } + if (args[4]) { + auto hour = args[4].Get<ui8>(); + if (!ValidateHour(hour)) { + return TUnboxedValuePod(); } - if (args[6]) { - auto second = args[6].Get<ui8>(); - if (!ValidateSecond(second)) { - return TUnboxedValuePod(); - } - SetSecond<TResourceName>(result, second); + SetHour<TResourceName>(result, hour); + } + if (args[5]) { + auto minute = args[5].Get<ui8>(); + if (!ValidateMinute(minute)) { + return TUnboxedValuePod(); } - if (args[7]) { - auto microsecond = args[7].Get<ui32>(); - if (!ValidateMicrosecond(microsecond)) { - return TUnboxedValuePod(); - } - SetMicrosecond<TResourceName>(result, microsecond); + SetMinute<TResourceName>(result, minute); + } + if (args[6]) { + auto second = args[6].Get<ui8>(); + if (!ValidateSecond(second)) { + return TUnboxedValuePod(); } - if (args[8]) { - auto timezoneId = args[8].Get<ui16>(); - if (!ValidateTimezoneId(timezoneId)) { - return TUnboxedValuePod(); - } - SetTimezoneId<TResourceName>(result, timezoneId); + SetSecond<TResourceName>(result, second); + } + if (args[7]) { + auto microsecond = args[7].Get<ui32>(); + if (!ValidateMicrosecond(microsecond)) { + return TUnboxedValuePod(); } - - auto& builder = valueBuilder->GetDateBuilder(); - auto& storage = Reference<TResourceName>(result); - if (!storage.Validate(builder)) { + SetMicrosecond<TResourceName>(result, microsecond); + } + if (args[8]) { + auto timezoneId = args[8].Get<ui16>(); + if (!ValidateTimezoneId(timezoneId)) { return TUnboxedValuePod(); } - return result; - } catch (const std::exception& e) { - TStringBuilder sb; - sb << CurrentExceptionMessage(); - sb << Endl << "[" << TStringBuf(Name()) << "]" ; - UdfTerminate(sb.c_str()); + SetTimezoneId<TResourceName>(result, timezoneId); } - } - }; - - template<const char* TResourceName> - static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { - builder.Returns<TOptional<TResource<TResourceName>>>(); - builder.OptionalArgs(8).Args()->Add<TAutoMap<TResource<TResourceName>>>() - .template Add<TOptional<std::conditional_t<TResourceName == TMResourceName, ui16, i32>>>().Name("Year") - .template Add<TOptional<ui8>>().Name("Month") - .template Add<TOptional<ui8>>().Name("Day") - .template Add<TOptional<ui8>>().Name("Hour") - .template Add<TOptional<ui8>>().Name("Minute") - .template Add<TOptional<ui8>>().Name("Second") - .template Add<TOptional<ui32>>().Name("Microsecond") - .template Add<TOptional<ui16>>().Name("TimezoneId"); - builder.IsStrict(); - if (!typesOnly) { - builder.Implementation(new TImpl<TResourceName>()); + auto& builder = valueBuilder->GetDateBuilder(); + auto& storage = Reference<TResourceName>(result); + if (!storage.Validate(builder)) { + return TUnboxedValuePod(); + } + return result; + } catch (const std::exception& e) { + TStringBuilder sb; + sb << CurrentExceptionMessage(); + sb << Endl << "[" << TStringBuf(Name()) << "]"; + UdfTerminate(sb.c_str()); } - } + } }; - // From* - - template<typename TInput, typename TOutput, i64 UsecMultiplier> - inline TUnboxedValuePod TFromConverter(TInput arg) { - using TLayout = TDataType<TOutput>::TLayout; - const TLayout usec = TLayout(arg) * UsecMultiplier; - return Validate<TOutput>(usec) ? TUnboxedValuePod(usec) : TUnboxedValuePod(); + template <const char* TResourceName> + static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { + builder.Returns<TOptional<TResource<TResourceName>>>(); + builder.OptionalArgs(8).Args()->Add<TAutoMap<TResource<TResourceName>>>().template Add<TOptional<std::conditional_t<TResourceName == TMResourceName, ui16, i32>>>().Name("Year").template Add<TOptional<ui8>>().Name("Month").template Add<TOptional<ui8>>().Name("Day").template Add<TOptional<ui8>>().Name("Hour").template Add<TOptional<ui8>>().Name("Minute").template Add<TOptional<ui8>>().Name("Second").template Add<TOptional<ui32>>().Name("Microsecond").template Add<TOptional<ui16>>().Name("TimezoneId"); + builder.IsStrict(); + if (!typesOnly) { + builder.Implementation(new TImpl<TResourceName>()); + } } +}; +// From* - template<typename TInput, typename TOutput, i64 UsecMultiplier> - using TFromConverterKernel = TUnaryUnsafeFixedSizeFilterKernel<TInput, - typename TDataType<TOutput>::TLayout, [] (TInput arg) { - using TLayout = TDataType<TOutput>::TLayout; - const TLayout usec = TLayout(arg) * UsecMultiplier; - return std::make_pair(usec, Validate<TOutput>(usec)); - }>; +template <typename TInput, typename TOutput, i64 UsecMultiplier> +inline TUnboxedValuePod TFromConverter(TInput arg) { + using TLayout = TDataType<TOutput>::TLayout; + const TLayout usec = TLayout(arg) * UsecMultiplier; + return Validate<TOutput>(usec) ? TUnboxedValuePod(usec) : TUnboxedValuePod(); +} +template <typename TInput, typename TOutput, i64 UsecMultiplier> +using TFromConverterKernel = TUnaryUnsafeFixedSizeFilterKernel<TInput, + typename TDataType<TOutput>::TLayout, [](TInput arg) { + using TLayout = TDataType<TOutput>::TLayout; + const TLayout usec = TLayout(arg) * UsecMultiplier; + return std::make_pair(usec, Validate<TOutput>(usec)); + }>; #define DATETIME_FROM_CONVERTER_UDF(name, retType, argType, usecMultiplier) \ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##name, TOptional<retType>(TAutoMap<argType>)) { \ @@ -1856,38 +1833,38 @@ TUnboxedValue GetTimezoneName(const IValueBuilder* valueBuilder, const TUnboxedV \ END_SIMPLE_ARROW_UDF(T##name, (TFromConverterKernel<argType, retType, usecMultiplier>::Do)) -#define DATETIME_FROM_CONVERTER_UDF_N(space, name, retType, argType, usecMultiplier) \ - namespace N##space { \ - DATETIME_FROM_CONVERTER_UDF(name, retType, argType, usecMultiplier); \ +#define DATETIME_FROM_CONVERTER_UDF_N(space, name, retType, argType, usecMultiplier) \ + namespace N##space { \ + DATETIME_FROM_CONVERTER_UDF(name, retType, argType, usecMultiplier); \ } - DATETIME_FROM_CONVERTER_UDF(FromSeconds, TTimestamp, ui32, UsecondsInSecond); - DATETIME_FROM_CONVERTER_UDF(FromMilliseconds, TTimestamp, ui64, UsecondsInMilliseconds); - DATETIME_FROM_CONVERTER_UDF(FromMicroseconds, TTimestamp, ui64, 1); +DATETIME_FROM_CONVERTER_UDF(FromSeconds, TTimestamp, ui32, UsecondsInSecond); +DATETIME_FROM_CONVERTER_UDF(FromMilliseconds, TTimestamp, ui64, UsecondsInMilliseconds); +DATETIME_FROM_CONVERTER_UDF(FromMicroseconds, TTimestamp, ui64, 1); - DATETIME_FROM_CONVERTER_UDF(FromSeconds64, TTimestamp64, i64, UsecondsInSecond); - DATETIME_FROM_CONVERTER_UDF(FromMilliseconds64, TTimestamp64, i64, UsecondsInMilliseconds); - DATETIME_FROM_CONVERTER_UDF(FromMicroseconds64, TTimestamp64, i64, 1); +DATETIME_FROM_CONVERTER_UDF(FromSeconds64, TTimestamp64, i64, UsecondsInSecond); +DATETIME_FROM_CONVERTER_UDF(FromMilliseconds64, TTimestamp64, i64, UsecondsInMilliseconds); +DATETIME_FROM_CONVERTER_UDF(FromMicroseconds64, TTimestamp64, i64, 1); - DATETIME_FROM_CONVERTER_UDF(IntervalFromDays, TInterval, i32, UsecondsInDay); - DATETIME_FROM_CONVERTER_UDF(IntervalFromHours, TInterval, i32, UsecondsInHour); - DATETIME_FROM_CONVERTER_UDF(IntervalFromMinutes, TInterval, i32, UsecondsInMinute); - DATETIME_FROM_CONVERTER_UDF_N(Legacy, IntervalFromSeconds, TInterval, i32, UsecondsInSecond); - DATETIME_FROM_CONVERTER_UDF_N(Actual, IntervalFromSeconds, TInterval, i64, UsecondsInSecond); - DATETIME_FROM_CONVERTER_UDF(IntervalFromMilliseconds, TInterval, i64, UsecondsInMilliseconds); - DATETIME_FROM_CONVERTER_UDF(IntervalFromMicroseconds, TInterval, i64, 1); +DATETIME_FROM_CONVERTER_UDF(IntervalFromDays, TInterval, i32, UsecondsInDay); +DATETIME_FROM_CONVERTER_UDF(IntervalFromHours, TInterval, i32, UsecondsInHour); +DATETIME_FROM_CONVERTER_UDF(IntervalFromMinutes, TInterval, i32, UsecondsInMinute); +DATETIME_FROM_CONVERTER_UDF_N(Legacy, IntervalFromSeconds, TInterval, i32, UsecondsInSecond); +DATETIME_FROM_CONVERTER_UDF_N(Actual, IntervalFromSeconds, TInterval, i64, UsecondsInSecond); +DATETIME_FROM_CONVERTER_UDF(IntervalFromMilliseconds, TInterval, i64, UsecondsInMilliseconds); +DATETIME_FROM_CONVERTER_UDF(IntervalFromMicroseconds, TInterval, i64, 1); - DATETIME_FROM_CONVERTER_UDF(Interval64FromDays, TInterval64, i32, UsecondsInDay); - DATETIME_FROM_CONVERTER_UDF(Interval64FromHours, TInterval64, i64, UsecondsInHour); - DATETIME_FROM_CONVERTER_UDF(Interval64FromMinutes, TInterval64, i64, UsecondsInMinute); - DATETIME_FROM_CONVERTER_UDF(Interval64FromSeconds, TInterval64, i64, UsecondsInSecond); - DATETIME_FROM_CONVERTER_UDF(Interval64FromMilliseconds, TInterval64, i64, UsecondsInMilliseconds); - DATETIME_FROM_CONVERTER_UDF(Interval64FromMicroseconds, TInterval64, i64, 1); +DATETIME_FROM_CONVERTER_UDF(Interval64FromDays, TInterval64, i32, UsecondsInDay); +DATETIME_FROM_CONVERTER_UDF(Interval64FromHours, TInterval64, i64, UsecondsInHour); +DATETIME_FROM_CONVERTER_UDF(Interval64FromMinutes, TInterval64, i64, UsecondsInMinute); +DATETIME_FROM_CONVERTER_UDF(Interval64FromSeconds, TInterval64, i64, UsecondsInSecond); +DATETIME_FROM_CONVERTER_UDF(Interval64FromMilliseconds, TInterval64, i64, UsecondsInMilliseconds); +DATETIME_FROM_CONVERTER_UDF(Interval64FromMicroseconds, TInterval64, i64, 1); - // To* +// To* -template<const char* TUdfName, typename TResult, typename TWResult, i64 ScaleSeconds> -class TToConverter : public TBoxedValue { +template <const char* TUdfName, typename TResult, typename TWResult, i64 ScaleSeconds> +class TToConverter: public TBoxedValue { public: typedef bool TTypeAwareMarker; static const ::NYql::NUdf::TStringRef& Name() { @@ -1955,8 +1932,9 @@ public: SetIntervalExpectedError(builder, typeInfoHelper, argType); return true; } + private: - class TImpl : public TBoxedValue { + class TImpl: public TBoxedValue { public: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { try { @@ -1965,14 +1943,13 @@ private: } catch (const std::exception& e) { TStringBuilder sb; sb << CurrentExceptionMessage(); - sb << Endl << "[" << TStringBuf(Name()) << "]" ; + sb << Endl << "[" << TStringBuf(Name()) << "]"; UdfTerminate(sb.c_str()); } } - }; - template<typename TInput, typename TOutput> + template <typename TInput, typename TOutput> static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { builder.Returns<TOutput>(); builder.Args()->Add<TAutoMap<TInput>>(); @@ -1983,34 +1960,33 @@ private: } }; - // StartOf* - - template<auto Core> - struct TStartOfKernelExec : TUnaryKernelExec<TStartOfKernelExec<Core>, TResourceBlockReader<false>, TResourceArrayBuilder<true>> { - template<typename TSink> - static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { - if (auto res = Core(Reference<TMResourceName>(item), *valueBuilder)) { - Reference<TMResourceName>(item) = res.GetRef(); - sink(item); - } else { - sink(TBlockItem{}); - } +// StartOf* +template <auto Core> +struct TStartOfKernelExec: TUnaryKernelExec<TStartOfKernelExec<Core>, TResourceBlockReader<false>, TResourceArrayBuilder<true>> { + template <typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { + if (auto res = Core(Reference<TMResourceName>(item), *valueBuilder)) { + Reference<TMResourceName>(item) = res.GetRef(); + sink(item); + } else { + sink(TBlockItem{}); } - }; + } +}; - template<const char* TResourceName, auto Core> - TUnboxedValue SimpleDatetimeToDatetimeUdf(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) { - auto result = args[0]; - auto& storage = Reference<TResourceName>(result); - if (auto res = Core(storage, *valueBuilder)) { - storage = res.GetRef(); - return result; - } - return TUnboxedValuePod{}; +template <const char* TResourceName, auto Core> +TUnboxedValue SimpleDatetimeToDatetimeUdf(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) { + auto result = args[0]; + auto& storage = Reference<TResourceName>(result); + if (auto res = Core(storage, *valueBuilder)) { + storage = res.GetRef(); + return result; } + return TUnboxedValuePod{}; +} -template<const char* TUdfName, auto Boundary, auto WBoundary> +template <const char* TUdfName, auto Boundary, auto WBoundary> class TBoundaryOf: public ::NYql::NUdf::TBoxedValue { public: typedef bool TTypeAwareMarker; @@ -2096,23 +2072,24 @@ public: SetUnexpectedTagError(builder, resource.GetTag()); return true; } + private: - template<auto Func> - class TImpl : public TBoxedValue { + template <auto Func> + class TImpl: public TBoxedValue { public: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { try { return Func(valueBuilder, args); } catch (const std::exception&) { - TStringBuilder sb; - sb << CurrentExceptionMessage(); - sb << Endl << "[" << TStringBuf(Name()) << "]" ; - UdfTerminate(sb.c_str()); + TStringBuilder sb; + sb << CurrentExceptionMessage(); + sb << Endl << "[" << TStringBuf(Name()) << "]"; + UdfTerminate(sb.c_str()); } } }; - template< const char* TResourceName, auto Func> + template <const char* TResourceName, auto Func> static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { builder.Returns<TOptional<TResource<TResourceName>>>(); builder.Args()->Add<TAutoMap<TResource<TResourceName>>>(); @@ -2123,222 +2100,222 @@ private: } }; - template<typename TStorage> - void SetStartOfDay(TStorage& storage) { - storage.Hour = 0; - storage.Minute = 0; - storage.Second = 0; - storage.Microsecond = 0; - } +template <typename TStorage> +void SetStartOfDay(TStorage& storage) { + storage.Hour = 0; + storage.Minute = 0; + storage.Second = 0; + storage.Microsecond = 0; +} - template<typename TStorage> - void SetEndOfDay(TStorage& storage) { - storage.Hour = 23; - storage.Minute = 59; - storage.Second = 59; - storage.Microsecond = 999999; - } +template <typename TStorage> +void SetEndOfDay(TStorage& storage) { + storage.Hour = 23; + storage.Minute = 59; + storage.Second = 59; + storage.Microsecond = 999999; +} - template<typename TStorage> - TMaybe<TStorage> StartOfYear(TStorage storage, const IValueBuilder& valueBuilder) { - storage.Month = 1; - storage.Day = 1; - SetStartOfDay(storage); - if (!storage.Validate(valueBuilder.GetDateBuilder())) { - return {}; - } - return storage; +template <typename TStorage> +TMaybe<TStorage> StartOfYear(TStorage storage, const IValueBuilder& valueBuilder) { + storage.Month = 1; + storage.Day = 1; + SetStartOfDay(storage); + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; } + return storage; +} - template<typename TStorage> - TMaybe<TStorage> EndOfYear(TStorage storage, const IValueBuilder& valueBuilder) { - storage.Month = 12; - storage.Day = 31; - SetEndOfDay(storage); - if (!storage.Validate(valueBuilder.GetDateBuilder())) { - return {}; - } - return storage; +template <typename TStorage> +TMaybe<TStorage> EndOfYear(TStorage storage, const IValueBuilder& valueBuilder) { + storage.Month = 12; + storage.Day = 31; + SetEndOfDay(storage); + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; } + return storage; +} - template<typename TStorage> - TMaybe<TStorage> StartOfQuarter(TStorage storage, const IValueBuilder& valueBuilder) { - storage.Month = (storage.Month - 1) / 3 * 3 + 1; - storage.Day = 1; - SetStartOfDay(storage); - if (!storage.Validate(valueBuilder.GetDateBuilder())) { - return {}; - } - return storage; +template <typename TStorage> +TMaybe<TStorage> StartOfQuarter(TStorage storage, const IValueBuilder& valueBuilder) { + storage.Month = (storage.Month - 1) / 3 * 3 + 1; + storage.Day = 1; + SetStartOfDay(storage); + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; } + return storage; +} - template<typename TStorage> - TMaybe<TStorage> EndOfQuarter(TStorage storage, const IValueBuilder& valueBuilder) { - storage.Month = ((storage.Month - 1) / 3 + 1) * 3; - storage.Day = NMiniKQL::GetMonthLength(storage.Month, NMiniKQL::IsLeapYear(storage.Year)); - SetEndOfDay(storage); - if (!storage.Validate(valueBuilder.GetDateBuilder())) { - return {}; - } - return storage; +template <typename TStorage> +TMaybe<TStorage> EndOfQuarter(TStorage storage, const IValueBuilder& valueBuilder) { + storage.Month = ((storage.Month - 1) / 3 + 1) * 3; + storage.Day = NMiniKQL::GetMonthLength(storage.Month, NMiniKQL::IsLeapYear(storage.Year)); + SetEndOfDay(storage); + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; } + return storage; +} - template<typename TStorage> - TMaybe<TStorage> StartOfMonth(TStorage storage, const IValueBuilder& valueBuilder) { - storage.Day = 1; - SetStartOfDay(storage); - if (!storage.Validate(valueBuilder.GetDateBuilder())) { - return {}; - } - return storage; +template <typename TStorage> +TMaybe<TStorage> StartOfMonth(TStorage storage, const IValueBuilder& valueBuilder) { + storage.Day = 1; + SetStartOfDay(storage); + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; } + return storage; +} - template<typename TStorage> - TMaybe<TStorage> EndOfMonth(TStorage storage, const IValueBuilder& valueBuilder) { - storage.Day = NMiniKQL::GetMonthLength(storage.Month, NMiniKQL::IsLeapYear(storage.Year)); - SetEndOfDay(storage); - if (!storage.Validate(valueBuilder.GetDateBuilder())) { - return {}; - } - return storage; +template <typename TStorage> +TMaybe<TStorage> EndOfMonth(TStorage storage, const IValueBuilder& valueBuilder) { + storage.Day = NMiniKQL::GetMonthLength(storage.Month, NMiniKQL::IsLeapYear(storage.Year)); + SetEndOfDay(storage); + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; } + return storage; +} - template<typename TStorage> - TMaybe<TStorage> StartOfWeek(TStorage storage, const IValueBuilder& valueBuilder) { - const ui32 shift = 86400u * (storage.DayOfWeek - 1u); - if constexpr (std::is_same_v<TStorage, TTMStorage>) { - if (shift > storage.ToDatetime(valueBuilder.GetDateBuilder())) { - return {}; - } - storage.FromDatetime(valueBuilder.GetDateBuilder(), storage.ToDatetime(valueBuilder.GetDateBuilder()) - shift, storage.TimezoneId); - } else { - if (shift > storage.ToDatetime64(valueBuilder.GetDateBuilder())) { - return {}; - } - storage.FromDatetime64(valueBuilder.GetDateBuilder(), storage.ToDatetime64(valueBuilder.GetDateBuilder()) - shift, storage.TimezoneId); - } - SetStartOfDay(storage); - if (!storage.Validate(valueBuilder.GetDateBuilder())) { +template <typename TStorage> +TMaybe<TStorage> StartOfWeek(TStorage storage, const IValueBuilder& valueBuilder) { + const ui32 shift = 86400u * (storage.DayOfWeek - 1u); + if constexpr (std::is_same_v<TStorage, TTMStorage>) { + if (shift > storage.ToDatetime(valueBuilder.GetDateBuilder())) { return {}; } - return storage; - } - - template<typename TStorage> - TMaybe<TStorage> EndOfWeek(TStorage storage, const IValueBuilder& valueBuilder) { - const ui32 shift = 86400u * (7u - storage.DayOfWeek); - if constexpr (std::is_same_v<TStorage, TTMStorage>) { - auto dt = storage.ToDatetime(valueBuilder.GetDateBuilder()); - if (NUdf::MAX_DATETIME - shift <= dt) { - return {}; - } - storage.FromDatetime(valueBuilder.GetDateBuilder(), dt + shift, storage.TimezoneId); - } else { - auto dt = storage.ToDatetime64(valueBuilder.GetDateBuilder()); - if (NUdf::MAX_DATETIME64 - shift <= dt) { - return {}; - } - storage.FromDatetime64(valueBuilder.GetDateBuilder(), dt + shift, storage.TimezoneId); - } - SetEndOfDay(storage); - if (!storage.Validate(valueBuilder.GetDateBuilder())) { + storage.FromDatetime(valueBuilder.GetDateBuilder(), storage.ToDatetime(valueBuilder.GetDateBuilder()) - shift, storage.TimezoneId); + } else { + if (shift > storage.ToDatetime64(valueBuilder.GetDateBuilder())) { return {}; } - return storage; + storage.FromDatetime64(valueBuilder.GetDateBuilder(), storage.ToDatetime64(valueBuilder.GetDateBuilder()) - shift, storage.TimezoneId); + } + SetStartOfDay(storage); + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; } + return storage; +} - template<typename TStorage> - TMaybe<TStorage> StartOfDay(TStorage storage, const IValueBuilder& valueBuilder) { - SetStartOfDay(storage); - auto& builder = valueBuilder.GetDateBuilder(); - if (!storage.Validate(builder)) { +template <typename TStorage> +TMaybe<TStorage> EndOfWeek(TStorage storage, const IValueBuilder& valueBuilder) { + const ui32 shift = 86400u * (7u - storage.DayOfWeek); + if constexpr (std::is_same_v<TStorage, TTMStorage>) { + auto dt = storage.ToDatetime(valueBuilder.GetDateBuilder()); + if (NUdf::MAX_DATETIME - shift <= dt) { return {}; } - return storage; - } - - template<typename TStorage> - TMaybe<TStorage> EndOfDay(TStorage storage, const IValueBuilder& valueBuilder) { - SetEndOfDay(storage); - auto& builder = valueBuilder.GetDateBuilder(); - if (!storage.Validate(builder)) { + storage.FromDatetime(valueBuilder.GetDateBuilder(), dt + shift, storage.TimezoneId); + } else { + auto dt = storage.ToDatetime64(valueBuilder.GetDateBuilder()); + if (NUdf::MAX_DATETIME64 - shift <= dt) { return {}; } - return storage; + storage.FromDatetime64(valueBuilder.GetDateBuilder(), dt + shift, storage.TimezoneId); + } + SetEndOfDay(storage); + if (!storage.Validate(valueBuilder.GetDateBuilder())) { + return {}; } + return storage; +} - template<typename TStorage> - TMaybe<TStorage> StartOf(TStorage storage, ui64 interval, const IValueBuilder& valueBuilder) { - if (interval >= 86400000000ull) { - // treat as StartOfDay - SetStartOfDay(storage); - } else { - auto current = storage.ToTimeOfDay(); - auto rounded = current / interval * interval; - storage.FromTimeOfDay(rounded); - } +template <typename TStorage> +TMaybe<TStorage> StartOfDay(TStorage storage, const IValueBuilder& valueBuilder) { + SetStartOfDay(storage); + auto& builder = valueBuilder.GetDateBuilder(); + if (!storage.Validate(builder)) { + return {}; + } + return storage; +} - auto& builder = valueBuilder.GetDateBuilder(); - if (!storage.Validate(builder)) { - return {}; - } - return storage; +template <typename TStorage> +TMaybe<TStorage> EndOfDay(TStorage storage, const IValueBuilder& valueBuilder) { + SetEndOfDay(storage); + auto& builder = valueBuilder.GetDateBuilder(); + if (!storage.Validate(builder)) { + return {}; } + return storage; +} - template<typename TStorage> - TMaybe<TStorage> EndOf(TStorage storage, ui64 interval, const IValueBuilder& valueBuilder) { - if (interval >= 86400000000ull) { - // treat as EndOfDay - SetEndOfDay(storage); - } else { - auto current = storage.ToTimeOfDay(); - auto rounded = current / interval * interval + interval - 1; - storage.FromTimeOfDay(rounded); - } +template <typename TStorage> +TMaybe<TStorage> StartOf(TStorage storage, ui64 interval, const IValueBuilder& valueBuilder) { + if (interval >= 86400000000ull) { + // treat as StartOfDay + SetStartOfDay(storage); + } else { + auto current = storage.ToTimeOfDay(); + auto rounded = current / interval * interval; + storage.FromTimeOfDay(rounded); + } - auto& builder = valueBuilder.GetDateBuilder(); - if (!storage.Validate(builder)) { - return {}; - } - return storage; + auto& builder = valueBuilder.GetDateBuilder(); + if (!storage.Validate(builder)) { + return {}; } + return storage; +} - template<bool UseEnd> - struct TStartEndOfBinaryKernelExec : TBinaryKernelExec<TStartEndOfBinaryKernelExec<UseEnd>> { - template<typename TSink> - static void Process(const IValueBuilder* valueBuilder, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - auto& storage = Reference<TMResourceName>(arg1); - ui64 interval = std::abs(arg2.Get<i64>()); - if (interval == 0) { - sink(arg1); - return; - } +template <typename TStorage> +TMaybe<TStorage> EndOf(TStorage storage, ui64 interval, const IValueBuilder& valueBuilder) { + if (interval >= 86400000000ull) { + // treat as EndOfDay + SetEndOfDay(storage); + } else { + auto current = storage.ToTimeOfDay(); + auto rounded = current / interval * interval + interval - 1; + storage.FromTimeOfDay(rounded); + } - if (auto res = (UseEnd ? EndOf<TTMStorage> : StartOf<TTMStorage>)(storage, interval, *valueBuilder)) { - storage = res.GetRef(); - sink(arg1); - } else { - sink(TBlockItem{}); - } - } - }; + auto& builder = valueBuilder.GetDateBuilder(); + if (!storage.Validate(builder)) { + return {}; + } + return storage; +} - template<const char* TResourceName, auto Core> - TUnboxedValue SimpleDatetimeToIntervalUdf(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) { - auto result = args[0]; - ui64 interval = std::abs(args[1].Get<i64>()); +template <bool UseEnd> +struct TStartEndOfBinaryKernelExec: TBinaryKernelExec<TStartEndOfBinaryKernelExec<UseEnd>> { + template <typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + auto& storage = Reference<TMResourceName>(arg1); + ui64 interval = std::abs(arg2.Get<i64>()); if (interval == 0) { - return result; + sink(arg1); + return; } - auto& storage = Reference<TResourceName>(result); - if (auto res = Core(storage, interval, *valueBuilder)) { + + if (auto res = (UseEnd ? EndOf<TTMStorage> : StartOf<TTMStorage>)(storage, interval, *valueBuilder)) { storage = res.GetRef(); - return result; + sink(arg1); + } else { + sink(TBlockItem{}); } - return TUnboxedValuePod{}; } +}; + +template <const char* TResourceName, auto Core> +TUnboxedValue SimpleDatetimeToIntervalUdf(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) { + auto result = args[0]; + ui64 interval = std::abs(args[1].Get<i64>()); + if (interval == 0) { + return result; + } + auto& storage = Reference<TResourceName>(result); + if (auto res = Core(storage, interval, *valueBuilder)) { + storage = res.GetRef(); + return result; + } + return TUnboxedValuePod{}; +} -template<const char* TUdfName, auto Boundary, auto WBoundary> +template <const char* TUdfName, auto Boundary, auto WBoundary> class TBoundaryOfInterval: public ::NYql::NUdf::TBoxedValue { public: typedef bool TTypeAwareMarker; @@ -2424,27 +2401,27 @@ public: SetUnexpectedTagError(builder, resource.GetTag()); return true; } + private: - template<auto Func> - class TImpl : public TBoxedValue { + template <auto Func> + class TImpl: public TBoxedValue { public: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { try { return Func(valueBuilder, args); } catch (const std::exception&) { - TStringBuilder sb; - sb << CurrentExceptionMessage(); - sb << Endl << "[" << TStringBuf(Name()) << "]" ; - UdfTerminate(sb.c_str()); + TStringBuilder sb; + sb << CurrentExceptionMessage(); + sb << Endl << "[" << TStringBuf(Name()) << "]"; + UdfTerminate(sb.c_str()); } } }; - template<const char* TResourceName, auto Func> + template <const char* TResourceName, auto Func> static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { builder.Returns<TOptional<TResource<TResourceName>>>(); - builder.Args()->Add<TAutoMap<TResource<TResourceName>>>() - .template Add<TAutoMap<std::conditional_t<TResourceName == TMResourceName, TInterval, TInterval64>>>(); + builder.Args()->Add<TAutoMap<TResource<TResourceName>>>().template Add<TAutoMap<std::conditional_t<TResourceName == TMResourceName, TInterval, TInterval64>>>(); builder.IsStrict(); if (!typesOnly) { builder.Implementation(new TImpl<Func>()); @@ -2452,14 +2429,14 @@ private: } }; - struct TTimeOfDayKernelExec : TUnaryKernelExec<TTimeOfDayKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<TDataType<TInterval>::TLayout, false>> { - template<typename TSink> - static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { - Y_UNUSED(valueBuilder); - auto& storage = Reference<TMResourceName>(item); - sink(TBlockItem{(TDataType<TInterval>::TLayout)storage.ToTimeOfDay()}); - } - }; +struct TTimeOfDayKernelExec: TUnaryKernelExec<TTimeOfDayKernelExec, TReaderTraits::TResource<false>, TFixedSizeArrayBuilder<TDataType<TInterval>::TLayout, false>> { + template <typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem item, const TSink& sink) { + Y_UNUSED(valueBuilder); + auto& storage = Reference<TMResourceName>(item); + sink(TBlockItem{(TDataType<TInterval>::TLayout)storage.ToTimeOfDay()}); + } +}; class TTimeOfDay: public ::NYql::NUdf::TBoxedValue { public: @@ -2546,9 +2523,10 @@ public: SetUnexpectedTagError(builder, resource.GetTag()); return true; } + private: - template<const char* TResourceName> - class TImpl : public TBoxedValue { + template <const char* TResourceName> + class TImpl: public TBoxedValue { public: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { try { @@ -2556,15 +2534,15 @@ private: auto& storage = Reference<TResourceName>(args[0]); return TUnboxedValuePod((i64)storage.ToTimeOfDay()); } catch (const std::exception&) { - TStringBuilder sb; - sb << CurrentExceptionMessage(); - sb << Endl << "[" << TStringBuf(Name()) << "]" ; - UdfTerminate(sb.c_str()); + TStringBuilder sb; + sb << CurrentExceptionMessage(); + sb << Endl << "[" << TStringBuf(Name()) << "]"; + UdfTerminate(sb.c_str()); } } }; - template< const char* TResourceName> + template <const char* TResourceName> static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { builder.Returns<std::conditional_t<TResourceName == TMResourceName, TInterval, TInterval64>>(); builder.Args()->Add<TAutoMap<TResource<TResourceName>>>(); @@ -2575,19 +2553,18 @@ private: } }; +// Add ... - // Add ... - - template<auto Core> - struct TAddKernelExec : TBinaryKernelExec<TAddKernelExec<Core>> { - template<typename TSink> - static void Process(const IValueBuilder* valueBuilder, TBlockItem date, TBlockItem arg, const TSink& sink) { - sink(Core(date, arg.Get<i32>(), valueBuilder->GetDateBuilder())); - } - }; +template <auto Core> +struct TAddKernelExec: TBinaryKernelExec<TAddKernelExec<Core>> { + template <typename TSink> + static void Process(const IValueBuilder* valueBuilder, TBlockItem date, TBlockItem arg, const TSink& sink) { + sink(Core(date, arg.Get<i32>(), valueBuilder->GetDateBuilder())); + } +}; -template<const char* TUdfName, auto Shifter, auto WShifter> -class TShift : public TBoxedValue { +template <const char* TUdfName, auto Shifter, auto WShifter> +class TShift: public TBoxedValue { public: typedef bool TTypeAwareMarker; @@ -2673,16 +2650,17 @@ public: SetUnexpectedTagError(builder, resource.GetTag()); return true; } + private: - template<auto ShiftHanler> - class TImpl : public TBoxedValue { + template <auto ShiftHanler> + class TImpl: public TBoxedValue { public: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { return ShiftHanler(args[0], args[1].Get<i32>(), valueBuilder->GetDateBuilder()); } }; - template<const char* TResourceName, auto ShiftHandler> + template <const char* TResourceName, auto ShiftHandler> static void BuildSignature(NUdf::IFunctionTypeInfoBuilder& builder, bool typesOnly) { builder.Returns<TOptional<TResource<TResourceName>>>(); builder.Args()->Add<TAutoMap<TResource<TResourceName>>>().template Add<i32>(); @@ -2693,154 +2671,155 @@ private: } }; - template<size_t Digits, bool Trailing = true, bool Leading = true> - struct PrintNDigits; +template <size_t Digits, bool Trailing = true, bool Leading = true> +struct PrintNDigits; - template<bool Trailing, bool Leading> - struct PrintNDigits<0U, Trailing, Leading> { - static constexpr ui32 Miltiplier = 1U; +template <bool Trailing, bool Leading> +struct PrintNDigits<0U, Trailing, Leading> { + static constexpr ui32 Miltiplier = 1U; - template <typename T> - static constexpr size_t Do(T, char*) { return 0U; } - }; + template <typename T> + static constexpr size_t Do(T, char*) { + return 0U; + } +}; - template<size_t Digits, bool Trailing, bool Leading> - struct PrintNDigits { - using TNextNoLeadPrint = PrintNDigits<Digits - 1U, Trailing, false>; - using TNextCommonPrint = PrintNDigits<Digits - 1U, Trailing, true>; - static_assert(TNextNoLeadPrint::Miltiplier == TNextCommonPrint::Miltiplier); - static constexpr ui32 Miltiplier = TNextCommonPrint::Miltiplier * 10U; - - template <typename T> - static constexpr size_t Do(T in, char* out) { - in %= Miltiplier; - if (!Trailing && in == 0) { - return 0U; - } - const auto digit = in / TNextCommonPrint::Miltiplier; - if (!Leading && digit == 0) { - return TNextNoLeadPrint::Do(in, out); - } - *out = "0123456789"[digit]; - return 1U + TNextCommonPrint::Do(in, ++out); - } - }; +template <size_t Digits, bool Trailing, bool Leading> +struct PrintNDigits { + using TNextNoLeadPrint = PrintNDigits<Digits - 1U, Trailing, false>; + using TNextCommonPrint = PrintNDigits<Digits - 1U, Trailing, true>; + static_assert(TNextNoLeadPrint::Miltiplier == TNextCommonPrint::Miltiplier); + static constexpr ui32 Miltiplier = TNextCommonPrint::Miltiplier * 10U; - // Format + template <typename T> + static constexpr size_t Do(T in, char* out) { + in %= Miltiplier; + if (!Trailing && in == 0) { + return 0U; + } + const auto digit = in / TNextCommonPrint::Miltiplier; + if (!Leading && digit == 0) { + return TNextNoLeadPrint::Do(in, out); + } + *out = "0123456789"[digit]; + return 1U + TNextCommonPrint::Do(in, ++out); + } +}; - class TFormat : public TBoxedValue { - public: - explicit TFormat(TSourcePosition pos) - : Pos_(pos) - {} +// Format - static const TStringRef& Name() { - static auto name = TStringRef::Of("Format"); - return name; - } +class TFormat: public TBoxedValue { +public: + explicit TFormat(TSourcePosition pos) + : Pos_(pos) + { + } - static bool DeclareSignature( - const TStringRef& name, - TType*, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) - { - if (Name() != name) { - return false; - } + static const TStringRef& Name() { + static auto name = TStringRef::Of("Format"); + return name; + } - builder.OptionalArgs(1).Args()->Add<char*>() - .Add<TOptional<bool>>().Name("AlwaysWriteFractionalSeconds"); - builder.Returns( - builder.SimpleSignatureType<char*(TAutoMap<TResource<TM64ResourceName>>)>()); - if (!typesOnly) { - builder.Implementation(new TFormat(builder.GetSourcePosition())); - } + static bool DeclareSignature( + const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + if (Name() != name) { + return false; + } - return true; + builder.OptionalArgs(1).Args()->Add<char*>().Add<TOptional<bool>>().Name("AlwaysWriteFractionalSeconds"); + builder.Returns( + builder.SimpleSignatureType<char*(TAutoMap<TResource<TM64ResourceName>>)>()); + if (!typesOnly) { + builder.Implementation(new TFormat(builder.GetSourcePosition())); } - private: - using TPrintersList = std::vector<std::function<size_t(char*, const TUnboxedValuePod&, const IDateBuilder&)>>; + return true; + } - struct TDataPrinter { - const std::string_view Data; +private: + using TPrintersList = std::vector<std::function<size_t(char*, const TUnboxedValuePod&, const IDateBuilder&)>>; - size_t operator()(char* out, const TUnboxedValuePod&, const IDateBuilder&) const { - std::memcpy(out, Data.data(), Data.size()); - return Data.size(); - } - }; + struct TDataPrinter { + const std::string_view Data; - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { - bool alwaysWriteFractionalSeconds = false; - if (auto val = args[1]) { - alwaysWriteFractionalSeconds = val.Get<bool>(); - } + size_t operator()(char* out, const TUnboxedValuePod&, const IDateBuilder&) const { + std::memcpy(out, Data.data(), Data.size()); + return Data.size(); + } + }; - return TUnboxedValuePod(new TImpl(Pos_, args[0], alwaysWriteFractionalSeconds)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { + bool alwaysWriteFractionalSeconds = false; + if (auto val = args[1]) { + alwaysWriteFractionalSeconds = val.Get<bool>(); } - class TImpl : public TBoxedValue { - public: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); - const auto value = args[0]; + return TUnboxedValuePod(new TImpl(Pos_, args[0], alwaysWriteFractionalSeconds)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } - auto& builder = valueBuilder->GetDateBuilder(); + class TImpl: public TBoxedValue { + public: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); + const auto value = args[0]; - auto result = valueBuilder->NewStringNotFilled(ReservedSize_); - auto pos = result.AsStringRef().Data(); - ui32 size = 0U; + auto& builder = valueBuilder->GetDateBuilder(); - for (const auto& printer : Printers_) { - if (const auto plus = printer(pos, value, builder)) { - size += plus; - pos += plus; - } - } + auto result = valueBuilder->NewStringNotFilled(ReservedSize_); + auto pos = result.AsStringRef().Data(); + ui32 size = 0U; - if (size < ReservedSize_) { - result = valueBuilder->SubString(result.Release(), 0U, size); + for (const auto& printer : Printers_) { + if (const auto plus = printer(pos, value, builder)) { + size += plus; + pos += plus; } + } - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + if (size < ReservedSize_) { + result = valueBuilder->SubString(result.Release(), 0U, size); } + + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - TImpl(TSourcePosition pos, TUnboxedValue format, bool alwaysWriteFractionalSeconds) - : Pos_(pos) - , Format_(format) - { - const std::string_view formatView(Format_.AsStringRef()); - auto dataStart = formatView.begin(); - size_t dataSize = 0U; - - for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) { - if (*ptr != '%') { - ++dataSize; - continue; - } + TImpl(TSourcePosition pos, TUnboxedValue format, bool alwaysWriteFractionalSeconds) + : Pos_(pos) + , Format_(format) + { + const std::string_view formatView(Format_.AsStringRef()); + auto dataStart = formatView.begin(); + size_t dataSize = 0U; - if (dataSize) { - Printers_.emplace_back(TDataPrinter{std::string_view(&*dataStart, dataSize)}); - ReservedSize_ += dataSize; - dataSize = 0U; - } + for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) { + if (*ptr != '%') { + ++dataSize; + continue; + } - if (formatView.end() == ++ptr) { - ythrow yexception() << "format string ends with single %%"; - } + if (dataSize) { + Printers_.emplace_back(TDataPrinter{std::string_view(&*dataStart, dataSize)}); + ReservedSize_ += dataSize; + dataSize = 0U; + } + + if (formatView.end() == ++ptr) { + ythrow yexception() << "format string ends with single %%"; + } - switch (*ptr) { + switch (*ptr) { case '%': { static constexpr size_t size = 1; Printers_.emplace_back([](char* out, const TUnboxedValuePod&, const IDateBuilder&) { @@ -2904,9 +2883,7 @@ private: out += PrintNDigits<size>::Do(GetSecond<TM64ResourceName>(value), out); *out++ = '.'; constexpr size_t msize = 6; - auto addSz = alwaysWriteFractionalSeconds ? - PrintNDigits<msize, true>::Do(microsecond, out) : - PrintNDigits<msize, false>::Do(microsecond, out); + auto addSz = alwaysWriteFractionalSeconds ? PrintNDigits<msize, true>::Do(microsecond, out) : PrintNDigits<msize, false>::Do(microsecond, out); return size + 1U + addSz; } return PrintNDigits<size>::Do(GetSecond<TM64ResourceName>(value), out); @@ -2956,7 +2933,7 @@ private: case 'b': { static constexpr size_t size = 3; Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) { - static constexpr std::string_view mp[] { + static constexpr std::string_view mp[]{ "Jan", "Feb", "Mar", @@ -2968,8 +2945,7 @@ private: "Sep", "Oct", "Nov", - "Dec" - }; + "Dec"}; auto month = GetMonth<TM64ResourceName>(value); Y_ENSURE(month > 0 && month <= sizeof(mp) / sizeof(mp[0]), "Invalid month value"); std::memcpy(out, mp[month - 1].data(), size); @@ -2980,7 +2956,7 @@ private: } case 'B': { Printers_.emplace_back([](char* out, const TUnboxedValuePod& value, const IDateBuilder&) { - static constexpr std::string_view mp[] { + static constexpr std::string_view mp[]{ "January", "February", "March", @@ -2992,8 +2968,7 @@ private: "September", "October", "November", - "December" - }; + "December"}; auto month = GetMonth<TM64ResourceName>(value); Y_ENSURE(month > 0 && month <= sizeof(mp) / sizeof(mp[0]), "Invalid month value"); const std::string_view monthFullName = mp[month - 1]; @@ -3006,181 +2981,180 @@ private: default: throw yexception() << "character '" << *ptr << "' is not a valid format specifier." << "\nSee documentation for valid format characters"; - } - - dataStart = ptr + 1U; } - if (dataSize) { - Printers_.emplace_back(TDataPrinter{std::string_view(dataStart, dataSize)}); - ReservedSize_ += dataSize; - } + dataStart = ptr + 1U; } - private: - const TSourcePosition Pos_; - - TUnboxedValue Format_; - TPrintersList Printers_{}; - size_t ReservedSize_ = 0; - }; + if (dataSize) { + Printers_.emplace_back(TDataPrinter{std::string_view(dataStart, dataSize)}); + ReservedSize_ += dataSize; + } + } + private: const TSourcePosition Pos_; + + TUnboxedValue Format_; + TPrintersList Printers_{}; + size_t ReservedSize_ = 0; }; - template<size_t Digits, bool Variable = false> - struct ParseNDigits; + const TSourcePosition Pos_; +}; - template<bool Variable> - struct ParseNDigits<0U, Variable> { - template <typename T> - static constexpr bool Do(std::string_view::const_iterator&, T&) { - return true; - } - }; +template <size_t Digits, bool Variable = false> +struct ParseNDigits; - template<size_t Digits, bool Variable> - struct ParseNDigits { - template <typename T> - static constexpr bool Do(std::string_view::const_iterator& it, T& out) { - const auto d = *it; - if (!std::isdigit(d)) { - // XXX: If the current char is not a digit, the - // parsing succeeds iff there are no more digits - // to be parsed (see the class specialization - // above) or there are given less than N digits - // to be parsed. - if constexpr (Variable) { - return true; - } - return false; +template <bool Variable> +struct ParseNDigits<0U, Variable> { + template <typename T> + static constexpr bool Do(std::string_view::const_iterator&, T&) { + return true; + } +}; + +template <size_t Digits, bool Variable> +struct ParseNDigits { + template <typename T> + static constexpr bool Do(std::string_view::const_iterator& it, T& out) { + const auto d = *it; + if (!std::isdigit(d)) { + // XXX: If the current char is not a digit, the + // parsing succeeds iff there are no more digits + // to be parsed (see the class specialization + // above) or there are given less than N digits + // to be parsed. + if constexpr (Variable) { + return true; } - out *= 10U; - out += d - '0'; - return ParseNDigits<Digits - 1U, Variable>::Do(++it, out); + return false; } - }; + out *= 10U; + out += d - '0'; + return ParseNDigits<Digits - 1U, Variable>::Do(++it, out); + } +}; - // Parse +// Parse - template<const char* TUdfName, const char* TResourceName> - class TParse : public TBoxedValue { +template <const char* TUdfName, const char* TResourceName> +class TParse: public TBoxedValue { +public: + class TFactory: public TBoxedValue { public: - class TFactory : public TBoxedValue { - public: - explicit TFactory(TSourcePosition pos) - : Pos_(pos) - {} - - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { - return TUnboxedValuePod(new TParse(args[0], Pos_)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - - const TSourcePosition Pos_; - }; + explicit TFactory(TSourcePosition pos) + : Pos_(pos) + { + } - static const TStringRef& Name() { - static auto name = TStringRef(TUdfName, std::strlen(TUdfName)); - return name; + private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { + return TUnboxedValuePod(new TParse(args[0], Pos_)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } - static bool DeclareSignature( - const TStringRef& name, - TType*, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) - { - if (Name() != name) { - return false; - } + const TSourcePosition Pos_; + }; - builder.OptionalArgs(1).Args()->Add<char*>() - .template Add<TOptional<ui16>>(); - builder.Returns( - builder.SimpleSignatureType<TOptional<TResource<TResourceName>>(TAutoMap<char*>)>()); - if (!typesOnly) { - builder.Implementation(new TParse::TFactory(builder.GetSourcePosition())); - } + static const TStringRef& Name() { + static auto name = TStringRef(TUdfName, std::strlen(TUdfName)); + return name; + } - return true; + static bool DeclareSignature( + const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + if (Name() != name) { + return false; } - private: - const TSourcePosition Pos_; - const TUnboxedValue Format_; + builder.OptionalArgs(1).Args()->Add<char*>().template Add<TOptional<ui16>>(); + builder.Returns( + builder.SimpleSignatureType<TOptional<TResource<TResourceName>>(TAutoMap<char*>)>()); + if (!typesOnly) { + builder.Implementation(new TParse::TFactory(builder.GetSourcePosition())); + } - std::vector<std::function<bool(std::string_view::const_iterator& it, size_t, TUnboxedValuePod&, const IDateBuilder&)>> Scanners_; + return true; + } - struct TDataScanner { - const std::string_view Data; +private: + const TSourcePosition Pos_; + const TUnboxedValue Format_; - bool operator()(std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod&, const IDateBuilder&) const { - if (limit < Data.size() || !std::equal(Data.begin(), Data.end(), it)) { - return false; - } - std::advance(it, Data.size()); - return true; - } - }; + std::vector<std::function<bool(std::string_view::const_iterator& it, size_t, TUnboxedValuePod&, const IDateBuilder&)>> Scanners_; - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { - try { - EMPTY_RESULT_ON_EMPTY_ARG(0); + struct TDataScanner { + const std::string_view Data; + + bool operator()(std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod&, const IDateBuilder&) const { + if (limit < Data.size() || !std::equal(Data.begin(), Data.end(), it)) { + return false; + } + std::advance(it, Data.size()); + return true; + } + }; - const std::string_view buffer = args[0].AsStringRef(); + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + EMPTY_RESULT_ON_EMPTY_ARG(0); - TUnboxedValuePod result(0); - auto& storage = Reference<TResourceName>(result); - storage.MakeDefault(); + const std::string_view buffer = args[0].AsStringRef(); - auto& builder = valueBuilder->GetDateBuilder(); + TUnboxedValuePod result(0); + auto& storage = Reference<TResourceName>(result); + storage.MakeDefault(); - auto it = buffer.begin(); - for (const auto& scanner : Scanners_) { - if (!scanner(it, std::distance(it, buffer.end()), result, builder)) { - return TUnboxedValuePod(); - } - } + auto& builder = valueBuilder->GetDateBuilder(); - if (buffer.end() != it || !storage.Validate(builder)) { + auto it = buffer.begin(); + for (const auto& scanner : Scanners_) { + if (!scanner(it, std::distance(it, buffer.end()), result, builder)) { return TUnboxedValuePod(); } - return result; - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + + if (buffer.end() != it || !storage.Validate(builder)) { + return TUnboxedValuePod(); + } + return result; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - TParse(const TUnboxedValuePod& runConfig, TSourcePosition pos) - : Pos_(pos) - , Format_(runConfig) - { - const std::string_view formatView(Format_.AsStringRef()); - auto dataStart = formatView.begin(); - size_t dataSize = 0U; + TParse(const TUnboxedValuePod& runConfig, TSourcePosition pos) + : Pos_(pos) + , Format_(runConfig) + { + const std::string_view formatView(Format_.AsStringRef()); + auto dataStart = formatView.begin(); + size_t dataSize = 0U; - for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) { - if (*ptr != '%') { - ++dataSize; - continue; - } + for (auto ptr = formatView.begin(); formatView.end() != ptr; ++ptr) { + if (*ptr != '%') { + ++dataSize; + continue; + } - if (dataSize) { - Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)}); - dataSize = 0; - } + if (dataSize) { + Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)}); + dataSize = 0; + } - if (++ptr == formatView.end()) { - ythrow yexception() << "format string ends with single %%"; - } + if (++ptr == formatView.end()) { + ythrow yexception() << "format string ends with single %%"; + } - switch (*ptr) { + switch (*ptr) { case '%': Scanners_.emplace_back([](std::string_view::const_iterator& it, size_t limit, TUnboxedValuePod&, const IDateBuilder&) { return limit > 0U && *it++ == '%'; @@ -3313,7 +3287,8 @@ private: ++it; --digits; } - for (; !digits && limit && std::isdigit(*it); --limit, ++it); + for (; !digits && limit && std::isdigit(*it); --limit, ++it) + ; while (digits--) { usec *= 10U; } @@ -3381,16 +3356,16 @@ private: } default: ythrow yexception() << "invalid format character: " << *ptr; - } - - dataStart = ptr + 1U; } - if (dataSize) { - Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)}); - } + dataStart = ptr + 1U; } - }; + + if (dataSize) { + Scanners_.emplace_back(TDataScanner{std::string_view(&*dataStart, dataSize)}); + } + } +}; #define PARSE_SPECIFIC_FORMAT(format) \ SIMPLE_STRICT_UDF(TParse##format, TOptional<TResource<TMResourceName>>(TAutoMap<char*>)) { \ @@ -3406,140 +3381,139 @@ private: return result; \ } - PARSE_SPECIFIC_FORMAT(Rfc822); - PARSE_SPECIFIC_FORMAT(Iso8601); - PARSE_SPECIFIC_FORMAT(Http); - PARSE_SPECIFIC_FORMAT(X509); - - SIMPLE_MODULE(TDateTime2Module, - TUserDataTypeFuncFactory<true, true, SplitUDF, TSplit, - TDate, - TDatetime, - TTimestamp, - TTzDate, - TTzDatetime, - TTzTimestamp, - TDate32, - TDatetime64, - TTimestamp64, - TTzDate32, - TTzDatetime64, - TTzTimestamp64>, - - TMakeDate, - TMakeDatetime, - TMakeTimestamp, - TMakeTzDate, - TMakeTzDatetime, - TMakeTzTimestamp, - - TConvert, - - TMakeDate32, - TMakeDatetime64, - TMakeTimestamp64, - TMakeTzDate32, - TMakeTzDatetime64, - TMakeTzTimestamp64, - - TGetDateComponent<GetYearUDF, ui16, GetYear<TMResourceName>, i32, GetYear<TM64ResourceName>>, - TGetDateComponent<GetDayOfYearUDF, ui16, GetDayOfYear<TMResourceName>, ui16, GetDayOfYear<TM64ResourceName>>, - TGetDateComponent<GetMonthUDF, ui8, GetMonth<TMResourceName>, ui8, GetMonth<TM64ResourceName>>, - TGetDateComponentName<GetMonthNameUDF, GetMonthName<TMResourceName>, GetMonthName<TM64ResourceName>>, - TGetDateComponent<GetWeekOfYearUDF, ui8, GetWeekOfYear<TMResourceName>, ui8, GetWeekOfYear<TM64ResourceName>>, - TGetDateComponent<GetWeekOfYearIso8601UDF, ui8, GetWeekOfYearIso8601<TMResourceName>, ui8, GetWeekOfYearIso8601<TM64ResourceName>>, - TGetDateComponent<GetDayOfMonthUDF, ui8, GetDay<TMResourceName>, ui8, GetDay<TM64ResourceName>>, - TGetDateComponent<GetDayOfWeekUDF, ui8, GetDayOfWeek<TMResourceName>, ui8, GetDayOfWeek<TM64ResourceName>>, - TGetDateComponentName<GetDayOfWeekNameUDF, GetDayOfWeekName<TMResourceName>, GetDayOfWeekName<TM64ResourceName>>, - TGetTimeComponent<GetHourUDF, ui8, GetHour<TMResourceName>, GetHour<TM64ResourceName>, 1u, 3600u, 24u, false>, - TGetTimeComponent<GetMinuteUDF, ui8, GetMinute<TMResourceName>, GetMinute<TM64ResourceName>, 1u, 60u, 60u, false>, - TGetTimeComponent<GetSecondUDF, ui8, GetSecond<TMResourceName>, GetSecond<TM64ResourceName>, 1u, 1u, 60u, false>, - TGetTimeComponent<GetMillisecondOfSecondUDF, ui32, GetMicrosecond<TMResourceName>, GetMicrosecond<TM64ResourceName>, 1000u, 1000u, 1000u, true>, - TGetTimeComponent<GetMicrosecondOfSecondUDF, ui32, GetMicrosecond<TMResourceName>, GetMicrosecond<TM64ResourceName>, 1u, 1u, 1000000u, true>, - TGetDateComponent<GetTimezoneIdUDF, ui16, GetTimezoneId<TMResourceName>, ui16, GetTimezoneId<TM64ResourceName>>, - TGetDateComponentName<GetTimezoneNameUDF, GetTimezoneName<TMResourceName>, GetTimezoneName<TM64ResourceName>>, - - TUpdate, - - TFromSeconds, - TFromMilliseconds, - TFromMicroseconds, - - TFromSeconds64, - TFromMilliseconds64, - TFromMicroseconds64, - - TIntervalFromDays, - TIntervalFromHours, - TIntervalFromMinutes, - - TLangVerForked< - NYql::MakeLangVersion(2025, 03), - NLegacy::TIntervalFromSeconds, - NActual::TIntervalFromSeconds>, - - TIntervalFromMilliseconds, - TIntervalFromMicroseconds, - - TInterval64FromDays, - TInterval64FromHours, - TInterval64FromMinutes, - TInterval64FromSeconds, - TInterval64FromMilliseconds, - TInterval64FromMicroseconds, - - TToConverter<ToDaysUDF, i32, i32, UsecondsInDay>, - TToConverter<ToHoursUDF, i32, i64, UsecondsInHour>, - TToConverter<ToMinutesUDF, i32, i64, UsecondsInMinute>, - - TBoundaryOf<StartOfYearUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfYear<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfYear<TTM64Storage>>>, - TBoundaryOf<StartOfQuarterUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfQuarter<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfQuarter<TTM64Storage>>>, - TBoundaryOf<StartOfMonthUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfMonth<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfMonth<TTM64Storage>>>, - TBoundaryOf<StartOfWeekUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfWeek<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfWeek<TTM64Storage>>>, - TBoundaryOf<StartOfDayUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfDay<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfDay<TTM64Storage>>>, - TBoundaryOfInterval<StartOfUDF, SimpleDatetimeToIntervalUdf<TMResourceName, StartOf<TTMStorage>>, - SimpleDatetimeToIntervalUdf<TM64ResourceName, StartOf<TTM64Storage>>>, - TTimeOfDay, - - TShift<ShiftYearsUDF, DoAddYears<TMResourceName>, DoAddYears<TM64ResourceName>>, - TShift<ShiftQuartersUDF, DoAddQuarters<TMResourceName>, DoAddQuarters<TM64ResourceName>>, - TShift<ShiftMonthsUDF, DoAddMonths<TMResourceName>, DoAddMonths<TM64ResourceName>>, - - TBoundaryOf<EndOfYearUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfYear<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfYear<TTM64Storage>>>, - TBoundaryOf<EndOfQuarterUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfQuarter<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfQuarter<TTM64Storage>>>, - TBoundaryOf<EndOfMonthUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfMonth<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfMonth<TTM64Storage>>>, - TBoundaryOf<EndOfWeekUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfWeek<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfWeek<TTM64Storage>>>, - TBoundaryOf<EndOfDayUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfDay<TTMStorage>>, - SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfDay<TTM64Storage>>>, - TBoundaryOfInterval<EndOfUDF, SimpleDatetimeToIntervalUdf<TMResourceName, EndOf<TTMStorage>>, - SimpleDatetimeToIntervalUdf<TM64ResourceName, EndOf<TTM64Storage>>>, - - TLangVerForked< - NYql::MakeLangVersion(2025, 03), - TToUnits<ToSecondsUDF, /* TResult = */ ui32, /* TSignedResult = */ i32, /* TWResult = */ i64, 1>, - TToUnits<ToSecondsUDF, /* TResult = */ ui32, /* TSignedResult = */ i64, /* TWResult = */ i64, 1>>, - - TToUnits<ToMillisecondsUDF, /* TResult = */ ui64, /* TSignedResult = */ i64, /* TWResult = */ i64, 1000>, - TToUnits<ToMicrosecondsUDF, /* TResult = */ ui64, /* TSignedResult = */ i64, /* TWResult = */ i64, 1000000>, - - TFormat, - TParse<ParseUDF, TMResourceName>, - TParse<Parse64UDF, TM64ResourceName>, - - TParseRfc822, - TParseIso8601, - TParseHttp, - TParseX509 - ) -} +PARSE_SPECIFIC_FORMAT(Rfc822); +PARSE_SPECIFIC_FORMAT(Iso8601); +PARSE_SPECIFIC_FORMAT(Http); +PARSE_SPECIFIC_FORMAT(X509); + +SIMPLE_MODULE(TDateTime2Module, + TUserDataTypeFuncFactory<true, true, SplitUDF, TSplit, + TDate, + TDatetime, + TTimestamp, + TTzDate, + TTzDatetime, + TTzTimestamp, + TDate32, + TDatetime64, + TTimestamp64, + TTzDate32, + TTzDatetime64, + TTzTimestamp64>, + + TMakeDate, + TMakeDatetime, + TMakeTimestamp, + TMakeTzDate, + TMakeTzDatetime, + TMakeTzTimestamp, + + TConvert, + + TMakeDate32, + TMakeDatetime64, + TMakeTimestamp64, + TMakeTzDate32, + TMakeTzDatetime64, + TMakeTzTimestamp64, + + TGetDateComponent<GetYearUDF, ui16, GetYear<TMResourceName>, i32, GetYear<TM64ResourceName>>, + TGetDateComponent<GetDayOfYearUDF, ui16, GetDayOfYear<TMResourceName>, ui16, GetDayOfYear<TM64ResourceName>>, + TGetDateComponent<GetMonthUDF, ui8, GetMonth<TMResourceName>, ui8, GetMonth<TM64ResourceName>>, + TGetDateComponentName<GetMonthNameUDF, GetMonthName<TMResourceName>, GetMonthName<TM64ResourceName>>, + TGetDateComponent<GetWeekOfYearUDF, ui8, GetWeekOfYear<TMResourceName>, ui8, GetWeekOfYear<TM64ResourceName>>, + TGetDateComponent<GetWeekOfYearIso8601UDF, ui8, GetWeekOfYearIso8601<TMResourceName>, ui8, GetWeekOfYearIso8601<TM64ResourceName>>, + TGetDateComponent<GetDayOfMonthUDF, ui8, GetDay<TMResourceName>, ui8, GetDay<TM64ResourceName>>, + TGetDateComponent<GetDayOfWeekUDF, ui8, GetDayOfWeek<TMResourceName>, ui8, GetDayOfWeek<TM64ResourceName>>, + TGetDateComponentName<GetDayOfWeekNameUDF, GetDayOfWeekName<TMResourceName>, GetDayOfWeekName<TM64ResourceName>>, + TGetTimeComponent<GetHourUDF, ui8, GetHour<TMResourceName>, GetHour<TM64ResourceName>, 1u, 3600u, 24u, false>, + TGetTimeComponent<GetMinuteUDF, ui8, GetMinute<TMResourceName>, GetMinute<TM64ResourceName>, 1u, 60u, 60u, false>, + TGetTimeComponent<GetSecondUDF, ui8, GetSecond<TMResourceName>, GetSecond<TM64ResourceName>, 1u, 1u, 60u, false>, + TGetTimeComponent<GetMillisecondOfSecondUDF, ui32, GetMicrosecond<TMResourceName>, GetMicrosecond<TM64ResourceName>, 1000u, 1000u, 1000u, true>, + TGetTimeComponent<GetMicrosecondOfSecondUDF, ui32, GetMicrosecond<TMResourceName>, GetMicrosecond<TM64ResourceName>, 1u, 1u, 1000000u, true>, + TGetDateComponent<GetTimezoneIdUDF, ui16, GetTimezoneId<TMResourceName>, ui16, GetTimezoneId<TM64ResourceName>>, + TGetDateComponentName<GetTimezoneNameUDF, GetTimezoneName<TMResourceName>, GetTimezoneName<TM64ResourceName>>, + + TUpdate, + + TFromSeconds, + TFromMilliseconds, + TFromMicroseconds, + + TFromSeconds64, + TFromMilliseconds64, + TFromMicroseconds64, + + TIntervalFromDays, + TIntervalFromHours, + TIntervalFromMinutes, + + TLangVerForked< + NYql::MakeLangVersion(2025, 03), + NLegacy::TIntervalFromSeconds, + NActual::TIntervalFromSeconds>, + + TIntervalFromMilliseconds, + TIntervalFromMicroseconds, + + TInterval64FromDays, + TInterval64FromHours, + TInterval64FromMinutes, + TInterval64FromSeconds, + TInterval64FromMilliseconds, + TInterval64FromMicroseconds, + + TToConverter<ToDaysUDF, i32, i32, UsecondsInDay>, + TToConverter<ToHoursUDF, i32, i64, UsecondsInHour>, + TToConverter<ToMinutesUDF, i32, i64, UsecondsInMinute>, + + TBoundaryOf<StartOfYearUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfYear<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfYear<TTM64Storage>>>, + TBoundaryOf<StartOfQuarterUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfQuarter<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfQuarter<TTM64Storage>>>, + TBoundaryOf<StartOfMonthUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfMonth<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfMonth<TTM64Storage>>>, + TBoundaryOf<StartOfWeekUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfWeek<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfWeek<TTM64Storage>>>, + TBoundaryOf<StartOfDayUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, StartOfDay<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, StartOfDay<TTM64Storage>>>, + TBoundaryOfInterval<StartOfUDF, SimpleDatetimeToIntervalUdf<TMResourceName, StartOf<TTMStorage>>, + SimpleDatetimeToIntervalUdf<TM64ResourceName, StartOf<TTM64Storage>>>, + TTimeOfDay, + + TShift<ShiftYearsUDF, DoAddYears<TMResourceName>, DoAddYears<TM64ResourceName>>, + TShift<ShiftQuartersUDF, DoAddQuarters<TMResourceName>, DoAddQuarters<TM64ResourceName>>, + TShift<ShiftMonthsUDF, DoAddMonths<TMResourceName>, DoAddMonths<TM64ResourceName>>, + + TBoundaryOf<EndOfYearUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfYear<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfYear<TTM64Storage>>>, + TBoundaryOf<EndOfQuarterUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfQuarter<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfQuarter<TTM64Storage>>>, + TBoundaryOf<EndOfMonthUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfMonth<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfMonth<TTM64Storage>>>, + TBoundaryOf<EndOfWeekUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfWeek<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfWeek<TTM64Storage>>>, + TBoundaryOf<EndOfDayUDF, SimpleDatetimeToDatetimeUdf<TMResourceName, EndOfDay<TTMStorage>>, + SimpleDatetimeToDatetimeUdf<TM64ResourceName, EndOfDay<TTM64Storage>>>, + TBoundaryOfInterval<EndOfUDF, SimpleDatetimeToIntervalUdf<TMResourceName, EndOf<TTMStorage>>, + SimpleDatetimeToIntervalUdf<TM64ResourceName, EndOf<TTM64Storage>>>, + + TLangVerForked< + NYql::MakeLangVersion(2025, 03), + TToUnits<ToSecondsUDF, /* TResult = */ ui32, /* TSignedResult = */ i32, /* TWResult = */ i64, 1>, + TToUnits<ToSecondsUDF, /* TResult = */ ui32, /* TSignedResult = */ i64, /* TWResult = */ i64, 1>>, + + TToUnits<ToMillisecondsUDF, /* TResult = */ ui64, /* TSignedResult = */ i64, /* TWResult = */ i64, 1000>, + TToUnits<ToMicrosecondsUDF, /* TResult = */ ui64, /* TSignedResult = */ i64, /* TWResult = */ i64, 1000000>, + + TFormat, + TParse<ParseUDF, TMResourceName>, + TParse<Parse64UDF, TM64ResourceName>, + + TParseRfc822, + TParseIso8601, + TParseHttp, + TParseX509) +} // namespace REGISTER_MODULES(TDateTime2Module) diff --git a/yql/essentials/udfs/common/datetime2/ya.make b/yql/essentials/udfs/common/datetime2/ya.make index 1ca69ae4bce..ca50b15e73f 100644 --- a/yql/essentials/udfs/common/datetime2/ya.make +++ b/yql/essentials/udfs/common/datetime2/ya.make @@ -4,6 +4,7 @@ YQL_UDF_CONTRIB(datetime2_udf) 43 0 ) + ENABLE(YQL_STYLE_CPP) SRCS( datetime_udf.cpp ) diff --git a/yql/essentials/udfs/common/digest/digest_udf.cpp b/yql/essentials/udfs/common/digest/digest_udf.cpp index 834d38aeaf6..63aa748e56c 100644 --- a/yql/essentials/udfs/common/digest/digest_udf.cpp +++ b/yql/essentials/udfs/common/digest/digest_udf.cpp @@ -27,456 +27,465 @@ using namespace NKikimr; using namespace NUdf; namespace { - enum EDigestType { - CRC32C, CRC64, FNV32, FNV64, MURMUR, MURMUR32, MURMUR2A, MURMUR2A32, CITY - }; - const char* DigestNames[] = { - "Crc32c", "Crc64", "Fnv32", "Fnv64", "MurMurHash", "MurMurHash32", "MurMurHash2A", "MurMurHash2A32", "CityHash" - }; - - template<typename TResult> - using TDigestGenerator = TResult(const TStringRef&, TMaybe<TResult> init); - - template<EDigestType DigestType, typename TResult, TDigestGenerator<TResult>* Generator> - class TDigestFunctionUdf: public TBoxedValue { - public: - TDigestFunctionUdf(TSourcePosition pos) : Pos_(pos) {} - - static TStringRef Name() { - static TString name = DigestNames[DigestType]; - return TStringRef(name); - } - - static bool DeclareSignature( - const TStringRef& name, - TType*, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) - { - if (Name() != name) { - return false; - } - - auto args = builder.Args(); - args->Add(builder.SimpleType<char *>()).Flags(ICallablePayload::TArgumentFlags::AutoMap); - args->Add(builder.Optional()->Item(builder.SimpleType<TResult>()).Build()).Name("Init"); - args->Done(); - builder.OptionalArgs(1); - builder.Returns(builder.SimpleType<TResult>()); - builder.IsStrict(); - - if (!typesOnly) { - builder.Implementation(new TDigestFunctionUdf<DigestType, TResult, Generator>(GetSourcePosition(builder))); - } - - return true; - } - - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { - TMaybe<TResult> init = Nothing(); - if (auto val = args[1]) { - init = val.Get<TResult>(); - } - return TUnboxedValuePod(Generator(args[0].AsStringRef(), init)); - } catch (const std ::exception&) { - TStringBuilder sb; - sb << Pos_ << " "; - sb << CurrentExceptionMessage(); - sb << Endl << "[" << TStringBuf(Name()) << "]"; - UdfTerminate(sb.c_str()); - } - - TSourcePosition Pos_; - }; - - SIMPLE_STRICT_UDF(TCrc32c, ui32(TAutoMap<char*>)) { - Y_UNUSED(valueBuilder); - const auto& inputRef = args[0].AsStringRef(); - ui32 hash = Crc32c(inputRef.Data(), inputRef.Size()); - return TUnboxedValuePod(hash); +enum EDigestType { + CRC32C, + CRC64, + FNV32, + FNV64, + MURMUR, + MURMUR32, + MURMUR2A, + MURMUR2A32, + CITY +}; +const char* DigestNames[] = { + "Crc32c", "Crc64", "Fnv32", "Fnv64", "MurMurHash", "MurMurHash32", "MurMurHash2A", "MurMurHash2A32", "CityHash"}; + +template <typename TResult> +using TDigestGenerator = TResult(const TStringRef&, TMaybe<TResult> init); + +template <EDigestType DigestType, typename TResult, TDigestGenerator<TResult>* Generator> +class TDigestFunctionUdf: public TBoxedValue { +public: + TDigestFunctionUdf(TSourcePosition pos) + : Pos_(pos) + { } - using TCrc64 = TDigestFunctionUdf<CRC64, ui64, [](auto& inputRef, auto init) { - return crc64(inputRef.Data(), inputRef.Size(), init.GetOrElse(CRC64INIT)); - }>; + static TStringRef Name() { + static TString name = DigestNames[DigestType]; + return TStringRef(name); + } - using TFnv32 = TDigestFunctionUdf<FNV32, ui32, [](auto& inputRef, auto init) { - if (init) { - return FnvHash<ui32>(inputRef.Data(), inputRef.Size(), *init); - } else { - return FnvHash<ui32>(inputRef.Data(), inputRef.Size()); + static bool DeclareSignature( + const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + if (Name() != name) { + return false; } - }>; - using TFnv64 = TDigestFunctionUdf<FNV64, ui64, [](auto& inputRef, auto init) { - if (init) { - return FnvHash<ui64>(inputRef.Data(), inputRef.Size(), *init); - } else { - return FnvHash<ui64>(inputRef.Data(), inputRef.Size()); - } - }>; + auto args = builder.Args(); + args->Add(builder.SimpleType<char*>()).Flags(ICallablePayload::TArgumentFlags::AutoMap); + args->Add(builder.Optional()->Item(builder.SimpleType<TResult>()).Build()).Name("Init"); + args->Done(); + builder.OptionalArgs(1); + builder.Returns(builder.SimpleType<TResult>()); + builder.IsStrict(); - using TMurMurHash = TDigestFunctionUdf<MURMUR, ui64, [](auto& inputRef, auto init) { - if (init) { - return MurmurHash<ui64>(inputRef.Data(), inputRef.Size(), *init); - } else { - return MurmurHash<ui64>(inputRef.Data(), inputRef.Size()); + if (!typesOnly) { + builder.Implementation(new TDigestFunctionUdf<DigestType, TResult, Generator>(GetSourcePosition(builder))); } - }>; - using TMurMurHash32 = TDigestFunctionUdf<MURMUR32, ui32, [] (auto& inputRef, auto init) { - if (init) { - return MurmurHash<ui32>(inputRef.Data(), inputRef.Size(), *init); - } else { - return MurmurHash<ui32>(inputRef.Data(), inputRef.Size()); - } - }>; + return true; + } - using TMurMurHash2A = TDigestFunctionUdf<MURMUR2A, ui64, [] (auto& inputRef, auto init) { - if (init) { - return TMurmurHash2A<ui64>{*init}.Update(inputRef.Data(), inputRef.Size()).Value(); - } else { - return TMurmurHash2A<ui64>{}.Update(inputRef.Data(), inputRef.Size()).Value(); +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { + TMaybe<TResult> init = Nothing(); + if (auto val = args[1]) { + init = val.Get<TResult>(); } - }>; + return TUnboxedValuePod(Generator(args[0].AsStringRef(), init)); + } catch (const std ::exception&) { + TStringBuilder sb; + sb << Pos_ << " "; + sb << CurrentExceptionMessage(); + sb << Endl << "[" << TStringBuf(Name()) << "]"; + UdfTerminate(sb.c_str()); + } - using TMurMurHash2A32 = TDigestFunctionUdf<MURMUR2A32, ui32, [] (auto& inputRef, auto init) { - if (init) { - return TMurmurHash2A<ui32>{*init}.Update(inputRef.Data(), inputRef.Size()).Value(); - } else { - return TMurmurHash2A<ui32>{}.Update(inputRef.Data(), inputRef.Size()).Value(); - } - }>; + TSourcePosition Pos_; +}; - using TCityHash = TDigestFunctionUdf<CITY, ui64, [] (auto& inputRef, auto init) { - if (init) { - return CityHash64WithSeed(inputRef.Data(), inputRef.Size(), *init); - } else { - return CityHash64(inputRef.Data(), inputRef.Size()); - } - }>; +SIMPLE_STRICT_UDF(TCrc32c, ui32(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + ui32 hash = Crc32c(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); +} - class TCityHash128: public TBoxedValue { - public: - static TStringRef Name() { - static auto name = TStringRef::Of("CityHash128"); - return name; - } +using TCrc64 = TDigestFunctionUdf<CRC64, ui64, [](auto& inputRef, auto init) { + return crc64(inputRef.Data(), inputRef.Size(), init.GetOrElse(CRC64INIT)); +}>; - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build(); - builder.Args(1)->Add<TAutoMap<char*>>(); - builder.Returns(type); - if (!typesOnly) { - builder.Implementation(new TCityHash128); - } - builder.IsStrict(); - return true; - } else { - return false; - } - } +using TFnv32 = TDigestFunctionUdf<FNV32, ui32, [](auto& inputRef, auto init) { + if (init) { + return FnvHash<ui32>(inputRef.Data(), inputRef.Size(), *init); + } else { + return FnvHash<ui32>(inputRef.Data(), inputRef.Size()); + } +}>; - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - TUnboxedValue* items = nullptr; - auto val = valueBuilder->NewArray(2U, items); - const auto& inputRef = args[0].AsStringRef(); - uint128 hash = CityHash128(inputRef.Data(), inputRef.Size()); - items[0] = TUnboxedValuePod(hash.first); - items[1] = TUnboxedValuePod(hash.second); - return val; - } - }; +using TFnv64 = TDigestFunctionUdf<FNV64, ui64, [](auto& inputRef, auto init) { + if (init) { + return FnvHash<ui64>(inputRef.Data(), inputRef.Size(), *init); + } else { + return FnvHash<ui64>(inputRef.Data(), inputRef.Size()); + } +}>; - SIMPLE_STRICT_UDF(TNumericHash, ui64(TAutoMap<ui64>)) { - Y_UNUSED(valueBuilder); - ui64 input = args[0].Get<ui64>(); - ui64 hash = (ui64)NumericHash(input); - return TUnboxedValuePod(hash); +using TMurMurHash = TDigestFunctionUdf<MURMUR, ui64, [](auto& inputRef, auto init) { + if (init) { + return MurmurHash<ui64>(inputRef.Data(), inputRef.Size(), *init); + } else { + return MurmurHash<ui64>(inputRef.Data(), inputRef.Size()); } +}>; - SIMPLE_STRICT_UDF(TMd5Hex, char*(TAutoMap<char*>)) { - const auto& inputRef = args[0].AsStringRef(); - MD5 md5; - const TString& hash = md5.Calc(inputRef); - return valueBuilder->NewString(hash); +using TMurMurHash32 = TDigestFunctionUdf<MURMUR32, ui32, [](auto& inputRef, auto init) { + if (init) { + return MurmurHash<ui32>(inputRef.Data(), inputRef.Size(), *init); + } else { + return MurmurHash<ui32>(inputRef.Data(), inputRef.Size()); } +}>; - SIMPLE_STRICT_UDF(TMd5Raw, char*(TAutoMap<char*>)) { - const auto& inputRef = args[0].AsStringRef(); - MD5 md5; - const TString& hash = md5.CalcRaw(inputRef); - return valueBuilder->NewString(hash); +using TMurMurHash2A = TDigestFunctionUdf<MURMUR2A, ui64, [](auto& inputRef, auto init) { + if (init) { + return TMurmurHash2A<ui64>{*init}.Update(inputRef.Data(), inputRef.Size()).Value(); + } else { + return TMurmurHash2A<ui64>{}.Update(inputRef.Data(), inputRef.Size()).Value(); } +}>; - SIMPLE_STRICT_UDF(TMd5HalfMix, ui64(TAutoMap<char*>)) { - Y_UNUSED(valueBuilder); - return TUnboxedValuePod(MD5::CalcHalfMix(args[0].AsStringRef())); +using TMurMurHash2A32 = TDigestFunctionUdf<MURMUR2A32, ui32, [](auto& inputRef, auto init) { + if (init) { + return TMurmurHash2A<ui32>{*init}.Update(inputRef.Data(), inputRef.Size()).Value(); + } else { + return TMurmurHash2A<ui32>{}.Update(inputRef.Data(), inputRef.Size()).Value(); } +}>; - SIMPLE_STRICT_UDF(TArgon2, char*(TAutoMap<char*>, TAutoMap<char*>)) { - const static ui32 outSize = 32; - const static NArgonish::TArgon2Factory afactory; - const static THolder<NArgonish::IArgon2Base> argon2 = afactory.Create( - NArgonish::EArgon2Type::Argon2d, 1, 32, 1); - - const TStringRef inputRef = args[0].AsStringRef(); - const TStringRef saltRef = args[1].AsStringRef(); - ui8 out[outSize]; - argon2->Hash(reinterpret_cast<const ui8*>(inputRef.Data()), inputRef.Size(), - reinterpret_cast<const ui8*>(saltRef.Data()), saltRef.Size(), - out, outSize); - return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(&out[0]), outSize)); +using TCityHash = TDigestFunctionUdf<CITY, ui64, [](auto& inputRef, auto init) { + if (init) { + return CityHash64WithSeed(inputRef.Data(), inputRef.Size(), *init); + } else { + return CityHash64(inputRef.Data(), inputRef.Size()); } +}>; - SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TBlake2B, char*(TAutoMap<char*>, TOptional<char*>), 1) { - const static ui32 outSize = 32; - const static NArgonish::TBlake2BFactory bfactory; - const TStringRef inputRef = args[0].AsStringRef(); - - THolder<NArgonish::IBlake2Base> blake2b; - if (args[1]) { - const TStringRef keyRef = args[1].AsStringRef(); - if (keyRef.Size() == 0) { - blake2b = bfactory.Create(outSize); - } else { - blake2b = bfactory.Create(outSize, reinterpret_cast<const ui8*>(keyRef.Data()), keyRef.Size()); +class TCityHash128: public TBoxedValue { +public: + static TStringRef Name() { + static auto name = TStringRef::Of("CityHash128"); + return name; + } + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build(); + builder.Args(1)->Add<TAutoMap<char*>>(); + builder.Returns(type); + if (!typesOnly) { + builder.Implementation(new TCityHash128); } + builder.IsStrict(); + return true; } else { - blake2b = bfactory.Create(outSize); + return false; } - - ui8 out[outSize]; - blake2b->Update(inputRef.Data(), inputRef.Size()); - blake2b->Final(out, outSize); - return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(&out[0]), outSize)); } - SIMPLE_STRICT_UDF(TSipHash, ui64(ui64, ui64, TAutoMap<char*>)) { - using namespace highwayhash; - Y_UNUSED(valueBuilder); - const TStringRef inputRef = args[2].AsStringRef(); - const HH_U64 state[2] = {args[0].Get<ui64>(), args[1].Get<ui64>()}; - ui64 hash = SipHash(state, inputRef.Data(), inputRef.Size()); - return TUnboxedValuePod(hash); +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + TUnboxedValue* items = nullptr; + auto val = valueBuilder->NewArray(2U, items); + const auto& inputRef = args[0].AsStringRef(); + uint128 hash = CityHash128(inputRef.Data(), inputRef.Size()); + items[0] = TUnboxedValuePod(hash.first); + items[1] = TUnboxedValuePod(hash.second); + return val; } +}; - SIMPLE_STRICT_UDF(THighwayHash, ui64(ui64, ui64, ui64, ui64, TAutoMap<char*>)) { - using namespace highwayhash; - Y_UNUSED(valueBuilder); - const TStringRef inputRef = args[4].AsStringRef(); - const uint64_t key[4] = { - args[0].Get<ui64>(), - args[1].Get<ui64>(), - args[2].Get<ui64>(), - args[3].Get<ui64>()}; - ui64 hash = HighwayHash64(key, inputRef.Data(), inputRef.Size()); - return TUnboxedValuePod(hash); - } +SIMPLE_STRICT_UDF(TNumericHash, ui64(TAutoMap<ui64>)) { + Y_UNUSED(valueBuilder); + ui64 input = args[0].Get<ui64>(); + ui64 hash = (ui64)NumericHash(input); + return TUnboxedValuePod(hash); +} - SIMPLE_STRICT_UDF(TFarmHashFingerprint, ui64(TAutoMap<ui64>)) { - Y_UNUSED(valueBuilder); - ui64 input = args[0].Get<ui64>(); - ui64 hash = util::Fingerprint(input); - return TUnboxedValuePod(hash); - } +SIMPLE_STRICT_UDF(TMd5Hex, char*(TAutoMap<char*>)) { + const auto& inputRef = args[0].AsStringRef(); + MD5 md5; + const TString& hash = md5.Calc(inputRef); + return valueBuilder->NewString(hash); +} - SIMPLE_STRICT_UDF(TFarmHashFingerprint2, ui64(TAutoMap<ui64>, TAutoMap<ui64>)) { - Y_UNUSED(valueBuilder); - ui64 low = args[0].Get<ui64>(); - ui64 high = args[1].Get<ui64>(); - ui64 hash = util::Fingerprint(util::Uint128(low, high)); - return TUnboxedValuePod(hash); - } +SIMPLE_STRICT_UDF(TMd5Raw, char*(TAutoMap<char*>)) { + const auto& inputRef = args[0].AsStringRef(); + MD5 md5; + const TString& hash = md5.CalcRaw(inputRef); + return valueBuilder->NewString(hash); +} - SIMPLE_STRICT_UDF(TFarmHashFingerprint32, ui32(TAutoMap<char*>)) { - Y_UNUSED(valueBuilder); - const auto& inputRef = args[0].AsStringRef(); - auto hash = util::Fingerprint32(inputRef.Data(), inputRef.Size()); - return TUnboxedValuePod(ui32(hash)); - } +SIMPLE_STRICT_UDF(TMd5HalfMix, ui64(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + return TUnboxedValuePod(MD5::CalcHalfMix(args[0].AsStringRef())); +} - SIMPLE_STRICT_UDF(TFarmHashFingerprint64, ui64(TAutoMap<char*>)) { - Y_UNUSED(valueBuilder); - const auto& inputRef = args[0].AsStringRef(); - auto hash = util::Fingerprint64(inputRef.Data(), inputRef.Size()); - return TUnboxedValuePod(ui64(hash)); - } +SIMPLE_STRICT_UDF(TArgon2, char*(TAutoMap<char*>, TAutoMap<char*>)) { + const static ui32 outSize = 32; + const static NArgonish::TArgon2Factory afactory; + const static THolder<NArgonish::IArgon2Base> argon2 = afactory.Create( + NArgonish::EArgon2Type::Argon2d, 1, 32, 1); + + const TStringRef inputRef = args[0].AsStringRef(); + const TStringRef saltRef = args[1].AsStringRef(); + ui8 out[outSize]; + argon2->Hash(reinterpret_cast<const ui8*>(inputRef.Data()), inputRef.Size(), + reinterpret_cast<const ui8*>(saltRef.Data()), saltRef.Size(), + out, outSize); + return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(&out[0]), outSize)); +} - class TFarmHashFingerprint128: public TBoxedValue { - public: - static TStringRef Name() { - static auto name = TStringRef::Of("FarmHashFingerprint128"); - return name; - } +SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TBlake2B, char*(TAutoMap<char*>, TOptional<char*>), 1) { + const static ui32 outSize = 32; + const static NArgonish::TBlake2BFactory bfactory; + const TStringRef inputRef = args[0].AsStringRef(); - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build(); - builder.Args(1)->Add<TAutoMap<char*>>(); - builder.Returns(type); - if (!typesOnly) { - builder.Implementation(new TFarmHashFingerprint128); - } - builder.IsStrict(); - return true; - } else { - return false; - } + THolder<NArgonish::IBlake2Base> blake2b; + if (args[1]) { + const TStringRef keyRef = args[1].AsStringRef(); + if (keyRef.Size() == 0) { + blake2b = bfactory.Create(outSize); + } else { + blake2b = bfactory.Create(outSize, reinterpret_cast<const ui8*>(keyRef.Data()), keyRef.Size()); } + } else { + blake2b = bfactory.Create(outSize); + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - TUnboxedValue* items = nullptr; - auto val = valueBuilder->NewArray(2U, items); - const auto& inputRef = args[0].AsStringRef(); - auto hash = util::Fingerprint128(inputRef.Data(), inputRef.Size()); - items[0] = TUnboxedValuePod(static_cast<ui64>(hash.first)); - items[1] = TUnboxedValuePod(static_cast<ui64>(hash.second)); - return val; - } - }; + ui8 out[outSize]; + blake2b->Update(inputRef.Data(), inputRef.Size()); + blake2b->Final(out, outSize); + return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(&out[0]), outSize)); +} - SIMPLE_STRICT_UDF(TSuperFastHash, ui32(TAutoMap<char*>)) { - Y_UNUSED(valueBuilder); - const auto& inputRef = args[0].AsStringRef(); - ui32 hash = SuperFastHash(inputRef.Data(), inputRef.Size()); - return TUnboxedValuePod(hash); - } +SIMPLE_STRICT_UDF(TSipHash, ui64(ui64, ui64, TAutoMap<char*>)) { + using namespace highwayhash; + Y_UNUSED(valueBuilder); + const TStringRef inputRef = args[2].AsStringRef(); + const HH_U64 state[2] = {args[0].Get<ui64>(), args[1].Get<ui64>()}; + ui64 hash = SipHash(state, inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); +} - SIMPLE_STRICT_UDF(TSha1, char*(TAutoMap<char*>)) { - const auto& inputRef = args[0].AsStringRef(); - SHA_CTX sha; - SHA1_Init(&sha); - SHA1_Update(&sha, inputRef.Data(), inputRef.Size()); - unsigned char hash[SHA_DIGEST_LENGTH]; - SHA1_Final(hash, &sha); - return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash))); - } +SIMPLE_STRICT_UDF(THighwayHash, ui64(ui64, ui64, ui64, ui64, TAutoMap<char*>)) { + using namespace highwayhash; + Y_UNUSED(valueBuilder); + const TStringRef inputRef = args[4].AsStringRef(); + const uint64_t key[4] = { + args[0].Get<ui64>(), + args[1].Get<ui64>(), + args[2].Get<ui64>(), + args[3].Get<ui64>()}; + ui64 hash = HighwayHash64(key, inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); +} - SIMPLE_STRICT_UDF(TSha256, char*(TAutoMap<char*>)) { - const auto& inputRef = args[0].AsStringRef(); - SHA256_CTX sha; - SHA256_Init(&sha); - SHA256_Update(&sha, inputRef.Data(), inputRef.Size()); - unsigned char hash[SHA256_DIGEST_LENGTH]; - SHA256_Final(hash, &sha); - return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash))); - } +SIMPLE_STRICT_UDF(TFarmHashFingerprint, ui64(TAutoMap<ui64>)) { + Y_UNUSED(valueBuilder); + ui64 input = args[0].Get<ui64>(); + ui64 hash = util::Fingerprint(input); + return TUnboxedValuePod(hash); +} - SIMPLE_STRICT_UDF_OPTIONS(TSha512, char*(TAutoMap<char*>), builder.SetMinLangVer(NYql::MakeLangVersion(2025, 3));) { - const auto& inputRef = args[0].AsStringRef(); - SHA512_CTX sha; - SHA512_Init(&sha); - SHA512_Update(&sha, inputRef.Data(), inputRef.Size()); - unsigned char hash[SHA512_DIGEST_LENGTH]; - SHA512_Final(hash, &sha); - return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash))); +SIMPLE_STRICT_UDF(TFarmHashFingerprint2, ui64(TAutoMap<ui64>, TAutoMap<ui64>)) { + Y_UNUSED(valueBuilder); + ui64 low = args[0].Get<ui64>(); + ui64 high = args[1].Get<ui64>(); + ui64 hash = util::Fingerprint(util::Uint128(low, high)); + return TUnboxedValuePod(hash); +} + +SIMPLE_STRICT_UDF(TFarmHashFingerprint32, ui32(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + auto hash = util::Fingerprint32(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(ui32(hash)); +} + +SIMPLE_STRICT_UDF(TFarmHashFingerprint64, ui64(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + auto hash = util::Fingerprint64(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(ui64(hash)); +} + +class TFarmHashFingerprint128: public TBoxedValue { +public: + static TStringRef Name() { + static auto name = TStringRef::Of("FarmHashFingerprint128"); + return name; } - SIMPLE_STRICT_UDF(TIntHash64, ui64(TAutoMap<ui64>)) { - Y_UNUSED(valueBuilder); - ui64 x = args[0].Get<ui64>(); - x ^= 0x4CF2D2BAAE6DA887ULL; - x ^= x >> 33; - x *= 0xff51afd7ed558ccdULL; - x ^= x >> 33; - x *= 0xc4ceb9fe1a85ec53ULL; - x ^= x >> 33; - return TUnboxedValuePod(x); + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build(); + builder.Args(1)->Add<TAutoMap<char*>>(); + builder.Returns(type); + if (!typesOnly) { + builder.Implementation(new TFarmHashFingerprint128); + } + builder.IsStrict(); + return true; + } else { + return false; + } } - SIMPLE_STRICT_UDF(TXXH3, ui64(TAutoMap<char*>)) { - Y_UNUSED(valueBuilder); +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + TUnboxedValue* items = nullptr; + auto val = valueBuilder->NewArray(2U, items); const auto& inputRef = args[0].AsStringRef(); - const ui64 hash = XXH3_64bits(inputRef.Data(), inputRef.Size()); - return TUnboxedValuePod(hash); + auto hash = util::Fingerprint128(inputRef.Data(), inputRef.Size()); + items[0] = TUnboxedValuePod(static_cast<ui64>(hash.first)); + items[1] = TUnboxedValuePod(static_cast<ui64>(hash.second)); + return val; } +}; - class TXXH3_128: public TBoxedValue { // NOLINT(readability-identifier-naming) - public: - static TStringRef Name() { - static auto name = TStringRef::Of("XXH3_128"); - return name; - } +SIMPLE_STRICT_UDF(TSuperFastHash, ui32(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + ui32 hash = SuperFastHash(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); +} - static bool DeclareSignature(const TStringRef& name, TType*, IFunctionTypeInfoBuilder& builder, bool typesOnly) { - if (Name() == name) { - const auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build(); - builder.Args(1)->Add<TAutoMap<char*>>(); - builder.Returns(type); - if (!typesOnly) { - builder.Implementation(new TXXH3_128); - } - builder.IsStrict(); - return true; - } else { - return false; - } - } +SIMPLE_STRICT_UDF(TSha1, char*(TAutoMap<char*>)) { + const auto& inputRef = args[0].AsStringRef(); + SHA_CTX sha; + SHA1_Init(&sha); + SHA1_Update(&sha, inputRef.Data(), inputRef.Size()); + unsigned char hash[SHA_DIGEST_LENGTH]; + SHA1_Final(hash, &sha); + return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash))); +} - private: - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { - TUnboxedValue* items = nullptr; - auto val = valueBuilder->NewArray(2U, items); - const auto& inputRef = args[0].AsStringRef(); - const auto hash = XXH3_128bits(inputRef.Data(), inputRef.Size()); - items[0] = TUnboxedValuePod(ui64(hash.low64)); - items[1] = TUnboxedValuePod(ui64(hash.high64)); - return val; - } - }; - - SIMPLE_MODULE(TDigestModule, - TCrc32c, - TCrc64, - TFnv32, - TFnv64, - TMurMurHash, - TMurMurHash32, - TMurMurHash2A, - TMurMurHash2A32, - TCityHash, - TCityHash128, - TNumericHash, - TMd5Hex, - TMd5Raw, - TMd5HalfMix, - TArgon2, - TBlake2B, - TSipHash, - THighwayHash, - TFarmHashFingerprint, - TFarmHashFingerprint2, - TFarmHashFingerprint32, - TFarmHashFingerprint64, - TFarmHashFingerprint128, - TSuperFastHash, - TSha1, - TSha256, - TSha512, - TIntHash64, - TXXH3, - TXXH3_128 - ) +SIMPLE_STRICT_UDF(TSha256, char*(TAutoMap<char*>)) { + const auto& inputRef = args[0].AsStringRef(); + SHA256_CTX sha; + SHA256_Init(&sha); + SHA256_Update(&sha, inputRef.Data(), inputRef.Size()); + unsigned char hash[SHA256_DIGEST_LENGTH]; + SHA256_Final(hash, &sha); + return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash))); +} + +SIMPLE_STRICT_UDF_OPTIONS(TSha512, char*(TAutoMap<char*>), builder.SetMinLangVer(NYql::MakeLangVersion(2025, 3));) { + const auto& inputRef = args[0].AsStringRef(); + SHA512_CTX sha; + SHA512_Init(&sha); + SHA512_Update(&sha, inputRef.Data(), inputRef.Size()); + unsigned char hash[SHA512_DIGEST_LENGTH]; + SHA512_Final(hash, &sha); + return valueBuilder->NewString(TStringRef(reinterpret_cast<char*>(hash), sizeof(hash))); +} + +SIMPLE_STRICT_UDF(TIntHash64, ui64(TAutoMap<ui64>)) { + Y_UNUSED(valueBuilder); + ui64 x = args[0].Get<ui64>(); + x ^= 0x4CF2D2BAAE6DA887ULL; + x ^= x >> 33; + x *= 0xff51afd7ed558ccdULL; + x ^= x >> 33; + x *= 0xc4ceb9fe1a85ec53ULL; + x ^= x >> 33; + return TUnboxedValuePod(x); +} +SIMPLE_STRICT_UDF(TXXH3, ui64(TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const auto& inputRef = args[0].AsStringRef(); + const ui64 hash = XXH3_64bits(inputRef.Data(), inputRef.Size()); + return TUnboxedValuePod(hash); } +class TXXH3_128: public TBoxedValue { // NOLINT(readability-identifier-naming) +public: + static TStringRef Name() { + static auto name = TStringRef::Of("XXH3_128"); + return name; + } + + static bool DeclareSignature(const TStringRef& name, TType*, IFunctionTypeInfoBuilder& builder, bool typesOnly) { + if (Name() == name) { + const auto type = builder.Tuple(2)->Add<ui64>().Add<ui64>().Build(); + builder.Args(1)->Add<TAutoMap<char*>>(); + builder.Returns(type); + if (!typesOnly) { + builder.Implementation(new TXXH3_128); + } + builder.IsStrict(); + return true; + } else { + return false; + } + } + +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { + TUnboxedValue* items = nullptr; + auto val = valueBuilder->NewArray(2U, items); + const auto& inputRef = args[0].AsStringRef(); + const auto hash = XXH3_128bits(inputRef.Data(), inputRef.Size()); + items[0] = TUnboxedValuePod(ui64(hash.low64)); + items[1] = TUnboxedValuePod(ui64(hash.high64)); + return val; + } +}; + +SIMPLE_MODULE(TDigestModule, + TCrc32c, + TCrc64, + TFnv32, + TFnv64, + TMurMurHash, + TMurMurHash32, + TMurMurHash2A, + TMurMurHash2A32, + TCityHash, + TCityHash128, + TNumericHash, + TMd5Hex, + TMd5Raw, + TMd5HalfMix, + TArgon2, + TBlake2B, + TSipHash, + THighwayHash, + TFarmHashFingerprint, + TFarmHashFingerprint2, + TFarmHashFingerprint32, + TFarmHashFingerprint64, + TFarmHashFingerprint128, + TSuperFastHash, + TSha1, + TSha256, + TSha512, + TIntHash64, + TXXH3, + TXXH3_128) + +} // namespace + REGISTER_MODULES(TDigestModule) diff --git a/yql/essentials/udfs/common/digest/ya.make b/yql/essentials/udfs/common/digest/ya.make index 565e77a3013..9daa7f25318 100644 --- a/yql/essentials/udfs/common/digest/ya.make +++ b/yql/essentials/udfs/common/digest/ya.make @@ -6,6 +6,8 @@ YQL_UDF_CONTRIB(digest_udf) 0 ) + ENABLE(YQL_STYLE_CPP) + SRCS( digest_udf.cpp ) diff --git a/yql/essentials/udfs/common/file/file_udf.cpp b/yql/essentials/udfs/common/file/file_udf.cpp index d499e85529e..c06da057dee 100644 --- a/yql/essentials/udfs/common/file/file_udf.cpp +++ b/yql/essentials/udfs/common/file/file_udf.cpp @@ -16,563 +16,566 @@ extern const char ByLineFuncName[]; const char ByLineFuncName[] = "ByLines"; namespace { - namespace Helper { - template <class TUserType> - inline bool ConvertToUnboxed(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { - Y_UNUSED(valueBuilder); - TUserType userType; - if (!TryFromString<TUserType>(curLine, userType)) { - return false; - } - result = TUnboxedValuePod(userType); - return true; - } +namespace Helper { +template <class TUserType> +inline bool ConvertToUnboxed(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { + Y_UNUSED(valueBuilder); + TUserType userType; + if (!TryFromString<TUserType>(curLine, userType)) { + return false; + } + result = TUnboxedValuePod(userType); + return true; +} - template <> - inline bool ConvertToUnboxed<const char*>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { - result = valueBuilder.NewString(curLine); - return true; - } +template <> +inline bool ConvertToUnboxed<const char*>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { + result = valueBuilder.NewString(curLine); + return true; +} - template <> - inline bool ConvertToUnboxed<TUtf8>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { - result = valueBuilder.NewString(curLine); - return true; - } +template <> +inline bool ConvertToUnboxed<TUtf8>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { + result = valueBuilder.NewString(curLine); + return true; +} - template <> - inline bool ConvertToUnboxed<TYson>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { - result = valueBuilder.NewString(curLine); - return true; - } +template <> +inline bool ConvertToUnboxed<TYson>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { + result = valueBuilder.NewString(curLine); + return true; +} - template <> - inline bool ConvertToUnboxed<TJson>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { - result = valueBuilder.NewString(curLine); - return true; - } +template <> +inline bool ConvertToUnboxed<TJson>(const IValueBuilder& valueBuilder, const TString& curLine, TUnboxedValue& result) { + result = valueBuilder.NewString(curLine); + return true; +} - template <typename T> - struct TypeToTypeName { - static const char* Name() { - return "Unknown"; - } - }; - template <> - struct TypeToTypeName<bool> { - static constexpr const char* Name() { - return "Bool"; - } - }; - template <> - struct TypeToTypeName<i8> { - static constexpr const char* Name() { - return "Int8"; - } - }; - template <> - struct TypeToTypeName<ui8> { - static constexpr const char* Name() { - return "Uint8"; - } - }; - template <> - struct TypeToTypeName<i16> { - static constexpr const char* Name() { - return "Int16"; - } - }; - template <> - struct TypeToTypeName<ui16> { - static constexpr const char* Name() { - return "Uint16"; - } - }; - template <> - struct TypeToTypeName<ui32> { - static constexpr const char* Name() { - return "Uint32"; - } - }; - template <> - struct TypeToTypeName<ui64> { - static constexpr const char* Name() { - return "Uint64"; - } - }; - template <> - struct TypeToTypeName<i32> { - static constexpr const char* Name() { - return "Int32"; - } - }; - template <> - struct TypeToTypeName<i64> { - static constexpr const char* Name() { - return "Int64"; - } - }; - template <> - struct TypeToTypeName<float> { - static constexpr const char* Name() { - return "Float"; - } - }; - template <> - struct TypeToTypeName<double> { - static constexpr const char* Name() { - return "Double"; - } - }; - template <> - struct TypeToTypeName<const char*> { - static constexpr const char* Name() { - return "String"; - } - }; - template <> - struct TypeToTypeName<TUtf8> { - static constexpr const char* Name() { - return "Utf8"; - } - }; - template <> - struct TypeToTypeName<TYson> { - static constexpr const char* Name() { - return "Yson"; - } - }; - template <> - struct TypeToTypeName<TJson> { - static constexpr const char* Name() { - return "Json"; - } - }; +template <typename T> +struct TypeToTypeName { + static const char* Name() { + return "Unknown"; + } +}; +template <> +struct TypeToTypeName<bool> { + static constexpr const char* Name() { + return "Bool"; + } +}; +template <> +struct TypeToTypeName<i8> { + static constexpr const char* Name() { + return "Int8"; + } +}; +template <> +struct TypeToTypeName<ui8> { + static constexpr const char* Name() { + return "Uint8"; + } +}; +template <> +struct TypeToTypeName<i16> { + static constexpr const char* Name() { + return "Int16"; + } +}; +template <> +struct TypeToTypeName<ui16> { + static constexpr const char* Name() { + return "Uint16"; + } +}; +template <> +struct TypeToTypeName<ui32> { + static constexpr const char* Name() { + return "Uint32"; + } +}; +template <> +struct TypeToTypeName<ui64> { + static constexpr const char* Name() { + return "Uint64"; + } +}; +template <> +struct TypeToTypeName<i32> { + static constexpr const char* Name() { + return "Int32"; + } +}; +template <> +struct TypeToTypeName<i64> { + static constexpr const char* Name() { + return "Int64"; + } +}; +template <> +struct TypeToTypeName<float> { + static constexpr const char* Name() { + return "Float"; + } +}; +template <> +struct TypeToTypeName<double> { + static constexpr const char* Name() { + return "Double"; + } +}; +template <> +struct TypeToTypeName<const char*> { + static constexpr const char* Name() { + return "String"; } +}; +template <> +struct TypeToTypeName<TUtf8> { + static constexpr const char* Name() { + return "Utf8"; + } +}; +template <> +struct TypeToTypeName<TYson> { + static constexpr const char* Name() { + return "Yson"; + } +}; +template <> +struct TypeToTypeName<TJson> { + static constexpr const char* Name() { + return "Json"; + } +}; +} // namespace Helper - static const ui64 TAKE_UNLIM = -1; +static const ui64 TAKE_UNLIM = -1; - bool SkipElements(IBoxedValue& iter, ui64 skip) { - for (; skip > 0; --skip) { - if (!TBoxedValueAccessor::Skip(iter)) { - return false; - } +bool SkipElements(IBoxedValue& iter, ui64 skip) { + for (; skip > 0; --skip) { + if (!TBoxedValueAccessor::Skip(iter)) { + return false; } - return true; } + return true; +} - typedef std::function<void(const TString& message)> TTerminateFunc; - - class TStreamMeta: public TThrRefBase { - public: - typedef TBuffered<TUnbufferedFileInput> TStream; - typedef TIntrusivePtr<TStreamMeta> TPtr; +typedef std::function<void(const TString& message)> TTerminateFunc; - TStreamMeta(TString filePath) - : FilePath_(filePath) - { - // work in greedy mode to catch error on creation - Cached_ = DoCreateStream(); - } +class TStreamMeta: public TThrRefBase { +public: + typedef TBuffered<TUnbufferedFileInput> TStream; + typedef TIntrusivePtr<TStreamMeta> TPtr; - std::unique_ptr<TStream> CreateStream(TTerminateFunc terminateFunc) { - if (Cached_) { - return std::move(Cached_); - } + TStreamMeta(TString filePath) + : FilePath_(filePath) + { + // work in greedy mode to catch error on creation + Cached_ = DoCreateStream(); + } - terminateFunc("The file iterator was already created. To scan file data multiple times please use ListCollect either over ParseFile or over some lazy function over it, e.g. ListMap"); - Y_ABORT("Terminate unstoppable!"); + std::unique_ptr<TStream> CreateStream(TTerminateFunc terminateFunc) { + if (Cached_) { + return std::move(Cached_); } - bool GetLinesCount(ui64& count) const { - if (LinesCount_ == Unknown) - return false; - count = LinesCount_; - return true; - } - void SetLinesCount(ui64 count) { - Y_DEBUG_ABORT_UNLESS(LinesCount_ == Unknown || count == LinesCount_, "Set another value of count lines"); - if (LinesCount_ == Unknown) { - LinesCount_ = count; - } - } + terminateFunc("The file iterator was already created. To scan file data multiple times please use ListCollect either over ParseFile or over some lazy function over it, e.g. ListMap"); + Y_ABORT("Terminate unstoppable!"); + } - const TString& GetFilePath() const { - return FilePath_; + bool GetLinesCount(ui64& count) const { + if (LinesCount_ == Unknown) { + return false; } - - private: - std::unique_ptr<TStream> DoCreateStream() { - static const auto bufferSize = 1 << 12; - TFile file(FilePath_, OpenExisting | RdOnly | Seq); - if (FileSize_ == Unknown) { - FileSize_ = file.GetLength(); - } - return std::make_unique<TBuffered<TUnbufferedFileInput>>(bufferSize, file); + count = LinesCount_; + return true; + } + void SetLinesCount(ui64 count) { + Y_DEBUG_ABORT_UNLESS(LinesCount_ == Unknown || count == LinesCount_, "Set another value of count lines"); + if (LinesCount_ == Unknown) { + LinesCount_ = count; } + } - TString FilePath_; - static const ui64 Unknown = -1; - ui64 FileSize_ = Unknown; - ui64 LinesCount_ = Unknown; - std::unique_ptr<TStream> Cached_; - }; + const TString& GetFilePath() const { + return FilePath_; + } - class TEmptyIter: public TBoxedValue { - private: - bool Skip() override { - return false; - } - bool Next(TUnboxedValue&) override { - return false; +private: + std::unique_ptr<TStream> DoCreateStream() { + static const auto bufferSize = 1 << 12; + TFile file(FilePath_, OpenExisting | RdOnly | Seq); + if (FileSize_ == Unknown) { + FileSize_ = file.GetLength(); } + return std::make_unique<TBuffered<TUnbufferedFileInput>>(bufferSize, file); + } - public: - TEmptyIter(TTerminateFunc terminateFunc) - : TerminateFunc_(terminateFunc) - { - } + TString FilePath_; + static const ui64 Unknown = -1; + ui64 FileSize_ = Unknown; + ui64 LinesCount_ = Unknown; + std::unique_ptr<TStream> Cached_; +}; + +class TEmptyIter: public TBoxedValue { +private: + bool Skip() override { + return false; + } + bool Next(TUnboxedValue&) override { + return false; + } - private: - const TTerminateFunc TerminateFunc_; - }; +public: + TEmptyIter(TTerminateFunc terminateFunc) + : TerminateFunc_(terminateFunc) + { + } - template <class TUserType> - class TLineByLineBoxedValueIterator: public TBoxedValue { - public: - TLineByLineBoxedValueIterator(TStreamMeta::TPtr metaPtr, std::unique_ptr<TStreamMeta::TStream>&& stream, const IValueBuilder& valueBuilder, TTerminateFunc terminateFunc) - : MetaPtr_(metaPtr) - , ValueBuilder_(valueBuilder) - , Stream_(std::move(stream)) - , Splitter_(*Stream_) - , TerminateFunc_(terminateFunc) - { - } +private: + const TTerminateFunc TerminateFunc_; +}; + +template <class TUserType> +class TLineByLineBoxedValueIterator: public TBoxedValue { +public: + TLineByLineBoxedValueIterator(TStreamMeta::TPtr metaPtr, std::unique_ptr<TStreamMeta::TStream>&& stream, const IValueBuilder& valueBuilder, TTerminateFunc terminateFunc) + : MetaPtr_(metaPtr) + , ValueBuilder_(valueBuilder) + , Stream_(std::move(stream)) + , Splitter_(*Stream_) + , TerminateFunc_(terminateFunc) + { + } - void SetLimit(ui64 limit = TAKE_UNLIM) { - Limit_ = limit; - } + void SetLimit(ui64 limit = TAKE_UNLIM) { + Limit_ = limit; + } - private: - bool SkipLimit() { - if (Limit_ != TAKE_UNLIM) { - if (Limit_ == 0) { - return false; - } - --Limit_; +private: + bool SkipLimit() { + if (Limit_ != TAKE_UNLIM) { + if (Limit_ == 0) { + return false; } - return true; + --Limit_; } + return true; + } - bool Skip() final { - ++CurLineNum_; - return Splitter_.Next(CurLine_) && SkipLimit(); - } + bool Skip() final { + ++CurLineNum_; + return Splitter_.Next(CurLine_) && SkipLimit(); + } - bool Next(TUnboxedValue& value) override { - if (!Skip()) { - return false; - } - if (!Helper::ConvertToUnboxed<TUserType>(ValueBuilder_, CurLine_, value)) { - TStringBuilder sb; - sb << "File::ByLines failed to cast string '" << CurLine_ << "' to " << Helper::TypeToTypeName<TUserType>::Name() << Endl; - sb << "- path: " << MetaPtr_->GetFilePath() << Endl; - sb << "- line: " << CurLineNum_ << Endl; - TerminateFunc_(sb); - Y_ABORT("Terminate unstoppable!"); - } - return true; + bool Next(TUnboxedValue& value) override { + if (!Skip()) { + return false; } + if (!Helper::ConvertToUnboxed<TUserType>(ValueBuilder_, CurLine_, value)) { + TStringBuilder sb; + sb << "File::ByLines failed to cast string '" << CurLine_ << "' to " << Helper::TypeToTypeName<TUserType>::Name() << Endl; + sb << "- path: " << MetaPtr_->GetFilePath() << Endl; + sb << "- line: " << CurLineNum_ << Endl; + TerminateFunc_(sb); + Y_ABORT("Terminate unstoppable!"); + } + return true; + } - TStreamMeta::TPtr MetaPtr_; - const IValueBuilder& ValueBuilder_; - - std::unique_ptr<TStreamMeta::TStream> Stream_; - TLineSplitter Splitter_; - TTerminateFunc TerminateFunc_; - TString CurLine_; - ui64 CurLineNum_ = 0; - ui64 Limit_ = TAKE_UNLIM; - TUnboxedValue Result_; - }; + TStreamMeta::TPtr MetaPtr_; + const IValueBuilder& ValueBuilder_; + + std::unique_ptr<TStreamMeta::TStream> Stream_; + TLineSplitter Splitter_; + TTerminateFunc TerminateFunc_; + TString CurLine_; + ui64 CurLineNum_ = 0; + ui64 Limit_ = TAKE_UNLIM; + TUnboxedValue Result_; +}; + +template <class TUserType> +class TListByLineBoxedValue: public TBoxedValue { +public: + TListByLineBoxedValue(TStreamMeta::TPtr metaPtr, const IValueBuilder& valueBuilder, TTerminateFunc terminateFunc, ui64 skip = 0ULL, ui64 take = TAKE_UNLIM) + : MetaPtr_(metaPtr) + , ValueBuilder_(valueBuilder) + , TerminateFunc_(terminateFunc) + , Skip_(skip) + , Take_(take) + { + } - template <class TUserType> - class TListByLineBoxedValue: public TBoxedValue { - public: - TListByLineBoxedValue(TStreamMeta::TPtr metaPtr, const IValueBuilder& valueBuilder, TTerminateFunc terminateFunc, ui64 skip = 0ULL, ui64 take = TAKE_UNLIM) - : MetaPtr_(metaPtr) - , ValueBuilder_(valueBuilder) - , TerminateFunc_(terminateFunc) - , Skip_(skip) - , Take_(take) - {} - private: - bool HasFastListLength() const override { - ui64 tmp; - return MetaPtr_->GetLinesCount(tmp); - } - ui64 GetListLength() const override { - ui64 length; - if (!MetaPtr_->GetLinesCount(length)) { - length = Skip_; - for (const auto iter = GetListIterator(); iter.Skip(); ++length) - continue; - if (Take_ == TAKE_UNLIM) { - MetaPtr_->SetLinesCount(length); - } +private: + bool HasFastListLength() const override { + ui64 tmp; + return MetaPtr_->GetLinesCount(tmp); + } + ui64 GetListLength() const override { + ui64 length; + if (!MetaPtr_->GetLinesCount(length)) { + length = Skip_; + for (const auto iter = GetListIterator(); iter.Skip(); ++length) { + continue; } - if (length <= Skip_) { - return 0; + if (Take_ == TAKE_UNLIM) { + MetaPtr_->SetLinesCount(length); } - return Min(length - Skip_, Take_); } - ui64 GetEstimatedListLength() const override { - /// \todo some optimisation? - return GetListLength(); - } - - TUnboxedValue GetListIterator() const override { - try { - auto stream = MetaPtr_->CreateStream(TerminateFunc_); - IBoxedValuePtr iter(new TLineByLineBoxedValueIterator<TUserType>(MetaPtr_, std::move(stream), ValueBuilder_, TerminateFunc_)); - if (!Take_ || !SkipElements(*iter, Skip_)) { - return TUnboxedValuePod(new TEmptyIter(TerminateFunc_)); - } - static_cast<TLineByLineBoxedValueIterator<TUserType>*>(iter.Get())->SetLimit(Take_); - return TUnboxedValuePod(std::move(iter)); - } catch (const std::exception& e) { - TerminateFunc_(CurrentExceptionMessage()); - Y_ABORT("Terminate unstoppable!"); - } + if (length <= Skip_) { + return 0; } + return Min(length - Skip_, Take_); + } + ui64 GetEstimatedListLength() const override { + /// \todo some optimisation? + return GetListLength(); + } - IBoxedValuePtr SkipListImpl(const IValueBuilder& builder, ui64 count) const override { - return new TListByLineBoxedValue(MetaPtr_, builder, TerminateFunc_, Skip_ + count, Take_ == TAKE_UNLIM ? TAKE_UNLIM : Take_ - std::min(Take_, count)); - } - IBoxedValuePtr TakeListImpl(const IValueBuilder& builder, ui64 count) const override { - return new TListByLineBoxedValue(MetaPtr_, builder, TerminateFunc_, Skip_, std::min(Take_, count)); + TUnboxedValue GetListIterator() const override { + try { + auto stream = MetaPtr_->CreateStream(TerminateFunc_); + IBoxedValuePtr iter(new TLineByLineBoxedValueIterator<TUserType>(MetaPtr_, std::move(stream), ValueBuilder_, TerminateFunc_)); + if (!Take_ || !SkipElements(*iter, Skip_)) { + return TUnboxedValuePod(new TEmptyIter(TerminateFunc_)); + } + static_cast<TLineByLineBoxedValueIterator<TUserType>*>(iter.Get())->SetLimit(Take_); + return TUnboxedValuePod(std::move(iter)); + } catch (const std::exception& e) { + TerminateFunc_(CurrentExceptionMessage()); + Y_ABORT("Terminate unstoppable!"); } + } - bool HasListItems() const override { - return true; - } + IBoxedValuePtr SkipListImpl(const IValueBuilder& builder, ui64 count) const override { + return new TListByLineBoxedValue(MetaPtr_, builder, TerminateFunc_, Skip_ + count, Take_ == TAKE_UNLIM ? TAKE_UNLIM : Take_ - std::min(Take_, count)); + } + IBoxedValuePtr TakeListImpl(const IValueBuilder& builder, ui64 count) const override { + return new TListByLineBoxedValue(MetaPtr_, builder, TerminateFunc_, Skip_, std::min(Take_, count)); + } - TStreamMeta::TPtr MetaPtr_; - const IValueBuilder& ValueBuilder_; - TTerminateFunc TerminateFunc_; - ui64 Skip_ = 0ULL; - ui64 Take_ = TAKE_UNLIM; - }; + bool HasListItems() const override { + return true; + } - template <class TUserType> - class TByLinesFunc: public TBoxedValue { - private: - TSourcePosition Pos_; + TStreamMeta::TPtr MetaPtr_; + const IValueBuilder& ValueBuilder_; + TTerminateFunc TerminateFunc_; + ui64 Skip_ = 0ULL; + ui64 Take_ = TAKE_UNLIM; +}; + +template <class TUserType> +class TByLinesFunc: public TBoxedValue { +private: + TSourcePosition Pos_; + + TByLinesFunc(TSourcePosition pos) + : Pos_(pos) + { + } - TByLinesFunc(TSourcePosition pos) - : Pos_(pos) - {} + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { + try { + TString filePath(args[0].AsStringRef()); + TStreamMeta::TPtr metaPtr(new TStreamMeta(filePath)); + auto pos = Pos_; + auto terminateFunc = [pos](const TString& message) { + UdfTerminate((TStringBuilder() << pos << " " << message).c_str()); + }; + return TUnboxedValuePod(new TListByLineBoxedValue<TUserType>(metaPtr, *valueBuilder, terminateFunc)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } + } - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { - try { - TString filePath(args[0].AsStringRef()); - TStreamMeta::TPtr metaPtr(new TStreamMeta(filePath)); - auto pos = Pos_; - auto terminateFunc = [pos](const TString& message) { - UdfTerminate((TStringBuilder() << pos << " " << message).c_str()); - }; - return TUnboxedValuePod(new TListByLineBoxedValue<TUserType>(metaPtr, *valueBuilder, terminateFunc)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } +public: + static void DeclareSignature( + TStringRef name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + Y_UNUSED(name); + builder.UserType(userType); + builder.SimpleSignature<TListType<TUserType>(char*)>(); + if (!typesOnly) { + builder.Implementation(new TByLinesFunc<TUserType>(builder.GetSourcePosition())); } + } +}; +class TFolderListFromFile: public TBoxedValue { +private: + class TIterator: public TBoxedValue { public: - static void DeclareSignature( - TStringRef name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) + TIterator(ui32 indexP, ui32 indexT, ui32 indexA, const IValueBuilder& valueBuilder, const TSourcePosition& pos, TString filePath) + : IndexP_(indexP) + , IndexT_(indexT) + , IndexA_(indexA) + , ValueBuilder_(valueBuilder) + , Pos_(pos) + , Input_(filePath) { - Y_UNUSED(name); - builder.UserType(userType); - builder.SimpleSignature<TListType<TUserType>(char*)>(); - if (!typesOnly) { - builder.Implementation(new TByLinesFunc<TUserType>(builder.GetSourcePosition())); - } } - }; - class TFolderListFromFile: public TBoxedValue { private: - class TIterator : public TBoxedValue { - public: - TIterator(ui32 indexP, ui32 indexT, ui32 indexA, const IValueBuilder& valueBuilder, const TSourcePosition& pos, TString filePath) - : IndexP_(indexP) - , IndexT_(indexT) - , IndexA_(indexA) - , ValueBuilder_(valueBuilder) - , Pos_(pos) - , Input_(filePath) - { - } - - private: - bool Next(NUdf::TUnboxedValue& value) override { - try { - TString type; - TString path; - TString attrs; - ::Load(&Input_, type); - if (!type) { - return false; - } - ::Load(&Input_, path); - ::Load(&Input_, attrs); - - NUdf::TUnboxedValue* items = nullptr; - value = ValueBuilder_.NewArray(3, items); - items[IndexT_] = ValueBuilder_.NewString(type); - items[IndexP_] = ValueBuilder_.NewString(path); - items[IndexA_] = ValueBuilder_.NewString(attrs); - } - catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - return true; - } - - private: - const ui32 IndexP_; - const ui32 IndexT_; - const ui32 IndexA_; - const IValueBuilder& ValueBuilder_; - const TSourcePosition Pos_; - TIFStream Input_; - }; - - class TList: public TBoxedValue { - public: - TList(ui32 indexP, ui32 indexT, ui32 indexA, const IValueBuilder& valueBuilder, const TSourcePosition& pos, TString filePath) - : IndexP_(indexP) - , IndexT_(indexT) - , IndexA_(indexA) - , ValueBuilder_(valueBuilder) - , Pos_(pos) - , FilePath_(std::move(filePath)) - { - } - - protected: - NUdf::TUnboxedValue GetListIterator() const override { - return NUdf::TUnboxedValuePod(new TIterator(IndexP_, IndexT_, IndexA_, ValueBuilder_, Pos_, FilePath_)); - } - - bool HasFastListLength() const override { - return bool(Length_); - } - - ui64 GetListLength() const override { - if (!Length_) { - ui64 length = 0ULL; - for (const auto it = GetListIterator(); it.Skip();) { - ++length; - } - - Length_ = length; - } - - return *Length_; - } - - ui64 GetEstimatedListLength() const override { - return GetListLength(); - } - - bool HasListItems() const override { - if (HasItems_) { - return *HasItems_; - } - - if (Length_) { - HasItems_ = (*Length_ != 0); - return *HasItems_; + bool Next(NUdf::TUnboxedValue& value) override { + try { + TString type; + TString path; + TString attrs; + ::Load(&Input_, type); + if (!type) { + return false; } - - auto iter = GetListIterator(); - HasItems_ = iter.Skip(); - return *HasItems_; + ::Load(&Input_, path); + ::Load(&Input_, attrs); + + NUdf::TUnboxedValue* items = nullptr; + value = ValueBuilder_.NewArray(3, items); + items[IndexT_] = ValueBuilder_.NewString(type); + items[IndexP_] = ValueBuilder_.NewString(path); + items[IndexA_] = ValueBuilder_.NewString(attrs); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + return true; + } - protected: - const ui32 IndexP_; - const ui32 IndexT_; - const ui32 IndexA_; - const IValueBuilder& ValueBuilder_; - const TSourcePosition Pos_; - const TString FilePath_; - mutable TMaybe<ui64> Length_; - mutable TMaybe<bool> HasItems_; - }; + private: + const ui32 IndexP_; + const ui32 IndexT_; + const ui32 IndexA_; + const IValueBuilder& ValueBuilder_; + const TSourcePosition Pos_; + TIFStream Input_; + }; + class TList: public TBoxedValue { public: - TFolderListFromFile(ui32 indexP, ui32 indexT, ui32 indexA, const TSourcePosition& pos) + TList(ui32 indexP, ui32 indexT, ui32 indexA, const IValueBuilder& valueBuilder, const TSourcePosition& pos, TString filePath) : IndexP_(indexP) , IndexT_(indexT) , IndexA_(indexA) + , ValueBuilder_(valueBuilder) , Pos_(pos) + , FilePath_(std::move(filePath)) { } - static const ::NYql::NUdf::TStringRef& Name() { - static auto name = ::NYql::NUdf::TStringRef::Of("FolderListFromFile"); - return name; + protected: + NUdf::TUnboxedValue GetListIterator() const override { + return NUdf::TUnboxedValuePod(new TIterator(IndexP_, IndexT_, IndexA_, ValueBuilder_, Pos_, FilePath_)); } - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { - try { - TString filePath(args[0].AsStringRef()); - return TUnboxedValuePod(new TList(IndexP_, IndexT_, IndexA_, *valueBuilder, Pos_, filePath)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + bool HasFastListLength() const override { + return bool(Length_); } - static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { - if (Name() != name) { - // the only case when we return false - return false; + ui64 GetListLength() const override { + if (!Length_) { + ui64 length = 0ULL; + for (const auto it = GetListIterator(); it.Skip();) { + ++length; + } + + Length_ = length; } - builder.UserType(userType); + return *Length_; + } + + ui64 GetEstimatedListLength() const override { + return GetListLength(); + } - ui32 indexP, indexT, indexA; - auto itemType = builder.Struct() - ->AddField<const char*>("Path", &indexP) - .AddField<const char*>("Type", &indexT) - .AddField<TYson>("Attributes", &indexA) - .Build(); - auto resultType = builder.List()->Item(itemType).Build(); + bool HasListItems() const override { + if (HasItems_) { + return *HasItems_; + } - builder.Args()->Add<const char*>().Done().Returns(resultType); - if (!typesOnly) { - builder.Implementation(new TFolderListFromFile(indexP, indexT, indexA, builder.GetSourcePosition())); + if (Length_) { + HasItems_ = (*Length_ != 0); + return *HasItems_; } - return true; + + auto iter = GetListIterator(); + HasItems_ = iter.Skip(); + return *HasItems_; } - private: + protected: const ui32 IndexP_; const ui32 IndexT_; const ui32 IndexA_; + const IValueBuilder& ValueBuilder_; const TSourcePosition Pos_; + const TString FilePath_; + mutable TMaybe<ui64> Length_; + mutable TMaybe<bool> HasItems_; }; - SIMPLE_MODULE(TFileModule, - TUserDataTypeFuncFactory<false, false, ByLineFuncName, TByLinesFunc, const char*, TUtf8, TYson, TJson, i8, ui8, i16, ui16, ui32, ui64, i32, i64, float, double, bool>, - TFolderListFromFile - ) +public: + TFolderListFromFile(ui32 indexP, ui32 indexT, ui32 indexA, const TSourcePosition& pos) + : IndexP_(indexP) + , IndexT_(indexT) + , IndexA_(indexA) + , Pos_(pos) + { + } -} + static const ::NYql::NUdf::TStringRef& Name() { + static auto name = ::NYql::NUdf::TStringRef::Of("FolderListFromFile"); + return name; + } + + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { + try { + TString filePath(args[0].AsStringRef()); + return TUnboxedValuePod(new TList(IndexP_, IndexT_, IndexA_, *valueBuilder, Pos_, filePath)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } + } + + static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { + if (Name() != name) { + // the only case when we return false + return false; + } + + builder.UserType(userType); + + ui32 indexP, indexT, indexA; + auto itemType = builder.Struct() + ->AddField<const char*>("Path", &indexP) + .AddField<const char*>("Type", &indexT) + .AddField<TYson>("Attributes", &indexA) + .Build(); + auto resultType = builder.List()->Item(itemType).Build(); + + builder.Args()->Add<const char*>().Done().Returns(resultType); + if (!typesOnly) { + builder.Implementation(new TFolderListFromFile(indexP, indexT, indexA, builder.GetSourcePosition())); + } + return true; + } + +private: + const ui32 IndexP_; + const ui32 IndexT_; + const ui32 IndexA_; + const TSourcePosition Pos_; +}; + +SIMPLE_MODULE(TFileModule, + TUserDataTypeFuncFactory<false, false, ByLineFuncName, TByLinesFunc, const char*, TUtf8, TYson, TJson, i8, ui8, i16, ui16, ui32, ui64, i32, i64, float, double, bool>, + TFolderListFromFile) + +} // namespace REGISTER_MODULES(TFileModule) diff --git a/yql/essentials/udfs/common/file/ya.make b/yql/essentials/udfs/common/file/ya.make index 250f0722d8e..9a4f1863132 100644 --- a/yql/essentials/udfs/common/file/ya.make +++ b/yql/essentials/udfs/common/file/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( file_udf.cpp ) diff --git a/yql/essentials/udfs/common/histogram/histogram_udf.cpp b/yql/essentials/udfs/common/histogram/histogram_udf.cpp index 731b5956ed8..283e243396b 100644 --- a/yql/essentials/udfs/common/histogram/histogram_udf.cpp +++ b/yql/essentials/udfs/common/histogram/histogram_udf.cpp @@ -43,461 +43,220 @@ namespace { XX(Merge, arg) #define DECLARE_HISTOGRAM_RESOURCE_NAME(name) extern const char name##HistogramResourceName[] = "Histogram." #name; - HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_RESOURCE_NAME) - DECLARE_HISTOGRAM_RESOURCE_NAME(Linear) - DECLARE_HISTOGRAM_RESOURCE_NAME(Logarithmic) - - class TLinearHistogram: public TAdaptiveWardHistogram { - public: - TLinearHistogram(double step, double begin, double end) - : TAdaptiveWardHistogram(1ULL << 24) - , Step_(step) - , Begin_(begin) - , End_(end) - { - } - - void Add(double value, double weight) override { - if (value < Begin_) { - value = Begin_; - } else if (value > End_) { - value = End_; - } else { - value = std::floor(value / Step_ + 0.5) * Step_; - } - TAdaptiveWardHistogram::Add(value, weight); - } - - void Add(const THistoRec&) override { - Y_ABORT("Not implemented"); - } - - protected: - double Step_; - double Begin_; - double End_; - }; - - class TLogarithmicHistogram: public TLinearHistogram { - public: - TLogarithmicHistogram(double step, double begin, double end) - : TLinearHistogram(step, begin, end) - { - } - - void Add(double value, double weight) override { - double base = std::log(value) / std::log(Step_); - double prev = std::pow(Step_, std::floor(base)); - double next = std::pow(Step_, std::ceil(base)); - if (std::abs(value - next) > std::abs(value - prev)) { - value = prev; - } else { - value = next; - } - - if (value < Begin_) { - value = Begin_; - } else if (value > End_) { - value = End_; - } - - if (!std::isnan(value)) { - TAdaptiveWardHistogram::Add(value, weight); - } - } - - void Add(const THistoRec&) override { - Y_ABORT("Not implemented"); - } - }; - - template <typename THistogramType, const char* ResourceName> - class THistogram_Create: public TBoxedValue { - public: - THistogram_Create(TSourcePosition pos) - : Pos_(pos) - {} - - typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - - static const TStringRef& Name() { - static auto name = TString(ResourceName).substr(10) + "Histogram_Create"; - static auto nameRef = TStringRef(name); - return nameRef; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - Y_UNUSED(valueBuilder); - THolder<THistogramResource> histogram(new THistogramResource(args[2].Get<ui32>())); - histogram->Get()->Add(args[0].Get<double>(), args[1].Get<double>()); - return TUnboxedValuePod(histogram.Release()); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - } +HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_RESOURCE_NAME) +DECLARE_HISTOGRAM_RESOURCE_NAME(Linear) +DECLARE_HISTOGRAM_RESOURCE_NAME(Logarithmic) + +class TLinearHistogram: public TAdaptiveWardHistogram { +public: + TLinearHistogram(double step, double begin, double end) + : TAdaptiveWardHistogram(1ULL << 24) + , Step_(step) + , Begin_(begin) + , End_(end) + { + } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<ResourceName>(double, double, ui32)>(); - if (!typesOnly) { - builder.Implementation(new THistogram_Create<THistogramType, ResourceName>(builder.GetSourcePosition())); - } - return true; - } else { - return false; - } + void Add(double value, double weight) override { + if (value < Begin_) { + value = Begin_; + } else if (value > End_) { + value = End_; + } else { + value = std::floor(value / Step_ + 0.5) * Step_; } + TAdaptiveWardHistogram::Add(value, weight); + } - private: - TSourcePosition Pos_; - }; + void Add(const THistoRec&) override { + Y_ABORT("Not implemented"); + } - template <typename THistogramType, const char* ResourceName> - class THistogram_AddValue: public TBoxedValue { - public: - THistogram_AddValue(TSourcePosition pos) - : Pos_(pos) - {} - - typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - - static const TStringRef& Name() { - static auto name = TString(ResourceName).substr(10) + "Histogram_AddValue"; - static auto nameRef = TStringRef(name); - return nameRef; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - Y_UNUSED(valueBuilder); - THistogramResource* resource = static_cast<THistogramResource*>(args[0].AsBoxed().Get()); - resource->Get()->Add(args[1].Get<double>(), args[2].Get<double>()); - return TUnboxedValuePod(args[0]); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - } +protected: + double Step_; + double Begin_; + double End_; +}; + +class TLogarithmicHistogram: public TLinearHistogram { +public: + TLogarithmicHistogram(double step, double begin, double end) + : TLinearHistogram(step, begin, end) + { + } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, double, double)>(); - if (!typesOnly) { - builder.Implementation(new THistogram_AddValue<THistogramType, ResourceName>(builder.GetSourcePosition())); - } - return true; - } else { - return false; - } + void Add(double value, double weight) override { + double base = std::log(value) / std::log(Step_); + double prev = std::pow(Step_, std::floor(base)); + double next = std::pow(Step_, std::ceil(base)); + if (std::abs(value - next) > std::abs(value - prev)) { + value = prev; + } else { + value = next; } - private: - TSourcePosition Pos_; - }; - - template <typename THistogramType, const char* ResourceName> - class THistogram_Serialize: public TBoxedValue { - public: - THistogram_Serialize(TSourcePosition pos) - : Pos_(pos) - {} - - typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - - static const TStringRef& Name() { - static auto name = TString(ResourceName).substr(10) + "Histogram_Serialize"; - static auto nameRef = TStringRef(name); - return nameRef; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - THistogram proto; - TString result; - static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto); - Y_PROTOBUF_SUPPRESS_NODISCARD proto.SerializeToString(&result); - return valueBuilder->NewString(result); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + if (value < Begin_) { + value = Begin_; + } else if (value > End_) { + value = End_; } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<char*(TResource<ResourceName>)>(); - if (!typesOnly) { - builder.Implementation(new THistogram_Serialize<THistogramType, ResourceName>(builder.GetSourcePosition())); - } - return true; - } else { - return false; - } + if (!std::isnan(value)) { + TAdaptiveWardHistogram::Add(value, weight); } + } - private: - TSourcePosition Pos_; - }; + void Add(const THistoRec&) override { + Y_ABORT("Not implemented"); + } +}; + +template <typename THistogramType, const char* ResourceName> +class THistogram_Create: public TBoxedValue { +public: + THistogram_Create(TSourcePosition pos) + : Pos_(pos) + { + } - template <typename THistogramType, const char* ResourceName> - class THistogram_Deserialize: public TBoxedValue { - public: - THistogram_Deserialize(TSourcePosition pos) - : Pos_(pos) - {} - - typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - - static const TStringRef& Name() { - static auto name = TString(ResourceName).substr(10) + "Histogram_Deserialize"; - static auto nameRef = TStringRef(name); - return nameRef; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - Y_UNUSED(valueBuilder); - THistogram proto; - Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); - THolder<THistogramResource> histogram(new THistogramResource(args[1].Get<ui32>())); - histogram->Get()->FromProto(proto); - return TUnboxedValuePod(histogram.Release()); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - } + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<ResourceName>(char*, ui32)>(); - if (!typesOnly) { - builder.Implementation(new THistogram_Deserialize<THistogramType, ResourceName>(builder.GetSourcePosition())); - } - return true; - } else { - return false; - } - } - - private: - TSourcePosition Pos_; - }; + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_Create"; + static auto nameRef = TStringRef(name); + return nameRef; + } - template <typename THistogramType, const char* ResourceName> - class THistogram_Merge: public TBoxedValue { - public: - THistogram_Merge(TSourcePosition pos) - : Pos_(pos) - {} - - typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - - static const TStringRef& Name() { - static auto name = TString(ResourceName).substr(10) + "Histogram_Merge"; - static auto nameRef = TStringRef(name); - return nameRef; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - Y_UNUSED(valueBuilder); - THistogram proto; - static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto); - static_cast<THistogramResource*>(args[1].AsBoxed().Get())->Get()->Merge(proto, 1.0); - return TUnboxedValuePod(args[1]); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + Y_UNUSED(valueBuilder); + THolder<THistogramResource> histogram(new THistogramResource(args[2].Get<ui32>())); + histogram->Get()->Add(args[0].Get<double>(), args[1].Get<double>()); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, TResource<ResourceName>)>(); - if (!typesOnly) { - builder.Implementation(new THistogram_Merge<THistogramType, ResourceName>(builder.GetSourcePosition())); - } - return true; - } else { - return false; +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<ResourceName>(double, double, ui32)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Create<THistogramType, ResourceName>(builder.GetSourcePosition())); } + return true; + } else { + return false; } + } - private: - TSourcePosition Pos_; - }; - - struct THistogramIndexes { - static constexpr ui32 BinFieldsCount = 2U; - static constexpr ui32 ResultFieldsCount = 5U; - - THistogramIndexes(IFunctionTypeInfoBuilder& builder) { - const auto binStructType = builder.Struct(BinFieldsCount)->AddField<double>("Position", &Position).AddField<double>("Frequency", &Frequency).Build(); - const auto binsList = builder.List()->Item(binStructType).Build(); - ResultStructType = builder.Struct(ResultFieldsCount)->AddField<char*>("Kind", &Kind).AddField<double>("Min", &Min).AddField<double>("Max", &Max).AddField<double>("WeightsSum", &WeightsSum).AddField("Bins", binsList, &Bins).Build(); - } - - ui32 Kind; - ui32 Min; - ui32 Max; - ui32 WeightsSum; - ui32 Bins; - - ui32 Position; - ui32 Frequency; +private: + TSourcePosition Pos_; +}; - TType* ResultStructType; - }; +template <typename THistogramType, const char* ResourceName> +class THistogram_AddValue: public TBoxedValue { +public: + THistogram_AddValue(TSourcePosition pos) + : Pos_(pos) + { + } - template <typename THistogramType, const char* ResourceName> - class THistogram_GetResult: public TBoxedValue { - public: - typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - THistogram_GetResult(const THistogramIndexes& histogramIndexes, TSourcePosition pos) - : HistogramIndexes_(histogramIndexes) - , Pos_(pos) - { - } + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_AddValue"; + static auto nameRef = TStringRef(name); + return nameRef; + } - static const TStringRef& Name() { - static auto name = TString(ResourceName).substr(10) + "Histogram_GetResult"; - static auto nameRef = TStringRef(name); - return nameRef; +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + Y_UNUSED(valueBuilder); + THistogramResource* resource = static_cast<THistogramResource*>(args[0].AsBoxed().Get()); + resource->Get()->Add(args[1].Get<double>(), args[2].Get<double>()); + return TUnboxedValuePod(args[0]); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - THistogram proto; - auto histogram = static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get(); - histogram->ToProto(proto); - - auto size = proto.FreqSize(); - TUnboxedValue* fields = nullptr; - auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields); - fields[HistogramIndexes_.Kind] = valueBuilder->NewString(TStringBuf(ResourceName).Skip(10)); - if (size) { - TUnboxedValue* items = nullptr; - fields[HistogramIndexes_.Bins] = valueBuilder->NewArray(size, items); - fields[HistogramIndexes_.Min] = TUnboxedValuePod(static_cast<double>(histogram->GetMinValue())); - fields[HistogramIndexes_.Max] = TUnboxedValuePod(static_cast<double>(histogram->GetMaxValue())); - fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(static_cast<double>(histogram->GetSum())); - for (ui64 i = 0; i < size; ++i) { - TUnboxedValue* binFields = nullptr; - *items++ = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields); - binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(static_cast<double>(proto.GetFreq(i))); - binFields[HistogramIndexes_.Position] = TUnboxedValuePod(static_cast<double>(proto.GetPosition(i))); - } - } else { - fields[HistogramIndexes_.Bins] = valueBuilder->NewEmptyList(); - fields[HistogramIndexes_.Min] = TUnboxedValuePod(0.0); - fields[HistogramIndexes_.Max] = TUnboxedValuePod(0.0); - fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(0.0); +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_AddValue<THistogramType, ResourceName>(builder.GetSourcePosition())); } - - return result; + return true; + } else { + return false; } + } - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - auto resource = builder.Resource(TStringRef(ResourceName, std::strlen(ResourceName))); +private: + TSourcePosition Pos_; +}; - THistogramIndexes histogramIndexes(builder); +template <typename THistogramType, const char* ResourceName> +class THistogram_Serialize: public TBoxedValue { +public: + THistogram_Serialize(TSourcePosition pos) + : Pos_(pos) + { + } - builder.Args()->Add(resource).Done().Returns(histogramIndexes.ResultStructType); + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; - if (!typesOnly) { - builder.Implementation(new THistogram_GetResult<THistogramType, ResourceName>(histogramIndexes, builder.GetSourcePosition())); - } - return true; - } else { - return false; - } - } - - private: - const THistogramIndexes HistogramIndexes_; - TSourcePosition Pos_; - }; + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_Serialize"; + static auto nameRef = TStringRef(name); + return nameRef; + } - template <> - TUnboxedValue THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::Run( +private: + TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - using THistogramResource = THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::THistogramResource; + const TUnboxedValuePod* args) const override { try { - Y_UNUSED(valueBuilder); - THolder<THistogramResource> histogram(new THistogramResource( - args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); - histogram->Get()->Add(args[0].Get<double>(), 1.0); - return TUnboxedValuePod(histogram.Release()); + THistogram proto; + TString result; + static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto); + Y_PROTOBUF_SUPPRESS_NODISCARD proto.SerializeToString(&result); + return valueBuilder->NewString(result); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } } - template <> - bool THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature( +public: + static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { - builder.SimpleSignature<TResource<LinearHistogramResourceName>(double, double, double, double)>(); + builder.SimpleSignature<char*(TResource<ResourceName>)>(); if (!typesOnly) { - builder.Implementation(new THistogram_Create<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition())); + builder.Implementation(new THistogram_Serialize<THistogramType, ResourceName>(builder.GetSourcePosition())); } return true; } else { @@ -505,17 +264,35 @@ namespace { } } - template <> - TUnboxedValue THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::Run( +private: + TSourcePosition Pos_; +}; + +template <typename THistogramType, const char* ResourceName> +class THistogram_Deserialize: public TBoxedValue { +public: + THistogram_Deserialize(TSourcePosition pos) + : Pos_(pos) + { + } + + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_Deserialize"; + static auto nameRef = TStringRef(name); + return nameRef; + } + +private: + TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - using THistogramResource = THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::THistogramResource; + const TUnboxedValuePod* args) const override { try { Y_UNUSED(valueBuilder); THistogram proto; Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); - THolder<THistogramResource> histogram( - new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + THolder<THistogramResource> histogram(new THistogramResource(args[1].Get<ui32>())); histogram->Get()->FromProto(proto); return TUnboxedValuePod(histogram.Release()); } catch (const std::exception& e) { @@ -523,17 +300,17 @@ namespace { } } - template <> - bool THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature( +public: + static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { - builder.SimpleSignature<TResource<LinearHistogramResourceName>(char*, double, double, double)>(); + builder.SimpleSignature<TResource<ResourceName>(char*, ui32)>(); if (!typesOnly) { - builder.Implementation(new THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition())); + builder.Implementation(new THistogram_Deserialize<THistogramType, ResourceName>(builder.GetSourcePosition())); } return true; } else { @@ -541,33 +318,52 @@ namespace { } } - template <> - TUnboxedValue THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run( +private: + TSourcePosition Pos_; +}; + +template <typename THistogramType, const char* ResourceName> +class THistogram_Merge: public TBoxedValue { +public: + THistogram_Merge(TSourcePosition pos) + : Pos_(pos) + { + } + + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_Merge"; + static auto nameRef = TStringRef(name); + return nameRef; + } + +private: + TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - using THistogramResource = THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource; + const TUnboxedValuePod* args) const override { try { Y_UNUSED(valueBuilder); - THolder<THistogramResource> histogram(new THistogramResource( - args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); - histogram->Get()->Add(args[0].Get<double>(), 1.0); - return TUnboxedValuePod(histogram.Release()); + THistogram proto; + static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get()->ToProto(proto); + static_cast<THistogramResource*>(args[1].AsBoxed().Get())->Get()->Merge(proto, 1.0); + return TUnboxedValuePod(args[1]); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } } - template <> - bool THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature( +public: + static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { - builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(double, double, double, double)>(); + builder.SimpleSignature<TResource<ResourceName>(TResource<ResourceName>, TResource<ResourceName>)>(); if (!typesOnly) { - builder.Implementation(new THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition())); + builder.Implementation(new THistogram_Merge<THistogramType, ResourceName>(builder.GetSourcePosition())); } return true; } else { @@ -575,35 +371,99 @@ namespace { } } - template <> - TUnboxedValue THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run( +private: + TSourcePosition Pos_; +}; + +struct THistogramIndexes { + static constexpr ui32 BinFieldsCount = 2U; + static constexpr ui32 ResultFieldsCount = 5U; + + THistogramIndexes(IFunctionTypeInfoBuilder& builder) { + const auto binStructType = builder.Struct(BinFieldsCount)->AddField<double>("Position", &Position).AddField<double>("Frequency", &Frequency).Build(); + const auto binsList = builder.List()->Item(binStructType).Build(); + ResultStructType = builder.Struct(ResultFieldsCount)->AddField<char*>("Kind", &Kind).AddField<double>("Min", &Min).AddField<double>("Max", &Max).AddField<double>("WeightsSum", &WeightsSum).AddField("Bins", binsList, &Bins).Build(); + } + + ui32 Kind; + ui32 Min; + ui32 Max; + ui32 WeightsSum; + ui32 Bins; + + ui32 Position; + ui32 Frequency; + + TType* ResultStructType; +}; + +template <typename THistogramType, const char* ResourceName> +class THistogram_GetResult: public TBoxedValue { +public: + typedef TBoxedResource<THistogramType, ResourceName> THistogramResource; + + THistogram_GetResult(const THistogramIndexes& histogramIndexes, TSourcePosition pos) + : HistogramIndexes_(histogramIndexes) + , Pos_(pos) + { + } + + static const TStringRef& Name() { + static auto name = TString(ResourceName).substr(10) + "Histogram_GetResult"; + static auto nameRef = TStringRef(name); + return nameRef; + } + +private: + TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - using THistogramResource = THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource; - try { - Y_UNUSED(valueBuilder); - THistogram proto; - Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); - THolder<THistogramResource> histogram( - new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); - histogram->Get()->FromProto(proto); - return TUnboxedValuePod(histogram.Release()); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + const TUnboxedValuePod* args) const override { + THistogram proto; + auto histogram = static_cast<THistogramResource*>(args[0].AsBoxed().Get())->Get(); + histogram->ToProto(proto); + + auto size = proto.FreqSize(); + TUnboxedValue* fields = nullptr; + auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields); + fields[HistogramIndexes_.Kind] = valueBuilder->NewString(TStringBuf(ResourceName).Skip(10)); + if (size) { + TUnboxedValue* items = nullptr; + fields[HistogramIndexes_.Bins] = valueBuilder->NewArray(size, items); + fields[HistogramIndexes_.Min] = TUnboxedValuePod(static_cast<double>(histogram->GetMinValue())); + fields[HistogramIndexes_.Max] = TUnboxedValuePod(static_cast<double>(histogram->GetMaxValue())); + fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(static_cast<double>(histogram->GetSum())); + for (ui64 i = 0; i < size; ++i) { + TUnboxedValue* binFields = nullptr; + *items++ = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields); + binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(static_cast<double>(proto.GetFreq(i))); + binFields[HistogramIndexes_.Position] = TUnboxedValuePod(static_cast<double>(proto.GetPosition(i))); + } + } else { + fields[HistogramIndexes_.Bins] = valueBuilder->NewEmptyList(); + fields[HistogramIndexes_.Min] = TUnboxedValuePod(0.0); + fields[HistogramIndexes_.Max] = TUnboxedValuePod(0.0); + fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(0.0); } + + return result; } - template <> - bool THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature( +public: + static bool DeclareSignature( const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { Y_UNUSED(userType); if (Name() == name) { - builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(char*, double, double, double)>(); + auto resource = builder.Resource(TStringRef(ResourceName, std::strlen(ResourceName))); + + THistogramIndexes histogramIndexes(builder); + + builder.Args()->Add(resource).Done().Returns(histogramIndexes.ResultStructType); + if (!typesOnly) { - builder.Implementation(new THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition())); + builder.Implementation(new THistogram_GetResult<THistogramType, ResourceName>(histogramIndexes, builder.GetSourcePosition())); } return true; } else { @@ -611,352 +471,499 @@ namespace { } } - class THistogramPrint: public TBoxedValue { - public: - THistogramPrint(const THistogramIndexes& histogramIndexes) - : HistogramIndexes_(histogramIndexes) - { +private: + const THistogramIndexes HistogramIndexes_; + TSourcePosition Pos_; +}; + +template <> +TUnboxedValue THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + using THistogramResource = THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::THistogramResource; + try { + Y_UNUSED(valueBuilder); + THolder<THistogramResource> histogram(new THistogramResource( + args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + histogram->Get()->Add(args[0].Get<double>(), 1.0); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } +} + +template <> +bool THistogram_Create<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<LinearHistogramResourceName>(double, double, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Create<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition())); + } + return true; + } else { + return false; + } +} + +template <> +TUnboxedValue THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + using THistogramResource = THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::THistogramResource; + try { + Y_UNUSED(valueBuilder); + THistogram proto; + Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); + THolder<THistogramResource> histogram( + new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + histogram->Get()->FromProto(proto); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } +} + +template <> +bool THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<LinearHistogramResourceName>(char*, double, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Deserialize<TLinearHistogram, LinearHistogramResourceName>(builder.GetSourcePosition())); } + return true; + } else { + return false; + } +} - static const TStringRef& Name() { - static auto name = TStringRef::Of("Print"); - return name; +template <> +TUnboxedValue THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + using THistogramResource = THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource; + try { + Y_UNUSED(valueBuilder); + THolder<THistogramResource> histogram(new THistogramResource( + args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + histogram->Get()->Add(args[0].Get<double>(), 1.0); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } +} + +template <> +bool THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(double, double, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Create<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition())); } + return true; + } else { + return false; + } +} - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - auto kind = args[0].GetElement(HistogramIndexes_.Kind); - auto bins = args[0].GetElement(HistogramIndexes_.Bins); - double min = args[0].GetElement(HistogramIndexes_.Min).Get<double>(); - double max = args[0].GetElement(HistogramIndexes_.Max).Get<double>(); - double weightsSum = args[0].GetElement(HistogramIndexes_.WeightsSum).Get<double>(); - auto binsIterator = bins.GetListIterator(); +template <> +TUnboxedValue THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + using THistogramResource = THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::THistogramResource; + try { + Y_UNUSED(valueBuilder); + THistogram proto; + Y_PROTOBUF_SUPPRESS_NODISCARD proto.ParseFromString(TString(args[0].AsStringRef())); + THolder<THistogramResource> histogram( + new THistogramResource(args[1].Get<double>(), args[2].Get<double>(), args[3].Get<double>())); + histogram->Get()->FromProto(proto); + return TUnboxedValuePod(histogram.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } +} - TStringBuilder result; - result << "Kind: " << (TStringBuf)kind.AsStringRef() << ' '; - result << Sprintf("Bins: %" PRIu64 " WeightsSum: %.3f Min: %.3f Max: %.3f", - bins.GetListLength(), weightsSum, min, max); - double maxFrequency = 0.0; - size_t maxPositionLength = 0; - size_t maxFrequencyLength = 0; - const ui8 bars = args[1].GetOrDefault<ui8>(25); +template <> +bool THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<LogarithmicHistogramResourceName>(char*, double, double, double)>(); + if (!typesOnly) { + builder.Implementation(new THistogram_Deserialize<TLogarithmicHistogram, LogarithmicHistogramResourceName>(builder.GetSourcePosition())); + } + return true; + } else { + return false; + } +} - for (TUnboxedValue current; binsIterator.Next(current);) { - if (bars) { - double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); - if (frequency > maxFrequency) { - maxFrequency = frequency; - } - } - size_t positionLength = Sprintf("%.3f", current.GetElement(HistogramIndexes_.Position).Get<double>()).length(); - size_t frequencyLength = Sprintf("%.3f", current.GetElement(HistogramIndexes_.Frequency).Get<double>()).length(); +class THistogramPrint: public TBoxedValue { +public: + THistogramPrint(const THistogramIndexes& histogramIndexes) + : HistogramIndexes_(histogramIndexes) + { + } - if (positionLength > maxPositionLength) { - maxPositionLength = positionLength; - } - if (frequencyLength > maxFrequencyLength) { - maxFrequencyLength = frequencyLength; - } - } + static const TStringRef& Name() { + static auto name = TStringRef::Of("Print"); + return name; + } - binsIterator = bins.GetListIterator(); - for (TUnboxedValue current; binsIterator.Next(current);) { - double position = current.GetElement(HistogramIndexes_.Position).Get<double>(); + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + auto kind = args[0].GetElement(HistogramIndexes_.Kind); + auto bins = args[0].GetElement(HistogramIndexes_.Bins); + double min = args[0].GetElement(HistogramIndexes_.Min).Get<double>(); + double max = args[0].GetElement(HistogramIndexes_.Max).Get<double>(); + double weightsSum = args[0].GetElement(HistogramIndexes_.WeightsSum).Get<double>(); + auto binsIterator = bins.GetListIterator(); + + TStringBuilder result; + result << "Kind: " << (TStringBuf)kind.AsStringRef() << ' '; + result << Sprintf("Bins: %" PRIu64 " WeightsSum: %.3f Min: %.3f Max: %.3f", + bins.GetListLength(), weightsSum, min, max); + double maxFrequency = 0.0; + size_t maxPositionLength = 0; + size_t maxFrequencyLength = 0; + const ui8 bars = args[1].GetOrDefault<ui8>(25); + + for (TUnboxedValue current; binsIterator.Next(current);) { + if (bars) { double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); - result << "\n"; - if (bars && maxFrequency > 0) { - ui8 filledBars = static_cast<ui8>(bars * frequency / maxFrequency); - for (ui8 i = 0; i < bars; ++i) { - if (i < filledBars) { - result << "█"; - } else { - result << "░"; - } - } + if (frequency > maxFrequency) { + maxFrequency = frequency; } - result << " P: " << LeftPad(Sprintf("%.3f", position), maxPositionLength); - result << " F: " << LeftPad(Sprintf("%.3f", frequency), maxFrequencyLength); } + size_t positionLength = Sprintf("%.3f", current.GetElement(HistogramIndexes_.Position).Get<double>()).length(); + size_t frequencyLength = Sprintf("%.3f", current.GetElement(HistogramIndexes_.Frequency).Get<double>()).length(); - return valueBuilder->NewString(result); + if (positionLength > maxPositionLength) { + maxPositionLength = positionLength; + } + if (frequencyLength > maxFrequencyLength) { + maxFrequencyLength = frequencyLength; + } } - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - THistogramIndexes histogramIndexes(builder); - auto optionalUi8 = builder.Optional()->Item<ui8>().Build(); - - builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalUi8).Done().OptionalArgs(1).Returns<char*>(); - - if (!typesOnly) { - builder.Implementation(new THistogramPrint(histogramIndexes)); + binsIterator = bins.GetListIterator(); + for (TUnboxedValue current; binsIterator.Next(current);) { + double position = current.GetElement(HistogramIndexes_.Position).Get<double>(); + double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); + result << "\n"; + if (bars && maxFrequency > 0) { + ui8 filledBars = static_cast<ui8>(bars * frequency / maxFrequency); + for (ui8 i = 0; i < bars; ++i) { + if (i < filledBars) { + result << "█"; + } else { + result << "░"; + } } - builder.IsStrict(); - return true; - } else { - return false; } + result << " P: " << LeftPad(Sprintf("%.3f", position), maxPositionLength); + result << " F: " << LeftPad(Sprintf("%.3f", frequency), maxFrequencyLength); } - private: - const THistogramIndexes HistogramIndexes_; - }; + return valueBuilder->NewString(result); + } - class THistogramToCumulativeDistributionFunction: public TBoxedValue { - public: - THistogramToCumulativeDistributionFunction(const THistogramIndexes& histogramIndexes) - : HistogramIndexes_(histogramIndexes) - { - } + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + THistogramIndexes histogramIndexes(builder); + auto optionalUi8 = builder.Optional()->Item<ui8>().Build(); - static const TStringRef& Name() { - static auto name = TStringRef::Of("ToCumulativeDistributionFunction"); - return name; - } + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalUi8).Done().OptionalArgs(1).Returns<char*>(); - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - TUnboxedValue* fields = nullptr; - auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields); - auto bins = args[0].GetElement(HistogramIndexes_.Bins); - double minValue = args[0].GetElement(HistogramIndexes_.Min).Get<double>(); - double maxValue = args[0].GetElement(HistogramIndexes_.Max).Get<double>(); - double sum = 0.0; - double weightsSum = 0.0; - std::vector<TUnboxedValue> resultBins; - if (bins.HasFastListLength()) - resultBins.reserve(bins.GetListLength()); - const auto binsIterator = bins.GetListIterator(); - for (TUnboxedValue current; binsIterator.Next(current);) { - TUnboxedValue* binFields = nullptr; - auto resultCurrent = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields); - const auto frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); - sum += frequency; - weightsSum += sum; - binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(sum); - binFields[HistogramIndexes_.Position] = current.GetElement(HistogramIndexes_.Position); - resultBins.emplace_back(std::move(resultCurrent)); + if (!typesOnly) { + builder.Implementation(new THistogramPrint(histogramIndexes)); } - - auto kind = args[0].GetElement(HistogramIndexes_.Kind); - fields[HistogramIndexes_.Kind] = valueBuilder->AppendString(kind, "Cdf"); - fields[HistogramIndexes_.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size()); - fields[HistogramIndexes_.Max] = TUnboxedValuePod(maxValue); - fields[HistogramIndexes_.Min] = TUnboxedValuePod(minValue); - fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(weightsSum); - return result; + builder.IsStrict(); + return true; + } else { + return false; } + } - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - THistogramIndexes histogramIndexes(builder); +private: + const THistogramIndexes HistogramIndexes_; +}; - builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Done().Returns(histogramIndexes.ResultStructType); +class THistogramToCumulativeDistributionFunction: public TBoxedValue { +public: + THistogramToCumulativeDistributionFunction(const THistogramIndexes& histogramIndexes) + : HistogramIndexes_(histogramIndexes) + { + } - if (!typesOnly) { - builder.Implementation(new THistogramToCumulativeDistributionFunction(histogramIndexes)); - } - builder.IsStrict(); - return true; - } else { - return false; - } + static const TStringRef& Name() { + static auto name = TStringRef::Of("ToCumulativeDistributionFunction"); + return name; + } + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + TUnboxedValue* fields = nullptr; + auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields); + auto bins = args[0].GetElement(HistogramIndexes_.Bins); + double minValue = args[0].GetElement(HistogramIndexes_.Min).Get<double>(); + double maxValue = args[0].GetElement(HistogramIndexes_.Max).Get<double>(); + double sum = 0.0; + double weightsSum = 0.0; + std::vector<TUnboxedValue> resultBins; + if (bins.HasFastListLength()) { + resultBins.reserve(bins.GetListLength()); + } + const auto binsIterator = bins.GetListIterator(); + for (TUnboxedValue current; binsIterator.Next(current);) { + TUnboxedValue* binFields = nullptr; + auto resultCurrent = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields); + const auto frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); + sum += frequency; + weightsSum += sum; + binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(sum); + binFields[HistogramIndexes_.Position] = current.GetElement(HistogramIndexes_.Position); + resultBins.emplace_back(std::move(resultCurrent)); } - private: - const THistogramIndexes HistogramIndexes_; - }; + auto kind = args[0].GetElement(HistogramIndexes_.Kind); + fields[HistogramIndexes_.Kind] = valueBuilder->AppendString(kind, "Cdf"); + fields[HistogramIndexes_.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size()); + fields[HistogramIndexes_.Max] = TUnboxedValuePod(maxValue); + fields[HistogramIndexes_.Min] = TUnboxedValuePod(minValue); + fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(weightsSum); + return result; + } - class THistogramNormalize: public TBoxedValue { - public: - THistogramNormalize(const THistogramIndexes& histogramIndexes) - : HistogramIndexes_(histogramIndexes) - { - } + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + THistogramIndexes histogramIndexes(builder); - static const TStringRef& Name() { - static auto name = TStringRef::Of("Normalize"); - return name; - } + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Done().Returns(histogramIndexes.ResultStructType); - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - TUnboxedValue* fields = nullptr; - auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields); - auto bins = args[0].GetElement(HistogramIndexes_.Bins); - double minValue = args[0].GetElement(HistogramIndexes_.Min).Get<double>(); - double maxValue = args[0].GetElement(HistogramIndexes_.Max).Get<double>(); - double area = args[1].GetOrDefault<double>(100.0); - bool cdfNormalization = args[2].GetOrDefault<bool>(false); - double sum = 0.0; - double weightsSum = 0.0; - double lastBinFrequency = 0.0; - std::vector<TUnboxedValue> resultBins; - if (bins.HasFastListLength()) - resultBins.reserve(bins.GetListLength()); - auto binsIterator = bins.GetListIterator(); - for (TUnboxedValue current; binsIterator.Next(current);) { - sum += current.GetElement(HistogramIndexes_.Frequency).Get<double>(); - lastBinFrequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); - } - binsIterator = bins.GetListIterator(); - for (TUnboxedValue current; binsIterator.Next(current);) { - TUnboxedValue* binFields = nullptr; - auto resultCurrent = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields); - double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); - if (cdfNormalization) { - frequency = area * frequency / lastBinFrequency; - } else { - frequency = area * frequency / sum; - } - weightsSum += frequency; - binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(frequency); - binFields[HistogramIndexes_.Position] = current.GetElement(HistogramIndexes_.Position); - resultBins.emplace_back(std::move(resultCurrent)); + if (!typesOnly) { + builder.Implementation(new THistogramToCumulativeDistributionFunction(histogramIndexes)); } + builder.IsStrict(); + return true; + } else { + return false; + } + } - TUnboxedValue kind = args[0].GetElement(HistogramIndexes_.Kind); - if (cdfNormalization) { - kind = valueBuilder->AppendString(kind, "Cdf"); - } +private: + const THistogramIndexes HistogramIndexes_; +}; - fields[HistogramIndexes_.Kind] = kind; - fields[HistogramIndexes_.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size()); - fields[HistogramIndexes_.Max] = TUnboxedValuePod(maxValue); - fields[HistogramIndexes_.Min] = TUnboxedValuePod(minValue); - fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(weightsSum); - return result; - } - - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - THistogramIndexes histogramIndexes(builder); - auto optionalDouble = builder.Optional()->Item<double>().Build(); - auto optionalCdfNormalization = builder.Optional()->Item<bool>().Build(); - builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalDouble).Add(optionalCdfNormalization).Done().Returns(histogramIndexes.ResultStructType); - builder.OptionalArgs(1); - builder.OptionalArgs(2); - if (!typesOnly) { - builder.Implementation(new THistogramNormalize(histogramIndexes)); - } - builder.IsStrict(); - return true; +class THistogramNormalize: public TBoxedValue { +public: + THistogramNormalize(const THistogramIndexes& histogramIndexes) + : HistogramIndexes_(histogramIndexes) + { + } + + static const TStringRef& Name() { + static auto name = TStringRef::Of("Normalize"); + return name; + } + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + TUnboxedValue* fields = nullptr; + auto result = valueBuilder->NewArray(HistogramIndexes_.ResultFieldsCount, fields); + auto bins = args[0].GetElement(HistogramIndexes_.Bins); + double minValue = args[0].GetElement(HistogramIndexes_.Min).Get<double>(); + double maxValue = args[0].GetElement(HistogramIndexes_.Max).Get<double>(); + double area = args[1].GetOrDefault<double>(100.0); + bool cdfNormalization = args[2].GetOrDefault<bool>(false); + double sum = 0.0; + double weightsSum = 0.0; + double lastBinFrequency = 0.0; + std::vector<TUnboxedValue> resultBins; + if (bins.HasFastListLength()) { + resultBins.reserve(bins.GetListLength()); + } + auto binsIterator = bins.GetListIterator(); + for (TUnboxedValue current; binsIterator.Next(current);) { + sum += current.GetElement(HistogramIndexes_.Frequency).Get<double>(); + lastBinFrequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); + } + binsIterator = bins.GetListIterator(); + for (TUnboxedValue current; binsIterator.Next(current);) { + TUnboxedValue* binFields = nullptr; + auto resultCurrent = valueBuilder->NewArray(HistogramIndexes_.BinFieldsCount, binFields); + double frequency = current.GetElement(HistogramIndexes_.Frequency).Get<double>(); + if (cdfNormalization) { + frequency = area * frequency / lastBinFrequency; } else { - return false; + frequency = area * frequency / sum; } + weightsSum += frequency; + binFields[HistogramIndexes_.Frequency] = TUnboxedValuePod(frequency); + binFields[HistogramIndexes_.Position] = current.GetElement(HistogramIndexes_.Position); + resultBins.emplace_back(std::move(resultCurrent)); } - private: - const THistogramIndexes HistogramIndexes_; - }; + TUnboxedValue kind = args[0].GetElement(HistogramIndexes_.Kind); + if (cdfNormalization) { + kind = valueBuilder->AppendString(kind, "Cdf"); + } - template <bool twoArgs> - class THistogramMethodBase: public TBoxedValue { - public: - THistogramMethodBase(const THistogramIndexes& histogramIndexes, TSourcePosition pos) - : HistogramIndexes_(histogramIndexes) - , Pos_(pos) - { - } - - virtual TUnboxedValue GetResult( - const THistogram& input, - const TUnboxedValuePod* args) const = 0; - - TUnboxedValue Run( - const IValueBuilder*, - const TUnboxedValuePod* args) const override { - try { - auto bins = args[0].GetElement(HistogramIndexes_.Bins); - double min = args[0].GetElement(HistogramIndexes_.Min).template Get<double>(); - double max = args[0].GetElement(HistogramIndexes_.Max).template Get<double>(); - auto binsIterator = bins.GetListIterator(); - - THistogram histogram; - histogram.SetType(HT_ADAPTIVE_HISTOGRAM); - histogram.SetMinValue(min); - histogram.SetMaxValue(max); - for (TUnboxedValue current; binsIterator.Next(current);) { - double frequency = current.GetElement(HistogramIndexes_.Frequency).template Get<double>(); - double position = current.GetElement(HistogramIndexes_.Position).template Get<double>(); - histogram.AddFreq(frequency); - histogram.AddPosition(position); - } + fields[HistogramIndexes_.Kind] = kind; + fields[HistogramIndexes_.Bins] = valueBuilder->NewList(resultBins.data(), resultBins.size()); + fields[HistogramIndexes_.Max] = TUnboxedValuePod(maxValue); + fields[HistogramIndexes_.Min] = TUnboxedValuePod(minValue); + fields[HistogramIndexes_.WeightsSum] = TUnboxedValuePod(weightsSum); + return result; + } - return GetResult(histogram, args); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + THistogramIndexes histogramIndexes(builder); + auto optionalDouble = builder.Optional()->Item<double>().Build(); + auto optionalCdfNormalization = builder.Optional()->Item<bool>().Build(); + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalDouble).Add(optionalCdfNormalization).Done().Returns(histogramIndexes.ResultStructType); + builder.OptionalArgs(1); + builder.OptionalArgs(2); + if (!typesOnly) { + builder.Implementation(new THistogramNormalize(histogramIndexes)); } + builder.IsStrict(); + return true; + } else { + return false; } + } - static THistogramIndexes DeclareSignatureBase(IFunctionTypeInfoBuilder& builder) { - THistogramIndexes histogramIndexes(builder); +private: + const THistogramIndexes HistogramIndexes_; +}; + +template <bool twoArgs> +class THistogramMethodBase: public TBoxedValue { +public: + THistogramMethodBase(const THistogramIndexes& histogramIndexes, TSourcePosition pos) + : HistogramIndexes_(histogramIndexes) + , Pos_(pos) + { + } - if (twoArgs) { - builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Add<double>().Done().Returns<double>(); - } else { - builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Done().Returns<double>(); + virtual TUnboxedValue GetResult( + const THistogram& input, + const TUnboxedValuePod* args) const = 0; + + TUnboxedValue Run( + const IValueBuilder*, + const TUnboxedValuePod* args) const override { + try { + auto bins = args[0].GetElement(HistogramIndexes_.Bins); + double min = args[0].GetElement(HistogramIndexes_.Min).template Get<double>(); + double max = args[0].GetElement(HistogramIndexes_.Max).template Get<double>(); + auto binsIterator = bins.GetListIterator(); + + THistogram histogram; + histogram.SetType(HT_ADAPTIVE_HISTOGRAM); + histogram.SetMinValue(min); + histogram.SetMaxValue(max); + for (TUnboxedValue current; binsIterator.Next(current);) { + double frequency = current.GetElement(HistogramIndexes_.Frequency).template Get<double>(); + double position = current.GetElement(HistogramIndexes_.Position).template Get<double>(); + histogram.AddFreq(frequency); + histogram.AddPosition(position); } - return histogramIndexes; + + return GetResult(histogram, args); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - protected: - const THistogramIndexes HistogramIndexes_; - TSourcePosition Pos_; - }; + static THistogramIndexes DeclareSignatureBase(IFunctionTypeInfoBuilder& builder) { + THistogramIndexes histogramIndexes(builder); + + if (twoArgs) { + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Add<double>().Done().Returns<double>(); + } else { + builder.Args()->Add(histogramIndexes.ResultStructType).Flags(ICallablePayload::TArgumentFlags::AutoMap).Add<double>().Done().Returns<double>(); + } + return histogramIndexes; + } -#define DECLARE_ONE_DOUBLE_ARG_METHOD_UDF(name) \ - class T##name: public THistogramMethodBase<false> { \ - public: \ - T##name(const THistogramIndexes& histogramIndexes, TSourcePosition pos) \ - : THistogramMethodBase<false>(histogramIndexes, pos) { \ - } \ - static const TStringRef& Name() { \ - static auto name = TStringRef::Of(#name); \ - return name; \ - } \ - static bool DeclareSignature( \ - const TStringRef& name, \ - TType* userType, \ - IFunctionTypeInfoBuilder& builder, \ - bool typesOnly) { \ - Y_UNUSED(userType); \ - if (Name() == name) { \ - const auto& histogramIndexes = DeclareSignatureBase(builder); \ - if (!typesOnly) { \ - builder.Implementation(new T##name(histogramIndexes, \ - builder.GetSourcePosition())); \ - } \ - return true; \ - } else { \ - return false; \ - } \ - } \ - TUnboxedValue GetResult( \ - const THistogram& input, \ - const TUnboxedValuePod* args) const override { \ - TAdaptiveWardHistogram histo(input, input.FreqSize()); \ - double result = histo.name(args[1].Get<double>()); \ - return TUnboxedValuePod(result); \ - } \ +protected: + const THistogramIndexes HistogramIndexes_; + TSourcePosition Pos_; +}; + +#define DECLARE_ONE_DOUBLE_ARG_METHOD_UDF(name) \ + class T##name: public THistogramMethodBase<false> { \ + public: \ + T##name(const THistogramIndexes& histogramIndexes, TSourcePosition pos) \ + : THistogramMethodBase<false>(histogramIndexes, pos) { \ + } \ + static const TStringRef& Name() { \ + static auto name = TStringRef::Of(#name); \ + return name; \ + } \ + static bool DeclareSignature( \ + const TStringRef& name, \ + TType* userType, \ + IFunctionTypeInfoBuilder& builder, \ + bool typesOnly) { \ + Y_UNUSED(userType); \ + if (Name() == name) { \ + const auto& histogramIndexes = DeclareSignatureBase(builder); \ + if (!typesOnly) { \ + builder.Implementation(new T##name(histogramIndexes, \ + builder.GetSourcePosition())); \ + } \ + return true; \ + } else { \ + return false; \ + } \ + } \ + TUnboxedValue GetResult( \ + const THistogram& input, \ + const TUnboxedValuePod* args) const override { \ + TAdaptiveWardHistogram histo(input, input.FreqSize()); \ + double result = histo.name(args[1].Get<double>()); \ + return TUnboxedValuePod(result); \ + } \ }; #define DECLARE_TWO_DOUBLE_ARG_METHOD_UDF(name) \ @@ -979,7 +986,7 @@ namespace { const auto& histogramIndexes = DeclareSignatureBase(builder); \ if (!typesOnly) { \ builder.Implementation(new T##name(histogramIndexes, \ - builder.GetSourcePosition())); \ + builder.GetSourcePosition())); \ } \ return true; \ } else { \ @@ -1001,18 +1008,18 @@ namespace { #define DECLARE_HISTOGRAM_UDFS(name) \ HISTOGRAM_FUNCTION_MAP(DECLARE_HISTOGRAM_UDF, name) - HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(DECLARE_ONE_DOUBLE_ARG_METHOD_UDF) - HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(DECLARE_TWO_DOUBLE_ARG_METHOD_UDF) - - SIMPLE_MODULE(THistogramModule, - HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_UDFS) - HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF) - HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF) - DECLARE_HISTOGRAM_UDFS(Linear) - DECLARE_HISTOGRAM_UDFS(Logarithmic) - THistogramPrint, - THistogramNormalize, - THistogramToCumulativeDistributionFunction) -} +HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(DECLARE_ONE_DOUBLE_ARG_METHOD_UDF) +HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(DECLARE_TWO_DOUBLE_ARG_METHOD_UDF) + +SIMPLE_MODULE(THistogramModule, + HISTOGRAM_ALGORITHMS_MAP(DECLARE_HISTOGRAM_UDFS) + HISTOGRAM_ONE_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF) + HISTOGRAM_TWO_DOUBLE_ARG_METHODS_MAP(REGISTER_METHOD_UDF) + DECLARE_HISTOGRAM_UDFS(Linear) + DECLARE_HISTOGRAM_UDFS(Logarithmic) + THistogramPrint, + THistogramNormalize, + THistogramToCumulativeDistributionFunction) +} // namespace REGISTER_MODULES(THistogramModule) diff --git a/yql/essentials/udfs/common/histogram/ya.make b/yql/essentials/udfs/common/histogram/ya.make index 51b4a241002..659a3ba4406 100644 --- a/yql/essentials/udfs/common/histogram/ya.make +++ b/yql/essentials/udfs/common/histogram/ya.make @@ -6,6 +6,8 @@ YQL_UDF_CONTRIB(histogram_udf) 0 ) + ENABLE(YQL_STYLE_CPP) + SRCS( histogram_udf.cpp ) @@ -18,4 +20,5 @@ YQL_UDF_CONTRIB(histogram_udf) RECURSE_FOR_TESTS( test -)
\ No newline at end of file +) + diff --git a/yql/essentials/udfs/common/hyperloglog/hyperloglog_udf.cpp b/yql/essentials/udfs/common/hyperloglog/hyperloglog_udf.cpp index 39d17f2ec44..f0e2ad69149 100644 --- a/yql/essentials/udfs/common/hyperloglog/hyperloglog_udf.cpp +++ b/yql/essentials/udfs/common/hyperloglog/hyperloglog_udf.cpp @@ -10,414 +10,423 @@ using namespace NKikimr; using namespace NUdf; namespace { - class THybridHyperLogLog { - private: - using THybridSet = THashSet<ui64, std::hash<ui64>, std::equal_to<ui64>, TStdAllocatorForUdf<ui64>>; - using THybridHll = THyperLogLogWithAlloc<TStdAllocatorForUdf<ui8>>; - - explicit THybridHyperLogLog(unsigned precision) - : Var_(THybridSet()), SizeLimit_((1u << precision) / 8), Precision_(precision) - { } - - THybridHll ConvertToHyperLogLog() const { - auto res = THybridHll::Create(Precision_); - for (auto& el : GetSetRef()) { - res.Update(el); - } - return res; +class THybridHyperLogLog { +private: + using THybridSet = THashSet<ui64, std::hash<ui64>, std::equal_to<ui64>, TStdAllocatorForUdf<ui64>>; + using THybridHll = THyperLogLogWithAlloc<TStdAllocatorForUdf<ui8>>; + + explicit THybridHyperLogLog(unsigned precision) + : Var_(THybridSet()) + , SizeLimit_((1u << precision) / 8) + , Precision_(precision) + { + } + + THybridHll ConvertToHyperLogLog() const { + auto res = THybridHll::Create(Precision_); + for (auto& el : GetSetRef()) { + res.Update(el); } + return res; + } - bool IsSet() const { - return Var_.index() == 1; - } + bool IsSet() const { + return Var_.index() == 1; + } - const THybridSet& GetSetRef() const { - return std::get<1>(Var_); - } + const THybridSet& GetSetRef() const { + return std::get<1>(Var_); + } - THybridSet& GetMutableSetRef() { - return std::get<1>(Var_); - } + THybridSet& GetMutableSetRef() { + return std::get<1>(Var_); + } - const THybridHll& GetHllRef() const { - return std::get<0>(Var_); - } + const THybridHll& GetHllRef() const { + return std::get<0>(Var_); + } - THybridHll& GetMutableHllRef() { - return std::get<0>(Var_); - } + THybridHll& GetMutableHllRef() { + return std::get<0>(Var_); + } - public: - THybridHyperLogLog (THybridHyperLogLog&&) = default; +public: + THybridHyperLogLog(THybridHyperLogLog&&) = default; - THybridHyperLogLog& operator=(THybridHyperLogLog&&) = default; + THybridHyperLogLog& operator=(THybridHyperLogLog&&) = default; - void Update(ui64 hash) { - if (IsSet()) { - GetMutableSetRef().insert(hash); - if (GetSetRef().size() >= SizeLimit_) { - Var_ = ConvertToHyperLogLog(); - } - } else { - GetMutableHllRef().Update(hash); + void Update(ui64 hash) { + if (IsSet()) { + GetMutableSetRef().insert(hash); + if (GetSetRef().size() >= SizeLimit_) { + Var_ = ConvertToHyperLogLog(); } + } else { + GetMutableHllRef().Update(hash); } + } - void Merge(const THybridHyperLogLog& rh) { - if (IsSet() && rh.IsSet()) { - GetMutableSetRef().insert(rh.GetSetRef().begin(), rh.GetSetRef().end()); - if (GetSetRef().size() >= SizeLimit_) { - Var_ = ConvertToHyperLogLog(); - } - } else { - if (IsSet()) { - Var_ = ConvertToHyperLogLog(); - } - if (rh.IsSet()) { - GetMutableHllRef().Merge(rh.ConvertToHyperLogLog()); - } else { - GetMutableHllRef().Merge(rh.GetHllRef()); - } + void Merge(const THybridHyperLogLog& rh) { + if (IsSet() && rh.IsSet()) { + GetMutableSetRef().insert(rh.GetSetRef().begin(), rh.GetSetRef().end()); + if (GetSetRef().size() >= SizeLimit_) { + Var_ = ConvertToHyperLogLog(); } - } - - void Save(IOutputStream& out) const { - out.Write(static_cast<char>(Var_.index())); - out.Write(static_cast<char>(Precision_)); + } else { if (IsSet()) { - ::Save(&out, GetSetRef()); - } else { - GetHllRef().Save(out); + Var_ = ConvertToHyperLogLog(); } - } - - ui64 Estimate() const { - if (IsSet()) { - return GetSetRef().size(); - } - return GetHllRef().Estimate(); - } - - static THybridHyperLogLog Create(unsigned precision) { - Y_ENSURE(precision >= THyperLogLog::PRECISION_MIN && precision <= THyperLogLog::PRECISION_MAX); - return THybridHyperLogLog(precision); - } - - static THybridHyperLogLog Load(IInputStream& in) { - char type; - Y_ENSURE(in.ReadChar(type)); - char precision; - Y_ENSURE(in.ReadChar(precision)); - auto res = Create(precision); - if (type) { - ::Load(&in, res.GetMutableSetRef()); + if (rh.IsSet()) { + GetMutableHllRef().Merge(rh.ConvertToHyperLogLog()); } else { - res.Var_ = THybridHll::Load(in); + GetMutableHllRef().Merge(rh.GetHllRef()); } - return res; } - - private: - std::variant<THybridHll, THybridSet> Var_; - - size_t SizeLimit_; - - unsigned Precision_; - }; - - extern const char HyperLogLogResourceName[] = "HyperLogLog.State"; - - using THyperLogLogResource = TBoxedResource<THybridHyperLogLog, HyperLogLogResourceName>; - - class THyperLogLogCreate: public TBoxedValue { - public: - THyperLogLogCreate(TSourcePosition pos) - : Pos_(pos) - {} - - static const TStringRef& Name() { - static auto nameRef = TStringRef::Of("Create"); - return nameRef; + } + + void Save(IOutputStream& out) const { + out.Write(static_cast<char>(Var_.index())); + out.Write(static_cast<char>(Precision_)); + if (IsSet()) { + ::Save(&out, GetSetRef()); + } else { + GetHllRef().Save(out); } + } - private: - TUnboxedValue Run( - const IValueBuilder*, - const TUnboxedValuePod* args) const override { - try { - THolder<THyperLogLogResource> hll(new THyperLogLogResource(THybridHyperLogLog::Create(args[1].Get<ui32>()))); - hll->Get()->Update(args[0].Get<ui64>()); - return TUnboxedValuePod(hll.Release()); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + ui64 Estimate() const { + if (IsSet()) { + return GetSetRef().size(); } - - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<HyperLogLogResourceName>(ui64, ui32)>(); - if (!typesOnly) { - builder.Implementation(new THyperLogLogCreate(builder.GetSourcePosition())); - } - return true; - } else { - return false; - } + return GetHllRef().Estimate(); + } + + static THybridHyperLogLog Create(unsigned precision) { + Y_ENSURE(precision >= THyperLogLog::PRECISION_MIN && precision <= THyperLogLog::PRECISION_MAX); + return THybridHyperLogLog(precision); + } + + static THybridHyperLogLog Load(IInputStream& in) { + char type; + Y_ENSURE(in.ReadChar(type)); + char precision; + Y_ENSURE(in.ReadChar(precision)); + auto res = Create(precision); + if (type) { + ::Load(&in, res.GetMutableSetRef()); + } else { + res.Var_ = THybridHll::Load(in); } - - private: - TSourcePosition Pos_; - }; - - class THyperLogLogAddValue: public TBoxedValue { - public: - THyperLogLogAddValue(TSourcePosition pos) - : Pos_(pos) - {} - - static const TStringRef& Name() { - static auto nameRef = TStringRef::Of("AddValue"); - return nameRef; + return res; + } + +private: + std::variant<THybridHll, THybridSet> Var_; + + size_t SizeLimit_; + + unsigned Precision_; +}; + +extern const char HyperLogLogResourceName[] = "HyperLogLog.State"; + +using THyperLogLogResource = TBoxedResource<THybridHyperLogLog, HyperLogLogResourceName>; + +class THyperLogLogCreate: public TBoxedValue { +public: + THyperLogLogCreate(TSourcePosition pos) + : Pos_(pos) + { + } + + static const TStringRef& Name() { + static auto nameRef = TStringRef::Of("Create"); + return nameRef; + } + +private: + TUnboxedValue Run( + const IValueBuilder*, + const TUnboxedValuePod* args) const override { + try { + THolder<THyperLogLogResource> hll(new THyperLogLogResource(THybridHyperLogLog::Create(args[1].Get<ui32>()))); + hll->Get()->Update(args[0].Get<ui64>()); + return TUnboxedValuePod(hll.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - Y_UNUSED(valueBuilder); - THyperLogLogResource* resource = static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get()); - resource->Get()->Update(args[1].Get<ui64>()); - return TUnboxedValuePod(args[0]); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } + +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<HyperLogLogResourceName>(ui64, ui32)>(); + if (!typesOnly) { + builder.Implementation(new THyperLogLogCreate(builder.GetSourcePosition())); } + return true; + } else { + return false; } - - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<HyperLogLogResourceName>(TResource<HyperLogLogResourceName>, ui64)>(); - if (!typesOnly) { - builder.Implementation(new THyperLogLogAddValue(builder.GetSourcePosition())); - } - builder.IsStrict(); - return true; - } else { - return false; - } - } - - private: - TSourcePosition Pos_; - }; - - class THyperLogLogSerialize: public TBoxedValue { - public: - THyperLogLogSerialize(TSourcePosition pos) - : Pos_(pos) - {} - - public: - static const TStringRef& Name() { - static auto nameRef = TStringRef::Of("Serialize"); - return nameRef; + } + +private: + TSourcePosition Pos_; +}; + +class THyperLogLogAddValue: public TBoxedValue { +public: + THyperLogLogAddValue(TSourcePosition pos) + : Pos_(pos) + { + } + + static const TStringRef& Name() { + static auto nameRef = TStringRef::Of("AddValue"); + return nameRef; + } + +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + Y_UNUSED(valueBuilder); + THyperLogLogResource* resource = static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get()); + resource->Get()->Update(args[1].Get<ui64>()); + return TUnboxedValuePod(args[0]); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - TStringStream result; - static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get())->Get()->Save(result); - return valueBuilder->NewString(result.Str()); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } + +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<HyperLogLogResourceName>(TResource<HyperLogLogResourceName>, ui64)>(); + if (!typesOnly) { + builder.Implementation(new THyperLogLogAddValue(builder.GetSourcePosition())); } + builder.IsStrict(); + return true; + } else { + return false; } - - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<char*(TResource<HyperLogLogResourceName>)>(); - if (!typesOnly) { - builder.Implementation(new THyperLogLogSerialize(builder.GetSourcePosition())); - } - return true; - } else { - return false; - } + } + +private: + TSourcePosition Pos_; +}; + +class THyperLogLogSerialize: public TBoxedValue { +public: + THyperLogLogSerialize(TSourcePosition pos) + : Pos_(pos) + { + } + +public: + static const TStringRef& Name() { + static auto nameRef = TStringRef::Of("Serialize"); + return nameRef; + } + +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + TStringStream result; + static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get())->Get()->Save(result); + return valueBuilder->NewString(result.Str()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } - - private: - TSourcePosition Pos_; - }; - - class THyperLogLogDeserialize: public TBoxedValue { - public: - THyperLogLogDeserialize(TSourcePosition pos) - : Pos_(pos) - {} - - static const TStringRef& Name() { - static auto nameRef = TStringRef::Of("Deserialize"); - return nameRef; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - Y_UNUSED(valueBuilder); - const TString arg(args[0].AsStringRef()); - TStringInput input(arg); - THolder<THyperLogLogResource> hll(new THyperLogLogResource(THybridHyperLogLog::Load(input))); - return TUnboxedValuePod(hll.Release()); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } + +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<char*(TResource<HyperLogLogResourceName>)>(); + if (!typesOnly) { + builder.Implementation(new THyperLogLogSerialize(builder.GetSourcePosition())); } + return true; + } else { + return false; } - - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<HyperLogLogResourceName>(char*)>(); - if (!typesOnly) { - builder.Implementation(new THyperLogLogDeserialize(builder.GetSourcePosition())); - } - return true; - } else { - return false; - } - } - - private: - TSourcePosition Pos_; - }; - - class THyperLogLogMerge: public TBoxedValue { - public: - THyperLogLogMerge(TSourcePosition pos) - : Pos_(pos) - {} - - static const TStringRef& Name() { - static auto nameRef = TStringRef::Of("Merge"); - return nameRef; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - try { - Y_UNUSED(valueBuilder); - auto left = static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get())->Get(); - static_cast<THyperLogLogResource*>(args[1].AsBoxed().Get())->Get()->Merge(*left); - return TUnboxedValuePod(args[1]); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + } + +private: + TSourcePosition Pos_; +}; + +class THyperLogLogDeserialize: public TBoxedValue { +public: + THyperLogLogDeserialize(TSourcePosition pos) + : Pos_(pos) + { + } + + static const TStringRef& Name() { + static auto nameRef = TStringRef::Of("Deserialize"); + return nameRef; + } + +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { + Y_UNUSED(valueBuilder); + const TString arg(args[0].AsStringRef()); + TStringInput input(arg); + THolder<THyperLogLogResource> hll(new THyperLogLogResource(THybridHyperLogLog::Load(input))); + return TUnboxedValuePod(hll.Release()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } - - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.SimpleSignature<TResource<HyperLogLogResourceName>(TResource<HyperLogLogResourceName>, TResource<HyperLogLogResourceName>)>(); - if (!typesOnly) { - builder.Implementation(new THyperLogLogMerge(builder.GetSourcePosition())); - } - builder.IsStrict(); - return true; - } else { - return false; + } + +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<HyperLogLogResourceName>(char*)>(); + if (!typesOnly) { + builder.Implementation(new THyperLogLogDeserialize(builder.GetSourcePosition())); } + return true; + } else { + return false; } - - private: - TSourcePosition Pos_; - }; - - class THyperLogLogGetResult: public TBoxedValue { - public: - THyperLogLogGetResult(TSourcePosition pos) - : Pos_(pos) - {} - - static const TStringRef& Name() { - static auto nameRef = TStringRef::Of("GetResult"); - return nameRef; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { + } + +private: + TSourcePosition Pos_; +}; + +class THyperLogLogMerge: public TBoxedValue { +public: + THyperLogLogMerge(TSourcePosition pos) + : Pos_(pos) + { + } + + static const TStringRef& Name() { + static auto nameRef = TStringRef::Of("Merge"); + return nameRef; + } + +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + try { Y_UNUSED(valueBuilder); - auto hll = static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get())->Get(); - return TUnboxedValuePod(hll->Estimate()); + auto left = static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get())->Get(); + static_cast<THyperLogLogResource*>(args[1].AsBoxed().Get())->Get()->Merge(*left); + return TUnboxedValuePod(args[1]); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } - - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - auto resource = builder.Resource(HyperLogLogResourceName); - builder.Args()->Add(resource).Done().Returns<ui64>(); - - if (!typesOnly) { - builder.Implementation(new THyperLogLogGetResult(builder.GetSourcePosition())); - } - builder.IsStrict(); - return true; - } else { - return false; + } + +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.SimpleSignature<TResource<HyperLogLogResourceName>(TResource<HyperLogLogResourceName>, TResource<HyperLogLogResourceName>)>(); + if (!typesOnly) { + builder.Implementation(new THyperLogLogMerge(builder.GetSourcePosition())); } + builder.IsStrict(); + return true; + } else { + return false; } - - private: - TSourcePosition Pos_; - }; - - SIMPLE_MODULE(THyperLogLogModule, - THyperLogLogCreate, - THyperLogLogAddValue, - THyperLogLogSerialize, - THyperLogLogDeserialize, - THyperLogLogMerge, - THyperLogLogGetResult) -} + } + +private: + TSourcePosition Pos_; +}; + +class THyperLogLogGetResult: public TBoxedValue { +public: + THyperLogLogGetResult(TSourcePosition pos) + : Pos_(pos) + { + } + + static const TStringRef& Name() { + static auto nameRef = TStringRef::Of("GetResult"); + return nameRef; + } + +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + Y_UNUSED(valueBuilder); + auto hll = static_cast<THyperLogLogResource*>(args[0].AsBoxed().Get())->Get(); + return TUnboxedValuePod(hll->Estimate()); + } + +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + auto resource = builder.Resource(HyperLogLogResourceName); + builder.Args()->Add(resource).Done().Returns<ui64>(); + + if (!typesOnly) { + builder.Implementation(new THyperLogLogGetResult(builder.GetSourcePosition())); + } + builder.IsStrict(); + return true; + } else { + return false; + } + } + +private: + TSourcePosition Pos_; +}; + +SIMPLE_MODULE(THyperLogLogModule, + THyperLogLogCreate, + THyperLogLogAddValue, + THyperLogLogSerialize, + THyperLogLogDeserialize, + THyperLogLogMerge, + THyperLogLogGetResult) +} // namespace REGISTER_MODULES(THyperLogLogModule) diff --git a/yql/essentials/udfs/common/hyperloglog/ya.make b/yql/essentials/udfs/common/hyperloglog/ya.make index 2becaf7388d..4c295580a9f 100644 --- a/yql/essentials/udfs/common/hyperloglog/ya.make +++ b/yql/essentials/udfs/common/hyperloglog/ya.make @@ -6,6 +6,8 @@ YQL_UDF_CONTRIB(hyperloglog_udf) 0 ) + ENABLE(YQL_STYLE_CPP) + SRCS( hyperloglog_udf.cpp ) @@ -18,4 +20,5 @@ YQL_UDF_CONTRIB(hyperloglog_udf) RECURSE_FOR_TESTS( test -)
\ No newline at end of file +) + diff --git a/yql/essentials/udfs/common/hyperscan/hyperscan_udf.cpp b/yql/essentials/udfs/common/hyperscan/hyperscan_udf.cpp index c641407bc4c..afa8d65f6d3 100644 --- a/yql/essentials/udfs/common/hyperscan/hyperscan_udf.cpp +++ b/yql/essentials/udfs/common/hyperscan/hyperscan_udf.cpp @@ -17,461 +17,460 @@ using namespace NKikimr; using namespace NUdf; namespace { - using TOptions = ui32; - class THyperscanUdfBase: public TBoxedValue { - protected: - constexpr static const char* IGNORE_CASE_PREFIX = "(?i)"; - static void SetCommonOptions(TString& regex, TOptions& options) { - options |= HS_FLAG_ALLOWEMPTY; - if (regex.StartsWith(IGNORE_CASE_PREFIX)) { - options |= HS_FLAG_CASELESS; - regex = regex.substr(4); - } - if (UTF8Detect(regex) == UTF8) { - options |= HS_FLAG_UTF8; - } - if (NX86::HaveAVX2()) { - options |= HS_CPU_FEATURES_AVX2; - } +using TOptions = ui32; +class THyperscanUdfBase: public TBoxedValue { +protected: + constexpr static const char* IGNORE_CASE_PREFIX = "(?i)"; + static void SetCommonOptions(TString& regex, TOptions& options) { + options |= HS_FLAG_ALLOWEMPTY; + if (regex.StartsWith(IGNORE_CASE_PREFIX)) { + options |= HS_FLAG_CASELESS; + regex = regex.substr(4); + } + if (UTF8Detect(regex) == UTF8) { + options |= HS_FLAG_UTF8; + } + if (NX86::HaveAVX2()) { + options |= HS_CPU_FEATURES_AVX2; } + } +}; + +class THyperscanMatch: public THyperscanUdfBase { +public: + enum class EMode { + NORMAL, + BACKTRACKING, + MULTI }; - class THyperscanMatch: public THyperscanUdfBase { + class TFactory: public THyperscanUdfBase { public: - enum class EMode { - NORMAL, - BACKTRACKING, - MULTI - }; - - class TFactory: public THyperscanUdfBase { - public: - TFactory( - TSourcePosition pos, - bool surroundMode, - THyperscanMatch::EMode mode, - size_t regexpsCount = 0) - : Pos_(pos) - , SurroundMode_(surroundMode) - , Mode_(mode) - , RegexpsCount_(regexpsCount) - { - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - return TUnboxedValuePod( - new THyperscanMatch( - valueBuilder, - args[0], - SurroundMode_, - Mode_, - Pos_, - RegexpsCount_)); - } - - TSourcePosition Pos_; - bool SurroundMode_; - THyperscanMatch::EMode Mode_; - size_t RegexpsCount_; - }; - - static const TStringRef& Name(bool isGrep, THyperscanMatch::EMode mode) { - static auto match = TStringRef::Of("Match"); - static auto grep = TStringRef::Of("Grep"); - static auto backtrackingMatch = TStringRef::Of("BacktrackingMatch"); - static auto backtrackingGrep = TStringRef::Of("BacktrackingGrep"); - static auto multiMatch = TStringRef::Of("MultiMatch"); - static auto multiGrep = TStringRef::Of("MultiGrep"); - if (isGrep) { - switch (mode) { - case THyperscanMatch::EMode::NORMAL: - return grep; - case THyperscanMatch::EMode::BACKTRACKING: - return backtrackingGrep; - case THyperscanMatch::EMode::MULTI: - return multiGrep; - } - } else { - switch (mode) { - case THyperscanMatch::EMode::NORMAL: - return match; - case THyperscanMatch::EMode::BACKTRACKING: - return backtrackingMatch; - case THyperscanMatch::EMode::MULTI: - return multiMatch; - } - } - - Y_ABORT("Unexpected"); - } - - THyperscanMatch( - const IValueBuilder*, - const TUnboxedValuePod& runConfig, + TFactory( + TSourcePosition pos, bool surroundMode, THyperscanMatch::EMode mode, - TSourcePosition pos, - size_t regexpsCount) - : Regex_(runConfig.AsStringRef()) + size_t regexpsCount = 0) + : Pos_(pos) + , SurroundMode_(surroundMode) , Mode_(mode) - , Pos_(pos) , RegexpsCount_(regexpsCount) { - try { - TOptions options = 0; - int pcreOptions = REG_EXTENDED; - if (Mode_ == THyperscanMatch::EMode::BACKTRACKING && Regex_.StartsWith(IGNORE_CASE_PREFIX)) { - pcreOptions |= REG_ICASE; - } - auto regex = Regex_; - SetCommonOptions(regex, options); - switch (mode) { - case THyperscanMatch::EMode::NORMAL: { - if (!surroundMode) { - regex = TStringBuilder() << '^' << regex << '$'; - } - Database_ = Compile(regex, options); - break; - } - case THyperscanMatch::EMode::BACKTRACKING: { - if (!surroundMode) { - regex = TStringBuilder() << '^' << regex << '$'; - } - try { - Database_ = Compile(regex, options); - Mode_ = THyperscanMatch::EMode::NORMAL; - } catch (const TCompileException&) { - options |= HS_FLAG_PREFILTER; - Database_ = Compile(regex, options); - Fallback_ = TRegExMatch(regex, pcreOptions); - } - break; - } - case THyperscanMatch::EMode::MULTI: { - std::vector<TString> regexes; - TVector<const char*> cregexes; - TVector<TOptions> flags; - TVector<TOptions> ids; - - const auto func = [®exes, &flags, surroundMode](const std::string_view& token) { - TString regex(token); - - TOptions opt = 0; - SetCommonOptions(regex, opt); - - if (!surroundMode) { - regex = TStringBuilder() << '^' << regex << '$'; - } - - regexes.emplace_back(std::move(regex)); - flags.emplace_back(opt); - }; - StringSplitter(Regex_).Split('\n').Consume(func); - - std::transform(regexes.cbegin(), regexes.cend(), std::back_inserter(cregexes), std::bind(&TString::c_str, std::placeholders::_1)); - ids.resize(regexes.size()); - std::iota(ids.begin(), ids.end(), 0); - - Database_ = CompileMulti(cregexes, flags, ids); - break; - } - } - Scratch_ = MakeScratch(Database_); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } } private: TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try { - TUnboxedValue* items = nullptr; - TUnboxedValue tuple; - size_t i = 0; - - if (Mode_ == THyperscanMatch::EMode::MULTI) { - tuple = valueBuilder->NewArray(RegexpsCount_, items); - for (i = 0; i < RegexpsCount_; ++i) { - items[i] = TUnboxedValuePod(false); - } + const TUnboxedValuePod* args) const override { + return TUnboxedValuePod( + new THyperscanMatch( + valueBuilder, + args[0], + SurroundMode_, + Mode_, + Pos_, + RegexpsCount_)); + } + + TSourcePosition Pos_; + bool SurroundMode_; + THyperscanMatch::EMode Mode_; + size_t RegexpsCount_; + }; + + static const TStringRef& Name(bool isGrep, THyperscanMatch::EMode mode) { + static auto match = TStringRef::Of("Match"); + static auto grep = TStringRef::Of("Grep"); + static auto backtrackingMatch = TStringRef::Of("BacktrackingMatch"); + static auto backtrackingGrep = TStringRef::Of("BacktrackingGrep"); + static auto multiMatch = TStringRef::Of("MultiMatch"); + static auto multiGrep = TStringRef::Of("MultiGrep"); + if (isGrep) { + switch (mode) { + case THyperscanMatch::EMode::NORMAL: + return grep; + case THyperscanMatch::EMode::BACKTRACKING: + return backtrackingGrep; + case THyperscanMatch::EMode::MULTI: + return multiGrep; + } + } else { + switch (mode) { + case THyperscanMatch::EMode::NORMAL: + return match; + case THyperscanMatch::EMode::BACKTRACKING: + return backtrackingMatch; + case THyperscanMatch::EMode::MULTI: + return multiMatch; } + } - if (args[0]) { - // XXX: StringRef data might not be a NTBS, though the function - // <TRegExMatch::Match> expects ASCIIZ string. Explicitly copy - // the given argument string and append the NUL terminator to it. - const TString input(args[0].AsStringRef()); - if (Y_UNLIKELY(Mode_ == THyperscanMatch::EMode::MULTI)) { - auto callback = [items] (TOptions id, ui64 /* from */, ui64 /* to */) { - items[id] = TUnboxedValuePod(true); - }; - Scan(Database_, Scratch_, input, callback); - return tuple; - } else { - bool matches = Matches(Database_, Scratch_, input); - if (matches && Mode_ == THyperscanMatch::EMode::BACKTRACKING) { - matches = Fallback_.Match(input.data()); + Y_ABORT("Unexpected"); + } + + THyperscanMatch( + const IValueBuilder*, + const TUnboxedValuePod& runConfig, + bool surroundMode, + THyperscanMatch::EMode mode, + TSourcePosition pos, + size_t regexpsCount) + : Regex_(runConfig.AsStringRef()) + , Mode_(mode) + , Pos_(pos) + , RegexpsCount_(regexpsCount) + { + try { + TOptions options = 0; + int pcreOptions = REG_EXTENDED; + if (Mode_ == THyperscanMatch::EMode::BACKTRACKING && Regex_.StartsWith(IGNORE_CASE_PREFIX)) { + pcreOptions |= REG_ICASE; + } + auto regex = Regex_; + SetCommonOptions(regex, options); + switch (mode) { + case THyperscanMatch::EMode::NORMAL: { + if (!surroundMode) { + regex = TStringBuilder() << '^' << regex << '$'; + } + Database_ = Compile(regex, options); + break; + } + case THyperscanMatch::EMode::BACKTRACKING: { + if (!surroundMode) { + regex = TStringBuilder() << '^' << regex << '$'; } - return TUnboxedValuePod(matches); + try { + Database_ = Compile(regex, options); + Mode_ = THyperscanMatch::EMode::NORMAL; + } catch (const TCompileException&) { + options |= HS_FLAG_PREFILTER; + Database_ = Compile(regex, options); + Fallback_ = TRegExMatch(regex, pcreOptions); + } + break; } + case THyperscanMatch::EMode::MULTI: { + std::vector<TString> regexes; + TVector<const char*> cregexes; + TVector<TOptions> flags; + TVector<TOptions> ids; - } else { - return Mode_ == THyperscanMatch::EMode::MULTI ? tuple : TUnboxedValue(TUnboxedValuePod(false)); + const auto func = [®exes, &flags, surroundMode](const std::string_view& token) { + TString regex(token); + + TOptions opt = 0; + SetCommonOptions(regex, opt); + + if (!surroundMode) { + regex = TStringBuilder() << '^' << regex << '$'; + } + + regexes.emplace_back(std::move(regex)); + flags.emplace_back(opt); + }; + StringSplitter(Regex_).Split('\n').Consume(func); + + std::transform(regexes.cbegin(), regexes.cend(), std::back_inserter(cregexes), std::bind(&TString::c_str, std::placeholders::_1)); + ids.resize(regexes.size()); + std::iota(ids.begin(), ids.end(), 0); + + Database_ = CompileMulti(cregexes, flags, ids); + break; + } } + Scratch_ = MakeScratch(Database_); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } - - private: - const TString Regex_; - THyperscanMatch::EMode Mode_; - const TSourcePosition Pos_; - const size_t RegexpsCount_; - TDatabase Database_; - TScratch Scratch_; - TRegExMatch Fallback_; - }; - - class THyperscanCapture: public THyperscanUdfBase { - public: - class TFactory: public THyperscanUdfBase { - public: - TFactory(TSourcePosition pos) - : Pos_(pos) - {} - - private: - TUnboxedValue Run(const IValueBuilder*, - const TUnboxedValuePod* args) const final try { - return TUnboxedValuePod(new THyperscanCapture(args[0], Pos_)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } + +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + TUnboxedValue* items = nullptr; + TUnboxedValue tuple; + size_t i = 0; + + if (Mode_ == THyperscanMatch::EMode::MULTI) { + tuple = valueBuilder->NewArray(RegexpsCount_, items); + for (i = 0; i < RegexpsCount_; ++i) { + items[i] = TUnboxedValuePod(false); } + } - private: - TSourcePosition Pos_; - }; + if (args[0]) { + // XXX: StringRef data might not be a NTBS, though the function + // <TRegExMatch::Match> expects ASCIIZ string. Explicitly copy + // the given argument string and append the NUL terminator to it. + const TString input(args[0].AsStringRef()); + if (Y_UNLIKELY(Mode_ == THyperscanMatch::EMode::MULTI)) { + auto callback = [items](TOptions id, ui64 /* from */, ui64 /* to */) { + items[id] = TUnboxedValuePod(true); + }; + Scan(Database_, Scratch_, input, callback); + return tuple; + } else { + bool matches = Matches(Database_, Scratch_, input); + if (matches && Mode_ == THyperscanMatch::EMode::BACKTRACKING) { + matches = Fallback_.Match(input.data()); + } + return TUnboxedValuePod(matches); + } - static const TStringRef& Name() { - static auto name = TStringRef::Of("Capture"); - return name; + } else { + return Mode_ == THyperscanMatch::EMode::MULTI ? tuple : TUnboxedValue(TUnboxedValuePod(false)); } - - THyperscanCapture(const TUnboxedValuePod& runConfig, TSourcePosition pos) + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } + +private: + const TString Regex_; + THyperscanMatch::EMode Mode_; + const TSourcePosition Pos_; + const size_t RegexpsCount_; + TDatabase Database_; + TScratch Scratch_; + TRegExMatch Fallback_; +}; + +class THyperscanCapture: public THyperscanUdfBase { +public: + class TFactory: public THyperscanUdfBase { + public: + TFactory(TSourcePosition pos) : Pos_(pos) { - Regex_ = runConfig.AsStringRef(); - TOptions options = HS_FLAG_SOM_LEFTMOST; - - SetCommonOptions(Regex_, options); - - Database_ = Compile(Regex_, options); - Scratch_ = MakeScratch(Database_); } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try { - if (const auto arg = args[0]) { - - TUnboxedValue result; - auto callback = [valueBuilder, arg, &result] (TOptions id, ui64 from, ui64 to) { - Y_UNUSED(id); - if (!result) { - result = valueBuilder->SubString(arg, from, to); - } - }; - Scan(Database_, Scratch_, arg.AsStringRef(), callback); - return result; - } - - return TUnboxedValue(); + TUnboxedValue Run(const IValueBuilder*, + const TUnboxedValuePod* args) const final try { + return TUnboxedValuePod(new THyperscanCapture(args[0], Pos_)); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + private: TSourcePosition Pos_; - TString Regex_; - TDatabase Database_; - TScratch Scratch_; }; - class THyperscanReplace: public THyperscanUdfBase { - public: - class TFactory: public THyperscanUdfBase { - public: - TFactory(TSourcePosition pos) - : Pos_(pos) - {} - - private: - TUnboxedValue Run(const IValueBuilder*, - const TUnboxedValuePod* args) const final try { - return TUnboxedValuePod(new THyperscanReplace(args[0], Pos_)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } + static const TStringRef& Name() { + static auto name = TStringRef::Of("Capture"); + return name; + } + + THyperscanCapture(const TUnboxedValuePod& runConfig, TSourcePosition pos) + : Pos_(pos) + { + Regex_ = runConfig.AsStringRef(); + TOptions options = HS_FLAG_SOM_LEFTMOST; + + SetCommonOptions(Regex_, options); + + Database_ = Compile(Regex_, options); + Scratch_ = MakeScratch(Database_); + } + +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + if (const auto arg = args[0]) { + TUnboxedValue result; + auto callback = [valueBuilder, arg, &result](TOptions id, ui64 from, ui64 to) { + Y_UNUSED(id); + if (!result) { + result = valueBuilder->SubString(arg, from, to); + } + }; + Scan(Database_, Scratch_, arg.AsStringRef(), callback); + return result; + } - private: - TSourcePosition Pos_; - }; + return TUnboxedValue(); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } - static const TStringRef& Name() { - static auto name = TStringRef::Of("Replace"); - return name; - } + TSourcePosition Pos_; + TString Regex_; + TDatabase Database_; + TScratch Scratch_; +}; - THyperscanReplace(const TUnboxedValuePod& runConfig, TSourcePosition pos) +class THyperscanReplace: public THyperscanUdfBase { +public: + class TFactory: public THyperscanUdfBase { + public: + TFactory(TSourcePosition pos) : Pos_(pos) { - Regex_ = runConfig.AsStringRef(); - TOptions options = HS_FLAG_SOM_LEFTMOST; - - SetCommonOptions(Regex_, options); - - - Database_ = Compile(Regex_, options); - Scratch_ = MakeScratch(Database_); } private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try { - if (args[0]) { - const std::string_view input(args[0].AsStringRef()); - const std::string_view replacement(args[1].AsStringRef()); - - ui64 index = 0; - TStringBuilder result; - auto callback = [input, replacement, &index, &result] (TOptions id, ui64 from, ui64 to) { - Y_UNUSED(id); - if (index != from) { - result << input.substr(index, from - index); - } - result << replacement; - index = to; - }; - Scan(Database_, Scratch_, input, callback); - - if (!index) { - return args[0]; - } - - result << input.substr(index); - return valueBuilder->NewString(result); - } - - return TUnboxedValue(); + TUnboxedValue Run(const IValueBuilder*, + const TUnboxedValuePod* args) const final try { + return TUnboxedValuePod(new THyperscanReplace(args[0], Pos_)); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + private: TSourcePosition Pos_; - TString Regex_; - TDatabase Database_; - TScratch Scratch_; }; - class THyperscanModule: public IUdfModule { - public: - TStringRef Name() const { - return TStringRef::Of("Hyperscan"); - } + static const TStringRef& Name() { + static auto name = TStringRef::Of("Replace"); + return name; + } + + THyperscanReplace(const TUnboxedValuePod& runConfig, TSourcePosition pos) + : Pos_(pos) + { + Regex_ = runConfig.AsStringRef(); + TOptions options = HS_FLAG_SOM_LEFTMOST; + + SetCommonOptions(Regex_, options); + + Database_ = Compile(Regex_, options); + Scratch_ = MakeScratch(Database_); + } + +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + if (args[0]) { + const std::string_view input(args[0].AsStringRef()); + const std::string_view replacement(args[1].AsStringRef()); + + ui64 index = 0; + TStringBuilder result; + auto callback = [input, replacement, &index, &result](TOptions id, ui64 from, ui64 to) { + Y_UNUSED(id); + if (index != from) { + result << input.substr(index, from - index); + } + result << replacement; + index = to; + }; + Scan(Database_, Scratch_, input, callback); - void CleanupOnTerminate() const final { - } + if (!index) { + return args[0]; + } - void GetAllFunctions(IFunctionsSink& sink) const final { - sink.Add(THyperscanMatch::Name(true, THyperscanMatch::EMode::NORMAL)); - sink.Add(THyperscanMatch::Name(false, THyperscanMatch::EMode::NORMAL)); - sink.Add(THyperscanMatch::Name(true, THyperscanMatch::EMode::BACKTRACKING)); - sink.Add(THyperscanMatch::Name(false, THyperscanMatch::EMode::BACKTRACKING)); - sink.Add(THyperscanMatch::Name(true, THyperscanMatch::EMode::MULTI))->SetTypeAwareness(); - sink.Add(THyperscanMatch::Name(false, THyperscanMatch::EMode::MULTI))->SetTypeAwareness(); - sink.Add(THyperscanCapture::Name()); - sink.Add(THyperscanReplace::Name()); + result << input.substr(index); + return valueBuilder->NewString(result); } - void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final { - try { - Y_UNUSED(userType); - - bool typesOnly = (flags & TFlags::TypesOnly); - bool isMatch = (THyperscanMatch::Name(false, THyperscanMatch::EMode::NORMAL) == name); - bool isGrep = (THyperscanMatch::Name(true, THyperscanMatch::EMode::NORMAL) == name); - bool isBacktrackingMatch = (THyperscanMatch::Name(false, THyperscanMatch::EMode::BACKTRACKING) == name); - bool isBacktrackingGrep = (THyperscanMatch::Name(true, THyperscanMatch::EMode::BACKTRACKING) == name); - bool isMultiMatch = (THyperscanMatch::Name(false, THyperscanMatch::EMode::MULTI) == name); - bool isMultiGrep = (THyperscanMatch::Name(true, THyperscanMatch::EMode::MULTI) == name); - - if (isMatch || isGrep) { - builder.SimpleSignature<bool(TOptional<char*>)>() - .RunConfig<const char*>(); - - if (!typesOnly) { - builder.Implementation(new THyperscanMatch::TFactory(builder.GetSourcePosition(), isGrep, THyperscanMatch::EMode::NORMAL)); - } - } else if (isBacktrackingMatch || isBacktrackingGrep) { - builder.SimpleSignature<bool(TOptional<char*>)>() - .RunConfig<const char*>(); + return TUnboxedValue(); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } + + TSourcePosition Pos_; + TString Regex_; + TDatabase Database_; + TScratch Scratch_; +}; + +class THyperscanModule: public IUdfModule { +public: + TStringRef Name() const { + return TStringRef::Of("Hyperscan"); + } + + void CleanupOnTerminate() const final { + } + + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(THyperscanMatch::Name(true, THyperscanMatch::EMode::NORMAL)); + sink.Add(THyperscanMatch::Name(false, THyperscanMatch::EMode::NORMAL)); + sink.Add(THyperscanMatch::Name(true, THyperscanMatch::EMode::BACKTRACKING)); + sink.Add(THyperscanMatch::Name(false, THyperscanMatch::EMode::BACKTRACKING)); + sink.Add(THyperscanMatch::Name(true, THyperscanMatch::EMode::MULTI))->SetTypeAwareness(); + sink.Add(THyperscanMatch::Name(false, THyperscanMatch::EMode::MULTI))->SetTypeAwareness(); + sink.Add(THyperscanCapture::Name()); + sink.Add(THyperscanReplace::Name()); + } + + void BuildFunctionTypeInfo( + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { + try { + Y_UNUSED(userType); + + bool typesOnly = (flags & TFlags::TypesOnly); + bool isMatch = (THyperscanMatch::Name(false, THyperscanMatch::EMode::NORMAL) == name); + bool isGrep = (THyperscanMatch::Name(true, THyperscanMatch::EMode::NORMAL) == name); + bool isBacktrackingMatch = (THyperscanMatch::Name(false, THyperscanMatch::EMode::BACKTRACKING) == name); + bool isBacktrackingGrep = (THyperscanMatch::Name(true, THyperscanMatch::EMode::BACKTRACKING) == name); + bool isMultiMatch = (THyperscanMatch::Name(false, THyperscanMatch::EMode::MULTI) == name); + bool isMultiGrep = (THyperscanMatch::Name(true, THyperscanMatch::EMode::MULTI) == name); + + if (isMatch || isGrep) { + builder.SimpleSignature<bool(TOptional<char*>)>() + .RunConfig<const char*>(); + + if (!typesOnly) { + builder.Implementation(new THyperscanMatch::TFactory(builder.GetSourcePosition(), isGrep, THyperscanMatch::EMode::NORMAL)); + } + } else if (isBacktrackingMatch || isBacktrackingGrep) { + builder.SimpleSignature<bool(TOptional<char*>)>() + .RunConfig<const char*>(); - if (!typesOnly) { - builder.Implementation(new THyperscanMatch::TFactory(builder.GetSourcePosition(), isBacktrackingGrep, THyperscanMatch::EMode::BACKTRACKING)); - } - } else if (isMultiMatch || isMultiGrep) { - auto boolType = builder.SimpleType<bool>(); - auto optionalStringType = builder.Optional()->Item<char*>().Build(); - const std::string_view regexp(typeConfig); - size_t regexpCount = std::count(regexp.begin(), regexp.end(), '\n') + 1; - auto tuple = builder.Tuple(); - for (size_t i = 0; i < regexpCount; ++i) { - tuple->Add(boolType); - } - auto tupleType = tuple->Build(); - builder.Args(1)->Add(optionalStringType).Done().Returns(tupleType).RunConfig<char*>(); + if (!typesOnly) { + builder.Implementation(new THyperscanMatch::TFactory(builder.GetSourcePosition(), isBacktrackingGrep, THyperscanMatch::EMode::BACKTRACKING)); + } + } else if (isMultiMatch || isMultiGrep) { + auto boolType = builder.SimpleType<bool>(); + auto optionalStringType = builder.Optional()->Item<char*>().Build(); + const std::string_view regexp(typeConfig); + size_t regexpCount = std::count(regexp.begin(), regexp.end(), '\n') + 1; + auto tuple = builder.Tuple(); + for (size_t i = 0; i < regexpCount; ++i) { + tuple->Add(boolType); + } + auto tupleType = tuple->Build(); + builder.Args(1)->Add(optionalStringType).Done().Returns(tupleType).RunConfig<char*>(); - if (!typesOnly) { - builder.Implementation(new THyperscanMatch::TFactory(builder.GetSourcePosition(), isMultiGrep, THyperscanMatch::EMode::MULTI, regexpCount)); - } - } else if (THyperscanCapture::Name() == name) { - builder.SimpleSignature<TOptional<char*>(TOptional<char*>)>() - .RunConfig<char*>(); + if (!typesOnly) { + builder.Implementation(new THyperscanMatch::TFactory(builder.GetSourcePosition(), isMultiGrep, THyperscanMatch::EMode::MULTI, regexpCount)); + } + } else if (THyperscanCapture::Name() == name) { + builder.SimpleSignature<TOptional<char*>(TOptional<char*>)>() + .RunConfig<char*>(); - if (!typesOnly) { - builder.Implementation(new THyperscanCapture::TFactory(builder.GetSourcePosition())); - } - } else if (THyperscanReplace::Name() == name) { - builder.SimpleSignature<TOptional<char*>(TOptional<char*>, char*)>() - .RunConfig<char*>(); + if (!typesOnly) { + builder.Implementation(new THyperscanCapture::TFactory(builder.GetSourcePosition())); + } + } else if (THyperscanReplace::Name() == name) { + builder.SimpleSignature<TOptional<char*>(TOptional<char*>, char*)>() + .RunConfig<char*>(); - if (!typesOnly) { - builder.Implementation(new THyperscanReplace::TFactory(builder.GetSourcePosition())); - } + if (!typesOnly) { + builder.Implementation(new THyperscanReplace::TFactory(builder.GetSourcePosition())); } - } catch (const std::exception& e) { - builder.SetError(CurrentExceptionMessage()); } + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); } - }; - - class TPcreModule : public THyperscanModule { - public: - TStringRef Name() const { - return TStringRef::Of("Pcre"); - } - }; -} + } +}; + +class TPcreModule: public THyperscanModule { +public: + TStringRef Name() const { + return TStringRef::Of("Pcre"); + } +}; +} // namespace REGISTER_MODULES(THyperscanModule, TPcreModule) diff --git a/yql/essentials/udfs/common/hyperscan/ya.make b/yql/essentials/udfs/common/hyperscan/ya.make index 9217280ba76..574458d246a 100644 --- a/yql/essentials/udfs/common/hyperscan/ya.make +++ b/yql/essentials/udfs/common/hyperscan/ya.make @@ -14,6 +14,8 @@ YQL_UDF_CONTRIB(hyperscan_udf) 0 ) + ENABLE(YQL_STYLE_CPP) + SRCS( hyperscan_udf.cpp ) @@ -27,4 +29,5 @@ YQL_UDF_CONTRIB(hyperscan_udf) RECURSE_FOR_TESTS( test -)
\ No newline at end of file +) + diff --git a/yql/essentials/udfs/common/ip_base/ip_base.cpp b/yql/essentials/udfs/common/ip_base/ip_base.cpp index 1c017e2a5d2..fbab4c25941 100644 --- a/yql/essentials/udfs/common/ip_base/ip_base.cpp +++ b/yql/essentials/udfs/common/ip_base/ip_base.cpp @@ -4,4 +4,3 @@ SIMPLE_MODULE(TIpModule, EXPORTED_IP_BASE_UDF) REGISTER_MODULES(TIpModule) - diff --git a/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.cpp b/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.cpp index a0617e77283..dbd58d3e25e 100644 --- a/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.cpp +++ b/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.cpp @@ -1 +1 @@ -#include "ip_base_udf.h"
\ No newline at end of file +#include "ip_base_udf.h" diff --git a/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.h b/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.h index 2bb4f987190..5e4e9cb9b00 100644 --- a/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.h +++ b/yql/essentials/udfs/common/ip_base/lib/ip_base_udf.h @@ -8,376 +8,369 @@ #include <util/generic/buffer.h> namespace { - using TAutoMapString = NKikimr::NUdf::TAutoMap<char*>; - using TAutoMapUint32 = NKikimr::NUdf::TAutoMap<ui32>; - using TOptionalString = NKikimr::NUdf::TOptional<char*>; - using TOptionalUint32 = NKikimr::NUdf::TOptional<ui32>; - using TOptionalByte = NKikimr::NUdf::TOptional<ui8>; - using TStringRef = NKikimr::NUdf::TStringRef; - using TUnboxedValue = NKikimr::NUdf::TUnboxedValue; - using TUnboxedValuePod = NKikimr::NUdf::TUnboxedValuePod; - - ui8 GetAddressRangePrefix(const TIpAddressRange& range) { - if (range.Contains(TIpv6Address(ui128(0), TIpv6Address::Ipv6)) && range.Contains(TIpv6Address(ui128(-1), TIpv6Address::Ipv6))) { - return 0; - } - if (range.Size() == 0) { - return range.Type() == TIpv6Address::Ipv4 ? 32 : 128; - } - ui128 size = range.Size(); - size_t sizeLog = MostSignificantBit(size); - return ui8((range.Type() == TIpv6Address::Ipv4 ? 32 : 128) - sizeLog); +using TAutoMapString = NKikimr::NUdf::TAutoMap<char*>; +using TAutoMapUint32 = NKikimr::NUdf::TAutoMap<ui32>; +using TOptionalString = NKikimr::NUdf::TOptional<char*>; +using TOptionalUint32 = NKikimr::NUdf::TOptional<ui32>; +using TOptionalByte = NKikimr::NUdf::TOptional<ui8>; +using TStringRef = NKikimr::NUdf::TStringRef; +using TUnboxedValue = NKikimr::NUdf::TUnboxedValue; +using TUnboxedValuePod = NKikimr::NUdf::TUnboxedValuePod; + +ui8 GetAddressRangePrefix(const TIpAddressRange& range) { + if (range.Contains(TIpv6Address(ui128(0), TIpv6Address::Ipv6)) && range.Contains(TIpv6Address(ui128(-1), TIpv6Address::Ipv6))) { + return 0; + } + if (range.Size() == 0) { + return range.Type() == TIpv6Address::Ipv4 ? 32 : 128; } + ui128 size = range.Size(); + size_t sizeLog = MostSignificantBit(size); + return ui8((range.Type() == TIpv6Address::Ipv4 ? 32 : 128) - sizeLog); +} - struct TRawIp4 { - ui8 A, B, C, D; - - static TRawIp4 FromIpAddress(const TIpv6Address& addr) { - ui128 x = addr; - return { - ui8(x >> 24 & 0xff), - ui8(x >> 16 & 0xff), - ui8(x >> 8 & 0xff), - ui8(x & 0xff) - }; - } +struct TRawIp4 { + ui8 A, B, C, D; - static TRawIp4 MaskFromPrefix(ui8 prefix) { - ui128 x = ui128(-1) << int(32 - prefix); - x &= ui128(ui32(-1)); - return FromIpAddress({x, TIpv6Address::Ipv4}); - } + static TRawIp4 FromIpAddress(const TIpv6Address& addr) { + ui128 x = addr; + return { + ui8(x >> 24 & 0xff), + ui8(x >> 16 & 0xff), + ui8(x >> 8 & 0xff), + ui8(x & 0xff)}; + } - TIpv6Address ToIpAddress() const { - return {A, B, C, D}; - } + static TRawIp4 MaskFromPrefix(ui8 prefix) { + ui128 x = ui128(-1) << int(32 - prefix); + x &= ui128(ui32(-1)); + return FromIpAddress({x, TIpv6Address::Ipv4}); + } - std::pair<TRawIp4, TRawIp4> ApplyMask(const TRawIp4& mask) const { - return {{ - ui8(A & mask.A), - ui8(B & mask.B), - ui8(C & mask.C), - ui8(D & mask.D) - },{ - ui8(A | ~mask.A), - ui8(B | ~mask.B), - ui8(C | ~mask.C), - ui8(D | ~mask.D) - }}; - } - }; + TIpv6Address ToIpAddress() const { + return {A, B, C, D}; + } - struct TRawIp4Subnet { - TRawIp4 Base, Mask; + std::pair<TRawIp4, TRawIp4> ApplyMask(const TRawIp4& mask) const { + return {{ui8(A & mask.A), + ui8(B & mask.B), + ui8(C & mask.C), + ui8(D & mask.D)}, + {ui8(A | ~mask.A), + ui8(B | ~mask.B), + ui8(C | ~mask.C), + ui8(D | ~mask.D)}}; + } +}; - static TRawIp4Subnet FromIpRange(const TIpAddressRange& range) { - return {TRawIp4::FromIpAddress(*range.Begin()), TRawIp4::MaskFromPrefix(GetAddressRangePrefix(range))}; - } +struct TRawIp4Subnet { + TRawIp4 Base, Mask; - TIpAddressRange ToIpRange() const { - auto range = Base.ApplyMask(Mask); - return {range.first.ToIpAddress(), range.second.ToIpAddress()}; - } - }; - - struct TRawIp6 { - ui8 A1, A0, B1, B0, C1, C0, D1, D0, E1, E0, F1, F0, G1, G0, H1, H0; - - static TRawIp6 FromIpAddress(const TIpv6Address& addr) { - ui128 x = addr; - return { - ui8(x >> 120 & 0xff), ui8(x >> 112 & 0xff), - ui8(x >> 104 & 0xff), ui8(x >> 96 & 0xff), - ui8(x >> 88 & 0xff), ui8(x >> 80 & 0xff), - ui8(x >> 72 & 0xff), ui8(x >> 64 & 0xff), - ui8(x >> 56 & 0xff), ui8(x >> 48 & 0xff), - ui8(x >> 40 & 0xff), ui8(x >> 32 & 0xff), - ui8(x >> 24 & 0xff), ui8(x >> 16 & 0xff), - ui8(x >> 8 & 0xff), ui8(x & 0xff) - }; - } + static TRawIp4Subnet FromIpRange(const TIpAddressRange& range) { + return {TRawIp4::FromIpAddress(*range.Begin()), TRawIp4::MaskFromPrefix(GetAddressRangePrefix(range))}; + } - static TRawIp6 MaskFromPrefix(ui8 prefix) { - ui128 x = prefix == 0 ? ui128(0) : ui128(-1) << int(128 - prefix); - return FromIpAddress({x, TIpv6Address::Ipv6}); - } + TIpAddressRange ToIpRange() const { + auto range = Base.ApplyMask(Mask); + return {range.first.ToIpAddress(), range.second.ToIpAddress()}; + } +}; + +struct TRawIp6 { + ui8 A1, A0, B1, B0, C1, C0, D1, D0, E1, E0, F1, F0, G1, G0, H1, H0; + + static TRawIp6 FromIpAddress(const TIpv6Address& addr) { + ui128 x = addr; + return { + ui8(x >> 120 & 0xff), ui8(x >> 112 & 0xff), + ui8(x >> 104 & 0xff), ui8(x >> 96 & 0xff), + ui8(x >> 88 & 0xff), ui8(x >> 80 & 0xff), + ui8(x >> 72 & 0xff), ui8(x >> 64 & 0xff), + ui8(x >> 56 & 0xff), ui8(x >> 48 & 0xff), + ui8(x >> 40 & 0xff), ui8(x >> 32 & 0xff), + ui8(x >> 24 & 0xff), ui8(x >> 16 & 0xff), + ui8(x >> 8 & 0xff), ui8(x & 0xff)}; + } - TIpv6Address ToIpAddress() const { - return {ui16(ui32(A1) << ui32(8) | ui32(A0)), - ui16(ui32(B1) << ui32(8) | ui32(B0)), - ui16(ui32(C1) << ui32(8) | ui32(C0)), - ui16(ui32(D1) << ui32(8) | ui32(D0)), - ui16(ui32(E1) << ui32(8) | ui32(E0)), - ui16(ui32(F1) << ui32(8) | ui32(F0)), - ui16(ui32(G1) << ui32(8) | ui32(G0)), - ui16(ui32(H1) << ui32(8) | ui32(H0)), - }; - } + static TRawIp6 MaskFromPrefix(ui8 prefix) { + ui128 x = prefix == 0 ? ui128(0) : ui128(-1) << int(128 - prefix); + return FromIpAddress({x, TIpv6Address::Ipv6}); + } - std::pair<TRawIp6, TRawIp6> ApplyMask(const TRawIp6& mask) const { - return { { - ui8(A1 & mask.A1), - ui8(A0 & mask.A0), - ui8(B1 & mask.B1), - ui8(B0 & mask.B0), - ui8(C1 & mask.C1), - ui8(C0 & mask.C0), - ui8(D1 & mask.D1), - ui8(D0 & mask.D0), - ui8(E1 & mask.E1), - ui8(E0 & mask.E0), - ui8(F1 & mask.F1), - ui8(F0 & mask.F0), - ui8(G1 & mask.G1), - ui8(G0 & mask.G0), - ui8(H1 & mask.H1), - ui8(H0 & mask.H0) - }, { - ui8(A1 | ~mask.A1), - ui8(A0 | ~mask.A0), - ui8(B1 | ~mask.B1), - ui8(B0 | ~mask.B0), - ui8(C1 | ~mask.C1), - ui8(C0 | ~mask.C0), - ui8(D1 | ~mask.D1), - ui8(D0 | ~mask.D0), - ui8(E1 | ~mask.E1), - ui8(E0 | ~mask.E0), - ui8(F1 | ~mask.F1), - ui8(F0 | ~mask.F0), - ui8(G1 | ~mask.G1), - ui8(G0 | ~mask.G0), - ui8(H1 | ~mask.H1), - ui8(H0 | ~mask.H0) - }}; - } - }; + TIpv6Address ToIpAddress() const { + return { + ui16(ui32(A1) << ui32(8) | ui32(A0)), + ui16(ui32(B1) << ui32(8) | ui32(B0)), + ui16(ui32(C1) << ui32(8) | ui32(C0)), + ui16(ui32(D1) << ui32(8) | ui32(D0)), + ui16(ui32(E1) << ui32(8) | ui32(E0)), + ui16(ui32(F1) << ui32(8) | ui32(F0)), + ui16(ui32(G1) << ui32(8) | ui32(G0)), + ui16(ui32(H1) << ui32(8) | ui32(H0)), + }; + } - struct TRawIp6Subnet { - TRawIp6 Base, Mask; + std::pair<TRawIp6, TRawIp6> ApplyMask(const TRawIp6& mask) const { + return {{ui8(A1 & mask.A1), + ui8(A0 & mask.A0), + ui8(B1 & mask.B1), + ui8(B0 & mask.B0), + ui8(C1 & mask.C1), + ui8(C0 & mask.C0), + ui8(D1 & mask.D1), + ui8(D0 & mask.D0), + ui8(E1 & mask.E1), + ui8(E0 & mask.E0), + ui8(F1 & mask.F1), + ui8(F0 & mask.F0), + ui8(G1 & mask.G1), + ui8(G0 & mask.G0), + ui8(H1 & mask.H1), + ui8(H0 & mask.H0)}, + {ui8(A1 | ~mask.A1), + ui8(A0 | ~mask.A0), + ui8(B1 | ~mask.B1), + ui8(B0 | ~mask.B0), + ui8(C1 | ~mask.C1), + ui8(C0 | ~mask.C0), + ui8(D1 | ~mask.D1), + ui8(D0 | ~mask.D0), + ui8(E1 | ~mask.E1), + ui8(E0 | ~mask.E0), + ui8(F1 | ~mask.F1), + ui8(F0 | ~mask.F0), + ui8(G1 | ~mask.G1), + ui8(G0 | ~mask.G0), + ui8(H1 | ~mask.H1), + ui8(H0 | ~mask.H0)}}; + } +}; - static TRawIp6Subnet FromIpRange(const TIpAddressRange& range) { - return {TRawIp6::FromIpAddress(*range.Begin()), TRawIp6::MaskFromPrefix(GetAddressRangePrefix(range))}; - } +struct TRawIp6Subnet { + TRawIp6 Base, Mask; - TIpAddressRange ToIpRange() const { - auto range = Base.ApplyMask(Mask); - return {range.first.ToIpAddress(), range.second.ToIpAddress()}; - } - }; - - TIpv6Address DeserializeAddress(const TStringRef& str) { - TIpv6Address addr; - if (str.Size() == 4) { - TRawIp4 addr4; - memcpy(&addr4, str.Data(), sizeof addr4); - addr = addr4.ToIpAddress(); - } else if (str.Size() == 16) { - TRawIp6 addr6; - memcpy(&addr6, str.Data(), sizeof addr6); - addr = addr6.ToIpAddress(); - } else { - ythrow yexception() << "Incorrect size of input, expected " - << "4 or 16, got " << str.Size(); - } - return addr; + static TRawIp6Subnet FromIpRange(const TIpAddressRange& range) { + return {TRawIp6::FromIpAddress(*range.Begin()), TRawIp6::MaskFromPrefix(GetAddressRangePrefix(range))}; } - TIpAddressRange DeserializeSubnet(const TStringRef& str) { - TIpAddressRange range; - if (str.Size() == sizeof(TRawIp4Subnet)) { - TRawIp4Subnet subnet4; - memcpy(&subnet4, str.Data(), sizeof subnet4); - range = subnet4.ToIpRange(); - } else if (str.Size() == sizeof(TRawIp6Subnet)) { - TRawIp6Subnet subnet6; - memcpy(&subnet6, str.Data(), sizeof subnet6); - range = subnet6.ToIpRange(); - } else { - ythrow yexception() << "Invalid binary representation"; - } - return range; + TIpAddressRange ToIpRange() const { + auto range = Base.ApplyMask(Mask); + return {range.first.ToIpAddress(), range.second.ToIpAddress()}; } - - TString SerializeAddress(const TIpv6Address& addr) { - Y_ENSURE(addr.Type() == TIpv6Address::Ipv4 || addr.Type() == TIpv6Address::Ipv6); - TString res; - if (addr.Type() == TIpv6Address::Ipv4) { - auto addr4 = TRawIp4::FromIpAddress(addr); - res = TString(reinterpret_cast<const char *>(&addr4), sizeof addr4); - } else if (addr.Type() == TIpv6Address::Ipv6) { - auto addr6 = TRawIp6::FromIpAddress(addr); - res = TString(reinterpret_cast<const char *>(&addr6), sizeof addr6); - } - return res; +}; + +TIpv6Address DeserializeAddress(const TStringRef& str) { + TIpv6Address addr; + if (str.Size() == 4) { + TRawIp4 addr4; + memcpy(&addr4, str.Data(), sizeof addr4); + addr = addr4.ToIpAddress(); + } else if (str.Size() == 16) { + TRawIp6 addr6; + memcpy(&addr6, str.Data(), sizeof addr6); + addr = addr6.ToIpAddress(); + } else { + ythrow yexception() << "Incorrect size of input, expected " + << "4 or 16, got " << str.Size(); } + return addr; +} - TString SerializeSubnet(const TIpAddressRange& range) { - TString res; - if (range.Type() == TIpv6Address::Ipv4) { - auto subnet4 = TRawIp4Subnet::FromIpRange(range); - res = TString(reinterpret_cast<const char *>(&subnet4), sizeof subnet4); - } else if (range.Type() == TIpv6Address::Ipv6) { - auto subnet6 = TRawIp6Subnet::FromIpRange(range); - res = TString(reinterpret_cast<const char *>(&subnet6), sizeof subnet6); - } - return res; +TIpAddressRange DeserializeSubnet(const TStringRef& str) { + TIpAddressRange range; + if (str.Size() == sizeof(TRawIp4Subnet)) { + TRawIp4Subnet subnet4; + memcpy(&subnet4, str.Data(), sizeof subnet4); + range = subnet4.ToIpRange(); + } else if (str.Size() == sizeof(TRawIp6Subnet)) { + TRawIp6Subnet subnet6; + memcpy(&subnet6, str.Data(), sizeof subnet6); + range = subnet6.ToIpRange(); + } else { + ythrow yexception() << "Invalid binary representation"; } + return range; +} - SIMPLE_STRICT_UDF(TFromString, TOptionalString(TAutoMapString)) { - TIpv6Address addr = TIpv6Address::FromString(args[0].AsStringRef()); - if (addr.Type() != TIpv6Address::Ipv4 && addr.Type() != TIpv6Address::Ipv6) { - return TUnboxedValue(); - } - return valueBuilder->NewString(SerializeAddress(addr)); +TString SerializeAddress(const TIpv6Address& addr) { + Y_ENSURE(addr.Type() == TIpv6Address::Ipv4 || addr.Type() == TIpv6Address::Ipv6); + TString res; + if (addr.Type() == TIpv6Address::Ipv4) { + auto addr4 = TRawIp4::FromIpAddress(addr); + res = TString(reinterpret_cast<const char*>(&addr4), sizeof addr4); + } else if (addr.Type() == TIpv6Address::Ipv6) { + auto addr6 = TRawIp6::FromIpAddress(addr); + res = TString(reinterpret_cast<const char*>(&addr6), sizeof addr6); } + return res; +} - SIMPLE_STRICT_UDF_OPTIONS(TIpv4FromUint32, char*(TAutoMapUint32), builder.SetMinLangVer(NYql::MakeLangVersion(2025, 3))) { - // in_addr expects bytes in network byte order. - in_addr addr; - addr.s_addr = htonl(args[0].Get<ui32>()); - return valueBuilder->NewString(SerializeAddress(TIpv6Address{addr})); +TString SerializeSubnet(const TIpAddressRange& range) { + TString res; + if (range.Type() == TIpv6Address::Ipv4) { + auto subnet4 = TRawIp4Subnet::FromIpRange(range); + res = TString(reinterpret_cast<const char*>(&subnet4), sizeof subnet4); + } else if (range.Type() == TIpv6Address::Ipv6) { + auto subnet6 = TRawIp6Subnet::FromIpRange(range); + res = TString(reinterpret_cast<const char*>(&subnet6), sizeof subnet6); } + return res; +} - SIMPLE_STRICT_UDF(TSubnetFromString, TOptionalString(TAutoMapString)) { - TIpAddressRange range = TIpAddressRange::FromCompactString(args[0].AsStringRef()); - auto res = SerializeSubnet(range); - return res ? valueBuilder->NewString(res) : TUnboxedValue(TUnboxedValuePod()); +SIMPLE_STRICT_UDF(TFromString, TOptionalString(TAutoMapString)) { + TIpv6Address addr = TIpv6Address::FromString(args[0].AsStringRef()); + if (addr.Type() != TIpv6Address::Ipv4 && addr.Type() != TIpv6Address::Ipv6) { + return TUnboxedValue(); } + return valueBuilder->NewString(SerializeAddress(addr)); +} - SIMPLE_UDF(TToString, char*(TAutoMapString)) { - return valueBuilder->NewString(DeserializeAddress(args[0].AsStringRef()).ToString(false)); - } +SIMPLE_STRICT_UDF_OPTIONS(TIpv4FromUint32, char*(TAutoMapUint32), builder.SetMinLangVer(NYql::MakeLangVersion(2025, 3))) { + // in_addr expects bytes in network byte order. + in_addr addr; + addr.s_addr = htonl(args[0].Get<ui32>()); + return valueBuilder->NewString(SerializeAddress(TIpv6Address{addr})); +} - SIMPLE_UDF_OPTIONS(TIpv4ToUint32, TOptionalUint32(TAutoMapString), builder.SetMinLangVer(NYql::MakeLangVersion(2025, 3))) { - Y_UNUSED(valueBuilder); - TIpv6Address addr = DeserializeAddress(args[0].AsStringRef()); - if (addr.Type() != TIpv6Address::Ipv4) { - return TUnboxedValue(); - } +SIMPLE_STRICT_UDF(TSubnetFromString, TOptionalString(TAutoMapString)) { + TIpAddressRange range = TIpAddressRange::FromCompactString(args[0].AsStringRef()); + auto res = SerializeSubnet(range); + return res ? valueBuilder->NewString(res) : TUnboxedValue(TUnboxedValuePod()); +} - in_addr tmp; - addr.ToInAddr(tmp); - ui32 ret = ntohl(tmp.s_addr); - return TUnboxedValuePod(ret); - } +SIMPLE_UDF(TToString, char*(TAutoMapString)) { + return valueBuilder->NewString(DeserializeAddress(args[0].AsStringRef()).ToString(false)); +} - SIMPLE_UDF(TSubnetToString, char*(TAutoMapString)) { - TStringBuilder result; - auto range = DeserializeSubnet(args[0].AsStringRef()); - result << (*range.Begin()).ToString(false); - result << '/'; - result << ToString(GetAddressRangePrefix(range)); - return valueBuilder->NewString(result); +SIMPLE_UDF_OPTIONS(TIpv4ToUint32, TOptionalUint32(TAutoMapString), builder.SetMinLangVer(NYql::MakeLangVersion(2025, 3))) { + Y_UNUSED(valueBuilder); + TIpv6Address addr = DeserializeAddress(args[0].AsStringRef()); + if (addr.Type() != TIpv6Address::Ipv4) { + return TUnboxedValue(); } - SIMPLE_UDF(TSubnetMatch, bool(TAutoMapString, TAutoMapString)) { - Y_UNUSED(valueBuilder); - auto range1 = DeserializeSubnet(args[0].AsStringRef()); - if (args[1].AsStringRef().Size() == sizeof(TRawIp4) || args[1].AsStringRef().Size() == sizeof(TRawIp6)) { - auto addr2 = DeserializeAddress(args[1].AsStringRef()); - return TUnboxedValuePod(range1.Contains(addr2)); - } else { // second argument is a whole subnet, not a single address - auto range2 = DeserializeSubnet(args[1].AsStringRef()); - return TUnboxedValuePod(range1.Contains(range2)); - } - } + in_addr tmp; + addr.ToInAddr(tmp); + ui32 ret = ntohl(tmp.s_addr); + return TUnboxedValuePod(ret); +} - SIMPLE_STRICT_UDF(TIsIPv4, bool(TOptionalString)) { - Y_UNUSED(valueBuilder); - bool result = false; - if (args[0]) { - const auto ref = args[0].AsStringRef(); - result = ref.Size() == 4; - } - return TUnboxedValuePod(result); +SIMPLE_UDF(TSubnetToString, char*(TAutoMapString)) { + TStringBuilder result; + auto range = DeserializeSubnet(args[0].AsStringRef()); + result << (*range.Begin()).ToString(false); + result << '/'; + result << ToString(GetAddressRangePrefix(range)); + return valueBuilder->NewString(result); +} + +SIMPLE_UDF(TSubnetMatch, bool(TAutoMapString, TAutoMapString)) { + Y_UNUSED(valueBuilder); + auto range1 = DeserializeSubnet(args[0].AsStringRef()); + if (args[1].AsStringRef().Size() == sizeof(TRawIp4) || args[1].AsStringRef().Size() == sizeof(TRawIp6)) { + auto addr2 = DeserializeAddress(args[1].AsStringRef()); + return TUnboxedValuePod(range1.Contains(addr2)); + } else { // second argument is a whole subnet, not a single address + auto range2 = DeserializeSubnet(args[1].AsStringRef()); + return TUnboxedValuePod(range1.Contains(range2)); } +} - SIMPLE_STRICT_UDF(TIsIPv6, bool(TOptionalString)) { - Y_UNUSED(valueBuilder); - bool result = false; - if (args[0]) { - const auto ref = args[0].AsStringRef(); - result = ref.Size() == 16; - } - return TUnboxedValuePod(result); +SIMPLE_STRICT_UDF(TIsIPv4, bool(TOptionalString)) { + Y_UNUSED(valueBuilder); + bool result = false; + if (args[0]) { + const auto ref = args[0].AsStringRef(); + result = ref.Size() == 4; } + return TUnboxedValuePod(result); +} - SIMPLE_STRICT_UDF(TIsEmbeddedIPv4, bool(TOptionalString)) { - Y_UNUSED(valueBuilder); - bool result = false; - if (args[0]) { - const auto ref = args[0].AsStringRef(); - if (ref.Size() == 16) { - result = DeserializeAddress(ref).Isv4MappedTov6(); - } - } - return TUnboxedValuePod(result); +SIMPLE_STRICT_UDF(TIsIPv6, bool(TOptionalString)) { + Y_UNUSED(valueBuilder); + bool result = false; + if (args[0]) { + const auto ref = args[0].AsStringRef(); + result = ref.Size() == 16; } + return TUnboxedValuePod(result); +} - SIMPLE_UDF(TConvertToIPv6, char*(TAutoMapString)) { - const auto& ref = args[0].AsStringRef(); +SIMPLE_STRICT_UDF(TIsEmbeddedIPv4, bool(TOptionalString)) { + Y_UNUSED(valueBuilder); + bool result = false; + if (args[0]) { + const auto ref = args[0].AsStringRef(); if (ref.Size() == 16) { - return valueBuilder->NewString(ref); - } else if (ref.Size() == 4) { - TIpv6Address addr4 = DeserializeAddress(ref); - auto addr6 = TIpv6Address(ui128(addr4) | ui128(0xFFFF) << 32, TIpv6Address::Ipv6); - return valueBuilder->NewString(SerializeAddress(addr6)); - } else { - ythrow yexception() << "Incorrect size of input, expected " - << "4 or 16, got " << ref.Size(); + result = DeserializeAddress(ref).Isv4MappedTov6(); } } + return TUnboxedValuePod(result); +} - SIMPLE_UDF_WITH_OPTIONAL_ARGS(TGetSubnet, char*(TAutoMapString, TOptionalByte), 1) { - const auto ref = args[0].AsStringRef(); - ui8 subnetSize = args[1].GetOrDefault<ui8>(0); - TIpv6Address addr = DeserializeAddress(ref); - if (ref.Size() == 4) { - if (!subnetSize) { - subnetSize = 24; - } - if (subnetSize > 32) { - subnetSize = 32; - } - } else if (ref.Size() == 16) { - if (!subnetSize) { - subnetSize = 64; - } - if (subnetSize > 128) { - subnetSize = 128; - } - } else { - ythrow yexception() << "Incorrect size of input, expected " - << "4 or 16, got " << ref.Size(); - } - TIpv6Address beg = LowerBoundForPrefix(addr, subnetSize); - return valueBuilder->NewString(SerializeAddress(beg)); +SIMPLE_UDF(TConvertToIPv6, char*(TAutoMapString)) { + const auto& ref = args[0].AsStringRef(); + if (ref.Size() == 16) { + return valueBuilder->NewString(ref); + } else if (ref.Size() == 4) { + TIpv6Address addr4 = DeserializeAddress(ref); + auto addr6 = TIpv6Address(ui128(addr4) | ui128(0xFFFF) << 32, TIpv6Address::Ipv6); + return valueBuilder->NewString(SerializeAddress(addr6)); + } else { + ythrow yexception() << "Incorrect size of input, expected " + << "4 or 16, got " << ref.Size(); } +} - SIMPLE_UDF(TGetSubnetByMask, char*(TAutoMapString, TAutoMapString)) { - const auto refBase = args[0].AsStringRef(); - const auto refMask = args[1].AsStringRef(); - TIpv6Address addrBase = DeserializeAddress(refBase); - TIpv6Address addrMask = DeserializeAddress(refMask); - if (addrBase.Type() != addrMask.Type()) { - ythrow yexception() << "Base and mask differ in length"; +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TGetSubnet, char*(TAutoMapString, TOptionalByte), 1) { + const auto ref = args[0].AsStringRef(); + ui8 subnetSize = args[1].GetOrDefault<ui8>(0); + TIpv6Address addr = DeserializeAddress(ref); + if (ref.Size() == 4) { + if (!subnetSize) { + subnetSize = 24; } - return valueBuilder->NewString(SerializeAddress(TIpv6Address(ui128(addrBase) & ui128(addrMask), addrBase.Type()))); + if (subnetSize > 32) { + subnetSize = 32; + } + } else if (ref.Size() == 16) { + if (!subnetSize) { + subnetSize = 64; + } + if (subnetSize > 128) { + subnetSize = 128; + } + } else { + ythrow yexception() << "Incorrect size of input, expected " + << "4 or 16, got " << ref.Size(); + } + TIpv6Address beg = LowerBoundForPrefix(addr, subnetSize); + return valueBuilder->NewString(SerializeAddress(beg)); +} + +SIMPLE_UDF(TGetSubnetByMask, char*(TAutoMapString, TAutoMapString)) { + const auto refBase = args[0].AsStringRef(); + const auto refMask = args[1].AsStringRef(); + TIpv6Address addrBase = DeserializeAddress(refBase); + TIpv6Address addrMask = DeserializeAddress(refMask); + if (addrBase.Type() != addrMask.Type()) { + ythrow yexception() << "Base and mask differ in length"; } + return valueBuilder->NewString(SerializeAddress(TIpv6Address(ui128(addrBase) & ui128(addrMask), addrBase.Type()))); +} #define EXPORTED_IP_BASE_UDF \ - TFromString, \ - TIpv4FromUint32, \ - TSubnetFromString, \ - TToString, \ - TIpv4ToUint32, \ - TSubnetToString, \ - TIsIPv4, \ - TIsIPv6, \ - TIsEmbeddedIPv4, \ - TConvertToIPv6, \ - TGetSubnet, \ - TSubnetMatch, \ - TGetSubnetByMask -} + TFromString, \ + TIpv4FromUint32, \ + TSubnetFromString, \ + TToString, \ + TIpv4ToUint32, \ + TSubnetToString, \ + TIsIPv4, \ + TIsIPv6, \ + TIsEmbeddedIPv4, \ + TConvertToIPv6, \ + TGetSubnet, \ + TSubnetMatch, \ + TGetSubnetByMask +} // namespace diff --git a/yql/essentials/udfs/common/ip_base/lib/ya.make b/yql/essentials/udfs/common/ip_base/lib/ya.make index ab9b2bce8e2..3587d2b6192 100644 --- a/yql/essentials/udfs/common/ip_base/lib/ya.make +++ b/yql/essentials/udfs/common/ip_base/lib/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( ip_base_udf.cpp ) diff --git a/yql/essentials/udfs/common/ip_base/ya.make b/yql/essentials/udfs/common/ip_base/ya.make index 0a2859c0af2..86c40dd2698 100644 --- a/yql/essentials/udfs/common/ip_base/ya.make +++ b/yql/essentials/udfs/common/ip_base/ya.make @@ -6,6 +6,8 @@ YQL_UDF_CONTRIB(ip_udf) 0 ) + ENABLE(YQL_STYLE_CPP) + SRCS( ip_base.cpp ) diff --git a/yql/essentials/udfs/common/json/json_udf.cpp b/yql/essentials/udfs/common/json/json_udf.cpp index 3a7916bed74..92316947df6 100644 --- a/yql/essentials/udfs/common/json/json_udf.cpp +++ b/yql/essentials/udfs/common/json/json_udf.cpp @@ -6,113 +6,113 @@ using namespace NKikimr; using namespace NUdf; namespace { - class TGetField: public TBoxedValue { - public: - typedef bool TTypeAwareMarker; - - public: - static TStringRef Name() { - return TStringRef::Of("GetField"); +class TGetField: public TBoxedValue { +public: + typedef bool TTypeAwareMarker; + +public: + static TStringRef Name() { + return TStringRef::Of("GetField"); + } + + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + if (!args[0]) { + return valueBuilder->NewEmptyList(); } - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - if (!args[0]) { - return valueBuilder->NewEmptyList(); - } + const TString json(args[0].AsStringRef()); + const TString field(args[1].AsStringRef()); - const TString json(args[0].AsStringRef()); - const TString field(args[1].AsStringRef()); + if (field.empty()) { + return valueBuilder->NewEmptyList(); + } - if (field.empty()) { - return valueBuilder->NewEmptyList(); - } + NJson::TJsonParser parser; + parser.AddField(field, false); - NJson::TJsonParser parser; - parser.AddField(field, false); + TVector<TString> result; + parser.Parse(json, &result); - TVector<TString> result; - parser.Parse(json, &result); + TUnboxedValue* items = nullptr; + const auto list = valueBuilder->NewArray(result.size(), items); + for (const TString& item : result) { + *items++ = valueBuilder->NewString(item); + } - TUnboxedValue* items = nullptr; - const auto list = valueBuilder->NewArray(result.size(), items); - for (const TString& item : result) { - *items++ = valueBuilder->NewString(item); - } + return list; + } + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + if (Name() == name) { + bool useString = true; + bool isOptional = true; + if (userType) { + // support of an overload with Json/Json? input type + auto typeHelper = builder.TypeInfoHelper(); + auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); + if (!userTypeInspector || userTypeInspector.GetElementsCount() < 1) { + builder.SetError("Missing or invalid user type."); + return true; + } - return list; - } + auto argsTypeTuple = userTypeInspector.GetElementType(0); + auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple); + if (!argsTypeInspector) { + builder.SetError("Invalid user type - expected tuple."); + return true; + } - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - if (Name() == name) { - bool useString = true; - bool isOptional = true; - if (userType) { - // support of an overload with Json/Json? input type - auto typeHelper = builder.TypeInfoHelper(); - auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); - if (!userTypeInspector || userTypeInspector.GetElementsCount() < 1) { - builder.SetError("Missing or invalid user type."); - return true; - } - - auto argsTypeTuple = userTypeInspector.GetElementType(0); - auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple); - if (!argsTypeInspector) { - builder.SetError("Invalid user type - expected tuple."); - return true; - } - - if (argsTypeInspector.GetElementsCount() != 2) { - builder.SetError("Invalid user type - expected two arguments."); - return true; - } - - auto inputType = argsTypeInspector.GetElementType(0); - auto optInspector = TOptionalTypeInspector(*typeHelper, inputType); - auto dataType = inputType; - if (optInspector) { - dataType = optInspector.GetItemType(); - } else { - isOptional = false; - } - - auto dataInspector = TDataTypeInspector(*typeHelper, dataType); - if (dataInspector && dataInspector.GetTypeId() == TDataType<TJson>::Id) { - useString = false; - builder.UserType(userType); - } + if (argsTypeInspector.GetElementsCount() != 2) { + builder.SetError("Invalid user type - expected two arguments."); + return true; } - auto retType = builder.List()->Item<char*>().Build(); - if (useString) { - builder.Args()->Add(builder.Optional()->Item<char*>().Build()).Add<char*>().Done().Returns(retType); + auto inputType = argsTypeInspector.GetElementType(0); + auto optInspector = TOptionalTypeInspector(*typeHelper, inputType); + auto dataType = inputType; + if (optInspector) { + dataType = optInspector.GetItemType(); } else { - auto type = builder.SimpleType<TJson>(); - if (isOptional) { - builder.Args()->Add(builder.Optional()->Item(type).Build()).Add<char*>().Done().Returns(retType); - } else { - builder.Args()->Add(type).Add<char*>().Done().Returns(retType); - } + isOptional = false; } - if (!typesOnly) { - builder.Implementation(new TGetField); + auto dataInspector = TDataTypeInspector(*typeHelper, dataType); + if (dataInspector && dataInspector.GetTypeId() == TDataType<TJson>::Id) { + useString = false; + builder.UserType(userType); } + } - builder.IsStrict(); - return true; + auto retType = builder.List()->Item<char*>().Build(); + if (useString) { + builder.Args()->Add(builder.Optional()->Item<char*>().Build()).Add<char*>().Done().Returns(retType); } else { - return false; + auto type = builder.SimpleType<TJson>(); + if (isOptional) { + builder.Args()->Add(builder.Optional()->Item(type).Build()).Add<char*>().Done().Returns(retType); + } else { + builder.Args()->Add(type).Add<char*>().Done().Returns(retType); + } } + + if (!typesOnly) { + builder.Implementation(new TGetField); + } + + builder.IsStrict(); + return true; + } else { + return false; } - }; -} + } +}; +} // namespace SIMPLE_MODULE(TJsonModule, TGetField) diff --git a/yql/essentials/udfs/common/json/ya.make b/yql/essentials/udfs/common/json/ya.make index 689714e306b..d45fe60e5f0 100644 --- a/yql/essentials/udfs/common/json/ya.make +++ b/yql/essentials/udfs/common/json/ya.make @@ -6,6 +6,8 @@ YQL_UDF_CONTRIB(json_udf) 0 ) + ENABLE(YQL_STYLE_CPP) + SRCS( json_udf.cpp ) diff --git a/yql/essentials/udfs/common/json2/as_json_node.h b/yql/essentials/udfs/common/json2/as_json_node.h index 6060f03bea8..82c51802433 100644 --- a/yql/essentials/udfs/common/json2/as_json_node.h +++ b/yql/essentials/udfs/common/json2/as_json_node.h @@ -8,108 +8,107 @@ #include <yql/essentials/minikql/dom/json.h> namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - using namespace NDom; - - template <typename TSource> - class TAsJsonNode: public TBoxedValue { - public: - TAsJsonNode(TSourcePosition pos) - : Pos_(pos) - { +using namespace NKikimr; +using namespace NUdf; +using namespace NYql; +using namespace NDom; + +template <typename TSource> +class TAsJsonNode: public TBoxedValue { +public: + TAsJsonNode(TSourcePosition pos) + : Pos_(pos) + { + } + + static TStringRef Name(); + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; } - static TStringRef Name(); + auto optionalSourceType = builder.Optional()->Item<TSource>().Build(); + auto resourceType = builder.Resource(JSON_NODE_RESOURCE_NAME); + builder.Args() + ->Add(optionalSourceType) + .Done() + .Returns(resourceType); - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; - } + if (!typesOnly) { + builder.Implementation(new TAsJsonNode<TSource>(builder.GetSourcePosition())); + } - auto optionalSourceType = builder.Optional()->Item<TSource>().Build(); - auto resourceType = builder.Resource(JSON_NODE_RESOURCE_NAME); - builder.Args() - ->Add(optionalSourceType) - .Done() - .Returns(resourceType); + builder.IsStrict(); + return true; + } - if (!typesOnly) { - builder.Implementation(new TAsJsonNode<TSource>(builder.GetSourcePosition())); - } +private: + const size_t MaxParseErrors_ = 10; - builder.IsStrict(); - return true; - } + static TUnboxedValue Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder); - private: - const size_t MaxParseErrors_ = 10; - - static TUnboxedValue Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder); - - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - Y_UNUSED(valueBuilder); - try { - if (!args[0].HasValue()) { - return MakeEntity(); - } - return Interpret(args[0], valueBuilder); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + if (!args[0].HasValue()) { + return MakeEntity(); } + return Interpret(args[0], valueBuilder); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } - - TSourcePosition Pos_; - }; - - template <> - TStringRef TAsJsonNode<TUtf8>::Name() { - return TStringRef::Of("Utf8AsJsonNode"); } - template <> - TUnboxedValue TAsJsonNode<TUtf8>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { - return MakeString(sourceValue.AsStringRef(), valueBuilder); - } + TSourcePosition Pos_; +}; - template <> - TStringRef TAsJsonNode<double>::Name() { - return TStringRef::Of("DoubleAsJsonNode"); - } +template <> +TStringRef TAsJsonNode<TUtf8>::Name() { + return TStringRef::Of("Utf8AsJsonNode"); +} - template <> - TUnboxedValue TAsJsonNode<double>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { - Y_UNUSED(valueBuilder); - return MakeDouble(sourceValue.Get<double>()); - } +template <> +TUnboxedValue TAsJsonNode<TUtf8>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { + return MakeString(sourceValue.AsStringRef(), valueBuilder); +} - template <> - TStringRef TAsJsonNode<bool>::Name() { - return TStringRef::Of("BoolAsJsonNode"); - } +template <> +TStringRef TAsJsonNode<double>::Name() { + return TStringRef::Of("DoubleAsJsonNode"); +} - template <> - TUnboxedValue TAsJsonNode<bool>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { - Y_UNUSED(valueBuilder); - return MakeBool(sourceValue.Get<bool>()); - } +template <> +TUnboxedValue TAsJsonNode<double>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { + Y_UNUSED(valueBuilder); + return MakeDouble(sourceValue.Get<double>()); +} - template <> - TStringRef TAsJsonNode<TJson>::Name() { - return TStringRef::Of("JsonAsJsonNode"); - } +template <> +TStringRef TAsJsonNode<bool>::Name() { + return TStringRef::Of("BoolAsJsonNode"); +} - template <> - TUnboxedValue TAsJsonNode<TJson>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { - return TryParseJsonDom(sourceValue.AsStringRef(), valueBuilder); - } +template <> +TUnboxedValue TAsJsonNode<bool>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { + Y_UNUSED(valueBuilder); + return MakeBool(sourceValue.Get<bool>()); } +template <> +TStringRef TAsJsonNode<TJson>::Name() { + return TStringRef::Of("JsonAsJsonNode"); +} + +template <> +TUnboxedValue TAsJsonNode<TJson>::Interpret(const TUnboxedValue& sourceValue, const IValueBuilder* valueBuilder) { + return TryParseJsonDom(sourceValue.AsStringRef(), valueBuilder); +} +} // namespace NJson2Udf diff --git a/yql/essentials/udfs/common/json2/compile_path.h b/yql/essentials/udfs/common/json2/compile_path.h index 220bd4fbaf6..f932c6df60c 100644 --- a/yql/essentials/udfs/common/json2/compile_path.h +++ b/yql/essentials/udfs/common/json2/compile_path.h @@ -6,65 +6,64 @@ #include <yql/essentials/public/udf/udf_helpers.h> namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; +using namespace NKikimr; +using namespace NUdf; +using namespace NYql; - class TCompilePath: public TBoxedValue { - public: - TCompilePath(TSourcePosition pos) - : Pos_(pos) - { - } +class TCompilePath: public TBoxedValue { +public: + TCompilePath(TSourcePosition pos) + : Pos_(pos) + { + } - static const TStringRef& Name() { - static auto name = TStringRef::Of("CompilePath"); - return name; - } + static const TStringRef& Name() { + static auto name = TStringRef::Of("CompilePath"); + return name; + } - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; - } + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } - auto resourceType = builder.Resource(JSONPATH_RESOURCE_NAME); - builder.Args() - ->Add<NUdf::TUtf8>() - .Done() - .Returns(resourceType); + auto resourceType = builder.Resource(JSONPATH_RESOURCE_NAME); + builder.Args() + ->Add<NUdf::TUtf8>() + .Done() + .Returns(resourceType); - if (!typesOnly) { - builder.Implementation(new TCompilePath(builder.GetSourcePosition())); - } - return true; + if (!typesOnly) { + builder.Implementation(new TCompilePath(builder.GetSourcePosition())); } + return true; + } - private: - const size_t MaxParseErrors_ = 10; - - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - Y_UNUSED(valueBuilder); - try { - TIssues issues; - const auto jsonPath = NJsonPath::ParseJsonPath(args[0].AsStringRef(), issues, MaxParseErrors_); - if (!issues.Empty()) { - ythrow yexception() << "Error parsing jsonpath:" << Endl << issues.ToString(); - } +private: + const size_t MaxParseErrors_ = 10; - return TUnboxedValuePod(new TJsonPathResource(jsonPath)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + TIssues issues; + const auto jsonPath = NJsonPath::ParseJsonPath(args[0].AsStringRef(), issues, MaxParseErrors_); + if (!issues.Empty()) { + ythrow yexception() << "Error parsing jsonpath:" << Endl << issues.ToString(); } - } - TSourcePosition Pos_; - }; -} + return TUnboxedValuePod(new TJsonPathResource(jsonPath)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } + } + TSourcePosition Pos_; +}; +} // namespace NJson2Udf diff --git a/yql/essentials/udfs/common/json2/json2_udf.cpp b/yql/essentials/udfs/common/json2/json2_udf.cpp index 96ef6ccf00b..76c4ef786db 100644 --- a/yql/essentials/udfs/common/json2/json2_udf.cpp +++ b/yql/essentials/udfs/common/json2/json2_udf.cpp @@ -9,35 +9,35 @@ #include <yql/essentials/public/udf/udf_helpers.h> namespace NJson2Udf { - SIMPLE_MODULE(TJson2Module, - TParse, - TSerialize<EDataSlot::Json>, - TSerialize<EDataSlot::JsonDocument>, - TCompilePath, - TSqlValue<EDataSlot::Json, TUtf8>, - TSqlValue<EDataSlot::Json, TUtf8, true>, - TSqlValue<EDataSlot::Json, i64>, - TSqlValue<EDataSlot::Json, double>, - TSqlValue<EDataSlot::Json, bool>, - TSqlValue<EDataSlot::JsonDocument, TUtf8>, - TSqlValue<EDataSlot::JsonDocument, TUtf8, true>, - TSqlValue<EDataSlot::JsonDocument, i64>, - TSqlValue<EDataSlot::JsonDocument, double>, - TSqlValue<EDataSlot::JsonDocument, bool>, - TSqlExists<EDataSlot::Json, false>, - TSqlExists<EDataSlot::Json, true>, - TSqlExists<EDataSlot::JsonDocument, false>, - TSqlExists<EDataSlot::JsonDocument, true>, - TSqlQuery<EDataSlot::Json, EJsonQueryWrap::NoWrap>, - TSqlQuery<EDataSlot::Json, EJsonQueryWrap::Wrap>, - TSqlQuery<EDataSlot::Json, EJsonQueryWrap::ConditionalWrap>, - TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::NoWrap>, - TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::Wrap>, - TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::ConditionalWrap>, - TAsJsonNode<TUtf8>, - TAsJsonNode<double>, - TAsJsonNode<bool>, - TAsJsonNode<TJson>) -} +SIMPLE_MODULE(TJson2Module, + TParse, + TSerialize<EDataSlot::Json>, + TSerialize<EDataSlot::JsonDocument>, + TCompilePath, + TSqlValue<EDataSlot::Json, TUtf8>, + TSqlValue<EDataSlot::Json, TUtf8, true>, + TSqlValue<EDataSlot::Json, i64>, + TSqlValue<EDataSlot::Json, double>, + TSqlValue<EDataSlot::Json, bool>, + TSqlValue<EDataSlot::JsonDocument, TUtf8>, + TSqlValue<EDataSlot::JsonDocument, TUtf8, true>, + TSqlValue<EDataSlot::JsonDocument, i64>, + TSqlValue<EDataSlot::JsonDocument, double>, + TSqlValue<EDataSlot::JsonDocument, bool>, + TSqlExists<EDataSlot::Json, false>, + TSqlExists<EDataSlot::Json, true>, + TSqlExists<EDataSlot::JsonDocument, false>, + TSqlExists<EDataSlot::JsonDocument, true>, + TSqlQuery<EDataSlot::Json, EJsonQueryWrap::NoWrap>, + TSqlQuery<EDataSlot::Json, EJsonQueryWrap::Wrap>, + TSqlQuery<EDataSlot::Json, EJsonQueryWrap::ConditionalWrap>, + TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::NoWrap>, + TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::Wrap>, + TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::ConditionalWrap>, + TAsJsonNode<TUtf8>, + TAsJsonNode<double>, + TAsJsonNode<bool>, + TAsJsonNode<TJson>) +} // namespace NJson2Udf REGISTER_MODULES(NJson2Udf::TJson2Module) diff --git a/yql/essentials/udfs/common/json2/parse.h b/yql/essentials/udfs/common/json2/parse.h index 6df4bce9b0a..72db2106fa5 100644 --- a/yql/essentials/udfs/common/json2/parse.h +++ b/yql/essentials/udfs/common/json2/parse.h @@ -9,58 +9,57 @@ #include <library/cpp/json/json_reader.h> namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - using namespace NDom; +using namespace NKikimr; +using namespace NUdf; +using namespace NYql; +using namespace NDom; - class TParse: public TBoxedValue { - public: - TParse(TSourcePosition pos) - : Pos_(pos) - { - } +class TParse: public TBoxedValue { +public: + TParse(TSourcePosition pos) + : Pos_(pos) + { + } - static const TStringRef& Name() { - static auto name = TStringRef::Of("Parse"); - return name; - } + static const TStringRef& Name() { + static auto name = TStringRef::Of("Parse"); + return name; + } - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; - } + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } - builder.Args() - ->Add<TAutoMap<TJson>>() - .Done() - .Returns<TJsonNodeResource>(); + builder.Args() + ->Add<TAutoMap<TJson>>() + .Done() + .Returns<TJsonNodeResource>(); - if (!typesOnly) { - builder.Implementation(new TParse(builder.GetSourcePosition())); - } - return true; + if (!typesOnly) { + builder.Implementation(new TParse(builder.GetSourcePosition())); } + return true; + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - Y_UNUSED(valueBuilder); - try { - const auto json = args[0].AsStringRef(); - return TryParseJsonDom(json, valueBuilder); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + const auto json = args[0].AsStringRef(); + return TryParseJsonDom(json, valueBuilder); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - TSourcePosition Pos_; - }; -} - + TSourcePosition Pos_; +}; +} // namespace NJson2Udf diff --git a/yql/essentials/udfs/common/json2/resource.h b/yql/essentials/udfs/common/json2/resource.h index aa65b14818d..5d2cc6e7b0b 100644 --- a/yql/essentials/udfs/common/json2/resource.h +++ b/yql/essentials/udfs/common/json2/resource.h @@ -4,14 +4,13 @@ #include <yql/essentials/minikql/jsonpath/jsonpath.h> namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; +using namespace NKikimr; +using namespace NUdf; +using namespace NYql; - extern const char JSONPATH_RESOURCE_NAME[] = "JsonPath"; - using TJsonPathResource = TBoxedResource<NJsonPath::TJsonPathPtr, JSONPATH_RESOURCE_NAME>; - - extern const char JSON_NODE_RESOURCE_NAME[] = "JsonNode"; - using TJsonNodeResource = TResource<JSON_NODE_RESOURCE_NAME>; -} +extern const char JSONPATH_RESOURCE_NAME[] = "JsonPath"; +using TJsonPathResource = TBoxedResource<NJsonPath::TJsonPathPtr, JSONPATH_RESOURCE_NAME>; +extern const char JSON_NODE_RESOURCE_NAME[] = "JsonNode"; +using TJsonNodeResource = TResource<JSON_NODE_RESOURCE_NAME>; +} // namespace NJson2Udf diff --git a/yql/essentials/udfs/common/json2/serialize.h b/yql/essentials/udfs/common/json2/serialize.h index cda95e77f5a..2443259fbee 100644 --- a/yql/essentials/udfs/common/json2/serialize.h +++ b/yql/essentials/udfs/common/json2/serialize.h @@ -9,81 +9,80 @@ #include <yql/essentials/types/binary_json/write.h> namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - using namespace NDom; - using namespace NBinaryJson; - - template <EDataSlot ResultType> - class TSerialize : public TBoxedValue { - public: - TSerialize(TSourcePosition pos) - : Pos_(pos) - { - } +using namespace NKikimr; +using namespace NUdf; +using namespace NYql; +using namespace NDom; +using namespace NBinaryJson; + +template <EDataSlot ResultType> +class TSerialize: public TBoxedValue { +public: + TSerialize(TSourcePosition pos) + : Pos_(pos) + { + } - static const TStringRef& Name(); + static const TStringRef& Name(); - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; - } + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; + } - TType* resultType = nullptr; - if constexpr (ResultType == EDataSlot::Json) { - resultType = builder.SimpleType<TJson>(); - } else { - resultType = builder.SimpleType<TJsonDocument>(); - } + TType* resultType = nullptr; + if constexpr (ResultType == EDataSlot::Json) { + resultType = builder.SimpleType<TJson>(); + } else { + resultType = builder.SimpleType<TJsonDocument>(); + } - builder.Args() - ->Add<TAutoMap<TJsonNodeResource>>() - .Done() - .Returns(resultType); + builder.Args() + ->Add<TAutoMap<TJsonNodeResource>>() + .Done() + .Returns(resultType); - if (!typesOnly) { - builder.Implementation(new TSerialize(builder.GetSourcePosition())); - } - return true; + if (!typesOnly) { + builder.Implementation(new TSerialize(builder.GetSourcePosition())); } + return true; + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - try { - const TUnboxedValue& jsonDom = args[0]; +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + try { + const TUnboxedValue& jsonDom = args[0]; - if constexpr (ResultType == EDataSlot::Json) { - return valueBuilder->NewString(SerializeJsonDom(jsonDom)); - } else { - const auto binaryJson = SerializeToBinaryJson(jsonDom); - return valueBuilder->NewString(TStringBuf(binaryJson.Data(), binaryJson.Size())); - } - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + if constexpr (ResultType == EDataSlot::Json) { + return valueBuilder->NewString(SerializeJsonDom(jsonDom)); + } else { + const auto binaryJson = SerializeToBinaryJson(jsonDom); + return valueBuilder->NewString(TStringBuf(binaryJson.Data(), binaryJson.Size())); } + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } - - TSourcePosition Pos_; - }; - - template <> - const TStringRef& TSerialize<EDataSlot::Json>::Name() { - static auto name = TStringRef::Of("Serialize"); - return name; } - template <> - const TStringRef& TSerialize<EDataSlot::JsonDocument>::Name() { - static auto name = TStringRef::Of("SerializeToJsonDocument"); - return name; - } + TSourcePosition Pos_; +}; + +template <> +const TStringRef& TSerialize<EDataSlot::Json>::Name() { + static auto name = TStringRef::Of("Serialize"); + return name; } +template <> +const TStringRef& TSerialize<EDataSlot::JsonDocument>::Name() { + static auto name = TStringRef::Of("SerializeToJsonDocument"); + return name; +} +} // namespace NJson2Udf diff --git a/yql/essentials/udfs/common/json2/sql_exists.h b/yql/essentials/udfs/common/json2/sql_exists.h index cb89f20ec21..955c1b1ce7f 100644 --- a/yql/essentials/udfs/common/json2/sql_exists.h +++ b/yql/essentials/udfs/common/json2/sql_exists.h @@ -10,126 +10,125 @@ #include <util/generic/yexception.h> namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - using namespace NJsonPath; - - template <EDataSlot InputType, bool ThrowException> - class TSqlExists: public TBoxedValue { - public: - explicit TSqlExists(TSourcePosition pos) - : Pos_(pos) - { +using namespace NKikimr; +using namespace NUdf; +using namespace NYql; +using namespace NJsonPath; + +template <EDataSlot InputType, bool ThrowException> +class TSqlExists: public TBoxedValue { +public: + explicit TSqlExists(TSourcePosition pos) + : Pos_(pos) + { + } + + static TStringRef Name(); + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; } - static TStringRef Name(); + auto jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); + TType* inputType = nullptr; + if constexpr (InputType == EDataSlot::JsonDocument) { + inputType = builder.SimpleType<TJsonDocument>(); + } else { + inputType = jsonType; + } + auto inputOptionalType = builder.Optional()->Item(inputType).Build(); + auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); + auto dictType = builder.Dict()->Key<TUtf8>().Value(jsonType).Build(); + auto optionalBoolType = builder.Optional()->Item<bool>().Build(); + + if constexpr (ThrowException) { + builder.Args() + ->Add(inputOptionalType) + .Add(jsonPathType) + .Add(dictType) + .Done() + .Returns(optionalBoolType); + } else { + builder.Args() + ->Add(inputOptionalType) + .Add(jsonPathType) + .Add(dictType) + .Add(optionalBoolType) + .Done() + .Returns(optionalBoolType); + } - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; + if (!typesOnly) { + builder.Implementation(new TSqlExists(builder.GetSourcePosition())); + } + if constexpr (!ThrowException) { + builder.IsStrict(); + } + return true; + } + +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + if (!args[0].HasValue()) { + return TUnboxedValuePod(); } - auto jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); - TType* inputType = nullptr; + TValue jsonDom; if constexpr (InputType == EDataSlot::JsonDocument) { - inputType = builder.SimpleType<TJsonDocument>(); - } else { - inputType = jsonType; - } - auto inputOptionalType = builder.Optional()->Item(inputType).Build(); - auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); - auto dictType = builder.Dict()->Key<TUtf8>().Value(jsonType).Build(); - auto optionalBoolType = builder.Optional()->Item<bool>().Build(); - - if constexpr (ThrowException) { - builder.Args() - ->Add(inputOptionalType) - .Add(jsonPathType) - .Add(dictType) - .Done() - .Returns(optionalBoolType); + jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); } else { - builder.Args() - ->Add(inputOptionalType) - .Add(jsonPathType) - .Add(dictType) - .Add(optionalBoolType) - .Done() - .Returns(optionalBoolType); + jsonDom = TValue(args[0]); } - if (!typesOnly) { - builder.Implementation(new TSqlExists(builder.GetSourcePosition())); - } - if constexpr (!ThrowException) { - builder.IsStrict(); - } - return true; - } + auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); + const auto& jsonPath = *jsonPathResource->Get(); + const auto variables = DictToVariables(args[2]); - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - Y_UNUSED(valueBuilder); - try { - if (!args[0].HasValue()) { - return TUnboxedValuePod(); - } - - TValue jsonDom; - if constexpr (InputType == EDataSlot::JsonDocument) { - jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); + const auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); + if (result.IsError()) { + if constexpr (ThrowException) { + ythrow yexception() << "Error executing jsonpath:" << Endl << result.GetError() << Endl; } else { - jsonDom = TValue(args[0]); - } - - auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); - const auto& jsonPath = *jsonPathResource->Get(); - const auto variables = DictToVariables(args[2]); - - const auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); - if (result.IsError()) { - if constexpr (ThrowException) { - ythrow yexception() << "Error executing jsonpath:" << Endl << result.GetError() << Endl; - } else { - return args[3]; - } + return args[3]; } - - return TUnboxedValuePod(!result.GetNodes().empty()); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } - } - - TSourcePosition Pos_; - }; - template <> - TStringRef TSqlExists<EDataSlot::Json, false>::Name() { - return "SqlExists"; + return TUnboxedValuePod(!result.GetNodes().empty()); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } } - template <> - TStringRef TSqlExists<EDataSlot::Json, true>::Name() { - return "SqlTryExists"; - } + TSourcePosition Pos_; +}; - template <> - TStringRef TSqlExists<EDataSlot::JsonDocument, false>::Name() { - return "JsonDocumentSqlExists"; - } +template <> +TStringRef TSqlExists<EDataSlot::Json, false>::Name() { + return "SqlExists"; +} - template <> - TStringRef TSqlExists<EDataSlot::JsonDocument, true>::Name() { - return "JsonDocumentSqlTryExists"; - } +template <> +TStringRef TSqlExists<EDataSlot::Json, true>::Name() { + return "SqlTryExists"; } +template <> +TStringRef TSqlExists<EDataSlot::JsonDocument, false>::Name() { + return "JsonDocumentSqlExists"; +} + +template <> +TStringRef TSqlExists<EDataSlot::JsonDocument, true>::Name() { + return "JsonDocumentSqlTryExists"; +} +} // namespace NJson2Udf diff --git a/yql/essentials/udfs/common/json2/sql_query.h b/yql/essentials/udfs/common/json2/sql_query.h index 1c2d610f923..00bf6fb2d74 100644 --- a/yql/essentials/udfs/common/json2/sql_query.h +++ b/yql/essentials/udfs/common/json2/sql_query.h @@ -12,173 +12,172 @@ #include <util/generic/yexception.h> namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - using namespace NDom; - using namespace NJsonPath; - - template <EDataSlot InputType, EJsonQueryWrap Mode> - class TSqlQuery: public TBoxedValue { - public: - explicit TSqlQuery(TSourcePosition pos) - : Pos_(pos) - { +using namespace NKikimr; +using namespace NUdf; +using namespace NYql; +using namespace NDom; +using namespace NJsonPath; + +template <EDataSlot InputType, EJsonQueryWrap Mode> +class TSqlQuery: public TBoxedValue { +public: + explicit TSqlQuery(TSourcePosition pos) + : Pos_(pos) + { + } + + static TStringRef Name(); + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; } - static TStringRef Name(); + auto jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); + auto optionalJsonType = builder.Optional()->Item(jsonType).Build(); + TType* inputType = nullptr; + if constexpr (InputType == EDataSlot::JsonDocument) { + inputType = builder.SimpleType<TJsonDocument>(); + } else { + inputType = jsonType; + } + auto inputOptionalType = builder.Optional()->Item(inputType).Build(); + auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); + auto dictType = builder.Dict()->Key<TUtf8>().Value(jsonType).Build(); + + /* + Arguments: + 0. Resource<JsonNode>? or JsonDocument?. Input json + 1. Resource<JsonPath>. Jsonpath to execute on json + 2. Dict<TUtf8, Resource<JsonNode>>. Variables to pass into jsonpath + 3. Bool. True - throw on empty result, false otherwise + 4. Resource<JsonNode>?. Default value to return on empty result. Ignored if 2d argument is true + 5. Bool. True - throw on error, false - otherwise + 6. Resource<JsonNode>?. Default value to return on error. Ignored if 4th argument is true + */ + // we can't mark TSqlQuery as strict due to runtime throw policy setting + // TODO: optimizer can mark SqlQuery as strict if 3th/5th arguments are literal booleans + builder.Args() + ->Add(inputOptionalType) + .Add(jsonPathType) + .Add(dictType) + .Add<bool>() + .Add(optionalJsonType) + .Add<bool>() + .Add(optionalJsonType) + .Done() + .Returns(optionalJsonType); + + if (!typesOnly) { + builder.Implementation(new TSqlQuery(builder.GetSourcePosition())); + } + return true; + } - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + Y_UNUSED(valueBuilder); + try { + if (!args[0].HasValue()) { + return TUnboxedValuePod(); } - auto jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); - auto optionalJsonType = builder.Optional()->Item(jsonType).Build(); - TType* inputType = nullptr; + TValue jsonDom; if constexpr (InputType == EDataSlot::JsonDocument) { - inputType = builder.SimpleType<TJsonDocument>(); + jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); } else { - inputType = jsonType; - } - auto inputOptionalType = builder.Optional()->Item(inputType).Build(); - auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); - auto dictType = builder.Dict()->Key<TUtf8>().Value(jsonType).Build(); - - /* - Arguments: - 0. Resource<JsonNode>? or JsonDocument?. Input json - 1. Resource<JsonPath>. Jsonpath to execute on json - 2. Dict<TUtf8, Resource<JsonNode>>. Variables to pass into jsonpath - 3. Bool. True - throw on empty result, false otherwise - 4. Resource<JsonNode>?. Default value to return on empty result. Ignored if 2d argument is true - 5. Bool. True - throw on error, false - otherwise - 6. Resource<JsonNode>?. Default value to return on error. Ignored if 4th argument is true - */ - // we can't mark TSqlQuery as strict due to runtime throw policy setting - // TODO: optimizer can mark SqlQuery as strict if 3th/5th arguments are literal booleans - builder.Args() - ->Add(inputOptionalType) - .Add(jsonPathType) - .Add(dictType) - .Add<bool>() - .Add(optionalJsonType) - .Add<bool>() - .Add(optionalJsonType) - .Done() - .Returns(optionalJsonType); - - if (!typesOnly) { - builder.Implementation(new TSqlQuery(builder.GetSourcePosition())); + jsonDom = TValue(args[0]); } - return true; - } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - Y_UNUSED(valueBuilder); - try { - if (!args[0].HasValue()) { - return TUnboxedValuePod(); - } + auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); + const auto& jsonPath = *jsonPathResource->Get(); - TValue jsonDom; - if constexpr (InputType == EDataSlot::JsonDocument) { - jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); - } else { - jsonDom = TValue(args[0]); - } - - auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); - const auto& jsonPath = *jsonPathResource->Get(); - - const bool throwOnEmpty = args[3].Get<bool>(); - const auto emptyDefault = args[4]; - const bool throwOnError = args[5].Get<bool>(); - const auto errorDefault = args[6]; - const auto variables = DictToVariables(args[2]); - - auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); + const bool throwOnEmpty = args[3].Get<bool>(); + const auto emptyDefault = args[4]; + const bool throwOnError = args[5].Get<bool>(); + const auto errorDefault = args[6]; + const auto variables = DictToVariables(args[2]); - const auto handleCase = [](TStringBuf message, bool throws, const TUnboxedValuePod& caseDefault) { - if (throws) { - ythrow yexception() << message; - } - return caseDefault; - }; + auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); - if (result.IsError()) { - return handleCase(TStringBuilder() << "Error executing jsonpath:" << Endl << result.GetError() << Endl, throwOnError, errorDefault); + const auto handleCase = [](TStringBuf message, bool throws, const TUnboxedValuePod& caseDefault) { + if (throws) { + ythrow yexception() << message; } + return caseDefault; + }; - auto& nodes = result.GetNodes(); - const bool isSingleStruct = nodes.size() == 1 && (nodes[0].Is(EValueType::Array) || nodes[0].Is(EValueType::Object)); - if (Mode == EJsonQueryWrap::Wrap || (Mode == EJsonQueryWrap::ConditionalWrap && !isSingleStruct)) { - TVector<TUnboxedValue> converted; - converted.reserve(nodes.size()); - for (auto& node : nodes) { - converted.push_back(node.ConvertToUnboxedValue(valueBuilder)); - } - return MakeList(converted.data(), converted.size(), valueBuilder); - } - - if (nodes.empty()) { - return handleCase("Empty result", throwOnEmpty, emptyDefault); - } + if (result.IsError()) { + return handleCase(TStringBuilder() << "Error executing jsonpath:" << Endl << result.GetError() << Endl, throwOnError, errorDefault); + } - // No wrapping is applicable and result is not empty. Result must be a single object or array - if (nodes.size() > 1) { - return handleCase("Result consists of multiple items", throwOnError, errorDefault); + auto& nodes = result.GetNodes(); + const bool isSingleStruct = nodes.size() == 1 && (nodes[0].Is(EValueType::Array) || nodes[0].Is(EValueType::Object)); + if (Mode == EJsonQueryWrap::Wrap || (Mode == EJsonQueryWrap::ConditionalWrap && !isSingleStruct)) { + TVector<TUnboxedValue> converted; + converted.reserve(nodes.size()); + for (auto& node : nodes) { + converted.push_back(node.ConvertToUnboxedValue(valueBuilder)); } + return MakeList(converted.data(), converted.size(), valueBuilder); + } - if (!nodes[0].Is(EValueType::Array) && !nodes[0].Is(EValueType::Object)) { - return handleCase("Result is neither object nor array", throwOnError, errorDefault); - } + if (nodes.empty()) { + return handleCase("Empty result", throwOnEmpty, emptyDefault); + } - return nodes[0].ConvertToUnboxedValue(valueBuilder); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + // No wrapping is applicable and result is not empty. Result must be a single object or array + if (nodes.size() > 1) { + return handleCase("Result consists of multiple items", throwOnError, errorDefault); } - } - TSourcePosition Pos_; - }; + if (!nodes[0].Is(EValueType::Array) && !nodes[0].Is(EValueType::Object)) { + return handleCase("Result is neither object nor array", throwOnError, errorDefault); + } - template <> - TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::NoWrap>::Name() { - return "SqlQuery"; + return nodes[0].ConvertToUnboxedValue(valueBuilder); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } } - template <> - TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::Wrap>::Name() { - return "SqlQueryWrap"; - } + TSourcePosition Pos_; +}; - template <> - TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::ConditionalWrap>::Name() { - return "SqlQueryConditionalWrap"; - } +template <> +TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::NoWrap>::Name() { + return "SqlQuery"; +} - template <> - TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::NoWrap>::Name() { - return "JsonDocumentSqlQuery"; - } +template <> +TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::Wrap>::Name() { + return "SqlQueryWrap"; +} - template <> - TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::Wrap>::Name() { - return "JsonDocumentSqlQueryWrap"; - } +template <> +TStringRef TSqlQuery<EDataSlot::Json, EJsonQueryWrap::ConditionalWrap>::Name() { + return "SqlQueryConditionalWrap"; +} - template <> - TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::ConditionalWrap>::Name() { - return "JsonDocumentSqlQueryConditionalWrap"; - } +template <> +TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::NoWrap>::Name() { + return "JsonDocumentSqlQuery"; +} + +template <> +TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::Wrap>::Name() { + return "JsonDocumentSqlQueryWrap"; } +template <> +TStringRef TSqlQuery<EDataSlot::JsonDocument, EJsonQueryWrap::ConditionalWrap>::Name() { + return "JsonDocumentSqlQueryConditionalWrap"; +} +} // namespace NJson2Udf diff --git a/yql/essentials/udfs/common/json2/sql_value.h b/yql/essentials/udfs/common/json2/sql_value.h index 53b451c6275..525d1296a6b 100644 --- a/yql/essentials/udfs/common/json2/sql_value.h +++ b/yql/essentials/udfs/common/json2/sql_value.h @@ -15,282 +15,282 @@ #include <util/string/cast.h> namespace NJson2Udf { - using namespace NKikimr; - using namespace NUdf; - using namespace NYql; - using namespace NDom; - using namespace NJsonPath; - - namespace { - template <class TValueType, bool ForceConvert = false> - TUnboxedValue TryConvertJson(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { - Y_UNUSED(valueBuilder); - Y_UNUSED(source); - Y_ABORT("Unsupported type"); - } +using namespace NKikimr; +using namespace NUdf; +using namespace NYql; +using namespace NDom; +using namespace NJsonPath; + +namespace { +template <class TValueType, bool ForceConvert = false> +TUnboxedValue TryConvertJson(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + Y_UNUSED(source); + Y_ABORT("Unsupported type"); +} - template <> - TUnboxedValue TryConvertJson<TUtf8>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { - Y_UNUSED(valueBuilder); - if (IsNodeType(source, ENodeType::String)) { - return source; - } +template <> +TUnboxedValue TryConvertJson<TUtf8>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + if (IsNodeType(source, ENodeType::String)) { + return source; + } + return {}; +} + +template <> +TUnboxedValue TryConvertJson<TUtf8, true>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + switch (GetNodeType(source)) { + case ENodeType::String: + return source; + case ENodeType::Uint64: + return valueBuilder->NewString(ToString(source.Get<ui64>())).Release(); + case ENodeType::Int64: + return valueBuilder->NewString(ToString(source.Get<i64>())).Release(); + case ENodeType::Bool: + return source.Get<bool>() ? TUnboxedValuePod::Embedded("true") : TUnboxedValuePod::Embedded("false"); + case ENodeType::Double: + return valueBuilder->NewString(ToString(source.Get<double>())).Release(); + case ENodeType::Entity: + return TUnboxedValuePod::Embedded("null"); + case ENodeType::List: + case ENodeType::Dict: + case ENodeType::Attr: return {}; - } + } +} - template <> - TUnboxedValue TryConvertJson<TUtf8, true>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { - switch (GetNodeType(source)) { - case ENodeType::String: - return source; - case ENodeType::Uint64: - return valueBuilder->NewString(ToString(source.Get<ui64>())).Release(); - case ENodeType::Int64: - return valueBuilder->NewString(ToString(source.Get<i64>())).Release(); - case ENodeType::Bool: - return source.Get<bool>() ? TUnboxedValuePod::Embedded("true") : TUnboxedValuePod::Embedded("false"); - case ENodeType::Double: - return valueBuilder->NewString(ToString(source.Get<double>())).Release(); - case ENodeType::Entity: - return TUnboxedValuePod::Embedded("null"); - case ENodeType::List: - case ENodeType::Dict: - case ENodeType::Attr: - return {}; - } - } +template <> +TUnboxedValue TryConvertJson<i64>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + if (!source.IsEmbedded()) { + return {}; + } - template <> - TUnboxedValue TryConvertJson<i64>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { - Y_UNUSED(valueBuilder); - if (!source.IsEmbedded()) { - return {}; - } + if (IsNodeType(source, ENodeType::Int64)) { + return TUnboxedValuePod(source.Get<i64>()); + } else if (IsNodeType(source, ENodeType::Uint64) && source.Get<ui64>() < Max<i64>()) { + return TUnboxedValuePod(static_cast<i64>(source.Get<ui64>())); + } else if (IsNodeType(source, ENodeType::Double) && static_cast<i64>(source.Get<double>()) == source.Get<double>()) { + return TUnboxedValuePod(static_cast<i64>(source.Get<double>())); + } - if (IsNodeType(source, ENodeType::Int64)) { - return TUnboxedValuePod(source.Get<i64>()); - } else if (IsNodeType(source, ENodeType::Uint64) && source.Get<ui64>() < Max<i64>()) { - return TUnboxedValuePod(static_cast<i64>(source.Get<ui64>())); - } else if (IsNodeType(source, ENodeType::Double) && static_cast<i64>(source.Get<double>()) == source.Get<double>()) { - return TUnboxedValuePod(static_cast<i64>(source.Get<double>())); - } + return {}; +} - return {}; - } +template <> +TUnboxedValue TryConvertJson<double>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + if (!source.IsEmbedded()) { + return {}; + } - template <> - TUnboxedValue TryConvertJson<double>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { - Y_UNUSED(valueBuilder); - if (!source.IsEmbedded()) { - return {}; - } + if (IsNodeType(source, ENodeType::Double)) { + return TUnboxedValuePod(source.Get<double>()); + } else if (IsNodeType(source, ENodeType::Int64)) { + return TUnboxedValuePod(static_cast<double>(source.Get<i64>())); + } else if (IsNodeType(source, ENodeType::Uint64)) { + return TUnboxedValuePod(static_cast<double>(source.Get<ui64>())); + } - if (IsNodeType(source, ENodeType::Double)) { - return TUnboxedValuePod(source.Get<double>()); - } else if (IsNodeType(source, ENodeType::Int64)) { - return TUnboxedValuePod(static_cast<double>(source.Get<i64>())); - } else if (IsNodeType(source, ENodeType::Uint64)) { - return TUnboxedValuePod(static_cast<double>(source.Get<ui64>())); - } + return {}; +} - return {}; +template <> +TUnboxedValue TryConvertJson<bool>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { + Y_UNUSED(valueBuilder); + if (!source.IsEmbedded() || !IsNodeType(source, ENodeType::Bool)) { + return {}; + } + return {TUnboxedValuePod(source.Get<bool>())}; +} +} // namespace + +template <EDataSlot InputType, class TValueType, bool ForceConvert = false> +class TSqlValue: public TBoxedValue { +public: + enum class TErrorCode: ui8 { + Empty = 0, + Error = 1 + }; + + TSqlValue(TSourcePosition pos) + : Pos_(pos) + { + } + + static TStringRef Name(); + + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (name != Name()) { + return false; } - template <> - TUnboxedValue TryConvertJson<bool>(const IValueBuilder* valueBuilder, const TUnboxedValue& source) { - Y_UNUSED(valueBuilder); - if (!source.IsEmbedded() || !IsNodeType(source, ENodeType::Bool)) { - return {}; - } - return {TUnboxedValuePod(source.Get<bool>())}; + auto optionalValueType = builder.Optional()->Item<TValueType>().Build(); + auto errorTupleType = builder.Tuple(2)->Add<ui8>().Add<char*>().Build(); + auto returnTypeTuple = builder.Tuple(2) + ->Add(errorTupleType) + .Add(optionalValueType) + .Build(); + auto returnType = builder.Variant()->Over(returnTypeTuple).Build(); + + TType* jsonType = nullptr; + if constexpr (InputType == EDataSlot::Json) { + jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); + } else { + jsonType = builder.SimpleType<TJsonDocument>(); } - } + auto optionalJsonType = builder.Optional()->Item(jsonType).Build(); + auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); + auto dictType = builder.Dict()->Key<TUtf8>().Value(builder.Resource(JSON_NODE_RESOURCE_NAME)).Build(); + + builder.Args() + ->Add(optionalJsonType) + .Add(jsonPathType) + .Add(dictType) + .Done() + .Returns(returnType); - template <EDataSlot InputType, class TValueType, bool ForceConvert = false> - class TSqlValue: public TBoxedValue { - public: - enum class TErrorCode : ui8 { - Empty = 0, - Error = 1 - }; - - TSqlValue(TSourcePosition pos) - : Pos_(pos) - { + builder.IsStrict(); + + if (!typesOnly) { + builder.Implementation(new TSqlValue(builder.GetSourcePosition())); } + return true; + } - static TStringRef Name(); +private: + TUnboxedValue BuildErrorResult(const IValueBuilder* valueBuilder, TErrorCode code, const TStringBuf message) const { + TUnboxedValue* items = nullptr; + auto errorTuple = valueBuilder->NewArray(2, items); + items[0] = TUnboxedValuePod(static_cast<ui8>(code)); + items[1] = valueBuilder->NewString(message); + return valueBuilder->NewVariant(0, std::move(errorTuple)); + } + + TUnboxedValue BuildSuccessfulResult(const IValueBuilder* valueBuilder, TUnboxedValue&& value) const { + return valueBuilder->NewVariant(1, std::move(value)); + } - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (name != Name()) { - return false; + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + try { + if (!args[0].HasValue()) { + return BuildSuccessfulResult(valueBuilder, TUnboxedValuePod()); } - auto optionalValueType = builder.Optional()->Item<TValueType>().Build(); - auto errorTupleType = builder.Tuple(2)->Add<ui8>().Add<char*>().Build(); - auto returnTypeTuple = builder.Tuple(2) - ->Add(errorTupleType) - .Add(optionalValueType) - .Build(); - auto returnType = builder.Variant()->Over(returnTypeTuple).Build(); - - TType* jsonType = nullptr; - if constexpr (InputType == EDataSlot::Json) { - jsonType = builder.Resource(JSON_NODE_RESOURCE_NAME); + TValue jsonDom; + if constexpr (InputType == EDataSlot::JsonDocument) { + jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); } else { - jsonType = builder.SimpleType<TJsonDocument>(); + jsonDom = TValue(args[0]); } - auto optionalJsonType = builder.Optional()->Item(jsonType).Build(); - auto jsonPathType = builder.Resource(JSONPATH_RESOURCE_NAME); - auto dictType = builder.Dict()->Key<TUtf8>().Value(builder.Resource(JSON_NODE_RESOURCE_NAME)).Build(); - builder.Args() - ->Add(optionalJsonType) - .Add(jsonPathType) - .Add(dictType) - .Done() - .Returns(returnType); + auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); + const auto& jsonPath = *jsonPathResource->Get(); + const auto variables = DictToVariables(args[2]); - builder.IsStrict(); + const auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); - if (!typesOnly) { - builder.Implementation(new TSqlValue(builder.GetSourcePosition())); + if (result.IsError()) { + return BuildErrorResult(valueBuilder, TErrorCode::Error, TStringBuilder() << "Error executing jsonpath:" << Endl << result.GetError() << Endl); } - return true; - } - private: - TUnboxedValue BuildErrorResult(const IValueBuilder* valueBuilder, TErrorCode code, const TStringBuf message) const { - TUnboxedValue* items = nullptr; - auto errorTuple = valueBuilder->NewArray(2, items); - items[0] = TUnboxedValuePod(static_cast<ui8>(code)); - items[1] = valueBuilder->NewString(message); - return valueBuilder->NewVariant(0, std::move(errorTuple)); - } + const auto& nodes = result.GetNodes(); + if (nodes.empty()) { + return BuildErrorResult(valueBuilder, TErrorCode::Empty, "Result is empty"); + } - TUnboxedValue BuildSuccessfulResult(const IValueBuilder* valueBuilder, TUnboxedValue&& value) const { - return valueBuilder->NewVariant(1, std::move(value)); - } + if (nodes.size() > 1) { + return BuildErrorResult(valueBuilder, TErrorCode::Error, "Result consists of multiple items"); + } - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - try { - if (!args[0].HasValue()) { - return BuildSuccessfulResult(valueBuilder, TUnboxedValuePod()); - } - - TValue jsonDom; - if constexpr (InputType == EDataSlot::JsonDocument) { - jsonDom = TValue(NBinaryJson::TBinaryJsonReader::Make(args[0].AsStringRef())->GetRootCursor()); - } else { - jsonDom = TValue(args[0]); - } - - auto* jsonPathResource = static_cast<TJsonPathResource*>(args[1].AsBoxed().Get()); - const auto& jsonPath = *jsonPathResource->Get(); - const auto variables = DictToVariables(args[2]); - - const auto result = ExecuteJsonPath(jsonPath, jsonDom, variables, valueBuilder); - - if (result.IsError()) { - return BuildErrorResult(valueBuilder, TErrorCode::Error, TStringBuilder() << "Error executing jsonpath:" << Endl << result.GetError() << Endl); - } - - const auto& nodes = result.GetNodes(); - if (nodes.empty()) { - return BuildErrorResult(valueBuilder, TErrorCode::Empty, "Result is empty"); - } - - if (nodes.size() > 1) { - return BuildErrorResult(valueBuilder, TErrorCode::Error, "Result consists of multiple items"); - } - - const auto& value = nodes[0]; - if (value.Is(EValueType::Array) || value.Is(EValueType::Object)) { - // SqlValue can return only scalar values - return BuildErrorResult(valueBuilder, TErrorCode::Error, "Extracted JSON value is either object or array"); - } - - if (value.Is(EValueType::Null)) { - // JSON nulls must be converted to SQL nulls - return BuildSuccessfulResult(valueBuilder, TUnboxedValuePod()); - } - - const auto source = value.ConvertToUnboxedValue(valueBuilder); - TUnboxedValue convertedValue = TryConvertJson<TValueType, ForceConvert>(valueBuilder, source); - if (!convertedValue) { - // error while converting JSON value type to TValueType - return BuildErrorResult(valueBuilder, TErrorCode::Error, "Cannot convert extracted JSON value to target type"); - } - - return BuildSuccessfulResult(valueBuilder, std::move(convertedValue)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + const auto& value = nodes[0]; + if (value.Is(EValueType::Array) || value.Is(EValueType::Object)) { + // SqlValue can return only scalar values + return BuildErrorResult(valueBuilder, TErrorCode::Error, "Extracted JSON value is either object or array"); } - } - TSourcePosition Pos_; - }; + if (value.Is(EValueType::Null)) { + // JSON nulls must be converted to SQL nulls + return BuildSuccessfulResult(valueBuilder, TUnboxedValuePod()); + } - template <EDataSlot InputType, class TValueType, bool ForceConvert> - TStringRef TSqlValue<InputType, TValueType, ForceConvert>::Name() { - Y_ABORT("Unknown name"); - } + const auto source = value.ConvertToUnboxedValue(valueBuilder); + TUnboxedValue convertedValue = TryConvertJson<TValueType, ForceConvert>(valueBuilder, source); + if (!convertedValue) { + // error while converting JSON value type to TValueType + return BuildErrorResult(valueBuilder, TErrorCode::Error, "Cannot convert extracted JSON value to target type"); + } - template<> - TStringRef TSqlValue<EDataSlot::Json, TUtf8, true>::Name() { - return TStringRef::Of("SqlValueConvertToUtf8"); + return BuildSuccessfulResult(valueBuilder, std::move(convertedValue)); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } } - template <> - TStringRef TSqlValue<EDataSlot::Json, TUtf8>::Name() { - return TStringRef::Of("SqlValueUtf8"); - } + TSourcePosition Pos_; +}; - template <> - TStringRef TSqlValue<EDataSlot::Json, i64>::Name() { - return TStringRef::Of("SqlValueInt64"); - } +template <EDataSlot InputType, class TValueType, bool ForceConvert> +TStringRef TSqlValue<InputType, TValueType, ForceConvert>::Name() { + Y_ABORT("Unknown name"); +} - template <> - TStringRef TSqlValue<EDataSlot::Json, double>::Name() { - return TStringRef::Of("SqlValueNumber"); - } +template <> +TStringRef TSqlValue<EDataSlot::Json, TUtf8, true>::Name() { + return TStringRef::Of("SqlValueConvertToUtf8"); +} - template <> - TStringRef TSqlValue<EDataSlot::Json, bool>::Name() { - return TStringRef::Of("SqlValueBool"); - } +template <> +TStringRef TSqlValue<EDataSlot::Json, TUtf8>::Name() { + return TStringRef::Of("SqlValueUtf8"); +} - template<> - TStringRef TSqlValue<EDataSlot::JsonDocument, TUtf8, true>::Name() { - return TStringRef::Of("JsonDocumentSqlValueConvertToUtf8"); - } +template <> +TStringRef TSqlValue<EDataSlot::Json, i64>::Name() { + return TStringRef::Of("SqlValueInt64"); +} - template <> - TStringRef TSqlValue<EDataSlot::JsonDocument, TUtf8>::Name() { - return TStringRef::Of("JsonDocumentSqlValueUtf8"); - } +template <> +TStringRef TSqlValue<EDataSlot::Json, double>::Name() { + return TStringRef::Of("SqlValueNumber"); +} - template <> - TStringRef TSqlValue<EDataSlot::JsonDocument, i64>::Name() { - return TStringRef::Of("JsonDocumentSqlValueInt64"); - } +template <> +TStringRef TSqlValue<EDataSlot::Json, bool>::Name() { + return TStringRef::Of("SqlValueBool"); +} - template <> - TStringRef TSqlValue<EDataSlot::JsonDocument, double>::Name() { - return TStringRef::Of("JsonDocumentSqlValueNumber"); - } +template <> +TStringRef TSqlValue<EDataSlot::JsonDocument, TUtf8, true>::Name() { + return TStringRef::Of("JsonDocumentSqlValueConvertToUtf8"); +} - template <> - TStringRef TSqlValue<EDataSlot::JsonDocument, bool>::Name() { - return TStringRef::Of("JsonDocumentSqlValueBool"); - } +template <> +TStringRef TSqlValue<EDataSlot::JsonDocument, TUtf8>::Name() { + return TStringRef::Of("JsonDocumentSqlValueUtf8"); +} +template <> +TStringRef TSqlValue<EDataSlot::JsonDocument, i64>::Name() { + return TStringRef::Of("JsonDocumentSqlValueInt64"); } + +template <> +TStringRef TSqlValue<EDataSlot::JsonDocument, double>::Name() { + return TStringRef::Of("JsonDocumentSqlValueNumber"); +} + +template <> +TStringRef TSqlValue<EDataSlot::JsonDocument, bool>::Name() { + return TStringRef::Of("JsonDocumentSqlValueBool"); +} + +} // namespace NJson2Udf diff --git a/yql/essentials/udfs/common/json2/ya.make b/yql/essentials/udfs/common/json2/ya.make index 52289125941..fa5e47018ce 100644 --- a/yql/essentials/udfs/common/json2/ya.make +++ b/yql/essentials/udfs/common/json2/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(json2_udf) 28 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( json2_udf.cpp diff --git a/yql/essentials/udfs/common/math/lib/erfinv.cpp b/yql/essentials/udfs/common/math/lib/erfinv.cpp index def902860c3..c7626b8faf0 100644 --- a/yql/essentials/udfs/common/math/lib/erfinv.cpp +++ b/yql/essentials/udfs/common/math/lib/erfinv.cpp @@ -8,8 +8,8 @@ template <size_t N> static double PolEval(double x, const std::array<double, N>& coef) { static_assert(N > 0, "Array coef[] should not be empty."); return std::accumulate(coef.crbegin() + 1, coef.crend(), coef[N - 1], - [x] (auto init, auto cur) { - return std::move(init) * x + cur; + [x](auto init, auto cur) { + return std::move(init) * x + cur; }); } @@ -111,4 +111,4 @@ double ErfInv(double x) { return ans * sign; } -} +} // namespace NMathUdf diff --git a/yql/essentials/udfs/common/math/lib/erfinv.h b/yql/essentials/udfs/common/math/lib/erfinv.h index 1ced5a07e65..23c129e6572 100644 --- a/yql/essentials/udfs/common/math/lib/erfinv.h +++ b/yql/essentials/udfs/common/math/lib/erfinv.h @@ -4,4 +4,4 @@ namespace NMathUdf { double ErfInv(double x); -} +} // namespace NMathUdf diff --git a/yql/essentials/udfs/common/math/lib/round.h b/yql/essentials/udfs/common/math/lib/round.h index f59700da88f..815c3f6173a 100644 --- a/yql/essentials/udfs/common/math/lib/round.h +++ b/yql/essentials/udfs/common/math/lib/round.h @@ -55,23 +55,23 @@ inline std::optional<i64> NearbyIntImpl(double value, decltype(FE_DOWNWARD) mode if (res < double(std::numeric_limits<i64>::min() + 513) || res > double(std::numeric_limits<i64>::max() - 512)) { return {}; } - + return static_cast<i64>(res); } inline std::optional<i64> NearbyInt(double value, ui32 mode) { switch (mode) { - case 0: - return NearbyIntImpl(value, FE_DOWNWARD); - case 1: - return NearbyIntImpl(value, FE_TONEAREST); - case 2: - return NearbyIntImpl(value, FE_TOWARDZERO); - case 3: - return NearbyIntImpl(value, FE_UPWARD); - default: - return {}; + case 0: + return NearbyIntImpl(value, FE_DOWNWARD); + case 1: + return NearbyIntImpl(value, FE_TONEAREST); + case 2: + return NearbyIntImpl(value, FE_TOWARDZERO); + case 3: + return NearbyIntImpl(value, FE_UPWARD); + default: + return {}; } } -} +} // namespace NMathUdf diff --git a/yql/essentials/udfs/common/math/lib/round_ut.cpp b/yql/essentials/udfs/common/math/lib/round_ut.cpp index 4d0e96e4dc3..d791086fa8c 100644 --- a/yql/essentials/udfs/common/math/lib/round_ut.cpp +++ b/yql/essentials/udfs/common/math/lib/round_ut.cpp @@ -7,64 +7,64 @@ using namespace NMathUdf; Y_UNIT_TEST_SUITE(TRound) { - Y_UNIT_TEST(Basic) { - double value = 1930.0 / 3361.0; - double result = RoundToDecimal<long double>(value, -3); - double answer = 0.574; - UNIT_ASSERT_VALUES_EQUAL( - HexEncode(&result, sizeof(double)), - HexEncode(&answer, sizeof(double))); - } +Y_UNIT_TEST(Basic) { + double value = 1930.0 / 3361.0; + double result = RoundToDecimal<long double>(value, -3); + double answer = 0.574; + UNIT_ASSERT_VALUES_EQUAL( + HexEncode(&result, sizeof(double)), + HexEncode(&answer, sizeof(double))); +} - Y_UNIT_TEST(Mod) { - UNIT_ASSERT_VALUES_EQUAL(*Mod(-1, 7), 6); - UNIT_ASSERT_VALUES_EQUAL(*Mod(1, 7), 1); - UNIT_ASSERT_VALUES_EQUAL(*Mod(0, 7), 0); +Y_UNIT_TEST(Mod) { + UNIT_ASSERT_VALUES_EQUAL(*Mod(-1, 7), 6); + UNIT_ASSERT_VALUES_EQUAL(*Mod(1, 7), 1); + UNIT_ASSERT_VALUES_EQUAL(*Mod(0, 7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Mod(-1, -7), -1); - UNIT_ASSERT_VALUES_EQUAL(*Mod(1, -7), -6); - UNIT_ASSERT_VALUES_EQUAL(*Mod(0, -7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Mod(-1, -7), -1); + UNIT_ASSERT_VALUES_EQUAL(*Mod(1, -7), -6); + UNIT_ASSERT_VALUES_EQUAL(*Mod(0, -7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Mod(-15, 7), 6); - UNIT_ASSERT_VALUES_EQUAL(*Mod(15, 7), 1); - UNIT_ASSERT_VALUES_EQUAL(*Mod(14, 7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Mod(-14, 7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Mod(-15, 7), 6); + UNIT_ASSERT_VALUES_EQUAL(*Mod(15, 7), 1); + UNIT_ASSERT_VALUES_EQUAL(*Mod(14, 7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Mod(-14, 7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Mod(-15, -7), -1); - UNIT_ASSERT_VALUES_EQUAL(*Mod(15, -7), -6); - UNIT_ASSERT_VALUES_EQUAL(*Mod(14, -7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Mod(-14, -7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Mod(-15, -7), -1); + UNIT_ASSERT_VALUES_EQUAL(*Mod(15, -7), -6); + UNIT_ASSERT_VALUES_EQUAL(*Mod(14, -7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Mod(-14, -7), 0); - UNIT_ASSERT(!Mod(-14, 0)); - } + UNIT_ASSERT(!Mod(-14, 0)); +} - Y_UNIT_TEST(Rem) { - UNIT_ASSERT_VALUES_EQUAL(*Rem(-1, 7), -1); - UNIT_ASSERT_VALUES_EQUAL(*Rem(1, 7), 1); - UNIT_ASSERT_VALUES_EQUAL(*Rem(0, 7), 0); +Y_UNIT_TEST(Rem) { + UNIT_ASSERT_VALUES_EQUAL(*Rem(-1, 7), -1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(1, 7), 1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(0, 7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Rem(-1, -7), -1); - UNIT_ASSERT_VALUES_EQUAL(*Rem(1, -7), 1); - UNIT_ASSERT_VALUES_EQUAL(*Rem(0, -7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Rem(-1, -7), -1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(1, -7), 1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(0, -7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Rem(-15, 7), -1); - UNIT_ASSERT_VALUES_EQUAL(*Rem(15, 7), 1); - UNIT_ASSERT_VALUES_EQUAL(*Rem(14, 7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Rem(-14, 7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Rem(-15, 7), -1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(15, 7), 1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(14, 7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Rem(-14, 7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Rem(-15, -7), -1); - UNIT_ASSERT_VALUES_EQUAL(*Rem(15, -7), 1); - UNIT_ASSERT_VALUES_EQUAL(*Rem(14, -7), 0); - UNIT_ASSERT_VALUES_EQUAL(*Rem(-14, -7), 0); - UNIT_ASSERT(!Rem(-14, 0)); - } + UNIT_ASSERT_VALUES_EQUAL(*Rem(-15, -7), -1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(15, -7), 1); + UNIT_ASSERT_VALUES_EQUAL(*Rem(14, -7), 0); + UNIT_ASSERT_VALUES_EQUAL(*Rem(-14, -7), 0); + UNIT_ASSERT(!Rem(-14, 0)); +} - Y_UNIT_TEST(NearbyInt) { - const i64 maxV = 9223372036854774784ll; - const i64 minV = -9223372036854774784ll; - UNIT_ASSERT_VALUES_EQUAL((i64)(double)(maxV), maxV); - UNIT_ASSERT_VALUES_EQUAL((i64)(double)(minV), minV); +Y_UNIT_TEST(NearbyInt) { + const i64 maxV = 9223372036854774784ll; + const i64 minV = -9223372036854774784ll; + UNIT_ASSERT_VALUES_EQUAL((i64)(double)(maxV), maxV); + UNIT_ASSERT_VALUES_EQUAL((i64)(double)(minV), minV); - UNIT_ASSERT_VALUES_UNEQUAL((i64)(double)(maxV + 1), maxV + 1); - } + UNIT_ASSERT_VALUES_UNEQUAL((i64)(double)(maxV + 1), maxV + 1); } +} // Y_UNIT_TEST_SUITE(TRound) diff --git a/yql/essentials/udfs/common/math/lib/ut/ya.make b/yql/essentials/udfs/common/math/lib/ut/ya.make index c1efcde3b47..2461b36ab3d 100644 --- a/yql/essentials/udfs/common/math/lib/ut/ya.make +++ b/yql/essentials/udfs/common/math/lib/ut/ya.make @@ -2,6 +2,8 @@ IF (OS_LINUX) IF (NOT WITH_VALGRIND) UNITTEST_FOR(yql/essentials/udfs/common/math/lib) + ENABLE(YQL_STYLE_CPP) + SRCS( round_ut.cpp ) diff --git a/yql/essentials/udfs/common/math/lib/ya.make b/yql/essentials/udfs/common/math/lib/ya.make index 54b882a8438..3c0faeb996a 100644 --- a/yql/essentials/udfs/common/math/lib/ya.make +++ b/yql/essentials/udfs/common/math/lib/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + SRCS( erfinv.cpp ) diff --git a/yql/essentials/udfs/common/math/math_ir.h b/yql/essentials/udfs/common/math/math_ir.h index ee788ee90ba..965d83424a5 100644 --- a/yql/essentials/udfs/common/math/math_ir.h +++ b/yql/essentials/udfs/common/math/math_ir.h @@ -16,80 +16,75 @@ namespace NUdf { XX(Pi, M_PI) \ XX(E, M_E) \ XX(Eps, std::numeric_limits<double>::epsilon()) \ - XX(RoundDownward, 0) \ - XX(RoundToNearest, 1) \ - XX(RoundTowardZero, 2) \ + XX(RoundDownward, 0) \ + XX(RoundToNearest, 1) \ + XX(RoundTowardZero, 2) \ XX(RoundUpward, 3) -#define SINGLE_ARG_FUNCS(XX) \ - XX(Abs, Abs) \ - XX(Acos, acos) \ - XX(Asin, asin) \ - XX(Asinh, asin) \ - XX(Atan, atan) \ - XX(Cbrt, cbrt) \ - XX(Ceil, ceil) \ - XX(Cos, cos) \ - XX(Cosh, cosh) \ - XX(Erf, Erf) \ - XX(Exp, exp) \ - XX(Exp2, Exp2) \ - XX(Fabs, fabs) \ - XX(Floor, std::floor) \ - XX(Lgamma, LogGamma) \ - XX(Rint, rint) \ - XX(Sin, sin) \ - XX(Sinh, sinh) \ - XX(Sqrt, sqrt) \ - XX(Tan, tan) \ - XX(Tanh, tanh) \ - XX(Tgamma, tgamma) \ - XX(Trunc, trunc) \ - XX(IsFinite, std::isfinite) \ - XX(IsInf, std::isinf) \ +#define SINGLE_ARG_FUNCS(XX) \ + XX(Abs, Abs) \ + XX(Acos, acos) \ + XX(Asin, asin) \ + XX(Asinh, asin) \ + XX(Atan, atan) \ + XX(Cbrt, cbrt) \ + XX(Ceil, ceil) \ + XX(Cos, cos) \ + XX(Cosh, cosh) \ + XX(Erf, Erf) \ + XX(Exp, exp) \ + XX(Exp2, Exp2) \ + XX(Fabs, fabs) \ + XX(Floor, std::floor) \ + XX(Lgamma, LogGamma) \ + XX(Rint, rint) \ + XX(Sin, sin) \ + XX(Sinh, sinh) \ + XX(Sqrt, sqrt) \ + XX(Tan, tan) \ + XX(Tanh, tanh) \ + XX(Tgamma, tgamma) \ + XX(Trunc, trunc) \ + XX(IsFinite, std::isfinite) \ + XX(IsInf, std::isinf) \ XX(IsNaN, std::isnan) -#define TWO_ARGS_FUNCS(XX) \ - XX(Atan2, atan2, double) \ - XX(Fmod, fmod, double) \ - XX(Hypot, hypot, double) \ - XX(Remainder, remainder, double) \ - XX(Pow, pow, double) \ +#define TWO_ARGS_FUNCS(XX) \ + XX(Atan2, atan2, double) \ + XX(Fmod, fmod, double) \ + XX(Hypot, hypot, double) \ + XX(Remainder, remainder, double) \ + XX(Pow, pow, double) \ XX(Ldexp, ldexp, int) -#define POSITIVE_SINGLE_ARG_FUNCS(XX) \ - XX(Log, log) \ - XX(Log2, Log2) \ +#define POSITIVE_SINGLE_ARG_FUNCS(XX) \ + XX(Log, log) \ + XX(Log2, Log2) \ XX(Log10, log10) - -#define CONST_IMPL(name, cnst) \ - extern "C" UDF_ALWAYS_INLINE \ - void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* /*args*/) {\ - *result = TUnboxedValuePod(cnst); \ +#define CONST_IMPL(name, cnst) \ + extern "C" UDF_ALWAYS_INLINE void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* /*args*/) { \ + *result = TUnboxedValuePod(cnst); \ } -#define SINGLE_ARG_IMPL(name, func) \ - extern "C" UDF_ALWAYS_INLINE \ - void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { \ - *result = TUnboxedValuePod(func(args[0].Get<double>())); \ +#define SINGLE_ARG_IMPL(name, func) \ + extern "C" UDF_ALWAYS_INLINE void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { \ + *result = TUnboxedValuePod(func(args[0].Get<double>())); \ } -#define TWO_ARGS_IMPL(name, func, secondType) \ - extern "C" UDF_ALWAYS_INLINE \ - void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { \ - *result = TUnboxedValuePod(func(args[0].Get<double>(), args[1].Get<secondType>())); \ +#define TWO_ARGS_IMPL(name, func, secondType) \ + extern "C" UDF_ALWAYS_INLINE void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { \ + *result = TUnboxedValuePod(func(args[0].Get<double>(), args[1].Get<secondType>())); \ } -#define POSITIVE_SINGLE_ARG_IMPL(name, func) \ - extern "C" UDF_ALWAYS_INLINE \ - void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { \ - double input = args[0].Get<double>(); \ - if (input > 0) { \ - *result = TUnboxedValuePod(func(input)); \ - } else { \ - *result = TUnboxedValuePod(static_cast<double>(NAN)); \ - } \ +#define POSITIVE_SINGLE_ARG_IMPL(name, func) \ + extern "C" UDF_ALWAYS_INLINE void name##IR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { \ + double input = args[0].Get<double>(); \ + if (input > 0) { \ + *result = TUnboxedValuePod(func(input)); \ + } else { \ + *result = TUnboxedValuePod(static_cast<double>(NAN)); \ + } \ } CONST_FUNCS(CONST_IMPL) @@ -97,13 +92,11 @@ SINGLE_ARG_FUNCS(SINGLE_ARG_IMPL) TWO_ARGS_FUNCS(TWO_ARGS_IMPL) POSITIVE_SINGLE_ARG_FUNCS(POSITIVE_SINGLE_ARG_IMPL) -extern "C" UDF_ALWAYS_INLINE -void SigmoidIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { +extern "C" UDF_ALWAYS_INLINE void SigmoidIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { *result = TUnboxedValuePod(1. / (1. + exp(-args[0].Get<double>()))); } -extern "C" UDF_ALWAYS_INLINE -void FuzzyEqualsIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { +extern "C" UDF_ALWAYS_INLINE void FuzzyEqualsIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { if (!args[2]) { *result = TUnboxedValuePod(FuzzyEquals(args[0].Get<double>(), args[1].Get<double>())); } else { @@ -112,39 +105,33 @@ void FuzzyEqualsIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const } } -extern "C" UDF_ALWAYS_INLINE -void RoundIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { +extern "C" UDF_ALWAYS_INLINE void RoundIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { const double val = NMathUdf::RoundToDecimal<long double>(args[0].Get<double>(), args[1].GetOrDefault<int>(0)); *result = TUnboxedValuePod(val); } -extern "C" UDF_ALWAYS_INLINE -void ErfInvIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { +extern "C" UDF_ALWAYS_INLINE void ErfInvIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { *result = TUnboxedValuePod(NMathUdf::ErfInv(args[0].Get<double>())); } -extern "C" UDF_ALWAYS_INLINE -void ErfcInvIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { +extern "C" UDF_ALWAYS_INLINE void ErfcInvIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { *result = TUnboxedValuePod(NMathUdf::ErfInv(1. - args[0].Get<double>())); } -extern "C" UDF_ALWAYS_INLINE -void ModIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { +extern "C" UDF_ALWAYS_INLINE void ModIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { const auto val = NMathUdf::Mod(args[0].Get<i64>(), args[1].Get<i64>()); *result = val ? TUnboxedValuePod(*val) : TUnboxedValuePod(); } -extern "C" UDF_ALWAYS_INLINE -void RemIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { +extern "C" UDF_ALWAYS_INLINE void RemIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { const auto val = NMathUdf::Rem(args[0].Get<i64>(), args[1].Get<i64>()); *result = val ? TUnboxedValuePod(*val) : TUnboxedValuePod(); } -extern "C" UDF_ALWAYS_INLINE -void NearbyIntIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { +extern "C" UDF_ALWAYS_INLINE void NearbyIntIR(const IBoxedValue* /*pThis*/, TUnboxedValuePod* result, const IValueBuilder* /*valueBuilder*/, const TUnboxedValuePod* args) { const auto val = NMathUdf::NearbyInt(args[0].Get<double>(), args[1].Get<ui32>()); *result = val ? TUnboxedValuePod(*val) : TUnboxedValuePod(); } -} // NUdf -} // NYql +} // namespace NUdf +} // namespace NYql diff --git a/yql/essentials/udfs/common/math/math_udf.cpp b/yql/essentials/udfs/common/math/math_udf.cpp index e769ed3bee9..05b1d38c172 100644 --- a/yql/essentials/udfs/common/math/math_udf.cpp +++ b/yql/essentials/udfs/common/math/math_udf.cpp @@ -7,142 +7,142 @@ extern const char TagRoundingMode[] = "MathRoundingMode"; using TTaggedRoundingMode = NYql::NUdf::TTagged<ui32, TagRoundingMode>; -#define MATH_UDF_MAP(XX, XXL) \ - XX(Pi, double(), 0) \ - XX(E, double(), 0) \ - XX(Eps, double(), 0) \ - XX(RoundDownward, TTaggedRoundingMode(), 0) \ - XX(RoundToNearest, TTaggedRoundingMode(), 0) \ - XX(RoundTowardZero, TTaggedRoundingMode(), 0) \ - XX(RoundUpward, TTaggedRoundingMode(), 0) \ - XX(Abs, double(TAutoMap<double>), 0) \ - XX(Acos, double(TAutoMap<double>), 0) \ - XX(Asin, double(TAutoMap<double>), 0) \ - XX(Asinh, double(TAutoMap<double>), 0) \ - XX(Atan, double(TAutoMap<double>), 0) \ - XX(Cbrt, double(TAutoMap<double>), 0) \ - XX(Ceil, double(TAutoMap<double>), 0) \ - XX(Cos, double(TAutoMap<double>), 0) \ - XX(Cosh, double(TAutoMap<double>), 0) \ - XX(Erf, double(TAutoMap<double>), 0) \ - XX(ErfInv, double(TAutoMap<double>), 0) \ - XX(ErfcInv, double(TAutoMap<double>), 0) \ - XX(Exp, double(TAutoMap<double>), 0) \ - XX(Exp2, double(TAutoMap<double>), 0) \ - XX(Fabs, double(TAutoMap<double>), 0) \ - XX(Floor, double(TAutoMap<double>), 0) \ - XX(Lgamma, double(TAutoMap<double>), 0) \ - XX(Rint, double(TAutoMap<double>), 0) \ - XX(Sin, double(TAutoMap<double>), 0) \ - XX(Sinh, double(TAutoMap<double>), 0) \ - XX(Sqrt, double(TAutoMap<double>), 0) \ - XX(Tan, double(TAutoMap<double>), 0) \ - XX(Tanh, double(TAutoMap<double>), 0) \ - XX(Tgamma, double(TAutoMap<double>), 0) \ - XX(Trunc, double(TAutoMap<double>), 0) \ - XX(Log, double(TAutoMap<double>), 0) \ - XX(Log2, double(TAutoMap<double>), 0) \ - XX(Log10, double(TAutoMap<double>), 0) \ - XX(Atan2, double(TAutoMap<double>, TAutoMap<double>), 0) \ - XX(Fmod, double(TAutoMap<double>, TAutoMap<double>), 0) \ - XX(Hypot, double(TAutoMap<double>, TAutoMap<double>), 0) \ - XX(Remainder, double(TAutoMap<double>, TAutoMap<double>), 0) \ - XX(Pow, double(TAutoMap<double>, TAutoMap<double>), 0) \ - XX(Ldexp, double(TAutoMap<double>, TAutoMap<int>), 0) \ - XX(IsFinite, bool(TAutoMap<double>), 0) \ - XX(IsInf, bool(TAutoMap<double>), 0) \ - XX(IsNaN, bool(TAutoMap<double>), 0) \ - XX(Sigmoid, double(TAutoMap<double>), 0) \ - XX(FuzzyEquals, bool(TAutoMap<double>, TAutoMap<double>, TEpsilon), 1) \ - XX(Mod, TOptional<i64>(TAutoMap<i64>, i64), 0) \ - XX(Rem, TOptional<i64>(TAutoMap<i64>, i64), 0) \ +#define MATH_UDF_MAP(XX, XXL) \ + XX(Pi, double(), 0) \ + XX(E, double(), 0) \ + XX(Eps, double(), 0) \ + XX(RoundDownward, TTaggedRoundingMode(), 0) \ + XX(RoundToNearest, TTaggedRoundingMode(), 0) \ + XX(RoundTowardZero, TTaggedRoundingMode(), 0) \ + XX(RoundUpward, TTaggedRoundingMode(), 0) \ + XX(Abs, double(TAutoMap<double>), 0) \ + XX(Acos, double(TAutoMap<double>), 0) \ + XX(Asin, double(TAutoMap<double>), 0) \ + XX(Asinh, double(TAutoMap<double>), 0) \ + XX(Atan, double(TAutoMap<double>), 0) \ + XX(Cbrt, double(TAutoMap<double>), 0) \ + XX(Ceil, double(TAutoMap<double>), 0) \ + XX(Cos, double(TAutoMap<double>), 0) \ + XX(Cosh, double(TAutoMap<double>), 0) \ + XX(Erf, double(TAutoMap<double>), 0) \ + XX(ErfInv, double(TAutoMap<double>), 0) \ + XX(ErfcInv, double(TAutoMap<double>), 0) \ + XX(Exp, double(TAutoMap<double>), 0) \ + XX(Exp2, double(TAutoMap<double>), 0) \ + XX(Fabs, double(TAutoMap<double>), 0) \ + XX(Floor, double(TAutoMap<double>), 0) \ + XX(Lgamma, double(TAutoMap<double>), 0) \ + XX(Rint, double(TAutoMap<double>), 0) \ + XX(Sin, double(TAutoMap<double>), 0) \ + XX(Sinh, double(TAutoMap<double>), 0) \ + XX(Sqrt, double(TAutoMap<double>), 0) \ + XX(Tan, double(TAutoMap<double>), 0) \ + XX(Tanh, double(TAutoMap<double>), 0) \ + XX(Tgamma, double(TAutoMap<double>), 0) \ + XX(Trunc, double(TAutoMap<double>), 0) \ + XX(Log, double(TAutoMap<double>), 0) \ + XX(Log2, double(TAutoMap<double>), 0) \ + XX(Log10, double(TAutoMap<double>), 0) \ + XX(Atan2, double(TAutoMap<double>, TAutoMap<double>), 0) \ + XX(Fmod, double(TAutoMap<double>, TAutoMap<double>), 0) \ + XX(Hypot, double(TAutoMap<double>, TAutoMap<double>), 0) \ + XX(Remainder, double(TAutoMap<double>, TAutoMap<double>), 0) \ + XX(Pow, double(TAutoMap<double>, TAutoMap<double>), 0) \ + XX(Ldexp, double(TAutoMap<double>, TAutoMap<int>), 0) \ + XX(IsFinite, bool(TAutoMap<double>), 0) \ + XX(IsInf, bool(TAutoMap<double>), 0) \ + XX(IsNaN, bool(TAutoMap<double>), 0) \ + XX(Sigmoid, double(TAutoMap<double>), 0) \ + XX(FuzzyEquals, bool(TAutoMap<double>, TAutoMap<double>, TEpsilon), 1) \ + XX(Mod, TOptional<i64>(TAutoMap<i64>, i64), 0) \ + XX(Rem, TOptional<i64>(TAutoMap<i64>, i64), 0) \ XXL(Round, double(TAutoMap<double>, TPrecision), 1) -#define MATH_UDF_MAP_WITHOUT_IR(XX) \ +#define MATH_UDF_MAP_WITHOUT_IR(XX) \ XX(NearbyInt, TOptional<i64>(TAutoMap<double>, TTaggedRoundingMode), 0) #ifdef DISABLE_IR -#define MATH_STRICT_UDF(name, signature, optionalArgsCount) \ - SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(T##name, signature, optionalArgsCount) { \ - TUnboxedValuePod res; \ - name##IR(this, &res, valueBuilder, args); \ - return res; \ - } + #define MATH_STRICT_UDF(name, signature, optionalArgsCount) \ + SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(T##name, signature, optionalArgsCount) { \ + TUnboxedValuePod res; \ + name##IR(this, &res, valueBuilder, args); \ + return res; \ + } #else -#define MATH_STRICT_UDF(name, signature, optionalArgsCount) \ - SIMPLE_STRICT_UDF_WITH_IR(T##name, signature, optionalArgsCount, "/llvm_bc/Math", #name "IR") { \ - TUnboxedValuePod res; \ - name##IR(this, &res, valueBuilder, args); \ - return res; \ - } + #define MATH_STRICT_UDF(name, signature, optionalArgsCount) \ + SIMPLE_STRICT_UDF_WITH_IR(T##name, signature, optionalArgsCount, "/llvm_bc/Math", #name "IR") { \ + TUnboxedValuePod res; \ + name##IR(this, &res, valueBuilder, args); \ + return res; \ + } #endif -#define MATH_STRICT_UDF_WITHOUT_IR(name, signature, optionalArgsCount) \ - SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(T##name, signature, optionalArgsCount) { \ - TUnboxedValuePod res; \ - name##IR(this, &res, valueBuilder, args); \ - return res; \ +#define MATH_STRICT_UDF_WITHOUT_IR(name, signature, optionalArgsCount) \ + SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(T##name, signature, optionalArgsCount) { \ + TUnboxedValuePod res; \ + name##IR(this, &res, valueBuilder, args); \ + return res; \ } -#define REGISTER_MATH_UDF(udfName, ...) T##udfName, -#define REGISTER_MATH_UDF_LAST(udfName, ...) T##udfName +#define REGISTER_MATH_UDF(udfName, ...) T##udfName, +#define REGISTER_MATH_UDF_LAST(udfName, ...) T##udfName using namespace NKikimr; using namespace NUdf; namespace { - const char SwapBytesUDF[] = "SwapBytes"; - template <class TUserType> - class TSwapBytesFunc: public TBoxedValue { - private: - TSourcePosition Pos_; - - TSwapBytesFunc(TSourcePosition pos) - : Pos_(pos) - { - } +const char SwapBytesUDF[] = "SwapBytes"; +template <class TUserType> +class TSwapBytesFunc: public TBoxedValue { +private: + TSourcePosition Pos_; + + TSwapBytesFunc(TSourcePosition pos) + : Pos_(pos) + { + } - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { - Y_UNUSED(valueBuilder); - if constexpr (sizeof(TUserType) == 1) { - return args[0]; - } - return TUnboxedValuePod(SwapBytes(args[0].Get<TUserType>())); + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { + Y_UNUSED(valueBuilder); + if constexpr (sizeof(TUserType) == 1) { + return args[0]; } + return TUnboxedValuePod(SwapBytes(args[0].Get<TUserType>())); + } - public: - static void DeclareSignature( - TStringRef name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) - { - Y_UNUSED(name); - Y_UNUSED(userType); - - builder.SimpleSignature<TUserType(TAutoMap<TUserType>)>() - .IsStrict() - .SetMinLangVer(NYql::MakeLangVersion(2025, 3)); - if (!typesOnly) { - builder.Implementation(new TSwapBytesFunc<TUserType>(builder.GetSourcePosition())); - } +public: + static void DeclareSignature( + TStringRef name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) + { + Y_UNUSED(name); + Y_UNUSED(userType); + + builder.SimpleSignature<TUserType(TAutoMap<TUserType>)>() + .IsStrict() + .SetMinLangVer(NYql::MakeLangVersion(2025, 3)); + if (!typesOnly) { + builder.Implementation(new TSwapBytesFunc<TUserType>(builder.GetSourcePosition())); } - }; + } +}; - extern const char epsilon[] = "Epsilon"; - using TEpsilon = TNamedArg<double, epsilon>; +extern const char epsilon[] = "Epsilon"; +using TEpsilon = TNamedArg<double, epsilon>; - extern const char precision[] = "Precision"; - using TPrecision = TNamedArg<int, precision>; +extern const char precision[] = "Precision"; +using TPrecision = TNamedArg<int, precision>; - MATH_UDF_MAP(MATH_STRICT_UDF, MATH_STRICT_UDF) +MATH_UDF_MAP(MATH_STRICT_UDF, MATH_STRICT_UDF) - MATH_UDF_MAP_WITHOUT_IR(MATH_STRICT_UDF_WITHOUT_IR) +MATH_UDF_MAP_WITHOUT_IR(MATH_STRICT_UDF_WITHOUT_IR) - SIMPLE_MODULE(TMathModule, - MATH_UDF_MAP_WITHOUT_IR(REGISTER_MATH_UDF) - TUserDataTypeFuncFactory<true, false, SwapBytesUDF, TSwapBytesFunc, ui8, ui16, ui32, ui64>, - MATH_UDF_MAP(REGISTER_MATH_UDF, REGISTER_MATH_UDF_LAST)) -} +SIMPLE_MODULE(TMathModule, + MATH_UDF_MAP_WITHOUT_IR(REGISTER_MATH_UDF) + TUserDataTypeFuncFactory<true, false, SwapBytesUDF, TSwapBytesFunc, ui8, ui16, ui32, ui64>, + MATH_UDF_MAP(REGISTER_MATH_UDF, REGISTER_MATH_UDF_LAST)) +} // namespace REGISTER_MODULES(TMathModule) diff --git a/yql/essentials/udfs/common/math/ya.make b/yql/essentials/udfs/common/math/ya.make index dfb5fa7c994..b4dbbc6afd2 100644 --- a/yql/essentials/udfs/common/math/ya.make +++ b/yql/essentials/udfs/common/math/ya.make @@ -6,6 +6,8 @@ YQL_UDF_CONTRIB(math_udf) 0 ) + ENABLE(YQL_STYLE_CPP) + SRCS( math_udf.cpp ) diff --git a/yql/essentials/udfs/common/pire/pire_udf.cpp b/yql/essentials/udfs/common/pire/pire_udf.cpp index de2a75955e3..1357107a12b 100644 --- a/yql/essentials/udfs/common/pire/pire_udf.cpp +++ b/yql/essentials/udfs/common/pire/pire_udf.cpp @@ -14,345 +14,350 @@ using namespace NKikimr; using namespace NUdf; namespace { - class TPireUdfBase: public TBoxedValue { - protected: - TPireUdfBase(TSourcePosition pos) - : Pos_(pos) - {} - - void SetCommonOptions(std::string_view& regex, TFsm::TOptions& options) { - if (regex.size() >= 4U && regex.substr(0U, 4U) == "(?i)") { - options.SetCaseInsensitive(true); - regex.remove_prefix(4U); - } - if (UTF8Detect(regex) == UTF8) { - options.SetCharset(CODES_UTF8); - } +class TPireUdfBase: public TBoxedValue { +protected: + TPireUdfBase(TSourcePosition pos) + : Pos_(pos) + { + } + + void SetCommonOptions(std::string_view& regex, TFsm::TOptions& options) { + if (regex.size() >= 4U && regex.substr(0U, 4U) == "(?i)") { + options.SetCaseInsensitive(true); + regex.remove_prefix(4U); } + if (UTF8Detect(regex) == UTF8) { + options.SetCharset(CODES_UTF8); + } + } - TSourcePosition Pos_; - }; + TSourcePosition Pos_; +}; - class TPireMatch: public TPireUdfBase { +class TPireMatch: public TPireUdfBase { +public: + class TFactory: public TPireUdfBase { public: - class TFactory: public TPireUdfBase { - public: - TFactory( - bool surroundMode, - bool multiMode, - TSourcePosition pos, - size_t regexpsCount = 0) - : TPireUdfBase(pos) - , SurroundMode_(surroundMode) - , MultiMode_(multiMode) - , RegexpsCount_(regexpsCount) - { - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final { - return TUnboxedValuePod( - new TPireMatch( - valueBuilder, - args[0], - SurroundMode_, - MultiMode_, - Pos_, - RegexpsCount_)); - } - - bool SurroundMode_; - bool MultiMode_; - size_t RegexpsCount_; - }; - - static const TStringRef& Name(bool surroundMode, bool multiMode) { - static auto match = TStringRef::Of("Match"); - static auto grep = TStringRef::Of("Grep"); - static auto multiMatch = TStringRef::Of("MultiMatch"); - static auto multiGrep = TStringRef::Of("MultiGrep"); - if (surroundMode) { - return multiMode ? multiGrep : grep; - } else { - return multiMode ? multiMatch : match; - } - } - - TPireMatch( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod& runConfig, + TFactory( bool surroundMode, bool multiMode, TSourcePosition pos, - size_t regexpsCount) + size_t regexpsCount = 0) : TPireUdfBase(pos) + , SurroundMode_(surroundMode) , MultiMode_(multiMode) , RegexpsCount_(regexpsCount) - , SurroundMode_(surroundMode) { - Y_UNUSED(valueBuilder); - try { - std::string_view regex(runConfig.AsStringRef()); - TFsm::TOptions options; - options.SetSurround(surroundMode); - SetCommonOptions(regex, options); - if (multiMode) { - std::vector<std::string_view> parts; - StringSplitter(regex).Split('\n').AddTo(&parts); - for (const auto& part : parts) { - if (!part.empty()) { - if (Fsm_) try { + } + + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final { + return TUnboxedValuePod( + new TPireMatch( + valueBuilder, + args[0], + SurroundMode_, + MultiMode_, + Pos_, + RegexpsCount_)); + } + + bool SurroundMode_; + bool MultiMode_; + size_t RegexpsCount_; + }; + + static const TStringRef& Name(bool surroundMode, bool multiMode) { + static auto match = TStringRef::Of("Match"); + static auto grep = TStringRef::Of("Grep"); + static auto multiMatch = TStringRef::Of("MultiMatch"); + static auto multiGrep = TStringRef::Of("MultiGrep"); + if (surroundMode) { + return multiMode ? multiGrep : grep; + } else { + return multiMode ? multiMatch : match; + } + } + + TPireMatch( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod& runConfig, + bool surroundMode, + bool multiMode, + TSourcePosition pos, + size_t regexpsCount) + : TPireUdfBase(pos) + , MultiMode_(multiMode) + , RegexpsCount_(regexpsCount) + , SurroundMode_(surroundMode) + { + Y_UNUSED(valueBuilder); + try { + std::string_view regex(runConfig.AsStringRef()); + TFsm::TOptions options; + options.SetSurround(surroundMode); + SetCommonOptions(regex, options); + if (multiMode) { + std::vector<std::string_view> parts; + StringSplitter(regex).Split('\n').AddTo(&parts); + for (const auto& part : parts) { + if (!part.empty()) { + if (Fsm_) { + try { *Fsm_ = *Fsm_ | TFsm(TString(part), options); } catch (const yexception&) { UdfTerminate((TStringBuilder() << Pos_ << " Failed to glue up regexes, probably the finite state machine appeared to be too large").c_str()); - } else { - Fsm_.Reset(new TFsm(TString(part), options)); } + } else { + Fsm_.Reset(new TFsm(TString(part), options)); } } - } else { - Fsm_.Reset(new TFsm(TString(regex), options)); } - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } else { + Fsm_.Reset(new TFsm(TString(regex), options)); } + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try { - TUnboxedValue* items = nullptr; - TUnboxedValue tuple; - size_t i = 0; +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + TUnboxedValue* items = nullptr; + TUnboxedValue tuple; + size_t i = 0; - if (MultiMode_) { - tuple = valueBuilder->NewArray(RegexpsCount_, items); + if (MultiMode_) { + tuple = valueBuilder->NewArray(RegexpsCount_, items); - for (i = 0; i < RegexpsCount_; ++i) { - items[i] = TUnboxedValuePod(false); - } + for (i = 0; i < RegexpsCount_; ++i) { + items[i] = TUnboxedValuePod(false); } + } - if (args[0]) { - const auto input = args[0].AsStringRef(); - TMatcher matcher(*Fsm_); - const bool isMatch = matcher.Match(input.Data(), input.Size(), SurroundMode_, SurroundMode_).Final(); - if (MultiMode_) { - if (isMatch) { - const auto& matchedRegexps = matcher.MatchedRegexps(); - size_t matchesCount = matchedRegexps.second - matchedRegexps.first; - - for (i = 0; i < matchesCount; ++i) { - items[matchedRegexps.first[i]] = TUnboxedValuePod(true); - } - } - return tuple; + if (args[0]) { + const auto input = args[0].AsStringRef(); + TMatcher matcher(*Fsm_); + const bool isMatch = matcher.Match(input.Data(), input.Size(), SurroundMode_, SurroundMode_).Final(); + if (MultiMode_) { + if (isMatch) { + const auto& matchedRegexps = matcher.MatchedRegexps(); + size_t matchesCount = matchedRegexps.second - matchedRegexps.first; - } else { - return TUnboxedValuePod(isMatch); + for (i = 0; i < matchesCount; ++i) { + items[matchedRegexps.first[i]] = TUnboxedValuePod(true); + } } + return tuple; } else { - return MultiMode_ ? tuple : TUnboxedValue(TUnboxedValuePod(false)); - } - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - - private: - TUniquePtr<TFsm> Fsm_; - bool MultiMode_; - size_t RegexpsCount_; - bool SurroundMode_; - }; - - class TPireCapture: public TPireUdfBase { - public: - class TFactory: public TPireUdfBase { - public: - TFactory(TSourcePosition pos) - : TPireUdfBase(pos) - {} - - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { - return TUnboxedValuePod(new TPireCapture(args[0], Pos_)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + return TUnboxedValuePod(isMatch); } - }; - static const TStringRef& Name() { - static auto name = TStringRef::Of("Capture"); - return name; + } else { + return MultiMode_ ? tuple : TUnboxedValue(TUnboxedValuePod(false)); } - - TPireCapture(const TUnboxedValuePod& runConfig, TSourcePosition pos) + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } + +private: + TUniquePtr<TFsm> Fsm_; + bool MultiMode_; + size_t RegexpsCount_; + bool SurroundMode_; +}; + +class TPireCapture: public TPireUdfBase { +public: + class TFactory: public TPireUdfBase { + public: + TFactory(TSourcePosition pos) : TPireUdfBase(pos) { - std::string_view regex(runConfig.AsStringRef()); - TFsm::TOptions options; - SetCommonOptions(regex, options); - Fsm_.Reset(new TSlowCapturingFsm(TString(regex), options)); } private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try { - if (args[0]) { - const std::string_view input = args[0].AsStringRef(); - - TSlowSearcher searcher(*Fsm_); - searcher.Search(input.data(), input.size()); - - if (searcher.Captured()) { - const auto& captured = searcher.GetCaptured(); - return valueBuilder->SubString(args[0], std::distance(input.begin(), captured.begin()), captured.length()); - } - } - - return TUnboxedValue(); + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { + return TUnboxedValuePod(new TPireCapture(args[0], Pos_)); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } - - TUniquePtr<TSlowCapturingFsm> Fsm_; }; - class TPireReplace: public TPireUdfBase { - public: - class TFactory: public TPireUdfBase { - public: - TFactory(TSourcePosition pos) - : TPireUdfBase(pos) - {} - - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { - return TUnboxedValuePod(new TPireReplace(args[0], Pos_)); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + static const TStringRef& Name() { + static auto name = TStringRef::Of("Capture"); + return name; + } + + TPireCapture(const TUnboxedValuePod& runConfig, TSourcePosition pos) + : TPireUdfBase(pos) + { + std::string_view regex(runConfig.AsStringRef()); + TFsm::TOptions options; + SetCommonOptions(regex, options); + Fsm_.Reset(new TSlowCapturingFsm(TString(regex), options)); + } + +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + if (args[0]) { + const std::string_view input = args[0].AsStringRef(); + + TSlowSearcher searcher(*Fsm_); + searcher.Search(input.data(), input.size()); + + if (searcher.Captured()) { + const auto& captured = searcher.GetCaptured(); + return valueBuilder->SubString(args[0], std::distance(input.begin(), captured.begin()), captured.length()); } - }; - - static const TStringRef& Name() { - static auto name = TStringRef::Of("Replace"); - return name; } - TPireReplace(const TUnboxedValuePod& runConfig, TSourcePosition pos) + return TUnboxedValue(); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } + + TUniquePtr<TSlowCapturingFsm> Fsm_; +}; + +class TPireReplace: public TPireUdfBase { +public: + class TFactory: public TPireUdfBase { + public: + TFactory(TSourcePosition pos) : TPireUdfBase(pos) { - std::string_view regex(runConfig.AsStringRef()); - TFsm::TOptions options; - SetCommonOptions(regex, options); - Fsm_.Reset(new TSlowCapturingFsm(TString(regex), options)); } private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try { - if (args[0]) { - const std::string_view input(args[0].AsStringRef()); - - TSlowSearcher s(*Fsm_); - s.Search(input.data(), input.size()); - if (s.Captured()) { - const auto& captured = s.GetCaptured(); - const TString replacement(args[1].AsStringRef()); - TString replaced(args[0].AsStringRef()); - replaced.replace(std::distance(input.begin(), captured.begin()), captured.length(), replacement); - return valueBuilder->NewString(replaced); - } else { - return TUnboxedValue(args[0]); - } - } else { - return TUnboxedValue(); - } + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const final try { + return TUnboxedValuePod(new TPireReplace(args[0], Pos_)); } catch (const std::exception& e) { UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } - - TUniquePtr<TSlowCapturingFsm> Fsm_; }; - class TPireModule: public IUdfModule { - public: - TStringRef Name() const { - return TStringRef::Of("Pire"); - } - - void CleanupOnTerminate() const final { - } - - void GetAllFunctions(IFunctionsSink& sink) const final { - sink.Add(TPireMatch::Name(true, true))->SetTypeAwareness(); - sink.Add(TPireMatch::Name(false, true))->SetTypeAwareness(); - sink.Add(TPireMatch::Name(true, false)); - sink.Add(TPireMatch::Name(false, false)); - sink.Add(TPireCapture::Name()); - sink.Add(TPireReplace::Name()); + static const TStringRef& Name() { + static auto name = TStringRef::Of("Replace"); + return name; + } + + TPireReplace(const TUnboxedValuePod& runConfig, TSourcePosition pos) + : TPireUdfBase(pos) + { + std::string_view regex(runConfig.AsStringRef()); + TFsm::TOptions options; + SetCommonOptions(regex, options); + Fsm_.Reset(new TSlowCapturingFsm(TString(regex), options)); + } + +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + if (args[0]) { + const std::string_view input(args[0].AsStringRef()); + + TSlowSearcher s(*Fsm_); + s.Search(input.data(), input.size()); + if (s.Captured()) { + const auto& captured = s.GetCaptured(); + const TString replacement(args[1].AsStringRef()); + TString replaced(args[0].AsStringRef()); + replaced.replace(std::distance(input.begin(), captured.begin()), captured.length(), replacement); + return valueBuilder->NewString(replaced); + } else { + return TUnboxedValue(args[0]); + } + } else { + return TUnboxedValue(); } + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } + + TUniquePtr<TSlowCapturingFsm> Fsm_; +}; + +class TPireModule: public IUdfModule { +public: + TStringRef Name() const { + return TStringRef::Of("Pire"); + } + + void CleanupOnTerminate() const final { + } + + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(TPireMatch::Name(true, true))->SetTypeAwareness(); + sink.Add(TPireMatch::Name(false, true))->SetTypeAwareness(); + sink.Add(TPireMatch::Name(true, false)); + sink.Add(TPireMatch::Name(false, false)); + sink.Add(TPireCapture::Name()); + sink.Add(TPireReplace::Name()); + } + + void BuildFunctionTypeInfo( + const TStringRef& name, + TType*, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final try { + const bool typesOnly = (flags & TFlags::TypesOnly); + const bool isMatch = (TPireMatch::Name(false, false) == name); + const bool isGrep = (TPireMatch::Name(true, false) == name); + const bool isMultiMatch = (TPireMatch::Name(false, true) == name); + const bool isMultiGrep = (TPireMatch::Name(true, true) == name); + + if (isMatch || isGrep) { + builder.SimpleSignature<bool(TOptional<char*>)>() + .RunConfig<const char*>(); + + if (!typesOnly) { + builder.Implementation(new TPireMatch::TFactory(isGrep, false, builder.GetSourcePosition())); + } + } else if (isMultiMatch || isMultiGrep) { + const auto boolType = builder.SimpleType<bool>(); + const auto optionalStringType = builder.Optional()->Item<char*>().Build(); + const std::string_view regexp(typeConfig); + const size_t regexpCount = std::count(regexp.begin(), regexp.end(), '\n') + 1; + const auto tuple = builder.Tuple(); + for (size_t i = 0; i < regexpCount; ++i) { + tuple->Add(boolType); + } + const auto tupleType = tuple->Build(); + builder.Args(1)->Add(optionalStringType).Done().Returns(tupleType).RunConfig<char*>(); - void BuildFunctionTypeInfo( - const TStringRef& name, - TType*, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final try { - const bool typesOnly = (flags & TFlags::TypesOnly); - const bool isMatch = (TPireMatch::Name(false, false) == name); - const bool isGrep = (TPireMatch::Name(true, false) == name); - const bool isMultiMatch = (TPireMatch::Name(false, true) == name); - const bool isMultiGrep = (TPireMatch::Name(true, true) == name); - - if (isMatch || isGrep) { - builder.SimpleSignature<bool(TOptional<char*>)>() - .RunConfig<const char*>(); - - if (!typesOnly) { - builder.Implementation(new TPireMatch::TFactory(isGrep, false, builder.GetSourcePosition())); - } - } else if (isMultiMatch || isMultiGrep) { - const auto boolType = builder.SimpleType<bool>(); - const auto optionalStringType = builder.Optional()->Item<char*>().Build(); - const std::string_view regexp(typeConfig); - const size_t regexpCount = std::count(regexp.begin(), regexp.end(), '\n') + 1; - const auto tuple = builder.Tuple(); - for (size_t i = 0; i < regexpCount; ++i) { - tuple->Add(boolType); - } - const auto tupleType = tuple->Build(); - builder.Args(1)->Add(optionalStringType).Done().Returns(tupleType).RunConfig<char*>(); - - if (!typesOnly) { - builder.Implementation(new TPireMatch::TFactory(isMultiGrep, true, builder.GetSourcePosition(), regexpCount)); - } - } else if (TPireCapture::Name() == name) { - builder.SimpleSignature<TOptional<char*>(TOptional<char*>)>() - .RunConfig<char*>(); + if (!typesOnly) { + builder.Implementation(new TPireMatch::TFactory(isMultiGrep, true, builder.GetSourcePosition(), regexpCount)); + } + } else if (TPireCapture::Name() == name) { + builder.SimpleSignature<TOptional<char*>(TOptional<char*>)>() + .RunConfig<char*>(); - if (!typesOnly) { - builder.Implementation(new TPireCapture::TFactory(builder.GetSourcePosition())); - } - } else if (TPireReplace::Name() == name) { - builder.SimpleSignature<TOptional<char*>(TOptional<char*>, char*)>() - .RunConfig<char*>(); + if (!typesOnly) { + builder.Implementation(new TPireCapture::TFactory(builder.GetSourcePosition())); + } + } else if (TPireReplace::Name() == name) { + builder.SimpleSignature<TOptional<char*>(TOptional<char*>, char*)>() + .RunConfig<char*>(); - if (!typesOnly) { - builder.Implementation(new TPireReplace::TFactory(builder.GetSourcePosition())); - } + if (!typesOnly) { + builder.Implementation(new TPireReplace::TFactory(builder.GetSourcePosition())); } - } catch (const std::exception& e) { - builder.SetError(CurrentExceptionMessage()); } - }; + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); + } +}; -} +} // namespace REGISTER_MODULES(TPireModule) diff --git a/yql/essentials/udfs/common/pire/ya.make b/yql/essentials/udfs/common/pire/ya.make index 414b973aa62..21aef728426 100644 --- a/yql/essentials/udfs/common/pire/ya.make +++ b/yql/essentials/udfs/common/pire/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(pire_udf) 27 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( pire_udf.cpp diff --git a/yql/essentials/udfs/common/protobuf/protobuf_udf.cpp b/yql/essentials/udfs/common/protobuf/protobuf_udf.cpp index 4b7df61c28e..25c85c37727 100644 --- a/yql/essentials/udfs/common/protobuf/protobuf_udf.cpp +++ b/yql/essentials/udfs/common/protobuf/protobuf_udf.cpp @@ -9,135 +9,138 @@ using namespace NKikimr::NUdf; using namespace NProtoBuf; namespace { - class TDynamicProtoValue: public TProtobufValue { - public: - TDynamicProtoValue(const TProtoInfo& info, TDynamicInfoRef dyn) - : TProtobufValue(info) - , Dynamic_(dyn) - { - Y_ASSERT(Dynamic_ != nullptr); +class TDynamicProtoValue: public TProtobufValue { +public: + TDynamicProtoValue(const TProtoInfo& info, TDynamicInfoRef dyn) + : TProtobufValue(info) + , Dynamic_(dyn) + { + Y_ASSERT(Dynamic_ != nullptr); + } + + TAutoPtr<Message> Parse(const TStringBuf& data) const override { + return Dynamic_->Parse(data); + } + +private: + TDynamicInfoRef Dynamic_; +}; + +class TDynamicProtoSerialize: public TProtobufSerialize { +public: + TDynamicProtoSerialize(const TProtoInfo& info, TDynamicInfoRef dyn) + : TProtobufSerialize(info) + , Dynamic_(dyn) + { + Y_ASSERT(Dynamic_ != nullptr); + } + + TMaybe<TString> Serialize(const Message& proto) const override { + return Dynamic_->Serialize(proto); + } + + TAutoPtr<Message> MakeProto() const override { + return Dynamic_->MakeProto(); + } + +private: + TDynamicInfoRef Dynamic_; +}; + +class TDynamicProtoValueSafe: public TDynamicProtoValue { +public: + TDynamicProtoValueSafe(const TProtoInfo& info, TDynamicInfoRef dyn) + : TDynamicProtoValue(info, dyn) + { + } + + TAutoPtr<Message> Parse(const TStringBuf& data) const override { + try { + return TDynamicProtoValue::Parse(data); + } catch (const std::exception& e) { + return nullptr; } - - TAutoPtr<Message> Parse(const TStringBuf& data) const override { - return Dynamic_->Parse(data); - } - - private: - TDynamicInfoRef Dynamic_; - }; - - class TDynamicProtoSerialize: public TProtobufSerialize { - public: - TDynamicProtoSerialize(const TProtoInfo& info, TDynamicInfoRef dyn) - : TProtobufSerialize(info) - , Dynamic_(dyn) - { - Y_ASSERT(Dynamic_ != nullptr); - } - - TMaybe<TString> Serialize(const Message& proto) const override { - return Dynamic_->Serialize(proto); - } - - TAutoPtr<Message> MakeProto() const override { - return Dynamic_->MakeProto(); - } - private: - TDynamicInfoRef Dynamic_; - }; - - class TDynamicProtoValueSafe: public TDynamicProtoValue { - public: - TDynamicProtoValueSafe(const TProtoInfo& info, TDynamicInfoRef dyn) - : TDynamicProtoValue(info, dyn) {} - - TAutoPtr<Message> Parse(const TStringBuf& data) const override { - try { - return TDynamicProtoValue::Parse(data); - } catch (const std::exception& e) { - return nullptr; - } - } - }; - - class TProtobufModule: public IUdfModule { - public: - TStringRef Name() const { - return TStringRef("Protobuf"); - } - - void CleanupOnTerminate() const final { - } - - void GetAllFunctions(IFunctionsSink& sink) const final { - sink.Add(TStringRef::Of("Parse"))->SetTypeAwareness(); - sink.Add(TStringRef::Of("TryParse"))->SetTypeAwareness(); - sink.Add(TStringRef::Of("Serialize"))->SetTypeAwareness(); - } - - void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final { - Y_UNUSED(userType); - - try { - auto dyn = TDynamicInfo::Create(TStringBuf(typeConfig.Data(), typeConfig.Size())); - - TProtoInfo typeInfo; - ProtoTypeBuild(dyn->Descriptor(), - dyn->GetEnumFormat(), - dyn->GetRecursionTraits(), - dyn->GetOptionalLists(), - builder, &typeInfo, - EProtoStringYqlType::Bytes, - dyn->GetSyntaxAware(), - false, - dyn->GetYtMode()); - - auto stringType = builder.SimpleType<char*>(); - auto structType = typeInfo.StructType; - auto optionalStructType = builder.Optional()->Item(structType).Build(); - - if (TStringRef::Of("Serialize") == name) { - // function signature: - // String Serialize(Protobuf value) - builder.Returns(stringType) - .Args() - ->Add(structType) - .Flags(ICallablePayload::TArgumentFlags::AutoMap) - .Done(); + } +}; + +class TProtobufModule: public IUdfModule { +public: + TStringRef Name() const { + return TStringRef("Protobuf"); + } + + void CleanupOnTerminate() const final { + } + + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(TStringRef::Of("Parse"))->SetTypeAwareness(); + sink.Add(TStringRef::Of("TryParse"))->SetTypeAwareness(); + sink.Add(TStringRef::Of("Serialize"))->SetTypeAwareness(); + } + + void BuildFunctionTypeInfo( + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { + Y_UNUSED(userType); + + try { + auto dyn = TDynamicInfo::Create(TStringBuf(typeConfig.Data(), typeConfig.Size())); + + TProtoInfo typeInfo; + ProtoTypeBuild(dyn->Descriptor(), + dyn->GetEnumFormat(), + dyn->GetRecursionTraits(), + dyn->GetOptionalLists(), + builder, &typeInfo, + EProtoStringYqlType::Bytes, + dyn->GetSyntaxAware(), + false, + dyn->GetYtMode()); + + auto stringType = builder.SimpleType<char*>(); + auto structType = typeInfo.StructType; + auto optionalStructType = builder.Optional()->Item(structType).Build(); + + if (TStringRef::Of("Serialize") == name) { + // function signature: + // String Serialize(Protobuf value) + builder.Returns(stringType) + .Args() + ->Add(structType) + .Flags(ICallablePayload::TArgumentFlags::AutoMap) + .Done(); + if ((flags & TFlags::TypesOnly) == 0) { + builder.Implementation(new TDynamicProtoSerialize(typeInfo, dyn)); + } + } else { + // function signature: + // Protobuf Parse(String value) + builder.Returns((TStringRef::Of("TryParse") == name) ? optionalStructType : structType) + .Args() + ->Add(stringType) + .Flags(ICallablePayload::TArgumentFlags::AutoMap) + .Done(); + + if (TStringRef::Of("Parse") == name) { if ((flags & TFlags::TypesOnly) == 0) { - builder.Implementation(new TDynamicProtoSerialize(typeInfo, dyn)); + builder.Implementation(new TDynamicProtoValue(typeInfo, dyn)); } - } else { - // function signature: - // Protobuf Parse(String value) - builder.Returns((TStringRef::Of("TryParse") == name) ? optionalStructType : structType) - .Args() - ->Add(stringType) - .Flags(ICallablePayload::TArgumentFlags::AutoMap) - .Done(); - - if (TStringRef::Of("Parse") == name) { - if ((flags & TFlags::TypesOnly) == 0) { - builder.Implementation(new TDynamicProtoValue(typeInfo, dyn)); - } - } else if (TStringRef::Of("TryParse") == name) { - if ((flags & TFlags::TypesOnly) == 0) { - builder.Implementation(new TDynamicProtoValueSafe(typeInfo, dyn)); - } + } else if (TStringRef::Of("TryParse") == name) { + if ((flags & TFlags::TypesOnly) == 0) { + builder.Implementation(new TDynamicProtoValueSafe(typeInfo, dyn)); } } - - } catch (const std::exception& e) { - builder.SetError(CurrentExceptionMessage()); } + + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); } - }; + } +}; -} +} // namespace REGISTER_MODULES(TProtobufModule); diff --git a/yql/essentials/udfs/common/protobuf/ya.make b/yql/essentials/udfs/common/protobuf/ya.make index 714ad77137f..007a5266671 100644 --- a/yql/essentials/udfs/common/protobuf/ya.make +++ b/yql/essentials/udfs/common/protobuf/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( protobuf_udf.cpp ) diff --git a/yql/essentials/udfs/common/python/bindings/py27_backports.c b/yql/essentials/udfs/common/python/bindings/py27_backports.c index cf21a97cef0..7359bcce4be 100644 --- a/yql/essentials/udfs/common/python/bindings/py27_backports.c +++ b/yql/essentials/udfs/common/python/bindings/py27_backports.c @@ -1,22 +1,21 @@ #include "py27_backports.h" - // Provide implementations from python 2.7.15 as backports -int -_PySlice_Unpack(PyObject *_r, - Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step) +int _PySlice_Unpack(PyObject* _r, + Py_ssize_t* start, Py_ssize_t* stop, Py_ssize_t* step) { - PySliceObject *r = (PySliceObject *)_r; + PySliceObject* r = (PySliceObject*)_r; /* this is harder to get right than you might think */ assert(PY_SSIZE_T_MIN + 1 <= -PY_SSIZE_T_MAX); if (r->step == Py_None) { *step = 1; - } - else { - if (!_PyEval_SliceIndex(r->step, step)) return -1; + } else { + if (!_PyEval_SliceIndex(r->step, step)) { + return -1; + } if (*step == 0) { PyErr_SetString(PyExc_ValueError, "slice step cannot be zero"); @@ -27,22 +26,25 @@ _PySlice_Unpack(PyObject *_r, * guards against later undefined behaviour resulting from code that * does "step = -step" as part of a slice reversal. */ - if (*step < -PY_SSIZE_T_MAX) + if (*step < -PY_SSIZE_T_MAX) { *step = -PY_SSIZE_T_MAX; + } } if (r->start == Py_None) { *start = *step < 0 ? PY_SSIZE_T_MAX : 0; - } - else { - if (!_PyEval_SliceIndex(r->start, start)) return -1; + } else { + if (!_PyEval_SliceIndex(r->start, start)) { + return -1; + } } if (r->stop == Py_None) { *stop = *step < 0 ? PY_SSIZE_T_MIN : PY_SSIZE_T_MAX; - } - else { - if (!_PyEval_SliceIndex(r->stop, stop)) return -1; + } else { + if (!_PyEval_SliceIndex(r->stop, stop)) { + return -1; + } } return 0; @@ -50,7 +52,7 @@ _PySlice_Unpack(PyObject *_r, Py_ssize_t _PySlice_AdjustIndices(Py_ssize_t length, - Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t step) + Py_ssize_t* start, Py_ssize_t* stop, Py_ssize_t step) { /* this is harder to get right than you might think */ @@ -62,8 +64,7 @@ _PySlice_AdjustIndices(Py_ssize_t length, if (*start < 0) { *start = (step < 0) ? -1 : 0; } - } - else if (*start >= length) { + } else if (*start >= length) { *start = (step < 0) ? length - 1 : length; } @@ -72,8 +73,7 @@ _PySlice_AdjustIndices(Py_ssize_t length, if (*stop < 0) { *stop = (step < 0) ? -1 : 0; } - } - else if (*stop >= length) { + } else if (*stop >= length) { *stop = (step < 0) ? length - 1 : length; } @@ -81,8 +81,7 @@ _PySlice_AdjustIndices(Py_ssize_t length, if (*stop < *start) { return (*start - *stop - 1) / (-step) + 1; } - } - else { + } else { if (*start < *stop) { return (*stop - *start - 1) / step + 1; } diff --git a/yql/essentials/udfs/common/python/bindings/py27_backports.h b/yql/essentials/udfs/common/python/bindings/py27_backports.h index 766af6a76fa..f39b0183ba4 100644 --- a/yql/essentials/udfs/common/python/bindings/py27_backports.h +++ b/yql/essentials/udfs/common/python/bindings/py27_backports.h @@ -6,17 +6,17 @@ extern "C" { #endif -// Declare functions which are to be backported -// (see details about need for backports in ya.make) + // Declare functions which are to be backported + // (see details about need for backports in ya.make) -int _PySlice_Unpack(PyObject *slice, - Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step); + int _PySlice_Unpack(PyObject* slice, + Py_ssize_t* start, Py_ssize_t* stop, Py_ssize_t* step); -Py_ssize_t _PySlice_AdjustIndices(Py_ssize_t length, - Py_ssize_t *start, Py_ssize_t *stop, - Py_ssize_t step); + Py_ssize_t _PySlice_AdjustIndices(Py_ssize_t length, + Py_ssize_t* start, Py_ssize_t* stop, + Py_ssize_t step); -// Declare py23 compatible names + // Declare py23 compatible names #define PySlice_Unpack _PySlice_Unpack #define PySlice_AdjustIndices _PySlice_AdjustIndices diff --git a/yql/essentials/udfs/common/python/bindings/py_callable.cpp b/yql/essentials/udfs/common/python/bindings/py_callable.cpp index e9b25606ed0..d3794a5fa9f 100644 --- a/yql/essentials/udfs/common/python/bindings/py_callable.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_callable.cpp @@ -22,8 +22,7 @@ namespace { ////////////////////////////////////////////////////////////////////////////// // TPyCallableObject ////////////////////////////////////////////////////////////////////////////// -struct TPyCallableObject -{ +struct TPyCallableObject { PyObject_HEAD; TPyCastContext::TPtr CastCtx; const NUdf::TType* Type; @@ -34,7 +33,8 @@ struct TPyCallableObject : CastCtx(castCtx) , Type(type) , Inspector(*castCtx->PyCtx->TypeInfoHelper, type) - {} + { + } }; inline TPyCallableObject* CastToCallable(PyObject* o) @@ -53,7 +53,7 @@ PyObject* CallableRepr(PyObject*) return PyRepr("<yql.TCallable>").Release(); } -PyObject* CallableCall(PyObject *self, PyObject *args, PyObject *kwargs) +PyObject* CallableCall(PyObject* self, PyObject* args, PyObject* kwargs) { Y_UNUSED(kwargs); @@ -74,88 +74,88 @@ PyObject* CallableCall(PyObject *self, PyObject *args, PyObject *kwargs) } return ToPyObject(callable->CastCtx, inspector.GetReturnType(), result).Release(); - } PY_CATCH(nullptr) + } + PY_CATCH(nullptr) } -} +} // namespace PyTypeObject PyCallableType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TCallable"), - INIT_MEMBER(tp_basicsize , sizeof(TPyCallableObject)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , CallableDealloc), + INIT_MEMBER(tp_name, "yql.TCallable"), + INIT_MEMBER(tp_basicsize, sizeof(TPyCallableObject)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, CallableDealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , CallableRepr), - INIT_MEMBER(tp_as_number , nullptr), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , nullptr), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , CallableCall), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , 0), - INIT_MEMBER(tp_doc , "yql.TCallable object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , nullptr), - INIT_MEMBER(tp_iternext , nullptr), - INIT_MEMBER(tp_methods , nullptr), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, CallableRepr), + INIT_MEMBER(tp_as_number, nullptr), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, nullptr), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, CallableCall), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, 0), + INIT_MEMBER(tp_doc, "yql.TCallable object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, nullptr), + INIT_MEMBER(tp_iternext, nullptr), + INIT_MEMBER(tp_methods, nullptr), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; ////////////////////////////////////////////////////////////////////////////// // TPyCallable ////////////////////////////////////////////////////////////////////////////// -class TPyCallable: public NUdf::TBoxedValue -{ +class TPyCallable: public NUdf::TBoxedValue { public: TPyCallable( - PyObject* function, - const NUdf::TType* functionType, - const TPyCastContext::TPtr& castCtx) + PyObject* function, + const NUdf::TType* functionType, + const TPyCastContext::TPtr& castCtx) : Function_(function, TPyObjectPtr::ADD_REF) , FunctionType_(functionType) , CastCtx_(castCtx) @@ -179,16 +179,17 @@ public: private: NUdf::TUnboxedValue Run( - const NUdf::IValueBuilder*, - const NUdf::TUnboxedValuePod* args) const final - { + const NUdf::IValueBuilder*, + const NUdf::TUnboxedValuePod* args) const final { TPyGilLocker lock; try { TPyObjectPtr pyArgs = ToPyArgs(CastCtx_, FunctionType_, args, Inspector_); TPyObjectPtr resultObj = - PyObject_CallObject(Function_.Get(), pyArgs.Get()); + PyObject_CallObject(Function_.Get(), pyArgs.Get()); if (!resultObj) { - UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).c_str()); + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to execute:\n" + << GetLastErrorAsString()) + .c_str()); } auto returnType = Inspector_.GetReturnType(); @@ -198,7 +199,9 @@ private: return FromPyObject(CastCtx_, returnType, resultObj.Get()); } catch (const yexception& e) { - UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to cast arguments or result\n" << e.what()).c_str()); + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to cast arguments or result\n" + << e.what()) + .c_str()); } } @@ -209,11 +212,10 @@ private: NUdf::TCallableTypeInspector Inspector_; }; - TPyObjectPtr ToPyCallable( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* type, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) { TPyCallableObject* callable = new TPyCallableObject(castCtx, type); PyObject_INIT(callable, &PyCallableType); @@ -224,9 +226,9 @@ TPyObjectPtr ToPyCallable( } NUdf::TUnboxedValue FromPyCallable( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* type, - PyObject* value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + PyObject* value) { return NUdf::TUnboxedValuePod(new TPyCallable(value, type, castCtx)); } @@ -244,13 +246,14 @@ TMaybe<TPyObjectPtr> GetOptionalAttribute(PyObject* value, const char* attrName) } } - -struct TPySecureParam -{ +struct TPySecureParam { PyObject_HEAD; TPyCastContext::TPtr CastCtx; - TPySecureParam(const TPyCastContext::TPtr& castCtx) : CastCtx(castCtx) {} + TPySecureParam(const TPyCastContext::TPtr& castCtx) + : CastCtx(castCtx) + { + } }; inline TPySecureParam* CastToSecureParam(PyObject* o) @@ -273,7 +276,9 @@ PyObject* SecureParamCall(PyObject* self, PyObject* args, PyObject* kwargs) Y_UNUSED(kwargs); struct PyBufDeleter { - void operator() (Py_buffer* view) { PyBuffer_Release(view); } + void operator()(Py_buffer* view) { + PyBuffer_Release(view); + } }; Py_buffer input; if (!PyArg_ParseTuple(args, "s*", &input)) { @@ -287,73 +292,74 @@ PyObject* SecureParamCall(PyObject* self, PyObject* args, PyObject* kwargs) throw yexception() << "Cannot get secure parameter for key: " << key; } return PyRepr(TStringBuf(key.Data(), key.Size())).Release(); - } PY_CATCH(nullptr) + } + PY_CATCH(nullptr) } static PyTypeObject PySecureParamType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TSecureParam"), - INIT_MEMBER(tp_basicsize , sizeof(TPySecureParam)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , SecureParamDealloc), + INIT_MEMBER(tp_name, "yql.TSecureParam"), + INIT_MEMBER(tp_basicsize, sizeof(TPySecureParam)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, SecureParamDealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , SecureParamRepr), - INIT_MEMBER(tp_as_number , nullptr), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , nullptr), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , SecureParamCall), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , 0), - INIT_MEMBER(tp_doc , "yql.TSecureParam object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , nullptr), - INIT_MEMBER(tp_iternext , nullptr), - INIT_MEMBER(tp_methods , nullptr), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, SecureParamRepr), + INIT_MEMBER(tp_as_number, nullptr), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, nullptr), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, SecureParamCall), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, 0), + INIT_MEMBER(tp_doc, "yql.TSecureParam object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, nullptr), + INIT_MEMBER(tp_iternext, nullptr), + INIT_MEMBER(tp_methods, nullptr), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; @@ -364,55 +370,60 @@ TPyObjectPtr ToPySecureParam(const TPyCastContext::TPtr& castCtx) return reinterpret_cast<PyObject*>(ret); } - void SetupCallableSettings(const TPyCastContext::TPtr& castCtx, PyObject* value) { - if (const auto lazyInput = GetOptionalAttribute(value, "_yql_lazy_input")) try { - castCtx->LazyInputObjects = PyCast<bool>(lazyInput->Get()); - } catch (const yexception& e) { - throw yexception() << "Cannot parse attribute '_yql_lazy_input', error: " << e.what(); + if (const auto lazyInput = GetOptionalAttribute(value, "_yql_lazy_input")) { + try { + castCtx->LazyInputObjects = PyCast<bool>(lazyInput->Get()); + } catch (const yexception& e) { + throw yexception() << "Cannot parse attribute '_yql_lazy_input', error: " << e.what(); + } } - if (const auto convertYson = GetOptionalAttribute(value, "_yql_convert_yson")) try { - Py_ssize_t itemsCount = PyTuple_GET_SIZE(convertYson->Get()); - if (itemsCount != 2) { - throw yexception() << "Expected tuple of 2 callables"; - } + if (const auto convertYson = GetOptionalAttribute(value, "_yql_convert_yson")) { + try { + Py_ssize_t itemsCount = PyTuple_GET_SIZE(convertYson->Get()); + if (itemsCount != 2) { + throw yexception() << "Expected tuple of 2 callables"; + } - castCtx->YsonConverterIn.ResetAddRef(PyTuple_GET_ITEM(convertYson->Get(), 0)); - castCtx->YsonConverterOut.ResetAddRef(PyTuple_GET_ITEM(convertYson->Get(), 1)); - if (!PyCallable_Check(castCtx->YsonConverterIn.Get()) || !PyCallable_Check(castCtx->YsonConverterOut.Get())) { - throw yexception() << "Expected tuple of 2 callables"; + castCtx->YsonConverterIn.ResetAddRef(PyTuple_GET_ITEM(convertYson->Get(), 0)); + castCtx->YsonConverterOut.ResetAddRef(PyTuple_GET_ITEM(convertYson->Get(), 1)); + if (!PyCallable_Check(castCtx->YsonConverterIn.Get()) || !PyCallable_Check(castCtx->YsonConverterOut.Get())) { + throw yexception() << "Expected tuple of 2 callables"; + } + } catch (const yexception& e) { + throw yexception() << "Cannot parse attribute '_yql_convert_yson', error: " << e.what(); } - } catch (const yexception& e) { - throw yexception() << "Cannot parse attribute '_yql_convert_yson', error: " << e.what(); } - if (const auto bytesDecodeMode = GetOptionalAttribute(value, "_yql_bytes_decode_mode")) try { - PyObject* bytesValue = nullptr; - if (PyBytes_Check(bytesDecodeMode->Get())) { - bytesValue = PyObject_Bytes(bytesDecodeMode->Get()); - } else if (PyUnicode_Check(bytesDecodeMode->Get())) { - bytesValue = PyUnicode_AsUTF8String(bytesDecodeMode->Get()); - } else { - throw yexception() << "Expected bytes or unicode"; - } - if (!bytesValue) { - PyErr_Clear(); - throw yexception() << "Failed to convert to bytes"; - } + if (const auto bytesDecodeMode = GetOptionalAttribute(value, "_yql_bytes_decode_mode")) { + try { + PyObject* bytesValue = nullptr; + if (PyBytes_Check(bytesDecodeMode->Get())) { + bytesValue = PyObject_Bytes(bytesDecodeMode->Get()); + } else if (PyUnicode_Check(bytesDecodeMode->Get())) { + bytesValue = PyUnicode_AsUTF8String(bytesDecodeMode->Get()); + } else { + throw yexception() << "Expected bytes or unicode"; + } + if (!bytesValue) { + PyErr_Clear(); + throw yexception() << "Failed to convert to bytes"; + } - TStringBuf view(PyBytes_AS_STRING(bytesValue)); - if (view == "never") { - castCtx->BytesDecodeMode = EBytesDecodeMode::Never; - } else if (view == "strict") { - castCtx->BytesDecodeMode = EBytesDecodeMode::Strict; - } else { + TStringBuf view(PyBytes_AS_STRING(bytesValue)); + if (view == "never") { + castCtx->BytesDecodeMode = EBytesDecodeMode::Never; + } else if (view == "strict") { + castCtx->BytesDecodeMode = EBytesDecodeMode::Strict; + } else { + Py_DECREF(bytesValue); + throw yexception() << "Expected values 'never' or 'strict'"; + } Py_DECREF(bytesValue); - throw yexception() << "Expected values 'never' or 'strict'"; + } catch (const yexception& e) { + throw yexception() << "Cannot parse attribute '_yql_bytes_decode_mode', error: " << e.what(); } - Py_DECREF(bytesValue); - } catch (const yexception& e) { - throw yexception() << "Cannot parse attribute '_yql_bytes_decode_mode', error: " << e.what(); } if (PyObject_SetAttrString(value, "_yql_secure_param", ToPySecureParam(castCtx).Get()) != 0) { diff --git a/yql/essentials/udfs/common/python/bindings/py_callable.h b/yql/essentials/udfs/common/python/bindings/py_callable.h index 4ce79e1d7f4..2c2f8bbed2a 100644 --- a/yql/essentials/udfs/common/python/bindings/py_callable.h +++ b/yql/essentials/udfs/common/python/bindings/py_callable.h @@ -8,15 +8,15 @@ namespace NPython { extern PyTypeObject PyCallableType; TPyObjectPtr ToPyCallable( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyCallable( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - PyObject* value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + PyObject* value); void SetupCallableSettings(const TPyCastContext::TPtr& castCtx, PyObject* value); -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp index 36cc13a1da4..c806d401b39 100644 --- a/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_callable_ut.cpp @@ -2,86 +2,85 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(TPyCallableTest) { +struct TTestCallable: public NUdf::TBoxedValue { + NUdf::TUnboxedValue Run( + const NUdf::IValueBuilder* valueBuilder, + const NUdf::TUnboxedValuePod* args) const override { + Y_UNUSED(valueBuilder); + return NUdf::TUnboxedValuePod(args[0].Get<ui32>() + 42); + } +}; + +Y_UNIT_TEST(FromPyFunction) { + TPythonTestEngine engine; + const NUdf::IValueBuilder* vb = &engine.GetValueBuilder(); + + engine.ToMiniKQL<char* (*)(char*, ui32)>( + "def Test():\n" + " def test(str, count):\n" + " return str * count\n" + " return test", + [vb](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + NUdf::TUnboxedValue args[2]; + args[0] = vb->NewString("j"); + args[1] = NUdf::TUnboxedValuePod((ui32)5); + auto result = value.Run(vb, args); + + UNIT_ASSERT(result); + UNIT_ASSERT(5 == result.AsStringRef().Size()); + UNIT_ASSERT_STRINGS_EQUAL(result.AsStringRef(), "jjjjj"); + }); +} + +Y_UNIT_TEST(ToPython) { + TPythonTestEngine engine; + engine.ToPython<i32 (*)(i32)>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new TTestCallable); + }, + "def Test(value):\n" + " assert type(value).__name__ == 'TCallable'\n" + " assert value.__call__ != None\n" + " assert value(-2) == 40\n" + " assert value(-1) == 41\n" + " assert value(0) == 42\n" + " assert value(1) == 43\n" + " assert value(2) == 44\n"); +} + +Y_UNIT_TEST(ToPythonAndBack) { struct TTestCallable: public NUdf::TBoxedValue { NUdf::TUnboxedValue Run( - const NUdf::IValueBuilder* valueBuilder, - const NUdf::TUnboxedValuePod* args) const override - { + const NUdf::IValueBuilder* valueBuilder, + const NUdf::TUnboxedValuePod* args) const override { Y_UNUSED(valueBuilder); return NUdf::TUnboxedValuePod(args[0].Get<ui32>() + 42); } }; - Y_UNIT_TEST(FromPyFunction) { - TPythonTestEngine engine; - const NUdf::IValueBuilder* vb = &engine.GetValueBuilder(); + TPythonTestEngine engine; + engine.ToPythonAndBack<i32 (*)(i32)>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new TTestCallable); + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + NUdf::TUnboxedValue arg = NUdf::TUnboxedValuePod((ui32)5); + const auto result = value.Run(nullptr, &arg); - engine.ToMiniKQL<char* (*)(char*, ui32)>( - "def Test():\n" - " def test(str, count):\n" - " return str * count\n" - " return test", - [vb](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - NUdf::TUnboxedValue args[2]; - args[0] = vb->NewString("j"); - args[1] = NUdf::TUnboxedValuePod((ui32) 5); - auto result = value.Run(vb, args); - - UNIT_ASSERT(result); - UNIT_ASSERT(5 == result.AsStringRef().Size()); - UNIT_ASSERT_STRINGS_EQUAL(result.AsStringRef(), "jjjjj"); - }); - } - - Y_UNIT_TEST(ToPython) { - TPythonTestEngine engine; - engine.ToPython<i32 (*)(i32)>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new TTestCallable); - }, - "def Test(value):\n" - " assert type(value).__name__ == 'TCallable'\n" - " assert value.__call__ != None\n" - " assert value(-2) == 40\n" - " assert value(-1) == 41\n" - " assert value(0) == 42\n" - " assert value(1) == 43\n" - " assert value(2) == 44\n"); - } - - Y_UNIT_TEST(ToPythonAndBack) { - struct TTestCallable: public NUdf::TBoxedValue { - NUdf::TUnboxedValue Run( - const NUdf::IValueBuilder* valueBuilder, - const NUdf::TUnboxedValuePod* args) const override - { - Y_UNUSED(valueBuilder); - return NUdf::TUnboxedValuePod(args[0].Get<ui32>() + 42); - } - }; - - TPythonTestEngine engine; - engine.ToPythonAndBack<i32 (*)(i32)>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new TTestCallable); - }, - "def Test(value): return value", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - NUdf::TUnboxedValue arg = NUdf::TUnboxedValuePod((ui32) 5); - const auto result = value.Run(nullptr, &arg); - - UNIT_ASSERT(result); - UNIT_ASSERT_VALUES_EQUAL(47, result.Get<ui32>()); - }); - } + UNIT_ASSERT(result); + UNIT_ASSERT_VALUES_EQUAL(47, result.Get<ui32>()); + }); } +} // Y_UNIT_TEST_SUITE(TPyCallableTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_cast.cpp b/yql/essentials/udfs/common/python/bindings/py_cast.cpp index 3048f803a72..42237428bb3 100644 --- a/yql/essentials/udfs/common/python/bindings/py_cast.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_cast.cpp @@ -27,253 +27,252 @@ #include <util/string/builder.h> #ifdef HAVE_LONG_LONG -# define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongLongMask // NOLINT(readability-identifier-naming) -# define YQL_PyLong_Asi64 PyLong_AsLongLong -# define YQL_PyLong_Asui64 PyLong_AsUnsignedLongLong + #define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongLongMask // NOLINT(readability-identifier-naming) + #define YQL_PyLong_Asi64 PyLong_AsLongLong + #define YQL_PyLong_Asui64 PyLong_AsUnsignedLongLong #else -# define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongMask // NOLINT(readability-identifier-naming) -# define YQL_PyLong_Asi64 PyLong_AsLong -# define YQL_PyLong_Asui64 PyLong_AsUnsignedLong + #define YQL_PyLong_AsUnsignedMask PyLong_AsUnsignedLongMask // NOLINT(readability-identifier-naming) + #define YQL_PyLong_Asi64 PyLong_AsLong + #define YQL_PyLong_Asui64 PyLong_AsUnsignedLong #endif -#define TO_PYTHON(Format, Type) \ - template <> \ +#define TO_PYTHON(Format, Type) \ + template <> \ ::NPython::TPyObjectPtr PyCast<Type>(Type value) { \ - return Py_BuildValue(Format, value); \ + return Py_BuildValue(Format, value); \ } -#define TO_PYTHON_BYTES(Type) \ - template <> \ - ::NPython::TPyObjectPtr PyCast<Type>(const Type& val) { \ - TStringBuf value = val; \ - if (value.data() == nullptr) \ - Py_RETURN_NONE; \ +#define TO_PYTHON_BYTES(Type) \ + template <> \ + ::NPython::TPyObjectPtr PyCast<Type>(const Type& val) { \ + TStringBuf value = val; \ + if (value.data() == nullptr) \ + Py_RETURN_NONE; \ const Py_ssize_t size = static_cast<Py_ssize_t>(value.size()); \ - return PyBytes_FromStringAndSize(value.data(), size); \ + return PyBytes_FromStringAndSize(value.data(), size); \ } -#define TO_PYTHON_UNICODE(Type) \ - template <> \ +#define TO_PYTHON_UNICODE(Type) \ + template <> \ ::NPython::TPyObjectPtr ToPyUnicode<Type>(const Type& val) { \ - TStringBuf value = val; \ - if (value.data() == nullptr) \ - Py_RETURN_NONE; \ + TStringBuf value = val; \ + if (value.data() == nullptr) \ + Py_RETURN_NONE; \ Py_ssize_t size = static_cast<Py_ssize_t>(value.size()); \ - return PyUnicode_FromStringAndSize(value.data(), size); \ + return PyUnicode_FromStringAndSize(value.data(), size); \ } -#define PY_ENSURE_TYPE(Type, Value, Message) \ - do { \ - if (!Py##Type##_Check(Value)) { \ +#define PY_ENSURE_TYPE(Type, Value, Message) \ + do { \ + if (!Py##Type##_Check(Value)) { \ throw yexception() << Message << " " #Type "; Object repr: " \ - << PyObjectRepr(Value); \ - } \ + << PyObjectRepr(Value); \ + } \ } while (0) -#define FROM_PYTHON_FLOAT(Type) \ - template <> \ - Type PyCast<Type>(PyObject* value) { \ - double result = PyFloat_AsDouble(value); \ +#define FROM_PYTHON_FLOAT(Type) \ + template <> \ + Type PyCast<Type>(PyObject * value) { \ + double result = PyFloat_AsDouble(value); \ if (result == -1.0 && PyErr_Occurred()) { \ - PyErr_Clear(); \ - ThrowCastException(value, "Float"); \ - } \ - return static_cast<Type>(result); \ + PyErr_Clear(); \ + ThrowCastException(value, "Float"); \ + } \ + return static_cast<Type>(result); \ } -#define FROM_PYTHON_LONG(Type, BigType) \ - template <> \ - Type PyCast<Type>(PyObject* value) { \ - if (PyLong_Check(value)) { \ - auto result = YQL_PyLong_As##BigType(value); \ - if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ - PyErr_Clear(); \ - ThrowCastException(value, "Long"); \ - } \ - if (result < Min<Type>() || result > Max<Type>()) { \ +#define FROM_PYTHON_LONG(Type, BigType) \ + template <> \ + Type PyCast<Type>(PyObject * value) { \ + if (PyLong_Check(value)) { \ + auto result = YQL_PyLong_As##BigType(value); \ + if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ + PyErr_Clear(); \ + ThrowCastException(value, "Long"); \ + } \ + if (result < Min<Type>() || result > Max<Type>()) { \ throw yexception() << "Python object " << PyObjectRepr(value) \ - << " is out of range for " << #Type; \ - } \ - return static_cast<Type>(result); \ - } \ - ThrowCastTypeException(value, "Long"); \ + << " is out of range for " << #Type; \ + } \ + return static_cast<Type>(result); \ + } \ + ThrowCastTypeException(value, "Long"); \ } -#define FROM_PYTHON_INT_OR_LONG(Type, BigType) \ - template <> \ - Type PyCast<Type>(PyObject* value) { \ - if (PyInt_Check(value)) { \ - long result = PyInt_AsLong(value); \ - if (result == -1L && PyErr_Occurred()) { \ - PyErr_Clear(); \ - ThrowCastException(value, "Long"); \ - } \ - if ( \ - static_cast<i64>(Min<long>()) < static_cast<i64>(Min<Type>()) && result < static_cast<long>(Min<Type>()) || \ - static_cast<ui64>(Max<long>()) > static_cast<ui64>(Max<Type>()) && result > static_cast<long>(Max<Type>()) \ - ) { \ - throw yexception() << "Python object " << PyObjectRepr(value) \ - << " is out of range for " << #Type; \ - } \ - return static_cast<Type>(result); \ - } else if (PyLong_Check(value)) { \ - auto result = YQL_PyLong_As##BigType(value); \ - if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ - PyErr_Clear(); \ - ThrowCastException(value, "Long"); \ - } \ - if (result < Min<Type>() || result > Max<Type>()) { \ - throw yexception() << "Python object " << PyObjectRepr(value) \ - << " is out of range for " << #Type; \ - } \ - return static_cast<Type>(result); \ - } \ - ThrowCastTypeException(value, "Long"); \ +#define FROM_PYTHON_INT_OR_LONG(Type, BigType) \ + template <> \ + Type PyCast<Type>(PyObject * value) { \ + if (PyInt_Check(value)) { \ + long result = PyInt_AsLong(value); \ + if (result == -1L && PyErr_Occurred()) { \ + PyErr_Clear(); \ + ThrowCastException(value, "Long"); \ + } \ + if ( \ + static_cast<i64>(Min<long>()) < static_cast<i64>(Min<Type>()) && result < static_cast<long>(Min<Type>()) || \ + static_cast<ui64>(Max<long>()) > static_cast<ui64>(Max<Type>()) && result > static_cast<long>(Max<Type>())) { \ + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is out of range for " << #Type; \ + } \ + return static_cast<Type>(result); \ + } else if (PyLong_Check(value)) { \ + auto result = YQL_PyLong_As##BigType(value); \ + if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ + PyErr_Clear(); \ + ThrowCastException(value, "Long"); \ + } \ + if (result < Min<Type>() || result > Max<Type>()) { \ + throw yexception() << "Python object " << PyObjectRepr(value) \ + << " is out of range for " << #Type; \ + } \ + return static_cast<Type>(result); \ + } \ + ThrowCastTypeException(value, "Long"); \ } -#define FROM_PYTHON_BYTES_OR_UTF(Type) \ - template <> \ - Type PyCast<Type>(PyObject* value) { \ - if (PyUnicode_Check(value)) { \ - Py_ssize_t size = 0U; \ - const auto str = PyUnicode_AsUTF8AndSize(value, &size); \ - if (!str || size < 0) { \ - ThrowCastTypeException(value, "String"); \ - } \ - return Type(str, size_t(size)); \ - } else if (PyBytes_Check(value)) { \ - Py_ssize_t size = 0U; \ - char *str = nullptr; \ +#define FROM_PYTHON_BYTES_OR_UTF(Type) \ + template <> \ + Type PyCast<Type>(PyObject * value) { \ + if (PyUnicode_Check(value)) { \ + Py_ssize_t size = 0U; \ + const auto str = PyUnicode_AsUTF8AndSize(value, &size); \ + if (!str || size < 0) { \ + ThrowCastTypeException(value, "String"); \ + } \ + return Type(str, size_t(size)); \ + } else if (PyBytes_Check(value)) { \ + Py_ssize_t size = 0U; \ + char* str = nullptr; \ const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \ - if (rc == -1 || size < 0) { \ - ThrowCastTypeException(value, "String"); \ - } \ - return Type(str, size_t(size)); \ - } \ - ThrowCastTypeException(value, "String"); \ + if (rc == -1 || size < 0) { \ + ThrowCastTypeException(value, "String"); \ + } \ + return Type(str, size_t(size)); \ + } \ + ThrowCastTypeException(value, "String"); \ } -#define FROM_PYTHON_BYTES(Type) \ - template <> \ - Type PyCast<Type>(PyObject* value) { \ - PY_ENSURE_TYPE(Bytes, value, "Expected"); \ - char* str = nullptr; \ - Py_ssize_t size = 0; \ +#define FROM_PYTHON_BYTES(Type) \ + template <> \ + Type PyCast<Type>(PyObject * value) { \ + PY_ENSURE_TYPE(Bytes, value, "Expected"); \ + char* str = nullptr; \ + Py_ssize_t size = 0; \ const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \ - if (rc == -1 || size < 0) { \ - ThrowCastTypeException(value, "String"); \ - } \ - return Type(str, size_t(size)); \ + if (rc == -1 || size < 0) { \ + ThrowCastTypeException(value, "String"); \ + } \ + return Type(str, size_t(size)); \ } -#define TRY_FROM_PYTHON_FLOAT(Type) \ - template <> \ - bool TryPyCast<Type>(PyObject* value, Type& result) { \ - double v = PyFloat_AsDouble(value); \ - if (v == -1.0 && PyErr_Occurred()) { \ - PyErr_Clear(); \ - return false; \ - } \ - result = static_cast<Type>(v); \ - return true; \ +#define TRY_FROM_PYTHON_FLOAT(Type) \ + template <> \ + bool TryPyCast<Type>(PyObject * value, Type & result) { \ + double v = PyFloat_AsDouble(value); \ + if (v == -1.0 && PyErr_Occurred()) { \ + PyErr_Clear(); \ + return false; \ + } \ + result = static_cast<Type>(v); \ + return true; \ } -#define TRY_FROM_PYTHON_LONG(Type, BigType) \ - template <> \ - bool TryPyCast<Type>(PyObject* value, Type& res) { \ - if (PyLong_Check(value)) { \ - auto result = YQL_PyLong_As##BigType(value); \ +#define TRY_FROM_PYTHON_LONG(Type, BigType) \ + template <> \ + bool TryPyCast<Type>(PyObject * value, Type & res) { \ + if (PyLong_Check(value)) { \ + auto result = YQL_PyLong_As##BigType(value); \ if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ - PyErr_Clear(); \ - return false; \ - } \ - if (result < Min<Type>() || result > Max<Type>()) { \ - return false; \ - } \ - res = static_cast<Type>(result); \ - return true; \ - } \ - return false; \ + PyErr_Clear(); \ + return false; \ + } \ + if (result < Min<Type>() || result > Max<Type>()) { \ + return false; \ + } \ + res = static_cast<Type>(result); \ + return true; \ + } \ + return false; \ } -#define TRY_FROM_PYTHON_INT_OR_LONG(Type, BigType) \ - template <> \ - bool TryPyCast<Type>(PyObject* value, Type& res) { \ - if (PyInt_Check(value)) { \ - long result = PyInt_AsLong(value); \ - if (result == -1L && PyErr_Occurred()) { \ - PyErr_Clear(); \ - return false; \ - } \ - res = static_cast<Type>(result); \ +#define TRY_FROM_PYTHON_INT_OR_LONG(Type, BigType) \ + template <> \ + bool TryPyCast<Type>(PyObject * value, Type & res) { \ + if (PyInt_Check(value)) { \ + long result = PyInt_AsLong(value); \ + if (result == -1L && PyErr_Occurred()) { \ + PyErr_Clear(); \ + return false; \ + } \ + res = static_cast<Type>(result); \ if (result < static_cast<long>(Min<Type>()) || (static_cast<ui64>(Max<long>()) > static_cast<ui64>(Max<Type>()) && result > static_cast<long>(Max<Type>()))) { \ - return false; \ - } \ - return true; \ - } else if (PyLong_Check(value)) { \ - auto result = YQL_PyLong_As##BigType(value); \ - if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ - PyErr_Clear(); \ - return false; \ - } \ - if (result < Min<Type>() || result > Max<Type>()) { \ - return false; \ - } \ - res = static_cast<Type>(result); \ - return true; \ - } \ - return false; \ + return false; \ + } \ + return true; \ + } else if (PyLong_Check(value)) { \ + auto result = YQL_PyLong_As##BigType(value); \ + if (result == static_cast<Type>(-1L) && PyErr_Occurred()) { \ + PyErr_Clear(); \ + return false; \ + } \ + if (result < Min<Type>() || result > Max<Type>()) { \ + return false; \ + } \ + res = static_cast<Type>(result); \ + return true; \ + } \ + return false; \ } -#define TRY_FROM_PYTHON_BYTES_OR_UTF(Type) \ - template <> \ - bool TryPyCast(PyObject* value, Type& result) { \ - if (PyUnicode_Check(value)) { \ - Py_ssize_t size = 0U; \ - const auto str = PyUnicode_AsUTF8AndSize(value, &size); \ - if (!str || size < 0) { \ - return false; \ - } \ - result = Type(str, size_t(size)); \ - return true; \ - } else if (PyBytes_Check(value)) { \ - Py_ssize_t size = 0U; \ - char *str = nullptr; \ +#define TRY_FROM_PYTHON_BYTES_OR_UTF(Type) \ + template <> \ + bool TryPyCast(PyObject* value, Type& result) { \ + if (PyUnicode_Check(value)) { \ + Py_ssize_t size = 0U; \ + const auto str = PyUnicode_AsUTF8AndSize(value, &size); \ + if (!str || size < 0) { \ + return false; \ + } \ + result = Type(str, size_t(size)); \ + return true; \ + } else if (PyBytes_Check(value)) { \ + Py_ssize_t size = 0U; \ + char* str = nullptr; \ const auto rc = PyBytes_AsStringAndSize(value, &str, &size); \ - if (rc == -1 || size < 0) { \ - ThrowCastTypeException(value, "String"); \ - } \ - result = Type(str, size_t(size)); \ - return true; \ - } \ - return false; \ + if (rc == -1 || size < 0) { \ + ThrowCastTypeException(value, "String"); \ + } \ + result = Type(str, size_t(size)); \ + return true; \ + } \ + return false; \ } -#define TRY_FROM_PYTHON_STR_OR_UTF(Type) \ - template <> \ - bool TryPyCast(PyObject* value, Type& result) { \ - if (PyUnicode_Check(value)) { \ - const TPyObjectPtr utf8(AsUtf8StringOrThrow(value)); \ - char* str = nullptr; \ - Py_ssize_t size = 0; \ +#define TRY_FROM_PYTHON_STR_OR_UTF(Type) \ + template <> \ + bool TryPyCast(PyObject* value, Type& result) { \ + if (PyUnicode_Check(value)) { \ + const TPyObjectPtr utf8(AsUtf8StringOrThrow(value)); \ + char* str = nullptr; \ + Py_ssize_t size = 0; \ int rc = PyBytes_AsStringAndSize(utf8.Get(), &str, &size); \ - if (rc == -1 || size < 0) { \ - return false; \ - } \ - result = Type(str, size_t(size)); \ - return true; \ - } else if (PyBytes_Check(value)) { \ - char* str = nullptr; \ - Py_ssize_t size = 0; \ - int rc = PyBytes_AsStringAndSize(value, &str, &size); \ - if (rc == -1 || size < 0) { \ - return false; \ - } \ - result = Type(str, size_t(size)); \ - return true; \ - } else { \ - return false; \ - } \ + if (rc == -1 || size < 0) { \ + return false; \ + } \ + result = Type(str, size_t(size)); \ + return true; \ + } else if (PyBytes_Check(value)) { \ + char* str = nullptr; \ + Py_ssize_t size = 0; \ + int rc = PyBytes_AsStringAndSize(value, &str, &size); \ + if (rc == -1 || size < 0) { \ + return false; \ + } \ + result = Type(str, size_t(size)); \ + return true; \ + } else { \ + return false; \ + } \ } namespace NPython { @@ -289,7 +288,8 @@ NPython::TPyObjectPtr AsUtf8StringOrThrow(PyObject* obj) { Y_DEFER { PyErr_Clear(); }; - throw yexception() << "Failed to convert the string to UTF-8 format. Original message is:\n" << GetLastErrorAsString() << "\n"; + throw yexception() << "Failed to convert the string to UTF-8 format. Original message is:\n" + << GetLastErrorAsString() << "\n"; } return NPython::TPyObjectPtr(utf8String); } @@ -306,7 +306,6 @@ inline void ThrowCastException(PyObject* value, TStringBuf toType) { << GetLastErrorAsString(); } - template <> bool TryPyCast<bool>(PyObject* value, bool& result) { @@ -416,19 +415,19 @@ TO_PYTHON_UNICODE(NUdf::TStringRef) template <typename T> NUdf::TUnboxedValuePod FromPyTz(PyObject* value, T limit, TStringBuf typeName, const TPyCastContext::TPtr& ctx) { PY_ENSURE(PyTuple_Check(value), - "Expected to get Tuple, but got " << Py_TYPE(value)->tp_name); + "Expected to get Tuple, but got " << Py_TYPE(value)->tp_name); Py_ssize_t tupleSize = PyTuple_GET_SIZE(value); PY_ENSURE(tupleSize == 2, - "Expected to get Tuple with 2 elements, but got " - << tupleSize << " elements"); + "Expected to get Tuple with 2 elements, but got " + << tupleSize << " elements"); PyObject* el0 = PyTuple_GET_ITEM(value, 0); PyObject* el1 = PyTuple_GET_ITEM(value, 1); auto num = PyCast<T>(el0); if (num >= limit) { - throw yexception() << "Python object " << PyObjectRepr(el0) \ - << " is out of range for " << typeName; + throw yexception() << "Python object " << PyObjectRepr(el0) + << " is out of range for " << typeName; } auto name = PyCast<NUdf::TStringRef>(el1); @@ -448,217 +447,248 @@ TO_PYTHON("d", double) namespace { TPyObjectPtr ToPyData(const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, const NUdf::TUnboxedValuePod& value) + const NUdf::TType* type, const NUdf::TUnboxedValuePod& value) { const NUdf::TDataAndDecimalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); const auto typeId = inspector.GetTypeId(); switch (typeId) { - case NUdf::TDataType<i8>::Id: return PyCast<i8>(value.Get<i8>()); - case NUdf::TDataType<ui8>::Id: return PyCast<ui8>(value.Get<ui8>()); - case NUdf::TDataType<i16>::Id: return PyCast<i16>(value.Get<i16>()); - case NUdf::TDataType<ui16>::Id: return PyCast<ui16>(value.Get<ui16>()); - case NUdf::TDataType<i32>::Id: return PyCast<i32>(value.Get<i32>()); - case NUdf::TDataType<ui32>::Id: return PyCast<ui32>(value.Get<ui32>()); - case NUdf::TDataType<i64>::Id: return PyCast<i64>(value.Get<i64>()); - case NUdf::TDataType<ui64>::Id: return PyCast<ui64>(value.Get<ui64>()); - case NUdf::TDataType<bool>::Id: return PyCast<bool>(value.Get<bool>()); - case NUdf::TDataType<float>::Id: return PyCast<float>(value.Get<float>()); - case NUdf::TDataType<double>::Id: return PyCast<double>(value.Get<double>()); - case NUdf::TDataType<NUdf::TDecimal>::Id: return ToPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale()); - case NUdf::TDataType<const char*>::Id: { - if (ctx->BytesDecodeMode == EBytesDecodeMode::Never) { - return PyCast<NUdf::TStringRef>(value.AsStringRef()); - } else { - auto pyObj = ToPyUnicode<NUdf::TStringRef>(value.AsStringRef()); - if (!pyObj) { - UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << - "Failed to convert to unicode with _yql_bytes_decode_mode='strict':\n" << - GetLastErrorAsString()).c_str() - ); + case NUdf::TDataType<i8>::Id: + return PyCast<i8>(value.Get<i8>()); + case NUdf::TDataType<ui8>::Id: + return PyCast<ui8>(value.Get<ui8>()); + case NUdf::TDataType<i16>::Id: + return PyCast<i16>(value.Get<i16>()); + case NUdf::TDataType<ui16>::Id: + return PyCast<ui16>(value.Get<ui16>()); + case NUdf::TDataType<i32>::Id: + return PyCast<i32>(value.Get<i32>()); + case NUdf::TDataType<ui32>::Id: + return PyCast<ui32>(value.Get<ui32>()); + case NUdf::TDataType<i64>::Id: + return PyCast<i64>(value.Get<i64>()); + case NUdf::TDataType<ui64>::Id: + return PyCast<ui64>(value.Get<ui64>()); + case NUdf::TDataType<bool>::Id: + return PyCast<bool>(value.Get<bool>()); + case NUdf::TDataType<float>::Id: + return PyCast<float>(value.Get<float>()); + case NUdf::TDataType<double>::Id: + return PyCast<double>(value.Get<double>()); + case NUdf::TDataType<NUdf::TDecimal>::Id: + return ToPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale()); + case NUdf::TDataType<const char*>::Id: { + if (ctx->BytesDecodeMode == EBytesDecodeMode::Never) { + return PyCast<NUdf::TStringRef>(value.AsStringRef()); + } else { + auto pyObj = ToPyUnicode<NUdf::TStringRef>(value.AsStringRef()); + if (!pyObj) { + UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to convert to unicode with _yql_bytes_decode_mode='strict':\n" + << GetLastErrorAsString()) + .c_str()); + } + return pyObj; } - return pyObj; } - } - case NUdf::TDataType<NUdf::TYson>::Id: { - auto pyObj = PyCast<NUdf::TStringRef>(value.AsStringRef()); - if (ctx->YsonConverterIn) { - TPyObjectPtr pyArgs(PyTuple_New(1)); - PyTuple_SET_ITEM(pyArgs.Get(), 0, pyObj.Release()); - pyObj = PyObject_CallObject(ctx->YsonConverterIn.Get(), pyArgs.Get()); - if (!pyObj) { - UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).c_str()); + case NUdf::TDataType<NUdf::TYson>::Id: { + auto pyObj = PyCast<NUdf::TStringRef>(value.AsStringRef()); + if (ctx->YsonConverterIn) { + TPyObjectPtr pyArgs(PyTuple_New(1)); + PyTuple_SET_ITEM(pyArgs.Get(), 0, pyObj.Release()); + pyObj = PyObject_CallObject(ctx->YsonConverterIn.Get(), pyArgs.Get()); + if (!pyObj) { + UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" + << GetLastErrorAsString()) + .c_str()); + } } - } - return pyObj; - } - case NUdf::TDataType<NUdf::TUuid>::Id: - return PyCast<NUdf::TStringRef>(value.AsStringRef()); - case NUdf::TDataType<NUdf::TJson>::Id: - case NUdf::TDataType<NUdf::TUtf8>::Id: - return ToPyUnicode<NUdf::TStringRef>(value.AsStringRef()); - case NUdf::TDataType<NUdf::TDate>::Id: return PyCast<ui16>(value.Get<ui16>()); - case NUdf::TDataType<NUdf::TDatetime>::Id: return PyCast<ui32>(value.Get<ui32>()); - case NUdf::TDataType<NUdf::TTimestamp>::Id: return PyCast<ui64>(value.Get<ui64>()); - case NUdf::TDataType<NUdf::TInterval>::Id: return PyCast<i64>(value.Get<i64>()); - case NUdf::TDataType<NUdf::TTzDate>::Id: { - TPyObjectPtr pyValue = PyCast<ui16>(value.Get<ui16>()); - auto tzId = value.GetTimezoneId(); - auto tzName = ctx->GetTimezoneName(tzId); - return PyTuple_Pack(2, pyValue.Get(), tzName.Get()); - } - case NUdf::TDataType<NUdf::TTzDatetime>::Id: { - TPyObjectPtr pyValue = PyCast<ui32>(value.Get<ui32>()); - auto tzId = value.GetTimezoneId(); - auto tzName = ctx->GetTimezoneName(tzId); - return PyTuple_Pack(2, pyValue.Get(), tzName.Get()); - } - case NUdf::TDataType<NUdf::TTzTimestamp>::Id: { - TPyObjectPtr pyValue = PyCast<ui64>(value.Get<ui64>()); - auto tzId = value.GetTimezoneId(); - auto tzName = ctx->GetTimezoneName(tzId); - return PyTuple_Pack(2, pyValue.Get(), tzName.Get()); - } + return pyObj; + } + case NUdf::TDataType<NUdf::TUuid>::Id: + return PyCast<NUdf::TStringRef>(value.AsStringRef()); + case NUdf::TDataType<NUdf::TJson>::Id: + case NUdf::TDataType<NUdf::TUtf8>::Id: + return ToPyUnicode<NUdf::TStringRef>(value.AsStringRef()); + case NUdf::TDataType<NUdf::TDate>::Id: + return PyCast<ui16>(value.Get<ui16>()); + case NUdf::TDataType<NUdf::TDatetime>::Id: + return PyCast<ui32>(value.Get<ui32>()); + case NUdf::TDataType<NUdf::TTimestamp>::Id: + return PyCast<ui64>(value.Get<ui64>()); + case NUdf::TDataType<NUdf::TInterval>::Id: + return PyCast<i64>(value.Get<i64>()); + case NUdf::TDataType<NUdf::TTzDate>::Id: { + TPyObjectPtr pyValue = PyCast<ui16>(value.Get<ui16>()); + auto tzId = value.GetTimezoneId(); + auto tzName = ctx->GetTimezoneName(tzId); + return PyTuple_Pack(2, pyValue.Get(), tzName.Get()); + } + case NUdf::TDataType<NUdf::TTzDatetime>::Id: { + TPyObjectPtr pyValue = PyCast<ui32>(value.Get<ui32>()); + auto tzId = value.GetTimezoneId(); + auto tzName = ctx->GetTimezoneName(tzId); + return PyTuple_Pack(2, pyValue.Get(), tzName.Get()); + } + case NUdf::TDataType<NUdf::TTzTimestamp>::Id: { + TPyObjectPtr pyValue = PyCast<ui64>(value.Get<ui64>()); + auto tzId = value.GetTimezoneId(); + auto tzName = ctx->GetTimezoneName(tzId); + return PyTuple_Pack(2, pyValue.Get(), tzName.Get()); + } } throw yexception() - << "Unsupported type " << typeId; + << "Unsupported type " << typeId; } NUdf::TUnboxedValue FromPyData( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, PyObject* value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) { const NUdf::TDataAndDecimalTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); const auto typeId = inspector.GetTypeId(); switch (typeId) { - case NUdf::TDataType<i8>::Id: return NUdf::TUnboxedValuePod(PyCast<i8>(value)); - case NUdf::TDataType<ui8>::Id: return NUdf::TUnboxedValuePod(PyCast<ui8>(value)); - case NUdf::TDataType<i16>::Id: return NUdf::TUnboxedValuePod(PyCast<i16>(value)); - case NUdf::TDataType<ui16>::Id: return NUdf::TUnboxedValuePod(PyCast<ui16>(value)); - case NUdf::TDataType<i32>::Id: return NUdf::TUnboxedValuePod(PyCast<i32>(value)); - case NUdf::TDataType<ui32>::Id: return NUdf::TUnboxedValuePod(PyCast<ui32>(value)); - case NUdf::TDataType<i64>::Id: return NUdf::TUnboxedValuePod(PyCast<i64>(value)); - case NUdf::TDataType<ui64>::Id: return NUdf::TUnboxedValuePod(PyCast<ui64>(value)); - case NUdf::TDataType<bool>::Id: return NUdf::TUnboxedValuePod(PyCast<bool>(value)); - case NUdf::TDataType<float>::Id: return NUdf::TUnboxedValuePod(PyCast<float>(value)); - case NUdf::TDataType<double>::Id: return NUdf::TUnboxedValuePod(PyCast<double>(value)); - case NUdf::TDataType<NUdf::TDecimal>::Id: return FromPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale()); - case NUdf::TDataType<NUdf::TYson>::Id: { - if (ctx->YsonConverterOut) { - TPyObjectPtr input(value, TPyObjectPtr::ADD_REF); - TPyObjectPtr pyArgs(PyTuple_New(1)); - // PyTuple_SET_ITEM steals reference, so pass ownership to it - PyTuple_SET_ITEM(pyArgs.Get(), 0, input.Release()); - input.ResetSteal(PyObject_CallObject(ctx->YsonConverterOut.Get(), pyArgs.Get())); - if (!input) { - UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).c_str()); + case NUdf::TDataType<i8>::Id: + return NUdf::TUnboxedValuePod(PyCast<i8>(value)); + case NUdf::TDataType<ui8>::Id: + return NUdf::TUnboxedValuePod(PyCast<ui8>(value)); + case NUdf::TDataType<i16>::Id: + return NUdf::TUnboxedValuePod(PyCast<i16>(value)); + case NUdf::TDataType<ui16>::Id: + return NUdf::TUnboxedValuePod(PyCast<ui16>(value)); + case NUdf::TDataType<i32>::Id: + return NUdf::TUnboxedValuePod(PyCast<i32>(value)); + case NUdf::TDataType<ui32>::Id: + return NUdf::TUnboxedValuePod(PyCast<ui32>(value)); + case NUdf::TDataType<i64>::Id: + return NUdf::TUnboxedValuePod(PyCast<i64>(value)); + case NUdf::TDataType<ui64>::Id: + return NUdf::TUnboxedValuePod(PyCast<ui64>(value)); + case NUdf::TDataType<bool>::Id: + return NUdf::TUnboxedValuePod(PyCast<bool>(value)); + case NUdf::TDataType<float>::Id: + return NUdf::TUnboxedValuePod(PyCast<float>(value)); + case NUdf::TDataType<double>::Id: + return NUdf::TUnboxedValuePod(PyCast<double>(value)); + case NUdf::TDataType<NUdf::TDecimal>::Id: + return FromPyDecimal(ctx, value, inspector.GetPrecision(), inspector.GetScale()); + case NUdf::TDataType<NUdf::TYson>::Id: { + if (ctx->YsonConverterOut) { + TPyObjectPtr input(value, TPyObjectPtr::ADD_REF); + TPyObjectPtr pyArgs(PyTuple_New(1)); + // PyTuple_SET_ITEM steals reference, so pass ownership to it + PyTuple_SET_ITEM(pyArgs.Get(), 0, input.Release()); + input.ResetSteal(PyObject_CallObject(ctx->YsonConverterOut.Get(), pyArgs.Get())); + if (!input) { + UdfTerminate((TStringBuilder() << ctx->PyCtx->Pos << "Failed to execute:\n" + << GetLastErrorAsString()) + .c_str()); + } + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(input.Get())); } - return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(input.Get())); } - } #if PY_MAJOR_VERSION >= 3 - case NUdf::TDataType<const char*>::Id: - return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value)); - case NUdf::TDataType<NUdf::TUtf8>::Id: - case NUdf::TDataType<NUdf::TJson>::Id: - if (PyUnicode_Check(value)) { - const TPyObjectPtr uif8(AsUtf8StringOrThrow(value)); - return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(uif8.Get())); - } - throw yexception() << "Python object " << PyObjectRepr(value) << " has invalid value for unicode"; + case NUdf::TDataType<const char*>::Id: + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value)); + case NUdf::TDataType<NUdf::TUtf8>::Id: + case NUdf::TDataType<NUdf::TJson>::Id: + if (PyUnicode_Check(value)) { + const TPyObjectPtr uif8(AsUtf8StringOrThrow(value)); + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(uif8.Get())); + } + throw yexception() << "Python object " << PyObjectRepr(value) << " has invalid value for unicode"; #else - case NUdf::TDataType<const char*>::Id: - case NUdf::TDataType<NUdf::TJson>::Id: - case NUdf::TDataType<NUdf::TUtf8>::Id: { - if (PyUnicode_Check(value)) { - const TPyObjectPtr utf8(AsUtf8StringOrThrow(value)); - return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(utf8.Get())); - } + case NUdf::TDataType<const char*>::Id: + case NUdf::TDataType<NUdf::TJson>::Id: + case NUdf::TDataType<NUdf::TUtf8>::Id: { + if (PyUnicode_Check(value)) { + const TPyObjectPtr utf8(AsUtf8StringOrThrow(value)); + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(utf8.Get())); + } - if ((typeId == NUdf::TDataType<NUdf::TUtf8>::Id || typeId == NUdf::TDataType<NUdf::TJson>::Id) && - PyBytes_Check(value) && !NYql::IsUtf8(std::string_view(PyBytes_AS_STRING(value), static_cast<size_t>(PyBytes_GET_SIZE(value))))) { - throw yexception() << "Python string " << PyObjectRepr(value) << " is invalid for Utf8/Json"; - } + if ((typeId == NUdf::TDataType<NUdf::TUtf8>::Id || typeId == NUdf::TDataType<NUdf::TJson>::Id) && + PyBytes_Check(value) && !NYql::IsUtf8(std::string_view(PyBytes_AS_STRING(value), static_cast<size_t>(PyBytes_GET_SIZE(value))))) { + throw yexception() << "Python string " << PyObjectRepr(value) << " is invalid for Utf8/Json"; + } - return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value)); - } + return ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value)); + } #endif - case NUdf::TDataType<NUdf::TUuid>::Id: { - const auto& ret = ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value)); - if (ret.AsStringRef().Size() != 16) { - throw yexception() << "Python object " << PyObjectRepr(value) \ - << " has invalid value for Uuid"; + case NUdf::TDataType<NUdf::TUuid>::Id: { + const auto& ret = ctx->ValueBuilder->NewString(PyCast<NUdf::TStringRef>(value)); + if (ret.AsStringRef().Size() != 16) { + throw yexception() << "Python object " << PyObjectRepr(value) + << " has invalid value for Uuid"; + } + + return ret; } + case NUdf::TDataType<NUdf::TDate>::Id: { + auto num = PyCast<ui16>(value); + if (num >= NUdf::MAX_DATE) { + throw yexception() << "Python object " << PyObjectRepr(value) + << " is out of range for Date"; + } - return ret; - } - case NUdf::TDataType<NUdf::TDate>::Id: { - auto num = PyCast<ui16>(value); - if (num >= NUdf::MAX_DATE) { - throw yexception() << "Python object " << PyObjectRepr(value) \ - << " is out of range for Date"; + return NUdf::TUnboxedValuePod(num); } - return NUdf::TUnboxedValuePod(num); - } + case NUdf::TDataType<NUdf::TDatetime>::Id: { + auto num = PyCast<ui32>(value); + if (num >= NUdf::MAX_DATETIME) { + throw yexception() << "Python object " << PyObjectRepr(value) + << " is out of range for Datetime"; + } - case NUdf::TDataType<NUdf::TDatetime>::Id: { - auto num = PyCast<ui32>(value); - if (num >= NUdf::MAX_DATETIME) { - throw yexception() << "Python object " << PyObjectRepr(value) \ - << " is out of range for Datetime"; + return NUdf::TUnboxedValuePod(num); } - return NUdf::TUnboxedValuePod(num); - } + case NUdf::TDataType<NUdf::TTimestamp>::Id: { + auto num = PyCast<ui64>(value); + if (num >= NUdf::MAX_TIMESTAMP) { + throw yexception() << "Python object " << PyObjectRepr(value) + << " is out of range for Timestamp"; + } - case NUdf::TDataType<NUdf::TTimestamp>::Id: { - auto num = PyCast<ui64>(value); - if (num >= NUdf::MAX_TIMESTAMP) { - throw yexception() << "Python object " << PyObjectRepr(value) \ - << " is out of range for Timestamp"; + return NUdf::TUnboxedValuePod(num); } - return NUdf::TUnboxedValuePod(num); - } + case NUdf::TDataType<NUdf::TInterval>::Id: { + auto num = PyCast<i64>(value); + if (num <= -(i64)NUdf::MAX_TIMESTAMP || num >= (i64)NUdf::MAX_TIMESTAMP) { + throw yexception() << "Python object " << PyObjectRepr(value) + << " is out of range for Interval"; + } - case NUdf::TDataType<NUdf::TInterval>::Id: { - auto num = PyCast<i64>(value); - if (num <= -(i64)NUdf::MAX_TIMESTAMP || num >= (i64)NUdf::MAX_TIMESTAMP) { - throw yexception() << "Python object " << PyObjectRepr(value) \ - << " is out of range for Interval"; + return NUdf::TUnboxedValuePod(num); } - return NUdf::TUnboxedValuePod(num); - } - - case NUdf::TDataType<NUdf::TTzDate>::Id: - return FromPyTz<ui16>(value, NUdf::MAX_DATE, TStringBuf("TzDate"), ctx); - case NUdf::TDataType<NUdf::TTzDatetime>::Id: - return FromPyTz<ui32>(value, NUdf::MAX_DATETIME, TStringBuf("TzDatetime"), ctx); - case NUdf::TDataType<NUdf::TTzTimestamp>::Id: - return FromPyTz<ui64>(value, NUdf::MAX_TIMESTAMP, TStringBuf("TzTimestamp"), ctx); + case NUdf::TDataType<NUdf::TTzDate>::Id: + return FromPyTz<ui16>(value, NUdf::MAX_DATE, TStringBuf("TzDate"), ctx); + case NUdf::TDataType<NUdf::TTzDatetime>::Id: + return FromPyTz<ui32>(value, NUdf::MAX_DATETIME, TStringBuf("TzDatetime"), ctx); + case NUdf::TDataType<NUdf::TTzTimestamp>::Id: + return FromPyTz<ui64>(value, NUdf::MAX_TIMESTAMP, TStringBuf("TzTimestamp"), ctx); } throw yexception() - << "Unsupported type " << typeId; + << "Unsupported type " << typeId; } TPyObjectPtr ToPyTagged( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) { const NUdf::TTaggedTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); return ToPyObject(ctx, inspector.GetBaseType(), value); } NUdf::TUnboxedValue FromPyTagged( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, PyObject* value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) { const NUdf::TTaggedTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); return FromPyObject(ctx, inspector.GetBaseType(), value).Release(); @@ -682,7 +712,7 @@ TPyObjectPtr ToPyList( auto pyItem = ToPyObject(ctx, itemType, item); if (PyList_Append(list.Get(), pyItem.Get()) < 0) { throw yexception() << "Can't append item to list" - << GetLastErrorAsString(); + << GetLastErrorAsString(); } } @@ -690,8 +720,8 @@ TPyObjectPtr ToPyList( } NUdf::TUnboxedValue FromPyList( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, PyObject* value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) { const NUdf::TListTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); @@ -699,10 +729,10 @@ NUdf::TUnboxedValue FromPyList( // eager list to list conversion auto itemType = inspector.GetItemType(); Py_ssize_t cnt = PyList_GET_SIZE(value); - NUdf::TUnboxedValue *items = nullptr; + NUdf::TUnboxedValue* items = nullptr; const auto list = ctx->ValueBuilder->NewArray(cnt, items); for (Py_ssize_t i = 0; i < cnt; ++i) { - PyObject *item = PyList_GET_ITEM(value, i); + PyObject* item = PyList_GET_ITEM(value, i); *items++ = FromPyObject(ctx, itemType, item); } return list; @@ -712,10 +742,10 @@ NUdf::TUnboxedValue FromPyList( // eager tuple to list conversion auto itemType = inspector.GetItemType(); Py_ssize_t cnt = PyTuple_GET_SIZE(value); - NUdf::TUnboxedValue *items = nullptr; + NUdf::TUnboxedValue* items = nullptr; const auto list = ctx->ValueBuilder->NewArray(cnt, items); for (Py_ssize_t i = 0; i < cnt; ++i) { - PyObject *item = PyTuple_GET_ITEM(value, i); + PyObject* item = PyTuple_GET_ITEM(value, i); *items++ = FromPyObject(ctx, itemType, item); } return list; @@ -748,13 +778,14 @@ NUdf::TUnboxedValue FromPyList( } throw yexception() << "Expected list, tuple, generator, generator factory, " - "iterator or iterable object, but got: " << PyObjectRepr(value); + "iterator or iterable object, but got: " + << PyObjectRepr(value); } TPyObjectPtr ToPyOptional( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) { if (!value) { return TPyObjectPtr(Py_None, TPyObjectPtr::ADD_REF); @@ -765,8 +796,8 @@ TPyObjectPtr ToPyOptional( } NUdf::TUnboxedValue FromPyOptional( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, PyObject* value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) { if (value == Py_None) { return NUdf::TUnboxedValue(); @@ -786,7 +817,7 @@ TPyObjectPtr ToPyDict( const auto valueType = inspector.GetValueType(); if (NUdf::ETypeKind::Void == ctx->PyCtx->TypeInfoHelper->GetTypeKind(valueType)) { - if (ctx->LazyInputObjects) { // TODO + if (ctx->LazyInputObjects) { // TODO return ToPyLazySet(ctx, keyType, value); } @@ -820,15 +851,14 @@ TPyObjectPtr ToPyDict( } NUdf::TUnboxedValue FromPyDict( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, PyObject* value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) { const NUdf::TDictTypeInspector inspector(*ctx->PyCtx->TypeInfoHelper, type); const auto keyType = inspector.GetKeyType(); const auto valueType = inspector.GetValueType(); - if ((PyList_Check(value) || PyTuple_Check(value) || value->ob_type == &PyThinListType || value->ob_type == &PyLazyListType) - && ctx->PyCtx->TypeInfoHelper->GetTypeKind(keyType) == NUdf::ETypeKind::Data) { + if ((PyList_Check(value) || PyTuple_Check(value) || value->ob_type == &PyThinListType || value->ob_type == &PyLazyListType) && ctx->PyCtx->TypeInfoHelper->GetTypeKind(keyType) == NUdf::ETypeKind::Data) { const NUdf::TDataTypeInspector keiIns(*ctx->PyCtx->TypeInfoHelper, keyType); if (NUdf::GetDataTypeInfo(NUdf::GetDataSlot(keiIns.GetTypeId())).Features & NUdf::EDataTypeFeatures::IntegralType) { return FromPySequence(ctx, valueType, keiIns.GetTypeId(), value); @@ -845,7 +875,7 @@ NUdf::TUnboxedValue FromPyDict( return FromPyMapping(ctx, keyType, valueType, value); } - throw yexception() << "Can't cast "<< PyObjectRepr(value) << " to dict."; + throw yexception() << "Can't cast " << PyObjectRepr(value) << " to dict."; } TPyObjectPtr ToPyNull( @@ -860,8 +890,8 @@ TPyObjectPtr ToPyNull( } NUdf::TUnboxedValue FromPyNull( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, PyObject* value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) { if (value == Py_None) { return NYql::NUdf::TUnboxedValuePod(); @@ -872,23 +902,36 @@ NUdf::TUnboxedValue FromPyNull( } // namespace TPyObjectPtr ToPyObject( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, const NUdf::TUnboxedValuePod& value) { switch (ctx->PyCtx->TypeInfoHelper->GetTypeKind(type)) { - case NUdf::ETypeKind::Data: return ToPyData(ctx, type, value); - case NUdf::ETypeKind::Tagged: return ToPyTagged(ctx, type, value); - case NUdf::ETypeKind::Tuple: return ToPyTuple(ctx, type, value); - case NUdf::ETypeKind::Struct: return ToPyStruct(ctx, type, value); - case NUdf::ETypeKind::List: return ToPyList(ctx, type, value); - case NUdf::ETypeKind::Optional: return ToPyOptional(ctx, type, value); - case NUdf::ETypeKind::Dict: return ToPyDict(ctx, type, value); - case NUdf::ETypeKind::Callable: return ToPyCallable(ctx, type, value); - case NUdf::ETypeKind::Resource: return ToPyResource(ctx, type, value); - case NUdf::ETypeKind::Void: return ToPyVoid(ctx, type, value); - case NUdf::ETypeKind::Stream: return ToPyStream(ctx, type, value); - case NUdf::ETypeKind::Variant: return ToPyVariant(ctx, type, value); - case NUdf::ETypeKind::Null: return ToPyNull(ctx, type, value); + case NUdf::ETypeKind::Data: + return ToPyData(ctx, type, value); + case NUdf::ETypeKind::Tagged: + return ToPyTagged(ctx, type, value); + case NUdf::ETypeKind::Tuple: + return ToPyTuple(ctx, type, value); + case NUdf::ETypeKind::Struct: + return ToPyStruct(ctx, type, value); + case NUdf::ETypeKind::List: + return ToPyList(ctx, type, value); + case NUdf::ETypeKind::Optional: + return ToPyOptional(ctx, type, value); + case NUdf::ETypeKind::Dict: + return ToPyDict(ctx, type, value); + case NUdf::ETypeKind::Callable: + return ToPyCallable(ctx, type, value); + case NUdf::ETypeKind::Resource: + return ToPyResource(ctx, type, value); + case NUdf::ETypeKind::Void: + return ToPyVoid(ctx, type, value); + case NUdf::ETypeKind::Stream: + return ToPyStream(ctx, type, value); + case NUdf::ETypeKind::Variant: + return ToPyVariant(ctx, type, value); + case NUdf::ETypeKind::Null: + return ToPyNull(ctx, type, value); default: { ::TStringBuilder sb; sb << "Failed to export: "; @@ -899,23 +942,36 @@ TPyObjectPtr ToPyObject( } NUdf::TUnboxedValue FromPyObject( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, PyObject* value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) { switch (ctx->PyCtx->TypeInfoHelper->GetTypeKind(type)) { - case NUdf::ETypeKind::Data: return FromPyData(ctx, type, value); - case NUdf::ETypeKind::Tagged: return FromPyTagged(ctx, type, value); - case NUdf::ETypeKind::Tuple: return FromPyTuple(ctx, type, value); - case NUdf::ETypeKind::Struct: return FromPyStruct(ctx, type, value); - case NUdf::ETypeKind::List: return FromPyList(ctx, type, value); - case NUdf::ETypeKind::Optional: return FromPyOptional(ctx, type, value); - case NUdf::ETypeKind::Dict: return FromPyDict(ctx, type, value); - case NUdf::ETypeKind::Callable: return FromPyCallable(ctx, type, value); - case NUdf::ETypeKind::Resource: return FromPyResource(ctx, type, value); - case NUdf::ETypeKind::Void: return FromPyVoid(ctx, type, value); - case NUdf::ETypeKind::Stream: return FromPyStream(ctx, type, TPyObjectPtr(value, TPyObjectPtr::ADD_REF), nullptr, nullptr, nullptr); - case NUdf::ETypeKind::Variant: return FromPyVariant(ctx, type, value); - case NUdf::ETypeKind::Null: return FromPyNull(ctx, type, value); + case NUdf::ETypeKind::Data: + return FromPyData(ctx, type, value); + case NUdf::ETypeKind::Tagged: + return FromPyTagged(ctx, type, value); + case NUdf::ETypeKind::Tuple: + return FromPyTuple(ctx, type, value); + case NUdf::ETypeKind::Struct: + return FromPyStruct(ctx, type, value); + case NUdf::ETypeKind::List: + return FromPyList(ctx, type, value); + case NUdf::ETypeKind::Optional: + return FromPyOptional(ctx, type, value); + case NUdf::ETypeKind::Dict: + return FromPyDict(ctx, type, value); + case NUdf::ETypeKind::Callable: + return FromPyCallable(ctx, type, value); + case NUdf::ETypeKind::Resource: + return FromPyResource(ctx, type, value); + case NUdf::ETypeKind::Void: + return FromPyVoid(ctx, type, value); + case NUdf::ETypeKind::Stream: + return FromPyStream(ctx, type, TPyObjectPtr(value, TPyObjectPtr::ADD_REF), nullptr, nullptr, nullptr); + case NUdf::ETypeKind::Variant: + return FromPyVariant(ctx, type, value); + case NUdf::ETypeKind::Null: + return FromPyNull(ctx, type, value); default: { ::TStringBuilder sb; sb << "Failed to import: "; @@ -926,10 +982,10 @@ NUdf::TUnboxedValue FromPyObject( } TPyObjectPtr ToPyArgs( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, - const NUdf::TUnboxedValuePod* args, - const NUdf::TCallableTypeInspector& inspector) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod* args, + const NUdf::TCallableTypeInspector& inspector) { const auto argsCount = inspector.GetArgsCount(); TPyObjectPtr tuple(PyTuple_New(argsCount)); @@ -955,11 +1011,11 @@ TPyObjectPtr ToPyArgs( } void FromPyArgs( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, - PyObject* pyArgs, - NUdf::TUnboxedValue* cArgs, - const NUdf::TCallableTypeInspector& inspector) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + PyObject* pyArgs, + NUdf::TUnboxedValue* cArgs, + const NUdf::TCallableTypeInspector& inspector) { PY_ENSURE_TYPE(Tuple, pyArgs, "Expected"); @@ -968,9 +1024,10 @@ void FromPyArgs( ui32 pyArgsCount = static_cast<ui32>(PyTuple_GET_SIZE(pyArgs)); PY_ENSURE(argsCount - optArgsCount <= pyArgsCount && pyArgsCount <= argsCount, - "arguments count missmatch: " - "min " << (argsCount - optArgsCount) << ", max " << argsCount - << ", got " << pyArgsCount); + "arguments count missmatch: " + "min " + << (argsCount - optArgsCount) << ", max " << argsCount + << ", got " << pyArgsCount); for (ui32 i = 0; i < pyArgsCount; i++) { PyObject* item = PyTuple_GET_ITEM(pyArgs, i); @@ -982,10 +1039,12 @@ void FromPyArgs( } } -class TDummyMemoryLock : public IMemoryLock { +class TDummyMemoryLock: public IMemoryLock { public: - void Acquire() override {} - void Release() override {} + void Acquire() override { + } + void Release() override { + } }; TPyCastContext::TPyCastContext( @@ -1023,4 +1082,4 @@ const TPyObjectPtr& TPyCastContext::GetTimezoneName(ui32 id) { return x; } -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_cast.h b/yql/essentials/udfs/common/python/bindings/py_cast.h index e6850c74040..5dc4182ae91 100644 --- a/yql/essentials/udfs/common/python/bindings/py_cast.h +++ b/yql/essentials/udfs/common/python/bindings/py_cast.h @@ -20,26 +20,26 @@ template <typename T> TPyObjectPtr ToPyUnicode(const T& value); TPyObjectPtr ToPyObject( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyObject( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, - PyObject* value); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + PyObject* value); TPyObjectPtr ToPyArgs( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod* args, - const NKikimr::NUdf::TCallableTypeInspector& inspector); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod* args, + const NKikimr::NUdf::TCallableTypeInspector& inspector); void FromPyArgs( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, - PyObject* pyArgs, - NKikimr::NUdf::TUnboxedValue* cArgs, - const NKikimr::NUdf::TCallableTypeInspector& inspector); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + PyObject* pyArgs, + NKikimr::NUdf::TUnboxedValue* cArgs, + const NKikimr::NUdf::TCallableTypeInspector& inspector); -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp index 3c6514aea02..dcd7cb8da40 100644 --- a/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_cast_ut.cpp @@ -25,128 +25,127 @@ UnicodeEncodeError: 'utf-8' codec can't encode character '\udc00' in position 0: )"; UNIT_ASSERT_EXCEPTION_CONTAINS( - engine.ToMiniKQL<TType>( - StripString(TString(programToRun)), - [](const NUdf::TUnboxedValuePod& value) { - Y_UNUSED(value); - }), - yexception, StripString(TString(expectedError))); + engine.ToMiniKQL<TType>( + StripString(TString(programToRun)), + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, StripString(TString(expectedError))); } } // namespace Y_UNIT_TEST_SUITE(TPyCastTest) { - Y_UNIT_TEST(FromPyStrToInt) { - TPythonTestEngine engine; - UNIT_ASSERT_EXCEPTION_CONTAINS( - engine.ToMiniKQL<i32>( - "def Test():\n" - " return '123a'", - [](const NUdf::TUnboxedValuePod& value) { - Y_UNUSED(value); - }), - yexception, "str"); - } - - Y_UNIT_TEST(FromPyTupleToLong) { - TPythonTestEngine engine; - UNIT_ASSERT_EXCEPTION_CONTAINS( - engine.ToMiniKQL<ui64>( - "def Test():\n" - " return 1, 1", - [](const NUdf::TUnboxedValuePod& value) { - Y_UNUSED(value); - }), - yexception, "tuple"); - } - - Y_UNIT_TEST(FromPyFuncToString) { - TPythonTestEngine engine; - UNIT_ASSERT_EXCEPTION_CONTAINS( - engine.ToMiniKQL<char*>( - "def f():\n" - " return 42\n" - "def Test():\n" - " return f", - [](const NUdf::TUnboxedValuePod& value) { - Y_UNUSED(value); - }), - yexception, "function"); - } - - Y_UNIT_TEST(FromPyNoneToString) { - TPythonTestEngine engine; - UNIT_ASSERT_EXCEPTION_CONTAINS( - engine.ToMiniKQL<char*>( - "def Test():\n" - " return None", - [](const NUdf::TUnboxedValuePod& value) { - Y_UNUSED(value); - }), - yexception, "None"); - } - - Y_UNIT_TEST(BadFromPythonFloat) { - TPythonTestEngine engine; - UNIT_ASSERT_EXCEPTION_CONTAINS( - engine.ToMiniKQL<float>( - "def Test():\n" - " return '3 <dot> 1415926'", - [](const NUdf::TUnboxedValuePod& value) { - Y_UNUSED(value); - Y_UNREACHABLE(); - }), - yexception, "Cast error object '3 <dot> 1415926' to Float"); - } +Y_UNIT_TEST(FromPyStrToInt) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<i32>( + "def Test():\n" + " return '123a'", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, "str"); +} + +Y_UNIT_TEST(FromPyTupleToLong) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<ui64>( + "def Test():\n" + " return 1, 1", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, "tuple"); +} + +Y_UNIT_TEST(FromPyFuncToString) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<char*>( + "def f():\n" + " return 42\n" + "def Test():\n" + " return f", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, "function"); +} + +Y_UNIT_TEST(FromPyNoneToString) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<char*>( + "def Test():\n" + " return None", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + }), + yexception, "None"); +} + +Y_UNIT_TEST(BadFromPythonFloat) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<float>( + "def Test():\n" + " return '3 <dot> 1415926'", + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + Y_UNREACHABLE(); + }), + yexception, "Cast error object '3 <dot> 1415926' to Float"); +} #if PY_MAJOR_VERSION >= 3 -# define RETVAL "-1" + #define RETVAL "-1" #else -# define RETVAL "-18446744073709551616L" + #define RETVAL "-18446744073709551616L" #endif - Y_UNIT_TEST(BadFromPythonLong) { - TPythonTestEngine engine; - UNIT_ASSERT_EXCEPTION_CONTAINS( - engine.ToMiniKQL<ui64>( - "def Test():\n" - " return " RETVAL, - [](const NUdf::TUnboxedValuePod& value) { - Y_UNUSED(value); - Y_UNREACHABLE(); - }), - yexception, "Cast error object " RETVAL " to Long"); - } - - Y_UNIT_TEST(BadFromPythonUtf8) { - TestBadUtf8Encode<NUdf::TUtf8>(); - } - - Y_UNIT_TEST(BadFromPythonJson) { - TestBadUtf8Encode<NUdf::TJson>(); - } - - Y_UNIT_TEST(BadToPythonJson) { - TPythonTestEngine engine; - UNIT_ASSERT_EXCEPTION_CONTAINS( - engine.UnsafeCall<void(NUdf::TJson)>( - [](const TType*, const NUdf::IValueBuilder& builder) { - // XXX: The value below is built with the - // following expression: - // $query = "a=1&t%EDb=2"; - // $qdict = Url::QueryStringToDict($query); - // $qyson = Yson::From($qdict); - // $badJson = Yson::SerializeJson($qyson); - // - // For more info, see YQL-20231 and YQL-20220. - constexpr TStringBuf badJson = "\x7b\x22\x61\x22\x3a\x5b\x22\x31\x22\x5d\x2c\x22\x74\xed\x62\x22\x3a\x5b\x22\x32\x22\x5d\x7d"; - return builder.NewString(badJson); - }, - "def Test(arg):\n" - " pass", - [](const NUdf::TUnboxedValuePod&) { - Y_UNREACHABLE(); - } - ), - yexception, "Failed to export Json given as args[0]"); - } +Y_UNIT_TEST(BadFromPythonLong) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.ToMiniKQL<ui64>( + "def Test():\n" + " return " RETVAL, + [](const NUdf::TUnboxedValuePod& value) { + Y_UNUSED(value); + Y_UNREACHABLE(); + }), + yexception, "Cast error object " RETVAL " to Long"); +} + +Y_UNIT_TEST(BadFromPythonUtf8) { + TestBadUtf8Encode<NUdf::TUtf8>(); +} + +Y_UNIT_TEST(BadFromPythonJson) { + TestBadUtf8Encode<NUdf::TJson>(); +} + +Y_UNIT_TEST(BadToPythonJson) { + TPythonTestEngine engine; + UNIT_ASSERT_EXCEPTION_CONTAINS( + engine.UnsafeCall<void(NUdf::TJson)>( + [](const TType*, const NUdf::IValueBuilder& builder) { + // XXX: The value below is built with the + // following expression: + // $query = "a=1&t%EDb=2"; + // $qdict = Url::QueryStringToDict($query); + // $qyson = Yson::From($qdict); + // $badJson = Yson::SerializeJson($qyson); + // + // For more info, see YQL-20231 and YQL-20220. + constexpr TStringBuf badJson = "\x7b\x22\x61\x22\x3a\x5b\x22\x31\x22\x5d\x2c\x22\x74\xed\x62\x22\x3a\x5b\x22\x32\x22\x5d\x7d"; + return builder.NewString(badJson); + }, + "def Test(arg):\n" + " pass", + [](const NUdf::TUnboxedValuePod&) { + Y_UNREACHABLE(); + }), + yexception, "Failed to export Json given as args[0]"); +} } // Y_UNIT_TEST_SUITE(TPyCastTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_ctx.h b/yql/essentials/udfs/common/python/bindings/py_ctx.h index 7958fc1f815..d832d0b2def 100644 --- a/yql/essentials/udfs/common/python/bindings/py_ctx.h +++ b/yql/essentials/udfs/common/python/bindings/py_ctx.h @@ -79,7 +79,7 @@ struct TPyContext: public TSimpleRefCount<TPyContext> { } void Cleanup() { - for (auto& o: CleanupList) { + for (auto& o : CleanupList) { o.Cleanup(); } CleanupList.Clear(); @@ -91,7 +91,7 @@ struct TPyContext: public TSimpleRefCount<TPyContext> { }; struct TPyCastContext: public TSimpleRefCount<TPyCastContext> { - const NKikimr::NUdf::IValueBuilder *const ValueBuilder; + const NKikimr::NUdf::IValueBuilder* const ValueBuilder; const TPyContext::TPtr PyCtx; std::unordered_map<const NKikimr::NUdf::TType*, TPyObjectPtr> StructTypes; bool LazyInputObjects = true; @@ -103,9 +103,9 @@ struct TPyCastContext: public TSimpleRefCount<TPyCastContext> { THolder<IMemoryLock> MemoryLock; TPyCastContext( - const NKikimr::NUdf::IValueBuilder* builder, - TPyContext::TPtr pyCtx, - THolder<IMemoryLock> memoryLock = {}); + const NKikimr::NUdf::IValueBuilder* builder, + TPyContext::TPtr pyCtx, + THolder<IMemoryLock> memoryLock = {}); ~TPyCastContext(); @@ -117,4 +117,4 @@ struct TPyCastContext: public TSimpleRefCount<TPyCastContext> { using TPyCastContextPtr = TPyCastContext::TPtr; -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp index 3f0298013a0..2440f4c281a 100644 --- a/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_decimal_ut.cpp @@ -5,118 +5,113 @@ using namespace NPython; Y_UNIT_TEST_SUITE(TPyDecimalTest) { - Y_UNIT_TEST(FromPyZero) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDecimalDataType<12,5>>( - R"( +Y_UNIT_TEST(FromPyZero) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<12, 5>>( + R"( from decimal import Decimal def Test(): return Decimal() )", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(!value.GetInt128()); - }); - } + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(!value.GetInt128()); + }); +} - Y_UNIT_TEST(FromPyPi) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDecimalDataType<28,18>>( - R"( +Y_UNIT_TEST(FromPyPi) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<28, 18>>( + R"( from decimal import Decimal def Test(): return Decimal('3.141592653589793238') )", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.GetInt128() == 3141592653589793238LL); - }); - } + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.GetInt128() == 3141592653589793238LL); + }); +} - Y_UNIT_TEST(FromPyTini) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDecimalDataType<35,35>>( - R"( +Y_UNIT_TEST(FromPyTini) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<35, 35>>( + R"( from decimal import Decimal def Test(): return Decimal('-.00000000000000000000000000000000001') )", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.GetInt128() == -1); - }); - } + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.GetInt128() == -1); + }); +} - Y_UNIT_TEST(FromPyNan) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDecimalDataType<35,34>>( - R"( +Y_UNIT_TEST(FromPyNan) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<35, 34>>( + R"( from decimal import Decimal def Test(): return Decimal('NaN') )", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.GetInt128() == NYql::NDecimal::Nan()); - }); - } + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.GetInt128() == NYql::NDecimal::Nan()); + }); +} - Y_UNIT_TEST(FromPyInf) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDecimalDataType<35,34>>( - R"( +Y_UNIT_TEST(FromPyInf) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDecimalDataType<35, 34>>( + R"( from decimal import Decimal def Test(): return Decimal('-inf') )", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.GetInt128() == -NYql::NDecimal::Inf()); - }); - } + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.GetInt128() == -NYql::NDecimal::Inf()); + }); +} - Y_UNIT_TEST(ToPyZero) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TDecimalDataType<7,7>>( - [](const TType*, const NUdf::IValueBuilder&) { - return NUdf::TUnboxedValuePod::Zero(); - }, - "def Test(value): assert value.is_zero()" - ); - } +Y_UNIT_TEST(ToPyZero) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<7, 7>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod::Zero(); + }, + "def Test(value): assert value.is_zero()"); +} - Y_UNIT_TEST(ToPyPi) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TDecimalDataType<20,18>>( - [](const TType*, const NUdf::IValueBuilder&) { - return NUdf::TUnboxedValuePod(NYql::NDecimal::TInt128(3141592653589793238LL)); - }, - "def Test(value): assert str(value) == '3.141592653589793238'" - ); - } +Y_UNIT_TEST(ToPyPi) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<20, 18>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(NYql::NDecimal::TInt128(3141592653589793238LL)); + }, + "def Test(value): assert str(value) == '3.141592653589793238'"); +} - Y_UNIT_TEST(ToPyTini) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TDecimalDataType<35,35>>( - [](const TType*, const NUdf::IValueBuilder&) { - return NUdf::TUnboxedValuePod(NYql::NDecimal::TInt128(-1)); - }, - "def Test(value): assert format(value, '.35f') == '-0.00000000000000000000000000000000001'" - ); - } +Y_UNIT_TEST(ToPyTini) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<35, 35>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(NYql::NDecimal::TInt128(-1)); + }, + "def Test(value): assert format(value, '.35f') == '-0.00000000000000000000000000000000001'"); +} - Y_UNIT_TEST(ToPyNan) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TDecimalDataType<2,2>>( - [](const TType*, const NUdf::IValueBuilder&) { - return NUdf::TUnboxedValuePod(NYql::NDecimal::Nan()); - }, - "def Test(value): assert value.is_nan()" - ); - } +Y_UNIT_TEST(ToPyNan) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<2, 2>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(NYql::NDecimal::Nan()); + }, + "def Test(value): assert value.is_nan()"); +} - Y_UNIT_TEST(ToPyInf) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TDecimalDataType<30,0>>( - [](const TType*, const NUdf::IValueBuilder&) { - return NUdf::TUnboxedValuePod(-NYql::NDecimal::Inf()); - }, - "def Test(value): assert value.is_infinite() and value.is_signed()" - ); - } +Y_UNIT_TEST(ToPyInf) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDecimalDataType<30, 0>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(-NYql::NDecimal::Inf()); + }, + "def Test(value): assert value.is_infinite() and value.is_signed()"); } +} // Y_UNIT_TEST_SUITE(TPyDecimalTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_dict.cpp b/yql/essentials/udfs/common/python/bindings/py_dict.cpp index 2df6eb4e99f..e8fe52d1d4d 100644 --- a/yql/essentials/udfs/common/python/bindings/py_dict.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_dict.cpp @@ -8,7 +8,6 @@ #include <yql/essentials/public/udf/udf_value_builder.h> #include <yql/essentials/public/udf/udf_type_inspection.h> - using namespace NKikimr; namespace NPython { @@ -16,8 +15,7 @@ namespace NPython { ////////////////////////////////////////////////////////////////////////////// // TPyLazyDict interface ////////////////////////////////////////////////////////////////////////////// -struct TPyLazyDict -{ +struct TPyLazyDict { using TPtr = NUdf::TRefCountedPtr<TPyLazyDict, TPyPtrOps<TPyLazyDict>>; PyObject_HEAD; @@ -35,10 +33,10 @@ struct TPyLazyDict } static PyObject* New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* keyType, - const NUdf::TType* payloadType, - NUdf::IBoxedValuePtr&& value); + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payloadType, + NUdf::IBoxedValuePtr&& value); static int Bool(PyObject* self); static PyObject* Repr(PyObject* self); @@ -47,7 +45,9 @@ struct TPyLazyDict static int Contains(PyObject* self, PyObject* key); static PyObject* Get(PyObject* self, PyObject* args); - static PyObject* Iter(PyObject* self) { return Keys(self, nullptr); } + static PyObject* Iter(PyObject* self) { + return Keys(self, nullptr); + } static PyObject* Keys(PyObject* self, PyObject* /* args */); static PyObject* Items(PyObject* self, PyObject* /* args */); static PyObject* Values(PyObject* self, PyObject* /* args */); @@ -60,196 +60,194 @@ PyMappingMethods LazyDictMapping = { }; PySequenceMethods LazyDictSequence = { - INIT_MEMBER(sq_length , TPyLazyDict::Len), - INIT_MEMBER(sq_concat , nullptr), - INIT_MEMBER(sq_repeat , nullptr), - INIT_MEMBER(sq_item , nullptr), + INIT_MEMBER(sq_length, TPyLazyDict::Len), + INIT_MEMBER(sq_concat, nullptr), + INIT_MEMBER(sq_repeat, nullptr), + INIT_MEMBER(sq_item, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(was_sq_slice , nullptr), + INIT_MEMBER(was_sq_slice, nullptr), #else - INIT_MEMBER(sq_slice , nullptr), + INIT_MEMBER(sq_slice, nullptr), #endif - INIT_MEMBER(sq_ass_item , nullptr), + INIT_MEMBER(sq_ass_item, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(was_sq_ass_slice , nullptr), + INIT_MEMBER(was_sq_ass_slice, nullptr), #else - INIT_MEMBER(sq_ass_slice , nullptr), + INIT_MEMBER(sq_ass_slice, nullptr), #endif - INIT_MEMBER(sq_contains , TPyLazyDict::Contains), - INIT_MEMBER(sq_inplace_concat , nullptr), - INIT_MEMBER(sq_inplace_repeat , nullptr), + INIT_MEMBER(sq_contains, TPyLazyDict::Contains), + INIT_MEMBER(sq_inplace_concat, nullptr), + INIT_MEMBER(sq_inplace_repeat, nullptr), }; PyNumberMethods LazyDictNumbering = { - INIT_MEMBER(nb_add, nullptr), - INIT_MEMBER(nb_subtract, nullptr), - INIT_MEMBER(nb_multiply, nullptr), + INIT_MEMBER(nb_add, nullptr), + INIT_MEMBER(nb_subtract, nullptr), + INIT_MEMBER(nb_multiply, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_divide, nullptr), + INIT_MEMBER(nb_divide, nullptr), #endif - INIT_MEMBER(nb_remainder, nullptr), - INIT_MEMBER(nb_divmod, nullptr), - INIT_MEMBER(nb_power, nullptr), - INIT_MEMBER(nb_negative, nullptr), - INIT_MEMBER(nb_positive, nullptr), - INIT_MEMBER(nb_absolute, nullptr), + INIT_MEMBER(nb_remainder, nullptr), + INIT_MEMBER(nb_divmod, nullptr), + INIT_MEMBER(nb_power, nullptr), + INIT_MEMBER(nb_negative, nullptr), + INIT_MEMBER(nb_positive, nullptr), + INIT_MEMBER(nb_absolute, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_bool, TPyLazyDict::Bool), + INIT_MEMBER(nb_bool, TPyLazyDict::Bool), #else - INIT_MEMBER(nb_nonzero, TPyLazyDict::Bool), + INIT_MEMBER(nb_nonzero, TPyLazyDict::Bool), #endif - INIT_MEMBER(nb_invert, nullptr), - INIT_MEMBER(nb_lshift, nullptr), - INIT_MEMBER(nb_rshift, nullptr), - INIT_MEMBER(nb_and, nullptr), - INIT_MEMBER(nb_xor, nullptr), - INIT_MEMBER(nb_or, nullptr), + INIT_MEMBER(nb_invert, nullptr), + INIT_MEMBER(nb_lshift, nullptr), + INIT_MEMBER(nb_rshift, nullptr), + INIT_MEMBER(nb_and, nullptr), + INIT_MEMBER(nb_xor, nullptr), + INIT_MEMBER(nb_or, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_coerce, nullptr), + INIT_MEMBER(nb_coerce, nullptr), #endif - INIT_MEMBER(nb_int, nullptr), + INIT_MEMBER(nb_int, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_reserved, nullptr), + INIT_MEMBER(nb_reserved, nullptr), #else - INIT_MEMBER(nb_long, nullptr), + INIT_MEMBER(nb_long, nullptr), #endif - INIT_MEMBER(nb_float, nullptr), + INIT_MEMBER(nb_float, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_oct, nullptr), - INIT_MEMBER(nb_hex, nullptr), + INIT_MEMBER(nb_oct, nullptr), + INIT_MEMBER(nb_hex, nullptr), #endif - INIT_MEMBER(nb_inplace_add, nullptr), - INIT_MEMBER(nb_inplace_subtract, nullptr), - INIT_MEMBER(nb_inplace_multiply, nullptr), - INIT_MEMBER(nb_inplace_remainder, nullptr), - INIT_MEMBER(nb_inplace_power, nullptr), - INIT_MEMBER(nb_inplace_lshift, nullptr), - INIT_MEMBER(nb_inplace_rshift, nullptr), - INIT_MEMBER(nb_inplace_and, nullptr), - INIT_MEMBER(nb_inplace_xor, nullptr), - INIT_MEMBER(nb_inplace_or, nullptr), - - INIT_MEMBER(nb_floor_divide, nullptr), - INIT_MEMBER(nb_true_divide, nullptr), - INIT_MEMBER(nb_inplace_floor_divide, nullptr), - INIT_MEMBER(nb_inplace_true_divide, nullptr), - - INIT_MEMBER(nb_index, nullptr), + INIT_MEMBER(nb_inplace_add, nullptr), + INIT_MEMBER(nb_inplace_subtract, nullptr), + INIT_MEMBER(nb_inplace_multiply, nullptr), + INIT_MEMBER(nb_inplace_remainder, nullptr), + INIT_MEMBER(nb_inplace_power, nullptr), + INIT_MEMBER(nb_inplace_lshift, nullptr), + INIT_MEMBER(nb_inplace_rshift, nullptr), + INIT_MEMBER(nb_inplace_and, nullptr), + INIT_MEMBER(nb_inplace_xor, nullptr), + INIT_MEMBER(nb_inplace_or, nullptr), + + INIT_MEMBER(nb_floor_divide, nullptr), + INIT_MEMBER(nb_true_divide, nullptr), + INIT_MEMBER(nb_inplace_floor_divide, nullptr), + INIT_MEMBER(nb_inplace_true_divide, nullptr), + + INIT_MEMBER(nb_index, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_matrix_multiply, nullptr), - INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), + INIT_MEMBER(nb_matrix_multiply, nullptr), + INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), #endif }; - #if PY_MAJOR_VERSION >= 3 -#define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) -#define Py_TPFLAGS_HAVE_SEQUENCE_IN 0 // NOLINT(readability-identifier-naming) + #define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) + #define Py_TPFLAGS_HAVE_SEQUENCE_IN 0 // NOLINT(readability-identifier-naming) #endif PyDoc_STRVAR(get__doc__, - "D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."); + "D.get(k[,d]) -> D[k] if k in D, else d. d defaults to None."); PyDoc_STRVAR(keys__doc__, - "D.keys() -> an iterator over the keys of D"); + "D.keys() -> an iterator over the keys of D"); PyDoc_STRVAR(values__doc__, - "D.values() -> an iterator over the values of D"); + "D.values() -> an iterator over the values of D"); PyDoc_STRVAR(items__doc__, - "D.items() -> an iterator over the (key, value) items of D"); + "D.items() -> an iterator over the (key, value) items of D"); #if PY_MAJOR_VERSION < 3 PyDoc_STRVAR(iterkeys__doc__, - "D.iterkeys() -> an iterator over the keys of D"); + "D.iterkeys() -> an iterator over the keys of D"); PyDoc_STRVAR(itervalues__doc__, - "D.itervalues() -> an iterator over the values of D"); + "D.itervalues() -> an iterator over the values of D"); PyDoc_STRVAR(iteritems__doc__, - "D.iteritems() -> an iterator over the (key, value) items of D"); + "D.iteritems() -> an iterator over the (key, value) items of D"); #endif static PyMethodDef LazyDictMethods[] = { - { "get", TPyLazyDict::Get, METH_VARARGS, get__doc__ }, - { "keys", TPyLazyDict::Keys, METH_NOARGS, keys__doc__ }, - { "items", TPyLazyDict::Items, METH_NOARGS, items__doc__ }, - { "values", TPyLazyDict::Values, METH_NOARGS, values__doc__ }, + {"get", TPyLazyDict::Get, METH_VARARGS, get__doc__}, + {"keys", TPyLazyDict::Keys, METH_NOARGS, keys__doc__}, + {"items", TPyLazyDict::Items, METH_NOARGS, items__doc__}, + {"values", TPyLazyDict::Values, METH_NOARGS, values__doc__}, #if PY_MAJOR_VERSION < 3 - { "iterkeys", TPyLazyDict::Keys, METH_NOARGS, iterkeys__doc__ }, - { "iteritems", TPyLazyDict::Items, METH_NOARGS, iteritems__doc__ }, - { "itervalues", TPyLazyDict::Values, METH_NOARGS, itervalues__doc__ }, + {"iterkeys", TPyLazyDict::Keys, METH_NOARGS, iterkeys__doc__}, + {"iteritems", TPyLazyDict::Items, METH_NOARGS, iteritems__doc__}, + {"itervalues", TPyLazyDict::Values, METH_NOARGS, itervalues__doc__}, #endif - { nullptr, nullptr, 0, nullptr } /* sentinel */ + {nullptr, nullptr, 0, nullptr} /* sentinel */ }; PyTypeObject PyLazyDictType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TDict"), - INIT_MEMBER(tp_basicsize , sizeof(TPyLazyDict)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , TPyLazyDict::Dealloc), + INIT_MEMBER(tp_name, "yql.TDict"), + INIT_MEMBER(tp_basicsize, sizeof(TPyLazyDict)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, TPyLazyDict::Dealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , TPyLazyDict::Repr), - INIT_MEMBER(tp_as_number , &LazyDictNumbering), - INIT_MEMBER(tp_as_sequence , &LazyDictSequence), - INIT_MEMBER(tp_as_mapping , &LazyDictMapping), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER | Py_TPFLAGS_HAVE_SEQUENCE_IN), - INIT_MEMBER(tp_doc , "yql.TDict object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , &TPyLazyDict::Iter), - INIT_MEMBER(tp_iternext , nullptr), - INIT_MEMBER(tp_methods , LazyDictMethods), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, TPyLazyDict::Repr), + INIT_MEMBER(tp_as_number, &LazyDictNumbering), + INIT_MEMBER(tp_as_sequence, &LazyDictSequence), + INIT_MEMBER(tp_as_mapping, &LazyDictMapping), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER | Py_TPFLAGS_HAVE_SEQUENCE_IN), + INIT_MEMBER(tp_doc, "yql.TDict object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, &TPyLazyDict::Iter), + INIT_MEMBER(tp_iternext, nullptr), + INIT_MEMBER(tp_methods, LazyDictMethods), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; ////////////////////////////////////////////////////////////////////////////// // TPyLazySet interface ////////////////////////////////////////////////////////////////////////////// -struct TPyLazySet -{ +struct TPyLazySet { using TPtr = NUdf::TRefCountedPtr<TPyLazySet, TPyPtrOps<TPyLazySet>>; PyObject_HEAD; @@ -266,9 +264,9 @@ struct TPyLazySet } static PyObject* New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - NUdf::IBoxedValuePtr&& value); + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr&& value); static int Bool(PyObject* self); static PyObject* Repr(PyObject* self); @@ -280,152 +278,152 @@ struct TPyLazySet }; PySequenceMethods LazySetSequence = { - INIT_MEMBER(sq_length , TPyLazySet::Len), - INIT_MEMBER(sq_concat , nullptr), - INIT_MEMBER(sq_repeat , nullptr), - INIT_MEMBER(sq_item , nullptr), + INIT_MEMBER(sq_length, TPyLazySet::Len), + INIT_MEMBER(sq_concat, nullptr), + INIT_MEMBER(sq_repeat, nullptr), + INIT_MEMBER(sq_item, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(was_sq_slice , nullptr), + INIT_MEMBER(was_sq_slice, nullptr), #else - INIT_MEMBER(sq_slice , nullptr), + INIT_MEMBER(sq_slice, nullptr), #endif - INIT_MEMBER(sq_ass_item , nullptr), + INIT_MEMBER(sq_ass_item, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(was_sq_ass_slice , nullptr), + INIT_MEMBER(was_sq_ass_slice, nullptr), #else - INIT_MEMBER(sq_ass_slice , nullptr), + INIT_MEMBER(sq_ass_slice, nullptr), #endif - INIT_MEMBER(sq_contains , TPyLazySet::Contains), - INIT_MEMBER(sq_inplace_concat , nullptr), - INIT_MEMBER(sq_inplace_repeat , nullptr), + INIT_MEMBER(sq_contains, TPyLazySet::Contains), + INIT_MEMBER(sq_inplace_concat, nullptr), + INIT_MEMBER(sq_inplace_repeat, nullptr), }; PyNumberMethods LazySetNumbering = { - INIT_MEMBER(nb_add, nullptr), - INIT_MEMBER(nb_subtract, nullptr), - INIT_MEMBER(nb_multiply, nullptr), + INIT_MEMBER(nb_add, nullptr), + INIT_MEMBER(nb_subtract, nullptr), + INIT_MEMBER(nb_multiply, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_divide, nullptr), + INIT_MEMBER(nb_divide, nullptr), #endif - INIT_MEMBER(nb_remainder, nullptr), - INIT_MEMBER(nb_divmod, nullptr), - INIT_MEMBER(nb_power, nullptr), - INIT_MEMBER(nb_negative, nullptr), - INIT_MEMBER(nb_positive, nullptr), - INIT_MEMBER(nb_absolute, nullptr), + INIT_MEMBER(nb_remainder, nullptr), + INIT_MEMBER(nb_divmod, nullptr), + INIT_MEMBER(nb_power, nullptr), + INIT_MEMBER(nb_negative, nullptr), + INIT_MEMBER(nb_positive, nullptr), + INIT_MEMBER(nb_absolute, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_bool, TPyLazySet::Bool), + INIT_MEMBER(nb_bool, TPyLazySet::Bool), #else - INIT_MEMBER(nb_nonzero, TPyLazySet::Bool), + INIT_MEMBER(nb_nonzero, TPyLazySet::Bool), #endif - INIT_MEMBER(nb_invert, nullptr), - INIT_MEMBER(nb_lshift, nullptr), - INIT_MEMBER(nb_rshift, nullptr), - INIT_MEMBER(nb_and, nullptr), - INIT_MEMBER(nb_xor, nullptr), - INIT_MEMBER(nb_or, nullptr), + INIT_MEMBER(nb_invert, nullptr), + INIT_MEMBER(nb_lshift, nullptr), + INIT_MEMBER(nb_rshift, nullptr), + INIT_MEMBER(nb_and, nullptr), + INIT_MEMBER(nb_xor, nullptr), + INIT_MEMBER(nb_or, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_coerce, nullptr), + INIT_MEMBER(nb_coerce, nullptr), #endif - INIT_MEMBER(nb_int, nullptr), + INIT_MEMBER(nb_int, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_reserved, nullptr), + INIT_MEMBER(nb_reserved, nullptr), #else - INIT_MEMBER(nb_long, nullptr), + INIT_MEMBER(nb_long, nullptr), #endif - INIT_MEMBER(nb_float, nullptr), + INIT_MEMBER(nb_float, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_oct, nullptr), - INIT_MEMBER(nb_hex, nullptr), + INIT_MEMBER(nb_oct, nullptr), + INIT_MEMBER(nb_hex, nullptr), #endif - INIT_MEMBER(nb_inplace_add, nullptr), - INIT_MEMBER(nb_inplace_subtract, nullptr), - INIT_MEMBER(nb_inplace_multiply, nullptr), - INIT_MEMBER(nb_inplace_remainder, nullptr), - INIT_MEMBER(nb_inplace_power, nullptr), - INIT_MEMBER(nb_inplace_lshift, nullptr), - INIT_MEMBER(nb_inplace_rshift, nullptr), - INIT_MEMBER(nb_inplace_and, nullptr), - INIT_MEMBER(nb_inplace_xor, nullptr), - INIT_MEMBER(nb_inplace_or, nullptr), - - INIT_MEMBER(nb_floor_divide, nullptr), - INIT_MEMBER(nb_true_divide, nullptr), - INIT_MEMBER(nb_inplace_floor_divide, nullptr), - INIT_MEMBER(nb_inplace_true_divide, nullptr), - - INIT_MEMBER(nb_index, nullptr), + INIT_MEMBER(nb_inplace_add, nullptr), + INIT_MEMBER(nb_inplace_subtract, nullptr), + INIT_MEMBER(nb_inplace_multiply, nullptr), + INIT_MEMBER(nb_inplace_remainder, nullptr), + INIT_MEMBER(nb_inplace_power, nullptr), + INIT_MEMBER(nb_inplace_lshift, nullptr), + INIT_MEMBER(nb_inplace_rshift, nullptr), + INIT_MEMBER(nb_inplace_and, nullptr), + INIT_MEMBER(nb_inplace_xor, nullptr), + INIT_MEMBER(nb_inplace_or, nullptr), + + INIT_MEMBER(nb_floor_divide, nullptr), + INIT_MEMBER(nb_true_divide, nullptr), + INIT_MEMBER(nb_inplace_floor_divide, nullptr), + INIT_MEMBER(nb_inplace_true_divide, nullptr), + + INIT_MEMBER(nb_index, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_matrix_multiply, nullptr), - INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), + INIT_MEMBER(nb_matrix_multiply, nullptr), + INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), #endif }; PyTypeObject PyLazySetType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TSet"), - INIT_MEMBER(tp_basicsize , sizeof(TPyLazySet)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , TPyLazySet::Dealloc), + INIT_MEMBER(tp_name, "yql.TSet"), + INIT_MEMBER(tp_basicsize, sizeof(TPyLazySet)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, TPyLazySet::Dealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , TPyLazySet::Repr), - INIT_MEMBER(tp_as_number , &LazySetNumbering), - INIT_MEMBER(tp_as_sequence , &LazySetSequence), - INIT_MEMBER(tp_as_mapping , nullptr), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER | Py_TPFLAGS_HAVE_SEQUENCE_IN), - INIT_MEMBER(tp_doc , "yql.TSet object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , &TPyLazySet::Iter), - INIT_MEMBER(tp_iternext , nullptr), - INIT_MEMBER(tp_methods , nullptr), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, TPyLazySet::Repr), + INIT_MEMBER(tp_as_number, &LazySetNumbering), + INIT_MEMBER(tp_as_sequence, &LazySetSequence), + INIT_MEMBER(tp_as_mapping, nullptr), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_ITER | Py_TPFLAGS_HAVE_SEQUENCE_IN), + INIT_MEMBER(tp_doc, "yql.TSet object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, &TPyLazySet::Iter), + INIT_MEMBER(tp_iternext, nullptr), + INIT_MEMBER(tp_methods, nullptr), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; @@ -433,10 +431,11 @@ PyTypeObject PyLazySetType = { // TPyLazyDict implementation ////////////////////////////////////////////////////////////////////////////// int TPyLazyDict::Bool(PyObject* self) -{ - PY_TRY { - return NUdf::TBoxedValueAccessor::HasDictItems(*Cast(self)->Value.Get()) ? 1 : 0; - } PY_CATCH(-1) + { + PY_TRY{ + return NUdf::TBoxedValueAccessor::HasDictItems(*Cast(self) -> Value.Get()) ? 1 : 0; +} // namespace NPython +PY_CATCH(-1) } PyObject* TPyLazyDict::Repr(PyObject*) @@ -445,10 +444,11 @@ PyObject* TPyLazyDict::Repr(PyObject*) } Py_ssize_t TPyLazyDict::Len(PyObject* self) -{ - PY_TRY { - return static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*Cast(self)->Value.Get())); - } PY_CATCH(-1) + { + PY_TRY{ + return static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*Cast(self) -> Value.Get())); +} +PY_CATCH(-1) } PyObject* TPyLazyDict::Subscript(PyObject* self, PyObject* key) @@ -487,112 +487,121 @@ PyObject* TPyLazyDict::Subscript(PyObject* self, PyObject* key) PyErr_SetObject(PyExc_IndexError, repr.Get()); return nullptr; } - - } PY_CATCH(nullptr) + } + PY_CATCH(nullptr) } // -1 error // 0 not found // 1 found int TPyLazyDict::Contains(PyObject* self, PyObject* key) -{ - PY_TRY { - TPyLazyDict* dict = Cast(self); - NUdf::TUnboxedValue mkqlKey; - - if (dict->KeyType) { - mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key); - } else { - if (!PyIndex_Check(key)) { - const TPyObjectPtr type = PyObject_Type(key); - const TPyObjectPtr repr = PyObject_Repr(type.Get()); - const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get()); - PyErr_SetObject(PyExc_TypeError, error.Get()); - return -1; - } + { + PY_TRY{ + TPyLazyDict* dict = Cast(self); +NUdf::TUnboxedValue mkqlKey; + +if (dict->KeyType) { + mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key); +} else { + if (!PyIndex_Check(key)) { + const TPyObjectPtr type = PyObject_Type(key); + const TPyObjectPtr repr = PyObject_Repr(type.Get()); + const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get()); + PyErr_SetObject(PyExc_TypeError, error.Get()); + return -1; + } - const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError); - if (index < 0) { - return 0; - } - mkqlKey = NUdf::TUnboxedValuePod(ui64(index)); - } + const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError); + if (index < 0) { + return 0; + } + mkqlKey = NUdf::TUnboxedValuePod(ui64(index)); +} - return NUdf::TBoxedValueAccessor::Contains(*dict->Value.Get(), mkqlKey) ? 1 : 0; - } PY_CATCH(-1) +return NUdf::TBoxedValueAccessor::Contains(*dict->Value.Get(), mkqlKey) ? 1 : 0; +} +PY_CATCH(-1) } PyObject* TPyLazyDict::Get(PyObject* self, PyObject* args) -{ - PY_TRY { - PyObject* key = nullptr; - PyObject* failobj = Py_None; + { + PY_TRY{ + PyObject* key = nullptr; +PyObject* failobj = Py_None; - if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj)) - return nullptr; +if (!PyArg_UnpackTuple(args, "get", 1, 2, &key, &failobj)) { + return nullptr; +} - TPyLazyDict* dict = Cast(self); - if (dict->KeyType) { - const auto mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key); - if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), mkqlKey)) { - return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release(); - } - } else { - if (!PyIndex_Check(key)) { - const TPyObjectPtr type = PyObject_Type(key); - const TPyObjectPtr repr = PyObject_Repr(type.Get()); - const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get()); - PyErr_SetObject(PyExc_TypeError, error.Get()); - return nullptr; - } +TPyLazyDict* dict = Cast(self); +if (dict->KeyType) { + const auto mkqlKey = FromPyObject(dict->CastCtx, dict->KeyType, key); + if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), mkqlKey)) { + return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release(); + } +} else { + if (!PyIndex_Check(key)) { + const TPyObjectPtr type = PyObject_Type(key); + const TPyObjectPtr repr = PyObject_Repr(type.Get()); + const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported index object type: %R", repr.Get()); + PyErr_SetObject(PyExc_TypeError, error.Get()); + return nullptr; + } - const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError); - if (index < 0) { - return nullptr; - } + const Py_ssize_t index = PyNumber_AsSsize_t(key, PyExc_IndexError); + if (index < 0) { + return nullptr; + } - if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), NUdf::TUnboxedValuePod(ui64(index)))) { - return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release(); - } - } + if (auto value = NUdf::TBoxedValueAccessor::Lookup(*dict->Value.Get(), NUdf::TUnboxedValuePod(ui64(index)))) { + return ToPyObject(dict->CastCtx, dict->PayloadType, value.Release().GetOptionalValue()).Release(); + } +} - Py_INCREF(failobj); - return failobj; - } PY_CATCH(nullptr) +Py_INCREF(failobj); +return failobj; +} +PY_CATCH(nullptr) } PyObject* TPyLazyDict::Keys(PyObject* self, PyObject* /* args */) -{ - PY_TRY { - const auto dict = Cast(self); - return ToPyIterator(dict->CastCtx, dict->KeyType, - NUdf::TBoxedValueAccessor::GetKeysIterator(*dict->Value.Get())).Release(); - } PY_CATCH(nullptr) + { + PY_TRY{ + const auto dict = Cast(self); +return ToPyIterator(dict->CastCtx, dict->KeyType, + NUdf::TBoxedValueAccessor::GetKeysIterator(*dict->Value.Get())) + .Release(); +} +PY_CATCH(nullptr) } PyObject* TPyLazyDict::Items(PyObject* self, PyObject* /* args */) -{ - PY_TRY { - const auto dict = Cast(self); - return ToPyIterator(dict->CastCtx, dict->KeyType, dict->PayloadType, - NUdf::TBoxedValueAccessor::GetDictIterator(*dict->Value.Get())).Release(); - } PY_CATCH(nullptr) + { + PY_TRY{ + const auto dict = Cast(self); +return ToPyIterator(dict->CastCtx, dict->KeyType, dict->PayloadType, + NUdf::TBoxedValueAccessor::GetDictIterator(*dict->Value.Get())) + .Release(); +} +PY_CATCH(nullptr) } PyObject* TPyLazyDict::Values(PyObject* self, PyObject* /* args */) -{ - PY_TRY { - const auto dict = Cast(self); - return ToPyIterator(dict->CastCtx, dict->PayloadType, - NUdf::TBoxedValueAccessor::GetPayloadsIterator(*dict->Value.Get())).Release(); - } PY_CATCH(nullptr) + { + PY_TRY{ + const auto dict = Cast(self); +return ToPyIterator(dict->CastCtx, dict->PayloadType, + NUdf::TBoxedValueAccessor::GetPayloadsIterator(*dict->Value.Get())) + .Release(); +} +PY_CATCH(nullptr) } PyObject* TPyLazyDict::New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* keyType, - const NUdf::TType* payloadType, - NUdf::IBoxedValuePtr&& value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payloadType, + NUdf::IBoxedValuePtr&& value) { TPyLazyDict* dict = new TPyLazyDict; PyObject_INIT(dict, &PyLazyDictType); @@ -608,10 +617,11 @@ PyObject* TPyLazyDict::New( // TPyLazySet implementation ////////////////////////////////////////////////////////////////////////////// int TPyLazySet::Bool(PyObject* self) -{ - PY_TRY { - return NUdf::TBoxedValueAccessor::HasDictItems(*Cast(self)->Value.Get()) ? 1 : 0; - } PY_CATCH(-1) + { + PY_TRY{ + return NUdf::TBoxedValueAccessor::HasDictItems(*Cast(self) -> Value.Get()) ? 1 : 0; +} +PY_CATCH(-1) } PyObject* TPyLazySet::Repr(PyObject*) @@ -623,34 +633,38 @@ Py_ssize_t TPyLazySet::Len(PyObject* self) { PY_TRY { return static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*Cast(self)->Value.Get())); - } PY_CATCH(-1) + } + PY_CATCH(-1) } // -1 error // 0 not found // 1 found int TPyLazySet::Contains(PyObject* self, PyObject* key) -{ - PY_TRY { - const auto set = Cast(self); - const auto mkqlKey = FromPyObject(set->CastCtx, set->ItemType, key); - return NUdf::TBoxedValueAccessor::Contains(*set->Value.Get(), mkqlKey) ? 1 : 0; - } PY_CATCH(-1) + { + PY_TRY{ + const auto set = Cast(self); +const auto mkqlKey = FromPyObject(set->CastCtx, set->ItemType, key); +return NUdf::TBoxedValueAccessor::Contains(*set->Value.Get(), mkqlKey) ? 1 : 0; +} +PY_CATCH(-1) } PyObject* TPyLazySet::Iter(PyObject* self) -{ - PY_TRY { - const auto set = Cast(self); - return ToPyIterator(set->CastCtx, set->ItemType, - NUdf::TBoxedValueAccessor::GetKeysIterator(*set->Value.Get())).Release(); - } PY_CATCH(nullptr) + { + PY_TRY{ + const auto set = Cast(self); +return ToPyIterator(set->CastCtx, set->ItemType, + NUdf::TBoxedValueAccessor::GetKeysIterator(*set->Value.Get())) + .Release(); +} +PY_CATCH(nullptr) } PyObject* TPyLazySet::New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - NUdf::IBoxedValuePtr&& value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr&& value) { TPyLazySet* dict = new TPyLazySet; PyObject_INIT(dict, &PyLazySetType); @@ -664,18 +678,18 @@ PyObject* TPyLazySet::New( ////////////////////////////////////////////////////////////////////////////// TPyObjectPtr ToPyLazyDict( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* keyType, - const NUdf::TType* payloadType, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payloadType, + const NUdf::TUnboxedValuePod& value) { return TPyLazyDict::New(castCtx, keyType, payloadType, value.AsBoxed()); } TPyObjectPtr ToPyLazySet( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + const NUdf::TUnboxedValuePod& value) { return TPyLazySet::New(castCtx, itemType, value.AsBoxed()); } diff --git a/yql/essentials/udfs/common/python/bindings/py_dict.h b/yql/essentials/udfs/common/python/bindings/py_dict.h index 538ca69a127..c1337cc5ec4 100644 --- a/yql/essentials/udfs/common/python/bindings/py_dict.h +++ b/yql/essentials/udfs/common/python/bindings/py_dict.h @@ -9,42 +9,42 @@ extern PyTypeObject PyLazyDictType; extern PyTypeObject PyLazySetType; TPyObjectPtr ToPyLazyDict( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* keyType, - const NKikimr::NUdf::TType* payloadType, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + const NKikimr::NUdf::TType* payloadType, + const NKikimr::NUdf::TUnboxedValuePod& value); TPyObjectPtr ToPyLazySet( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* itemType, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyMapping( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* keyType, - const NKikimr::NUdf::TType* payType, - PyObject* map); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + const NKikimr::NUdf::TType* payType, + PyObject* map); NKikimr::NUdf::TUnboxedValue FromPyDict( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* keyType, - const NKikimr::NUdf::TType* payType, - PyObject* dict); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + const NKikimr::NUdf::TType* payType, + PyObject* dict); NKikimr::NUdf::TUnboxedValue FromPySet( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* itemType, - PyObject* set); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + PyObject* set); NKikimr::NUdf::TUnboxedValue FromPySequence( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* keyType, - PyObject* sequence); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + PyObject* sequence); NKikimr::NUdf::TUnboxedValue FromPySequence( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* itemType, - const NKikimr::NUdf::TDataTypeId keyType, - PyObject* sequence); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + const NKikimr::NUdf::TDataTypeId keyType, + PyObject* sequence); -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp index edb3d36e8c3..454ff363862 100644 --- a/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_dict_ut.cpp @@ -7,716 +7,673 @@ using namespace NPython; Y_UNIT_TEST_SUITE(TPyDictTest) { - Y_UNIT_TEST(FromPyEmptyDict) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( - "def Test(): return {}", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(!value.HasDictItems()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 0); - }); - } - - Y_UNIT_TEST(FromPyDict_Length) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( - "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(value.HasDictItems()); - UNIT_ASSERT(!value.IsSortedDict()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); - }); - } - - Y_UNIT_TEST(FromPyDict_Lookup) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( - "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", - [](const NUdf::TUnboxedValuePod& value) { - const auto v1 = value.Lookup(NUdf::TUnboxedValuePod(ui32(1))); - UNIT_ASSERT_EQUAL(v1.AsStringRef(), "one"); - const auto v2 = value.Lookup(NUdf::TUnboxedValuePod(ui32(2))); - UNIT_ASSERT_EQUAL(v2.AsStringRef(), "two"); - const auto v3 = value.Lookup(NUdf::TUnboxedValuePod(ui32(3))); - UNIT_ASSERT_EQUAL(v3.AsStringRef(), "three"); - - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui32(0)))); - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui32(4)))); - }); - } - - Y_UNIT_TEST(FromPyDict_Contains) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( - "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(0)))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(1)))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(2)))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(3)))); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(4)))); - }); - } - - Y_UNIT_TEST(FromPyDict_Items) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( - "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", - [](const NUdf::TUnboxedValuePod& value) { - std::map<ui32, TString> items; - const auto it = value.GetDictIterator(); - for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { - items.emplace(key.Get<ui32>(), payload.AsStringRef()); - } - - UNIT_ASSERT_EQUAL(items.size(), 3); - UNIT_ASSERT_EQUAL(items[1], "one"); - UNIT_ASSERT_EQUAL(items[2], "two"); - UNIT_ASSERT_EQUAL(items[3], "three"); - }); - } - - Y_UNIT_TEST(FromPyDict_Keys) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( - "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", - [](const NUdf::TUnboxedValuePod& value) { - std::vector<ui32> items; - const auto it = value.GetKeysIterator(); - for (NUdf::TUnboxedValue key; it.Next(key);) { - items.emplace_back(key.Get<ui32>()); - } - - UNIT_ASSERT_EQUAL(items.size(), 3); - - std::sort(items.begin(), items.end()); - UNIT_ASSERT_EQUAL(items[0], 1U); - UNIT_ASSERT_EQUAL(items[1], 2U); - UNIT_ASSERT_EQUAL(items[2], 3U); - }); - } - - Y_UNIT_TEST(FromPyDict_Values) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( - "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", - [](const NUdf::TUnboxedValuePod& value) { - std::vector<TString> items; - const auto it = value.GetPayloadsIterator(); - for (NUdf::TUnboxedValue payload; it.Next(payload);) { - items.emplace_back(payload.AsStringRef()); - } - - UNIT_ASSERT_EQUAL(items.size(), 3); - - std::sort(items.begin(), items.end()); - UNIT_ASSERT_EQUAL(items[0], "one"); - UNIT_ASSERT_EQUAL(items[1], "three"); - UNIT_ASSERT_EQUAL(items[2], "two"); - }); - } - - Y_UNIT_TEST(FromPyList_Length) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( - "def Test(): return ['one', 'two', 'three']", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(value.HasDictItems()); - UNIT_ASSERT(value.IsSortedDict()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); - }); - } - - Y_UNIT_TEST(FromPyTuple_Lookup) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<i32, char*>>( - "def Test(): return ('one', 'two', 'three')", - [](const NUdf::TUnboxedValuePod& value) { - const auto v1 = value.Lookup(NUdf::TUnboxedValuePod(i32(0))); - UNIT_ASSERT_EQUAL(v1.AsStringRef(), "one"); - const auto v2 = value.Lookup(NUdf::TUnboxedValuePod(i32(1))); - UNIT_ASSERT_EQUAL(v2.AsStringRef(), "two"); - const auto v3 = value.Lookup(NUdf::TUnboxedValuePod(i32(2))); - UNIT_ASSERT_EQUAL(v3.AsStringRef(), "three"); - const auto v4 = value.Lookup(NUdf::TUnboxedValuePod(i32(-1))); - UNIT_ASSERT_EQUAL(v4.AsStringRef(), "three"); - const auto v5 = value.Lookup(NUdf::TUnboxedValuePod(i32(-2))); - UNIT_ASSERT_EQUAL(v5.AsStringRef(), "two"); - const auto v6 = value.Lookup(NUdf::TUnboxedValuePod(i32(-3))); - UNIT_ASSERT_EQUAL(v6.AsStringRef(), "one"); - - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i32(3)))); - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i32(-4)))); - }); - } - - Y_UNIT_TEST(FromPyList_Contains) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<i16, char*>>( - "def Test(): return ['one', 'two', 'three']", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(0)))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(1)))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(2)))); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i16(3)))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-1)))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-2)))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-3)))); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i16(-4)))); - }); - } - - Y_UNIT_TEST(FromPyTuple_Items) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui16, char*>>( - "def Test(): return ('one', 'two', 'three')", - [](const NUdf::TUnboxedValuePod& value) { - std::vector<std::pair<ui16, TString>> items; - const auto it = value.GetDictIterator(); - for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { - items.emplace_back(key.Get<ui16>(), payload.AsStringRef()); - } - - UNIT_ASSERT_EQUAL(items.size(), 3U); - UNIT_ASSERT_EQUAL(items[0].first, 0); - UNIT_ASSERT_EQUAL(items[1].first, 1); - UNIT_ASSERT_EQUAL(items[2].first, 2); - UNIT_ASSERT_EQUAL(items[0].second, "one"); - UNIT_ASSERT_EQUAL(items[1].second, "two"); - UNIT_ASSERT_EQUAL(items[2].second, "three"); - }); - } - - Y_UNIT_TEST(FromPyList_Keys) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<i64, char*>>( - "def Test(): return ['one', 'two', 'three']", - [](const NUdf::TUnboxedValuePod& value) { - std::vector<i64> items; - const auto it = value.GetKeysIterator(); - for (NUdf::TUnboxedValue key; it.Next(key);) { - items.emplace_back(key.Get<i64>()); - } - - UNIT_ASSERT_EQUAL(items.size(), 3); - UNIT_ASSERT_EQUAL(items[0], 0); - UNIT_ASSERT_EQUAL(items[1], 1); - UNIT_ASSERT_EQUAL(items[2], 2); - }); - } - - Y_UNIT_TEST(FromPyTuple_Values) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui64, char*>>( - "def Test(): return ('one', 'two', 'three')", - [](const NUdf::TUnboxedValuePod& value) { - std::vector<TString> items; - const auto it = value.GetPayloadsIterator(); - for (NUdf::TUnboxedValue payload; it.Next(payload);) { - items.emplace_back(payload.AsStringRef()); - } - - UNIT_ASSERT_EQUAL(items.size(), 3); - UNIT_ASSERT_EQUAL(items[0], "one"); - UNIT_ASSERT_EQUAL(items[1], "two"); - UNIT_ASSERT_EQUAL(items[2], "three"); - }); - } - - Y_UNIT_TEST(ToPyEmptyDict) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TDict<ui8, ui32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Build(); - }, - "def Test(value):\n" - " assert not value\n" - " assert len(value) == 0\n" - ); - } - - Y_UNIT_TEST(ToPyDict) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TDict<int, double>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - return vb.NewDict(type, NUdf::TDictFlags::Hashed)-> - Add(NUdf::TUnboxedValuePod((int) 1), NUdf::TUnboxedValuePod((double) 0.1)) - .Add(NUdf::TUnboxedValuePod((int) 2), NUdf::TUnboxedValuePod((double) 0.2)) - .Add(NUdf::TUnboxedValuePod((int) 3), NUdf::TUnboxedValuePod((double) 0.3)) - .Build(); - }, - "def Test(value):\n" - " assert value\n" - " assert len(value) == 3\n" - " assert iter(value) is not None\n" - " assert 2 in value\n" - " assert 0 not in value\n" - " assert set(iter(value)) == set([1, 2, 3])\n" - " assert value[2] == 0.2\n" - " assert value.get(0, 0.7) == 0.7\n" - " assert value.get(3, 0.7) == 0.3\n" - " assert sorted(value.keys()) == [1, 2, 3]\n" - " assert sorted(value.items()) == [(1, 0.1), (2, 0.2), (3, 0.3)]\n" - " assert sorted(value.values()) == [0.1, 0.2, 0.3]\n" +Y_UNIT_TEST(FromPyEmptyDict) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(!value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 0); + }); +} + +Y_UNIT_TEST(FromPyDict_Length) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT(!value.IsSortedDict()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + }); +} + +Y_UNIT_TEST(FromPyDict_Lookup) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + const auto v1 = value.Lookup(NUdf::TUnboxedValuePod(ui32(1))); + UNIT_ASSERT_EQUAL(v1.AsStringRef(), "one"); + const auto v2 = value.Lookup(NUdf::TUnboxedValuePod(ui32(2))); + UNIT_ASSERT_EQUAL(v2.AsStringRef(), "two"); + const auto v3 = value.Lookup(NUdf::TUnboxedValuePod(ui32(3))); + UNIT_ASSERT_EQUAL(v3.AsStringRef(), "three"); + + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui32(0)))); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui32(4)))); + }); +} + +Y_UNIT_TEST(FromPyDict_Contains) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(0)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(1)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(2)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(3)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(4)))); + }); +} + +Y_UNIT_TEST(FromPyDict_Items) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + std::map<ui32, TString> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace(key.Get<ui32>(), payload.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[1], "one"); + UNIT_ASSERT_EQUAL(items[2], "two"); + UNIT_ASSERT_EQUAL(items[3], "three"); + }); +} + +Y_UNIT_TEST(FromPyDict_Keys) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<ui32> items; + const auto it = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; it.Next(key);) { + items.emplace_back(key.Get<ui32>()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3); + + std::sort(items.begin(), items.end()); + UNIT_ASSERT_EQUAL(items[0], 1U); + UNIT_ASSERT_EQUAL(items[1], 2U); + UNIT_ASSERT_EQUAL(items[2], 3U); + }); +} + +Y_UNIT_TEST(FromPyDict_Values) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return {1: 'one', 3: 'three', 2: 'two'}", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<TString> items; + const auto it = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; it.Next(payload);) { + items.emplace_back(payload.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3); + + std::sort(items.begin(), items.end()); + UNIT_ASSERT_EQUAL(items[0], "one"); + UNIT_ASSERT_EQUAL(items[1], "three"); + UNIT_ASSERT_EQUAL(items[2], "two"); + }); +} + +Y_UNIT_TEST(FromPyList_Length) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "def Test(): return ['one', 'two', 'three']", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT(value.IsSortedDict()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + }); +} + +Y_UNIT_TEST(FromPyTuple_Lookup) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<i32, char*>>( + "def Test(): return ('one', 'two', 'three')", + [](const NUdf::TUnboxedValuePod& value) { + const auto v1 = value.Lookup(NUdf::TUnboxedValuePod(i32(0))); + UNIT_ASSERT_EQUAL(v1.AsStringRef(), "one"); + const auto v2 = value.Lookup(NUdf::TUnboxedValuePod(i32(1))); + UNIT_ASSERT_EQUAL(v2.AsStringRef(), "two"); + const auto v3 = value.Lookup(NUdf::TUnboxedValuePod(i32(2))); + UNIT_ASSERT_EQUAL(v3.AsStringRef(), "three"); + const auto v4 = value.Lookup(NUdf::TUnboxedValuePod(i32(-1))); + UNIT_ASSERT_EQUAL(v4.AsStringRef(), "three"); + const auto v5 = value.Lookup(NUdf::TUnboxedValuePod(i32(-2))); + UNIT_ASSERT_EQUAL(v5.AsStringRef(), "two"); + const auto v6 = value.Lookup(NUdf::TUnboxedValuePod(i32(-3))); + UNIT_ASSERT_EQUAL(v6.AsStringRef(), "one"); + + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i32(3)))); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i32(-4)))); + }); +} + +Y_UNIT_TEST(FromPyList_Contains) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<i16, char*>>( + "def Test(): return ['one', 'two', 'three']", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(0)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(1)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(2)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i16(3)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-1)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-2)))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i16(-3)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i16(-4)))); + }); +} + +Y_UNIT_TEST(FromPyTuple_Items) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui16, char*>>( + "def Test(): return ('one', 'two', 'three')", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<std::pair<ui16, TString>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<ui16>(), payload.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3U); + UNIT_ASSERT_EQUAL(items[0].first, 0); + UNIT_ASSERT_EQUAL(items[1].first, 1); + UNIT_ASSERT_EQUAL(items[2].first, 2); + UNIT_ASSERT_EQUAL(items[0].second, "one"); + UNIT_ASSERT_EQUAL(items[1].second, "two"); + UNIT_ASSERT_EQUAL(items[2].second, "three"); + }); +} + +Y_UNIT_TEST(FromPyList_Keys) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<i64, char*>>( + "def Test(): return ['one', 'two', 'three']", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<i64> items; + const auto it = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; it.Next(key);) { + items.emplace_back(key.Get<i64>()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0], 0); + UNIT_ASSERT_EQUAL(items[1], 1); + UNIT_ASSERT_EQUAL(items[2], 2); + }); +} + +Y_UNIT_TEST(FromPyTuple_Values) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui64, char*>>( + "def Test(): return ('one', 'two', 'three')", + [](const NUdf::TUnboxedValuePod& value) { + std::vector<TString> items; + const auto it = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; it.Next(payload);) { + items.emplace_back(payload.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0], "one"); + UNIT_ASSERT_EQUAL(items[1], "two"); + UNIT_ASSERT_EQUAL(items[2], "three"); + }); +} + +Y_UNIT_TEST(ToPyEmptyDict) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDict<ui8, ui32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Build(); + }, + "def Test(value):\n" + " assert not value\n" + " assert len(value) == 0\n"); +} + +Y_UNIT_TEST(ToPyDict) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDict<int, double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Add(NUdf::TUnboxedValuePod((int)1), NUdf::TUnboxedValuePod((double)0.1)).Add(NUdf::TUnboxedValuePod((int)2), NUdf::TUnboxedValuePod((double)0.2)).Add(NUdf::TUnboxedValuePod((int)3), NUdf::TUnboxedValuePod((double)0.3)).Build(); + }, + "def Test(value):\n" + " assert value\n" + " assert len(value) == 3\n" + " assert iter(value) is not None\n" + " assert 2 in value\n" + " assert 0 not in value\n" + " assert set(iter(value)) == set([1, 2, 3])\n" + " assert value[2] == 0.2\n" + " assert value.get(0, 0.7) == 0.7\n" + " assert value.get(3, 0.7) == 0.3\n" + " assert sorted(value.keys()) == [1, 2, 3]\n" + " assert sorted(value.items()) == [(1, 0.1), (2, 0.2), (3, 0.3)]\n" + " assert sorted(value.values()) == [0.1, 0.2, 0.3]\n" #if PY_MAJOR_VERSION < 3 - " assert all(isinstance(k, int) for k in value.iterkeys())\n" - " assert all(isinstance(v, float) for v in value.itervalues())\n" - " assert all(isinstance(k, int) and isinstance(v, float) for k,v in value.iteritems())\n" + " assert all(isinstance(k, int) for k in value.iterkeys())\n" + " assert all(isinstance(v, float) for v in value.itervalues())\n" + " assert all(isinstance(k, int) and isinstance(v, float) for k,v in value.iteritems())\n" #endif - ); - } - - Y_UNIT_TEST(ToPyDictWrongKey) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TDict<int, double>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - return vb.NewDict(type, NUdf::TDictFlags::Hashed)-> - Add(NUdf::TUnboxedValuePod((int) 1), NUdf::TUnboxedValuePod((double) 0.1)) - .Add(NUdf::TUnboxedValuePod((int) 2), NUdf::TUnboxedValuePod((double) 0.2)) - .Add(NUdf::TUnboxedValuePod((int) 3), NUdf::TUnboxedValuePod((double) 0.3)) - .Build(); - }, - "def Test(value):\n" - " try:\n" - " print(value[0])\n" - " except KeyError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } - - Y_UNIT_TEST(FromPyEmptySet) { - TPythonTestEngine engine; - - engine.ToMiniKQL<NUdf::TDict<ui32, void>>( - "def Test(): return set([])", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(!value.HasDictItems()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 0); - }); - - } - - Y_UNIT_TEST(FromPySet) { - TPythonTestEngine engine; - - engine.ToMiniKQL<NUdf::TDict<char*, void>>( - "def Test(): return set(['one', 'two', 'three'])", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(value.HasDictItems()); - UNIT_ASSERT(!value.IsSortedDict()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); - - std::set<TString> set; - const auto it = value.GetKeysIterator(); - for (NUdf::TUnboxedValue key; it.Next(key);) { - set.emplace(key.AsStringRef()); - } - - UNIT_ASSERT_EQUAL(set.size(), 3); - UNIT_ASSERT(set.count("one")); - UNIT_ASSERT(set.count("two")); - UNIT_ASSERT(set.count("three")); - }); - - } - - Y_UNIT_TEST(FromPySet_Contains) { - TPythonTestEngine engine; - - engine.ToMiniKQL<NUdf::TDict<char*, void>>( - "def Test(): return {b'one', b'two', b'three'}", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("one"))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("two"))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("three"))); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod::Embedded("zero"))); - }); - - } - - Y_UNIT_TEST(ToPyEmptySet) { - TPythonTestEngine engine; - - engine.ToPython<NUdf::TDict<ui8, void>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Build(); - }, - "def Test(value):\n" - " assert not value\n" - " assert len(value) == 0\n" - ); - - } - - Y_UNIT_TEST(ToPySet) { - TPythonTestEngine engine; - - engine.ToPython<NUdf::TDict<ui8, void>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - return vb.NewDict(type, NUdf::TDictFlags::Hashed)-> - Add(NUdf::TUnboxedValuePod((ui8) 1), NUdf::TUnboxedValuePod::Void()) - .Add(NUdf::TUnboxedValuePod((ui8) 2), NUdf::TUnboxedValuePod::Void()) - .Add(NUdf::TUnboxedValuePod((ui8) 3), NUdf::TUnboxedValuePod::Void()) - .Build(); - - }, - "def Test(value):\n" - " assert len(value) == 3\n" - " assert all(isinstance(k, int) for k in iter(value))\n" - " assert all(i in value for i in [1, 2, 3])\n"); - } - - Y_UNIT_TEST(FromPyMultiDict) { - TPythonTestEngine engine; - - engine.ToMiniKQL<NUdf::TDict<ui32, NUdf::TListType<char*>>>( - "def Test(): return {1: ['one', 'two'], 3: ['three']}", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 2); - - std::unordered_map<ui32, std::vector<TString>> map; - const auto dictIt = value.GetDictIterator(); - for (NUdf::TUnboxedValue key, payload; dictIt.NextPair(key, payload);) { - auto& val = map[key.Get<ui32>()]; - const auto listIt = payload.GetListIterator(); - for (NUdf::TUnboxedValue listItem; listIt.Next(listItem);) { - val.emplace_back(listItem.AsStringRef()); - } - } - - UNIT_ASSERT_EQUAL(map.size(), 2); - auto it = map.find(1); - UNIT_ASSERT(it != map.end()); - UNIT_ASSERT_EQUAL(it->second.size(), 2); - UNIT_ASSERT_EQUAL(it->second[0], "one"); - UNIT_ASSERT_EQUAL(it->second[1], "two"); - it = map.find(3); - UNIT_ASSERT(it != map.end()); - UNIT_ASSERT_EQUAL(it->second.size(), 1); - UNIT_ASSERT_EQUAL(it->second[0], "three"); - }); - - } - - Y_UNIT_TEST(ToPyMultiDict) { - TPythonTestEngine engine; - - engine.ToPython<NUdf::TDict<ui8, NUdf::TListType<NUdf::TUtf8>>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - ui32 flags = NUdf::TDictFlags::Hashed | NUdf::TDictFlags::Multi; - return vb.NewDict(type, flags)-> - Add(NUdf::TUnboxedValuePod((ui8) 1), vb.NewString("one")) - .Add(NUdf::TUnboxedValuePod((ui8) 1), vb.NewString("two")) - .Add(NUdf::TUnboxedValuePod((ui8) 3), vb.NewString("three")) - .Build(); - - }, - "def Test(value):\n" - " assert len(value) == 2\n" - " assert 1 in value\n" - " assert 3 in value\n" - " assert len(value[1]) == 2\n" - " assert 'one' in value[1]\n" - " assert 'two' in value[1]\n" - " assert list(value[3]) == ['three']\n"); - } - - Y_UNIT_TEST(ToPyAndBackDictAsIs) { - TPythonTestEngine engine; - engine.ToPythonAndBack<NUdf::TDict<i32, double>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - return vb.NewDict(type, NUdf::TDictFlags::Sorted)-> - Add(NUdf::TUnboxedValuePod((i32) 1), NUdf::TUnboxedValuePod((double) 0.1)) - .Add(NUdf::TUnboxedValuePod((i32) 2), NUdf::TUnboxedValuePod((double) 0.2)) - .Add(NUdf::TUnboxedValuePod((i32) 3), NUdf::TUnboxedValuePod((double) 0.3)) - .Build(); - }, - "def Test(value): return value", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value.HasDictItems()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod((i32) 0))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod((i32) 3))); - UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod((i32) 2)).Get<double>(), 0.2); - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((i32) 4))); - - std::vector<std::pair<i32, double>> items; - const auto it = value.GetDictIterator(); - for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { - items.emplace_back(key.Get<i32>(), payload.Get<double>()); - } - UNIT_ASSERT_EQUAL(items.size(), 3); - UNIT_ASSERT_EQUAL(items[0].first, 1); - UNIT_ASSERT_EQUAL(items[1].first, 2); - UNIT_ASSERT_EQUAL(items[2].first, 3); - UNIT_ASSERT_EQUAL(items[0].second, 0.1); - UNIT_ASSERT_EQUAL(items[1].second, 0.2); - UNIT_ASSERT_EQUAL(items[2].second, 0.3); - - std::vector<i32> keys; - const auto kit = value.GetKeysIterator(); - for (NUdf::TUnboxedValue key; kit.Next(key);) { - keys.emplace_back(key.Get<i32>()); - } - - UNIT_ASSERT_EQUAL(keys.size(), 3); - UNIT_ASSERT_EQUAL(keys[0], 1); - UNIT_ASSERT_EQUAL(keys[1], 2); - UNIT_ASSERT_EQUAL(keys[2], 3); - - std::vector<double> values; - const auto pit = value.GetPayloadsIterator(); - for (NUdf::TUnboxedValue payload; pit.Next(payload);) { - values.emplace_back(payload.Get<double>()); - } - - UNIT_ASSERT_EQUAL(values.size(), 3); - UNIT_ASSERT_EQUAL(values[0], 0.1); - UNIT_ASSERT_EQUAL(values[1], 0.2); - UNIT_ASSERT_EQUAL(values[2], 0.3); - } - ); - } - - Y_UNIT_TEST(PyInvertDict) { - TPythonTestEngine engine; - engine.ToPythonAndBack<NUdf::TDict<i32, double>, NUdf::TDict<double, i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - return vb.NewDict(type, NUdf::TDictFlags::Hashed)-> - Add(NUdf::TUnboxedValuePod((i32) 1), NUdf::TUnboxedValuePod((double) 0.1)) - .Add(NUdf::TUnboxedValuePod((i32) 2), NUdf::TUnboxedValuePod((double) 0.2)) - .Add(NUdf::TUnboxedValuePod((i32) 3), NUdf::TUnboxedValuePod((double) 0.3)) - .Build(); - }, - "def Test(value): return { v: k for k, v in value.items() }", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value.HasDictItems()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod((double) 0.1))); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod((double) 0.0))); - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((double) 0.4))); - UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod((double) 0.2)).Get<i32>(), 2); - - std::map<double, i32> items; - const auto it = value.GetDictIterator(); - for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { - items.emplace(key.Get<double>(), payload.Get<i32>()); - } - UNIT_ASSERT_EQUAL(items.size(), 3); - UNIT_ASSERT_EQUAL(items[0.1], 1); - UNIT_ASSERT_EQUAL(items[0.2], 2); - UNIT_ASSERT_EQUAL(items[0.3], 3); + ); +} + +Y_UNIT_TEST(ToPyDictWrongKey) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TDict<int, double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Add(NUdf::TUnboxedValuePod((int)1), NUdf::TUnboxedValuePod((double)0.1)).Add(NUdf::TUnboxedValuePod((int)2), NUdf::TUnboxedValuePod((double)0.2)).Add(NUdf::TUnboxedValuePod((int)3), NUdf::TUnboxedValuePod((double)0.3)).Build(); + }, + "def Test(value):\n" + " try:\n" + " print(value[0])\n" + " except KeyError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} + +Y_UNIT_TEST(FromPyEmptySet) { + TPythonTestEngine engine; + + engine.ToMiniKQL<NUdf::TDict<ui32, void>>( + "def Test(): return set([])", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(!value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 0); + }); +} + +Y_UNIT_TEST(FromPySet) { + TPythonTestEngine engine; + + engine.ToMiniKQL<NUdf::TDict<char*, void>>( + "def Test(): return set(['one', 'two', 'three'])", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT(!value.IsSortedDict()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + + std::set<TString> set; + const auto it = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; it.Next(key);) { + set.emplace(key.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(set.size(), 3); + UNIT_ASSERT(set.count("one")); + UNIT_ASSERT(set.count("two")); + UNIT_ASSERT(set.count("three")); + }); +} + +Y_UNIT_TEST(FromPySet_Contains) { + TPythonTestEngine engine; + + engine.ToMiniKQL<NUdf::TDict<char*, void>>( + "def Test(): return {b'one', b'two', b'three'}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("one"))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("two"))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod::Embedded("three"))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod::Embedded("zero"))); + }); +} + +Y_UNIT_TEST(ToPyEmptySet) { + TPythonTestEngine engine; + + engine.ToPython<NUdf::TDict<ui8, void>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Build(); + }, + "def Test(value):\n" + " assert not value\n" + " assert len(value) == 0\n"); +} + +Y_UNIT_TEST(ToPySet) { + TPythonTestEngine engine; + + engine.ToPython<NUdf::TDict<ui8, void>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Add(NUdf::TUnboxedValuePod((ui8)1), NUdf::TUnboxedValuePod::Void()).Add(NUdf::TUnboxedValuePod((ui8)2), NUdf::TUnboxedValuePod::Void()).Add(NUdf::TUnboxedValuePod((ui8)3), NUdf::TUnboxedValuePod::Void()).Build(); + }, + "def Test(value):\n" + " assert len(value) == 3\n" + " assert all(isinstance(k, int) for k in iter(value))\n" + " assert all(i in value for i in [1, 2, 3])\n"); +} + +Y_UNIT_TEST(FromPyMultiDict) { + TPythonTestEngine engine; + + engine.ToMiniKQL<NUdf::TDict<ui32, NUdf::TListType<char*>>>( + "def Test(): return {1: ['one', 'two'], 3: ['three']}", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 2); + + std::unordered_map<ui32, std::vector<TString>> map; + const auto dictIt = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; dictIt.NextPair(key, payload);) { + auto& val = map[key.Get<ui32>()]; + const auto listIt = payload.GetListIterator(); + for (NUdf::TUnboxedValue listItem; listIt.Next(listItem);) { + val.emplace_back(listItem.AsStringRef()); } - ); - } - - Y_UNIT_TEST(FromPyOrderedDict) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( - "from collections import OrderedDict\n" - "def Test(): return OrderedDict([(2, 'two'), (1, 'one'), (3, 'three')])\n", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(value.HasDictItems()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); - - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(1)))); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(0)))); - const auto v = value.Lookup(NUdf::TUnboxedValuePod(ui32(1))); - UNIT_ASSERT_EQUAL(v.AsStringRef(), "one"); - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((ui32(4))))); + } + + UNIT_ASSERT_EQUAL(map.size(), 2); + auto it = map.find(1); + UNIT_ASSERT(it != map.end()); + UNIT_ASSERT_EQUAL(it->second.size(), 2); + UNIT_ASSERT_EQUAL(it->second[0], "one"); + UNIT_ASSERT_EQUAL(it->second[1], "two"); + it = map.find(3); + UNIT_ASSERT(it != map.end()); + UNIT_ASSERT_EQUAL(it->second.size(), 1); + UNIT_ASSERT_EQUAL(it->second[0], "three"); + }); +} + +Y_UNIT_TEST(ToPyMultiDict) { + TPythonTestEngine engine; + + engine.ToPython<NUdf::TDict<ui8, NUdf::TListType<NUdf::TUtf8>>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + ui32 flags = NUdf::TDictFlags::Hashed | NUdf::TDictFlags::Multi; + return vb.NewDict(type, flags)->Add(NUdf::TUnboxedValuePod((ui8)1), vb.NewString("one")).Add(NUdf::TUnboxedValuePod((ui8)1), vb.NewString("two")).Add(NUdf::TUnboxedValuePod((ui8)3), vb.NewString("three")).Build(); + }, + "def Test(value):\n" + " assert len(value) == 2\n" + " assert 1 in value\n" + " assert 3 in value\n" + " assert len(value[1]) == 2\n" + " assert 'one' in value[1]\n" + " assert 'two' in value[1]\n" + " assert list(value[3]) == ['three']\n"); +} + +Y_UNIT_TEST(ToPyAndBackDictAsIs) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TDict<i32, double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Sorted)->Add(NUdf::TUnboxedValuePod((i32)1), NUdf::TUnboxedValuePod((double)0.1)).Add(NUdf::TUnboxedValuePod((i32)2), NUdf::TUnboxedValuePod((double)0.2)).Add(NUdf::TUnboxedValuePod((i32)3), NUdf::TUnboxedValuePod((double)0.3)).Build(); + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod((i32)0))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod((i32)3))); + UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod((i32)2)).Get<double>(), 0.2); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((i32)4))); + + std::vector<std::pair<i32, double>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<i32>(), payload.Get<double>()); + } + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0].first, 1); + UNIT_ASSERT_EQUAL(items[1].first, 2); + UNIT_ASSERT_EQUAL(items[2].first, 3); + UNIT_ASSERT_EQUAL(items[0].second, 0.1); + UNIT_ASSERT_EQUAL(items[1].second, 0.2); + UNIT_ASSERT_EQUAL(items[2].second, 0.3); + + std::vector<i32> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<i32>()); + } + + UNIT_ASSERT_EQUAL(keys.size(), 3); + UNIT_ASSERT_EQUAL(keys[0], 1); + UNIT_ASSERT_EQUAL(keys[1], 2); + UNIT_ASSERT_EQUAL(keys[2], 3); + + std::vector<double> values; + const auto pit = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; pit.Next(payload);) { + values.emplace_back(payload.Get<double>()); + } + + UNIT_ASSERT_EQUAL(values.size(), 3); + UNIT_ASSERT_EQUAL(values[0], 0.1); + UNIT_ASSERT_EQUAL(values[1], 0.2); + UNIT_ASSERT_EQUAL(values[2], 0.3); + }); +} + +Y_UNIT_TEST(PyInvertDict) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TDict<i32, double>, NUdf::TDict<double, i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Hashed)->Add(NUdf::TUnboxedValuePod((i32)1), NUdf::TUnboxedValuePod((double)0.1)).Add(NUdf::TUnboxedValuePod((i32)2), NUdf::TUnboxedValuePod((double)0.2)).Add(NUdf::TUnboxedValuePod((i32)3), NUdf::TUnboxedValuePod((double)0.3)).Build(); + }, + "def Test(value): return { v: k for k, v in value.items() }", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod((double)0.1))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod((double)0.0))); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((double)0.4))); + UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod((double)0.2)).Get<i32>(), 2); + + std::map<double, i32> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace(key.Get<double>(), payload.Get<i32>()); + } + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0.1], 1); + UNIT_ASSERT_EQUAL(items[0.2], 2); + UNIT_ASSERT_EQUAL(items[0.3], 3); + }); +} + +Y_UNIT_TEST(FromPyOrderedDict) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TDict<ui32, char*>>( + "from collections import OrderedDict\n" + "def Test(): return OrderedDict([(2, 'two'), (1, 'one'), (3, 'three')])\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui32(1)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui32(0)))); + const auto v = value.Lookup(NUdf::TUnboxedValuePod(ui32(1))); + UNIT_ASSERT_EQUAL(v.AsStringRef(), "one"); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod((ui32(4))))); #if PY_MAJOR_VERSION >= 3 - std::vector<std::pair<ui32, TString>> items; - const auto it = value.GetDictIterator(); - for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { - items.emplace_back(key.Get<ui32>(), payload.AsStringRef()); - } - - UNIT_ASSERT_EQUAL(items.size(), 3); - UNIT_ASSERT_EQUAL(items[0].first, 2); - UNIT_ASSERT_EQUAL(items[1].first, 1); - UNIT_ASSERT_EQUAL(items[2].first, 3); - UNIT_ASSERT_EQUAL(items[0].second, "two"); - UNIT_ASSERT_EQUAL(items[1].second, "one"); - UNIT_ASSERT_EQUAL(items[2].second, "three"); - - std::vector<ui32> keys; - const auto kit = value.GetKeysIterator(); - for (NUdf::TUnboxedValue key; kit.Next(key);) { - keys.emplace_back(key.Get<ui32>()); - } - - UNIT_ASSERT_EQUAL(keys.size(), 3); - UNIT_ASSERT_EQUAL(keys[0], 2); - UNIT_ASSERT_EQUAL(keys[1], 1); - UNIT_ASSERT_EQUAL(keys[2], 3); - - std::vector<TString> values; - const auto pit = value.GetPayloadsIterator(); - for (NUdf::TUnboxedValue payload; pit.Next(payload);) { - values.emplace_back(payload.AsStringRef()); - } - - UNIT_ASSERT_EQUAL(values.size(), 3); - UNIT_ASSERT_EQUAL(values[0], "two"); - UNIT_ASSERT_EQUAL(values[1], "one"); - UNIT_ASSERT_EQUAL(values[2], "three"); + std::vector<std::pair<ui32, TString>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<ui32>(), payload.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(items.size(), 3); + UNIT_ASSERT_EQUAL(items[0].first, 2); + UNIT_ASSERT_EQUAL(items[1].first, 1); + UNIT_ASSERT_EQUAL(items[2].first, 3); + UNIT_ASSERT_EQUAL(items[0].second, "two"); + UNIT_ASSERT_EQUAL(items[1].second, "one"); + UNIT_ASSERT_EQUAL(items[2].second, "three"); + + std::vector<ui32> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<ui32>()); + } + + UNIT_ASSERT_EQUAL(keys.size(), 3); + UNIT_ASSERT_EQUAL(keys[0], 2); + UNIT_ASSERT_EQUAL(keys[1], 1); + UNIT_ASSERT_EQUAL(keys[2], 3); + + std::vector<TString> values; + const auto pit = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; pit.Next(payload);) { + values.emplace_back(payload.AsStringRef()); + } + + UNIT_ASSERT_EQUAL(values.size(), 3); + UNIT_ASSERT_EQUAL(values[0], "two"); + UNIT_ASSERT_EQUAL(values[1], "one"); + UNIT_ASSERT_EQUAL(values[2], "three"); #endif - }); - } - - Y_UNIT_TEST(ToPyAndBackSetAsIs) { - TPythonTestEngine engine; - engine.ToPythonAndBack<NUdf::TDict<float, void>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - return vb.NewDict(type, NUdf::TDictFlags::Sorted)-> - Add(NUdf::TUnboxedValuePod(0.1f), NUdf::TUnboxedValuePod::Void()) - .Add(NUdf::TUnboxedValuePod(0.2f), NUdf::TUnboxedValuePod::Void()) - .Add(NUdf::TUnboxedValuePod(0.3f), NUdf::TUnboxedValuePod::Void()) - .Build(); - }, - "def Test(value): return value", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value.HasDictItems()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(0.0f))); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(0.3f))); - UNIT_ASSERT(value.Lookup(NUdf::TUnboxedValuePod(0.2f))); - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(0.4f))); - - std::vector<float> keys; - const auto kit = value.GetKeysIterator(); - for (NUdf::TUnboxedValue key; kit.Next(key);) { - keys.emplace_back(key.Get<float>()); - } - - UNIT_ASSERT_EQUAL(keys.size(), 3); - UNIT_ASSERT_EQUAL(keys[0], 0.1f); - UNIT_ASSERT_EQUAL(keys[1], 0.2f); - UNIT_ASSERT_EQUAL(keys[2], 0.3f); - } - ); - } - - Y_UNIT_TEST(ToPyAsThinList_FromPyAsDict) { - TPythonTestEngine engine; - engine.ToPythonAndBack<NUdf::TListType<float>, NUdf::TDict<i8, float>>( - [](const TType*, const NUdf::IValueBuilder& vb) { - NUdf::TUnboxedValue *items = nullptr; - const auto a = vb.NewArray(9U, items); - const float f[] = { 0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f }; - std::transform(f, f + 9U, items, [](float v){ return NUdf::TUnboxedValuePod(v); }); - return a; - }, - "def Test(value): return value", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value.HasDictItems()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 9U); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i8(0)))); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i8(10)))); - UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod(i8(5))).Get<float>(), 0.6f); - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i8(13)))); - - std::vector<std::pair<i8, float>> items; - const auto it = value.GetDictIterator(); - for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { - items.emplace_back(key.Get<i8>(), payload.Get<float>()); - } - - UNIT_ASSERT_EQUAL(items.size(), 9U); - UNIT_ASSERT_EQUAL(items.front().first, 0); - UNIT_ASSERT_EQUAL(items.back().first, 8); - UNIT_ASSERT_EQUAL(items.front().second, 0.1f); - UNIT_ASSERT_EQUAL(items.back().second, 0.9f); - - std::vector<i8> keys; - const auto kit = value.GetKeysIterator(); - for (NUdf::TUnboxedValue key; kit.Next(key);) { - keys.emplace_back(key.Get<i8>()); - } - - UNIT_ASSERT_EQUAL(keys.size(), 9U); - UNIT_ASSERT_EQUAL(keys.front(), 0); - UNIT_ASSERT_EQUAL(keys.back(), 8); - - std::vector<float> values; - const auto pit = value.GetPayloadsIterator(); - for (NUdf::TUnboxedValue payload; pit.Next(payload);) { - values.emplace_back(payload.Get<float>()); - } - - UNIT_ASSERT_EQUAL(values.size(), 9U); - UNIT_ASSERT_EQUAL(values.front(), 0.1f); - UNIT_ASSERT_EQUAL(values.back(), 0.9f); - } - ); - } - - Y_UNIT_TEST(ToPyAsLazyList_FromPyAsDict) { - TPythonTestEngine engine; - engine.ToPythonAndBack<NUdf::TListType<i32>, NUdf::TDict<ui8, i32>>( - [](const TType*, const NUdf::IValueBuilder&) { - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(1, 10)); - }, - "def Test(value): return value", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value.HasDictItems()); - UNIT_ASSERT_EQUAL(value.GetDictLength(), 9U); - UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui8(0)))); - UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui8(10)))); - UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod(ui8(5))).Get<i32>(), 6); - UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui8(13)))); - - std::vector<std::pair<ui8, i32>> items; - const auto it = value.GetDictIterator(); - for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { - items.emplace_back(key.Get<ui8>(), payload.Get<i32>()); - } - - UNIT_ASSERT_EQUAL(items.size(), 9U); - UNIT_ASSERT_EQUAL(items.front().first, 0); - UNIT_ASSERT_EQUAL(items.back().first, 8); - UNIT_ASSERT_EQUAL(items.front().second, 1); - UNIT_ASSERT_EQUAL(items.back().second, 9); - - std::vector<ui8> keys; - const auto kit = value.GetKeysIterator(); - for (NUdf::TUnboxedValue key; kit.Next(key);) { - keys.emplace_back(key.Get<ui8>()); - } - - UNIT_ASSERT_EQUAL(keys.size(), 9U); - UNIT_ASSERT_EQUAL(keys.front(), 0); - UNIT_ASSERT_EQUAL(keys.back(), 8); - - std::vector<i32> values; - const auto pit = value.GetPayloadsIterator(); - for (NUdf::TUnboxedValue payload; pit.Next(payload);) { - values.emplace_back(payload.Get<i32>()); - } - - UNIT_ASSERT_EQUAL(values.size(), 9U); - UNIT_ASSERT_EQUAL(values.front(), 1); - UNIT_ASSERT_EQUAL(values.back(), 9); - } - ); - } + }); +} + +Y_UNIT_TEST(ToPyAndBackSetAsIs) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TDict<float, void>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + return vb.NewDict(type, NUdf::TDictFlags::Sorted)->Add(NUdf::TUnboxedValuePod(0.1f), NUdf::TUnboxedValuePod::Void()).Add(NUdf::TUnboxedValuePod(0.2f), NUdf::TUnboxedValuePod::Void()).Add(NUdf::TUnboxedValuePod(0.3f), NUdf::TUnboxedValuePod::Void()).Build(); + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 3); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(0.0f))); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(0.3f))); + UNIT_ASSERT(value.Lookup(NUdf::TUnboxedValuePod(0.2f))); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(0.4f))); + + std::vector<float> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<float>()); + } + + UNIT_ASSERT_EQUAL(keys.size(), 3); + UNIT_ASSERT_EQUAL(keys[0], 0.1f); + UNIT_ASSERT_EQUAL(keys[1], 0.2f); + UNIT_ASSERT_EQUAL(keys[2], 0.3f); + }); +} + +Y_UNIT_TEST(ToPyAsThinList_FromPyAsDict) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TListType<float>, NUdf::TDict<i8, float>>( + [](const TType*, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + const auto a = vb.NewArray(9U, items); + const float f[] = {0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f, 0.9f}; + std::transform(f, f + 9U, items, [](float v) { return NUdf::TUnboxedValuePod(v); }); + return a; + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 9U); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(i8(0)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(i8(10)))); + UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod(i8(5))).Get<float>(), 0.6f); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(i8(13)))); + + std::vector<std::pair<i8, float>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<i8>(), payload.Get<float>()); + } + + UNIT_ASSERT_EQUAL(items.size(), 9U); + UNIT_ASSERT_EQUAL(items.front().first, 0); + UNIT_ASSERT_EQUAL(items.back().first, 8); + UNIT_ASSERT_EQUAL(items.front().second, 0.1f); + UNIT_ASSERT_EQUAL(items.back().second, 0.9f); + + std::vector<i8> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<i8>()); + } + + UNIT_ASSERT_EQUAL(keys.size(), 9U); + UNIT_ASSERT_EQUAL(keys.front(), 0); + UNIT_ASSERT_EQUAL(keys.back(), 8); + + std::vector<float> values; + const auto pit = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; pit.Next(payload);) { + values.emplace_back(payload.Get<float>()); + } + + UNIT_ASSERT_EQUAL(values.size(), 9U); + UNIT_ASSERT_EQUAL(values.front(), 0.1f); + UNIT_ASSERT_EQUAL(values.back(), 0.9f); + }); +} + +Y_UNIT_TEST(ToPyAsLazyList_FromPyAsDict) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TListType<i32>, NUdf::TDict<ui8, i32>>( + [](const TType*, const NUdf::IValueBuilder&) { + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(1, 10)); + }, + "def Test(value): return value", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value.HasDictItems()); + UNIT_ASSERT_EQUAL(value.GetDictLength(), 9U); + UNIT_ASSERT(value.Contains(NUdf::TUnboxedValuePod(ui8(0)))); + UNIT_ASSERT(!value.Contains(NUdf::TUnboxedValuePod(ui8(10)))); + UNIT_ASSERT_EQUAL(value.Lookup(NUdf::TUnboxedValuePod(ui8(5))).Get<i32>(), 6); + UNIT_ASSERT(!value.Lookup(NUdf::TUnboxedValuePod(ui8(13)))); + + std::vector<std::pair<ui8, i32>> items; + const auto it = value.GetDictIterator(); + for (NUdf::TUnboxedValue key, payload; it.NextPair(key, payload);) { + items.emplace_back(key.Get<ui8>(), payload.Get<i32>()); + } + + UNIT_ASSERT_EQUAL(items.size(), 9U); + UNIT_ASSERT_EQUAL(items.front().first, 0); + UNIT_ASSERT_EQUAL(items.back().first, 8); + UNIT_ASSERT_EQUAL(items.front().second, 1); + UNIT_ASSERT_EQUAL(items.back().second, 9); + + std::vector<ui8> keys; + const auto kit = value.GetKeysIterator(); + for (NUdf::TUnboxedValue key; kit.Next(key);) { + keys.emplace_back(key.Get<ui8>()); + } + + UNIT_ASSERT_EQUAL(keys.size(), 9U); + UNIT_ASSERT_EQUAL(keys.front(), 0); + UNIT_ASSERT_EQUAL(keys.back(), 8); + + std::vector<i32> values; + const auto pit = value.GetPayloadsIterator(); + for (NUdf::TUnboxedValue payload; pit.Next(payload);) { + values.emplace_back(payload.Get<i32>()); + } + + UNIT_ASSERT_EQUAL(values.size(), 9U); + UNIT_ASSERT_EQUAL(values.front(), 1); + UNIT_ASSERT_EQUAL(values.back(), 9); + }); } +} // Y_UNIT_TEST_SUITE(TPyDictTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_errors.cpp b/yql/essentials/udfs/common/python/bindings/py_errors.cpp index 5741978d543..0118bee003f 100644 --- a/yql/essentials/udfs/common/python/bindings/py_errors.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_errors.cpp @@ -38,11 +38,11 @@ TString GetLastErrorAsString() return {}; } - TPyObjectPtr etypePtr {etype, TPyObjectPtr::ADD_REF}; - TPyObjectPtr evaluePtr {evalue, TPyObjectPtr::ADD_REF}; - TPyObjectPtr etracebackPtr {etraceback, TPyObjectPtr::ADD_REF}; + TPyObjectPtr etypePtr{etype, TPyObjectPtr::ADD_REF}; + TPyObjectPtr evaluePtr{evalue, TPyObjectPtr::ADD_REF}; + TPyObjectPtr etracebackPtr{etraceback, TPyObjectPtr::ADD_REF}; - TPyObjectPtr stderrObject {PySys_GetObject("stderr"), TPyObjectPtr::ADD_REF}; + TPyObjectPtr stderrObject{PySys_GetObject("stderr"), TPyObjectPtr::ADD_REF}; if (!stderrObject) { return {}; } @@ -59,8 +59,7 @@ TString GetLastErrorAsString() return {}; } unused.ResetSteal( - PyObject_CallMethod(stderrObject.Get(), "_toggle_real_mode", nullptr) - ); + PyObject_CallMethod(stderrObject.Get(), "_toggle_real_mode", nullptr)); TString errorValue; if (!TryPyCast(error.Get(), errorValue)) { @@ -69,4 +68,4 @@ TString GetLastErrorAsString() return errorValue; } -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_errors.h b/yql/essentials/udfs/common/python/bindings/py_errors.h index 2306b47bb95..1aa6ecf6eb0 100644 --- a/yql/essentials/udfs/common/python/bindings/py_errors.h +++ b/yql/essentials/udfs/common/python/bindings/py_errors.h @@ -8,17 +8,17 @@ TString GetLastErrorAsString(); #define PY_TRY try -#define PY_CATCH(ErrorValue) \ - catch (const yexception& e) { \ +#define PY_CATCH(ErrorValue) \ + catch (const yexception& e) { \ PyErr_SetString(PyExc_RuntimeError, e.what()); \ - return ErrorValue; \ + return ErrorValue; \ } -#define PY_ENSURE(condition, message) \ - do { \ - if (Y_UNLIKELY(!(condition))) { \ +#define PY_ENSURE(condition, message) \ + do { \ + if (Y_UNLIKELY(!(condition))) { \ throw yexception() << message; \ - } \ + } \ } while (0) -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_gil.h b/yql/essentials/udfs/common/python/bindings/py_gil.h index 6d629e7b237..7d402773186 100644 --- a/yql/essentials/udfs/common/python/bindings/py_gil.h +++ b/yql/essentials/udfs/common/python/bindings/py_gil.h @@ -2,11 +2,9 @@ #include <Python.h> - namespace NPython { -struct TPyGilLocker -{ +struct TPyGilLocker { TPyGilLocker() : Gil_(PyGILState_Ensure()) { diff --git a/yql/essentials/udfs/common/python/bindings/py_iterator.cpp b/yql/essentials/udfs/common/python/bindings/py_iterator.cpp index c6f21cecb16..e53a8f550df 100644 --- a/yql/essentials/udfs/common/python/bindings/py_iterator.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_iterator.cpp @@ -13,8 +13,7 @@ namespace NPython { ////////////////////////////////////////////////////////////////////////////// // TPyIterator interface ////////////////////////////////////////////////////////////////////////////// -struct TPyIterator -{ +struct TPyIterator { PyObject_HEAD; TPyCastContext::TPtr CastCtx; const NUdf::TType* ItemType; @@ -38,81 +37,80 @@ struct TPyIterator }; #if PY_MAJOR_VERSION >= 3 -#define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) + #define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) #endif PyTypeObject PyIteratorType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TIterator"), - INIT_MEMBER(tp_basicsize , sizeof(TPyIterator)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , TPyIterator::Dealloc), + INIT_MEMBER(tp_name, "yql.TIterator"), + INIT_MEMBER(tp_basicsize, sizeof(TPyIterator)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, TPyIterator::Dealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , TPyIterator::Repr), - INIT_MEMBER(tp_as_number , nullptr), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , nullptr), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), - INIT_MEMBER(tp_doc , "yql.TDictKeysIterator object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , PyObject_SelfIter), - INIT_MEMBER(tp_iternext , TPyIterator::Next), - INIT_MEMBER(tp_methods , nullptr), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, TPyIterator::Repr), + INIT_MEMBER(tp_as_number, nullptr), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, nullptr), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc, "yql.TDictKeysIterator object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, PyObject_SelfIter), + INIT_MEMBER(tp_iternext, TPyIterator::Next), + INIT_MEMBER(tp_methods, nullptr), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; ////////////////////////////////////////////////////////////////////////////// // TPyPairIterator interface ////////////////////////////////////////////////////////////////////////////// -struct TPyPairIterator -{ +struct TPyPairIterator { PyObject_HEAD; TPyCastContext::TPtr CastCtx; const NUdf::TType* KeyType; @@ -138,68 +136,68 @@ struct TPyPairIterator PyTypeObject PyPairIteratorType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TDictIterator"), - INIT_MEMBER(tp_basicsize , sizeof(TPyPairIterator)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , TPyPairIterator::Dealloc), + INIT_MEMBER(tp_name, "yql.TDictIterator"), + INIT_MEMBER(tp_basicsize, sizeof(TPyPairIterator)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, TPyPairIterator::Dealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , TPyPairIterator::Repr), - INIT_MEMBER(tp_as_number , nullptr), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , nullptr), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), - INIT_MEMBER(tp_doc , "yql.TPairIterator object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , PyObject_SelfIter), - INIT_MEMBER(tp_iternext , TPyPairIterator::Next), - INIT_MEMBER(tp_methods , nullptr), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, TPyPairIterator::Repr), + INIT_MEMBER(tp_as_number, nullptr), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, nullptr), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc, "yql.TPairIterator object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, PyObject_SelfIter), + INIT_MEMBER(tp_iternext, TPyPairIterator::Next), + INIT_MEMBER(tp_methods, nullptr), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; @@ -217,15 +215,16 @@ PyObject* TPyIterator::New(const TPyCastContext::TPtr& ctx, const NUdf::TType* i } PyObject* TPyIterator::Next(PyObject* self) -{ - PY_TRY { - const auto iter = Cast(self); - NUdf::TUnboxedValue item; - if (NUdf::TBoxedValueAccessor::Next(*iter->Iterator.Get(), item)) { - return (iter->ItemType ? ToPyObject(iter->CastCtx, iter->ItemType, item) : PyCast<ui64>(item.Get<ui64>())).Release(); - } - return nullptr; - } PY_CATCH(nullptr) + { + PY_TRY{ + const auto iter = Cast(self); +NUdf::TUnboxedValue item; +if (NUdf::TBoxedValueAccessor::Next(*iter->Iterator.Get(), item)) { + return (iter->ItemType ? ToPyObject(iter->CastCtx, iter->ItemType, item) : PyCast<ui64>(item.Get<ui64>())).Release(); +} +return nullptr; +} // namespace NPython +PY_CATCH(nullptr) } ////////////////////////////////////////////////////////////////////////////// @@ -243,36 +242,35 @@ PyObject* TPyPairIterator::New(const TPyCastContext::TPtr& ctx, const NUdf::TTyp } PyObject* TPyPairIterator::Next(PyObject* self) -{ - PY_TRY { - const auto iter = Cast(self); - NUdf::TUnboxedValue k, v; - if (NUdf::TBoxedValueAccessor::NextPair(*iter->Iterator.Get(), k, v)) { - const TPyObjectPtr key = iter->KeyType ? - ToPyObject(iter->CastCtx, iter->KeyType, k): - PyCast<ui64>(k.Get<ui64>()); - const TPyObjectPtr value = ToPyObject(iter->CastCtx, iter->PayloadType, v); - return PyTuple_Pack(2, key.Get(), value.Get()); - } - return nullptr; - } PY_CATCH(nullptr) + { + PY_TRY{ + const auto iter = Cast(self); +NUdf::TUnboxedValue k, v; +if (NUdf::TBoxedValueAccessor::NextPair(*iter->Iterator.Get(), k, v)) { + const TPyObjectPtr key = iter->KeyType ? ToPyObject(iter->CastCtx, iter->KeyType, k) : PyCast<ui64>(k.Get<ui64>()); + const TPyObjectPtr value = ToPyObject(iter->CastCtx, iter->PayloadType, v); + return PyTuple_Pack(2, key.Get(), value.Get()); +} +return nullptr; +} +PY_CATCH(nullptr) } ////////////////////////////////////////////////////////////////////////////// TPyObjectPtr ToPyIterator( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + const NUdf::TUnboxedValuePod& value) { return TPyIterator::New(castCtx, itemType, value.AsBoxed()); } TPyObjectPtr ToPyIterator( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* keyType, - const NUdf::TType* payloadType, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payloadType, + const NUdf::TUnboxedValuePod& value) { return TPyPairIterator::New(castCtx, keyType, payloadType, value.AsBoxed()); } diff --git a/yql/essentials/udfs/common/python/bindings/py_iterator.h b/yql/essentials/udfs/common/python/bindings/py_iterator.h index 5c5de27b0bc..29a9534a1d0 100644 --- a/yql/essentials/udfs/common/python/bindings/py_iterator.h +++ b/yql/essentials/udfs/common/python/bindings/py_iterator.h @@ -9,15 +9,14 @@ extern PyTypeObject PyIteratorType; extern PyTypeObject PyPairIteratorType; TPyObjectPtr ToPyIterator( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* itemType, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + const NKikimr::NUdf::TUnboxedValuePod& value); TPyObjectPtr ToPyIterator( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* keyType, - const NKikimr::NUdf::TType* payloadType, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* keyType, + const NKikimr::NUdf::TType* payloadType, + const NKikimr::NUdf::TUnboxedValuePod& value); - -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp index 1408f185a75..4ffaee2b66c 100644 --- a/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_dict.cpp @@ -18,14 +18,16 @@ namespace { ////////////////////////////////////////////////////////////////////////////// // TLazyDictBase ////////////////////////////////////////////////////////////////////////////// -class TLazyDictBase: public NUdf::TBoxedValue -{ +class TLazyDictBase: public NUdf::TBoxedValue { protected: class TIterator: public NUdf::TBoxedValue { public: TIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, TPyObjectPtr&& pyIter) - : CastCtx_(ctx), ItemType_(type), PyIter_(std::move(pyIter)) - {} + : CastCtx_(ctx) + , ItemType_(type) + , PyIter_(std::move(pyIter)) + { + } ~TIterator() { const TPyGilLocker lock; @@ -80,8 +82,12 @@ protected: class TPairIterator: public NUdf::TBoxedValue { public: TPairIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, TPyObjectPtr&& pyIter) - : CastCtx_(ctx), KeyType_(keyType), PayType_(payType), PyIter_(std::move(pyIter)) - {} + : CastCtx_(ctx) + , KeyType_(keyType) + , PayType_(payType) + , PyIter_(std::move(pyIter)) + { + } ~TPairIterator() { const TPyGilLocker lock; @@ -131,8 +137,11 @@ protected: }; TLazyDictBase(const TPyCastContext::TPtr& castCtx, const NUdf::TType* itemType, PyObject* pyObject) - : CastCtx_(castCtx), ItemType_(itemType), PyObject_(pyObject, TPyObjectPtr::AddRef()) - {} + : CastCtx_(castCtx) + , ItemType_(itemType) + , PyObject_(pyObject, TPyObjectPtr::AddRef()) + { + } ~TLazyDictBase() { const TPyGilLocker lock; @@ -146,8 +155,7 @@ protected: UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).c_str()); } return bool(has); - } - catch (const yexception& e) { + } catch (const yexception& e) { UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).c_str()); } @@ -159,15 +167,18 @@ protected: ////////////////////////////////////////////////////////////////////////////// // TLazyMapping ////////////////////////////////////////////////////////////////////////////// -class TLazyMapping: public TLazyDictBase -{ +class TLazyMapping: public TLazyDictBase { public: TLazyMapping(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, PyObject* dict) - : TLazyDictBase(ctx, keyType, dict), PayType_(payType) - {} + : TLazyDictBase(ctx, keyType, dict) + , PayType_(payType) + { + } private: - bool IsSortedDict() const override { return false; } + bool IsSortedDict() const override { + return false; + } ui64 GetDictLength() const override try { const TPyGilLocker lock; @@ -238,9 +249,7 @@ private: const TPyGilLocker lock; if (const TPyObjectPtr pyKey = ToPyObject(CastCtx_, ItemType_, key)) { const auto map = PyObject_.Get(); - const auto has = map->ob_type->tp_as_sequence && map->ob_type->tp_as_sequence->sq_contains ? - (map->ob_type->tp_as_sequence->sq_contains)(map, pyKey.Get()) : - PyMapping_HasKey(map, pyKey.Get()); + const auto has = map->ob_type->tp_as_sequence && map->ob_type->tp_as_sequence->sq_contains ? (map->ob_type->tp_as_sequence->sq_contains)(map, pyKey.Get()) : PyMapping_HasKey(map, pyKey.Get()); if (has >= 0) { return bool(has); @@ -258,15 +267,18 @@ private: ////////////////////////////////////////////////////////////////////////////// // TLazyDict ////////////////////////////////////////////////////////////////////////////// -class TLazyDict: public TLazyDictBase -{ +class TLazyDict: public TLazyDictBase { public: TLazyDict(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, const NUdf::TType* payType, PyObject* dict) - : TLazyDictBase(ctx, keyType, dict), PayType_(payType) - {} + : TLazyDictBase(ctx, keyType, dict) + , PayType_(payType) + { + } private: - bool IsSortedDict() const override { return false; } + bool IsSortedDict() const override { + return false; + } ui64 GetDictLength() const override try { const TPyGilLocker lock; @@ -349,15 +361,17 @@ private: ////////////////////////////////////////////////////////////////////////////// // TLazySet ////////////////////////////////////////////////////////////////////////////// -class TLazySet: public TLazyDictBase -{ +class TLazySet: public TLazyDictBase { public: TLazySet(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, PyObject* set) : TLazyDictBase(ctx, itemType, set) - {} + { + } private: - bool IsSortedDict() const override { return false; } + bool IsSortedDict() const override { + return false; + } ui64 GetDictLength() const override try { const TPyGilLocker lock; @@ -425,15 +439,17 @@ private: ////////////////////////////////////////////////////////////////////////////// // TLazySequenceAsSet ////////////////////////////////////////////////////////////////////////////// -class TLazySequenceAsSet: public TLazyDictBase -{ +class TLazySequenceAsSet: public TLazyDictBase { public: TLazySequenceAsSet(const TPyCastContext::TPtr& ctx, const NUdf::TType* keyType, PyObject* sequence) : TLazyDictBase(ctx, keyType, sequence) - {} + { + } private: - bool IsSortedDict() const override { return false; } + bool IsSortedDict() const override { + return false; + } ui64 GetDictLength() const override try { const TPyGilLocker lock; @@ -501,28 +517,31 @@ private: ////////////////////////////////////////////////////////////////////////////// // TLazySequenceAsDict ////////////////////////////////////////////////////////////////////////////// -template<typename KeyType> -class TLazySequenceAsDict: public NUdf::TBoxedValue -{ +template <typename KeyType> +class TLazySequenceAsDict: public NUdf::TBoxedValue { private: class TKeyIterator: public NUdf::TBoxedValue { public: TKeyIterator(Py_ssize_t size) - : Size_(size), Index_(0) - {} + : Size_(size) + , Index_(0) + { + } private: bool Skip() override { - if (Index_ >= Size_) + if (Index_ >= Size_) { return false; + } ++Index_; return true; } bool Next(NUdf::TUnboxedValue& value) override { - if (Index_ >= Size_) + if (Index_ >= Size_) { return false; + } value = NUdf::TUnboxedValuePod(KeyType(Index_++)); return true; @@ -536,8 +555,13 @@ private: class TIterator: public NUdf::TBoxedValue { public: TIterator(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, Py_ssize_t size, const TPyObjectPtr& pySeq) - : CastCtx_(ctx), ItemType_(itemType), PySeq_(pySeq), Size_(size), Index_(0) - {} + : CastCtx_(ctx) + , ItemType_(itemType) + , PySeq_(pySeq) + , Size_(size) + , Index_(0) + { + } ~TIterator() { const TPyGilLocker lock; @@ -546,16 +570,18 @@ private: private: bool Skip() override { - if (Index_ >= Size_) + if (Index_ >= Size_) { return false; + } ++Index_; return true; } bool Next(NUdf::TUnboxedValue& value) override try { - if (Index_ >= Size_) + if (Index_ >= Size_) { return false; + } const TPyGilLocker lock; value = FromPyObject(CastCtx_, ItemType_, PySequence_Fast_GET_ITEM(PySeq_.Get(), Index_++)); @@ -565,8 +591,9 @@ private: } bool NextPair(NUdf::TUnboxedValue& key, NUdf::TUnboxedValue& pay) override try { - if (Index_ >= Size_) + if (Index_ >= Size_) { return false; + } const TPyGilLocker lock; key = NUdf::TUnboxedValuePod(KeyType(Index_)); @@ -586,8 +613,12 @@ private: public: TLazySequenceAsDict(const TPyCastContext::TPtr& ctx, const NUdf::TType* itemType, TPyObjectPtr&& sequence, Py_ssize_t size) - : CastCtx_(ctx), ItemType_(itemType), Size_(size), PySeq_(std::move(sequence)) - {} + : CastCtx_(ctx) + , ItemType_(itemType) + , Size_(size) + , PySeq_(std::move(sequence)) + { + } ~TLazySequenceAsDict() { @@ -596,7 +627,9 @@ public: } private: - bool IsSortedDict() const override { return true; } + bool IsSortedDict() const override { + return true; + } bool HasDictItems() const override { return Size_ > 0; @@ -608,15 +641,17 @@ private: NUdf::TUnboxedValue Lookup(const NUdf::TUnboxedValuePod& key) const override { const Py_ssize_t index = key.Get<KeyType>(); - if (index >= -Size_ && index < Size_) try { - const TPyGilLocker lock; - if (const auto item = PySequence_Fast_GET_ITEM(PySeq_.Get(), index >= 0 ? index : Size_ + index)) { - return FromPyObject(CastCtx_, ItemType_, item).Release().MakeOptional(); - } else if (PyErr_Occurred()) { - UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).c_str()); + if (index >= -Size_ && index < Size_) { + try { + const TPyGilLocker lock; + if (const auto item = PySequence_Fast_GET_ITEM(PySeq_.Get(), index >= 0 ? index : Size_ + index)) { + return FromPyObject(CastCtx_, ItemType_, item).Release().MakeOptional(); + } else if (PyErr_Occurred()) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << GetLastErrorAsString()).c_str()); + } + } catch (const yexception& e) { + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).c_str()); } - } catch (const yexception& e) { - UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).c_str()); } return NUdf::TUnboxedValue(); } @@ -644,60 +679,60 @@ private: TPyObjectPtr PySeq_; }; -} // namspace +} // namespace NUdf::TUnboxedValue FromPyDict( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* keyType, - const NUdf::TType* payType, - PyObject* dict) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payType, + PyObject* dict) { return NUdf::TUnboxedValuePod(new TLazyDict(castCtx, keyType, payType, dict)); } NUdf::TUnboxedValue FromPyMapping( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* keyType, - const NUdf::TType* payType, - PyObject* map) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + const NUdf::TType* payType, + PyObject* map) { return NUdf::TUnboxedValuePod(new TLazyMapping(castCtx, keyType, payType, map)); } NUdf::TUnboxedValue FromPySet( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - PyObject* set) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + PyObject* set) { return NUdf::TUnboxedValuePod(new TLazySet(castCtx, itemType, set)); } NUdf::TUnboxedValue FromPySequence( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* keyType, - PyObject* set) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* keyType, + PyObject* set) { return NUdf::TUnboxedValuePod(new TLazySequenceAsSet(castCtx, keyType, set)); } NUdf::TUnboxedValue FromPySequence( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - const NUdf::TDataTypeId keyType, - PyObject* sequence) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + const NUdf::TDataTypeId keyType, + PyObject* sequence) { if (TPyObjectPtr fast = PySequence_Fast(sequence, "Can't get fast sequence.")) { - const auto size = PySequence_Fast_GET_SIZE(fast.Get()); - if (size >= 0) { - switch (keyType) { + const auto size = PySequence_Fast_GET_SIZE(fast.Get()); + if (size >= 0) { + switch (keyType) { #define MAKE_PRIMITIVE_TYPE_SIZE(type) \ - case NUdf::TDataType<type>::Id: \ - return NUdf::TUnboxedValuePod(new TLazySequenceAsDict<type>(castCtx, itemType, std::move(fast), size)); - INTEGRAL_VALUE_TYPES(MAKE_PRIMITIVE_TYPE_SIZE) + case NUdf::TDataType<type>::Id: \ + return NUdf::TUnboxedValuePod(new TLazySequenceAsDict<type>(castCtx, itemType, std::move(fast), size)); + INTEGRAL_VALUE_TYPES(MAKE_PRIMITIVE_TYPE_SIZE) #undef MAKE_PRIMITIVE_TYPE_SIZE + } + Y_ABORT("Invalid key type."); } - Y_ABORT("Invalid key type."); - } } UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).c_str()); } diff --git a/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp index ef135f3ba20..ca6e83d1aee 100644 --- a/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_lazy_mkql_list.cpp @@ -11,7 +11,6 @@ #include <util/generic/maybe.h> #include <util/string/builder.h> - using namespace NKikimr; namespace NPython { @@ -50,9 +49,8 @@ static bool IsIteratorHasItems(PyObject* iter, const TPyCastContext::TPtr& castC ////////////////////////////////////////////////////////////////////////////// // TBaseLazyList ////////////////////////////////////////////////////////////////////////////// -template<typename TDerived> -class TBaseLazyList: public NUdf::TBoxedValue -{ +template <typename TDerived> +class TBaseLazyList: public NUdf::TBoxedValue { using TListSelf = TBaseLazyList<TDerived>; class TIterator: public NUdf::TBoxedValue { @@ -61,7 +59,8 @@ class TBaseLazyList: public NUdf::TBoxedValue : CastCtx_(ctx) , PyIter_(std::move(pyIter)) , ItemType_(type) - {} + { + } ~TIterator() { const TPyGilLocker lock; @@ -110,9 +109,9 @@ class TBaseLazyList: public NUdf::TBoxedValue public: TBaseLazyList( - const TPyCastContext::TPtr& castCtx, - TPyObjectPtr&& pyObject, - const NUdf::TType* type) + const TPyCastContext::TPtr& castCtx, + TPyObjectPtr&& pyObject, + const NUdf::TType* type) : CastCtx_(castCtx) , PyObject_(std::move(pyObject)) , ItemType_(NUdf::TListTypeInspector(*CastCtx_->PyCtx->TypeInfoHelper, type).GetItemType()) @@ -127,8 +126,7 @@ public: private: TPyObjectPtr GetIterator() const try { return static_cast<const TDerived*>(this)->GetIteratorImpl(); - } - catch (const yexception& e) { + } catch (const yexception& e) { UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).c_str()); } @@ -153,8 +151,9 @@ private: } bool HasListItems() const override try { - if (Length_.Defined()) + if (Length_.Defined()) { return *Length_ > 0; + } const TPyGilLocker lock; TPyObjectPtr iter = GetIterator(); @@ -163,8 +162,7 @@ private: Length_ = 0; } return hasItems; - } - catch (const yexception& e) { + } catch (const yexception& e) { UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).c_str()); } @@ -182,31 +180,27 @@ private: } NUdf::IBoxedValuePtr ReverseListImpl( - const NUdf::IValueBuilder& builder) const override - { + const NUdf::IValueBuilder& builder) const override { Y_UNUSED(builder); return nullptr; } NUdf::IBoxedValuePtr SkipListImpl( - const NUdf::IValueBuilder& builder, ui64 count) const override - { + const NUdf::IValueBuilder& builder, ui64 count) const override { Y_UNUSED(builder); Y_UNUSED(count); return nullptr; } NUdf::IBoxedValuePtr TakeListImpl( - const NUdf::IValueBuilder& builder, ui64 count) const override - { + const NUdf::IValueBuilder& builder, ui64 count) const override { Y_UNUSED(builder); Y_UNUSED(count); return nullptr; } NUdf::IBoxedValuePtr ToIndexDictImpl( - const NUdf::IValueBuilder& builder) const override - { + const NUdf::IValueBuilder& builder) const override { Y_UNUSED(builder); return nullptr; } @@ -221,26 +215,28 @@ protected: ////////////////////////////////////////////////////////////////////////////// // TLazyIterable ////////////////////////////////////////////////////////////////////////////// -class TLazyIterable: public TBaseLazyList<TLazyIterable> -{ +class TLazyIterable: public TBaseLazyList<TLazyIterable> { using TBase = TBaseLazyList<TLazyIterable>; + public: TLazyIterable( - const TPyCastContext::TPtr& castCtx, - TPyObjectPtr&& pyObject, - const NUdf::TType* type) + const TPyCastContext::TPtr& castCtx, + TPyObjectPtr&& pyObject, + const NUdf::TType* type) : TBase(castCtx, std::move(pyObject), type) - {} + { + } TPyObjectPtr GetIteratorImpl() const { - if (const TPyObjectPtr ret = PyObject_GetIter(PyObject_.Get())) { + if (const TPyObjectPtr ret = PyObject_GetIter(PyObject_.Get())) { return ret; } UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos - << "Cannot get iterator from object: " - << PyObjectRepr(PyObject_.Get()) << ", error: " - << GetLastErrorAsString()).c_str()); + << "Cannot get iterator from object: " + << PyObjectRepr(PyObject_.Get()) << ", error: " + << GetLastErrorAsString()) + .c_str()); } private: @@ -259,8 +255,7 @@ private: } } return *Length_; - } - catch (const yexception& e) { + } catch (const yexception& e) { UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).c_str()); } @@ -278,8 +273,7 @@ private: Length_ = 0; } return hasItems; - } - catch (const yexception& e) { + } catch (const yexception& e) { UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).c_str()); } }; @@ -287,23 +281,24 @@ private: ////////////////////////////////////////////////////////////////////////////// // TLazyIterator ////////////////////////////////////////////////////////////////////////////// -class TLazyIterator: public TBaseLazyList<TLazyIterator> -{ +class TLazyIterator: public TBaseLazyList<TLazyIterator> { using TBase = TBaseLazyList<TLazyIterator>; + public: TLazyIterator( - const TPyCastContext::TPtr& castCtx, - TPyObjectPtr&& pyObject, - const NUdf::TType* type) + const TPyCastContext::TPtr& castCtx, + TPyObjectPtr&& pyObject, + const NUdf::TType* type) : TBase(castCtx, std::move(pyObject), type) , IteratorDrained_(false) - {} + { + } TPyObjectPtr GetIteratorImpl() const { if (IteratorDrained_) { - UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << - "Lazy list was build under python iterator. " - "Iterator was already used.").c_str()); + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Lazy list was build under python iterator. " + "Iterator was already used.") + .c_str()); } IteratorDrained_ = true; return PyObject_; @@ -316,14 +311,14 @@ private: ////////////////////////////////////////////////////////////////////////////// // TLazyGenerator ////////////////////////////////////////////////////////////////////////////// -class TLazyGenerator: public TBaseLazyList<TLazyGenerator> -{ +class TLazyGenerator: public TBaseLazyList<TLazyGenerator> { using TBase = TBaseLazyList<TLazyGenerator>; + public: TLazyGenerator( - const TPyCastContext::TPtr& castCtx, - TPyObjectPtr&& pyObject, - const NUdf::TType* type) + const TPyCastContext::TPtr& castCtx, + TPyObjectPtr&& pyObject, + const NUdf::TType* type) : TBase(castCtx, std::move(pyObject), type) { // keep ownership of function closure if any @@ -352,29 +347,28 @@ private: TPyObjectPtr Closure_; }; -} // namspace - +} // namespace NUdf::TUnboxedValue FromPyLazyGenerator( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* type, - TPyObjectPtr callableObj) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + TPyObjectPtr callableObj) { return NUdf::TUnboxedValuePod(new TLazyGenerator(castCtx, std::move(callableObj), type)); } NUdf::TUnboxedValue FromPyLazyIterable( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* type, - TPyObjectPtr iterableObj) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + TPyObjectPtr iterableObj) { return NUdf::TUnboxedValuePod(new TLazyIterable(castCtx, std::move(iterableObj), type)); } NUdf::TUnboxedValue FromPyLazyIterator( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* type, - TPyObjectPtr iteratorObj) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + TPyObjectPtr iteratorObj) { return NUdf::TUnboxedValuePod(new TLazyIterator(castCtx, std::move(iteratorObj), type)); } diff --git a/yql/essentials/udfs/common/python/bindings/py_list.cpp b/yql/essentials/udfs/common/python/bindings/py_list.cpp index bbae59865f2..d854a2eac6b 100644 --- a/yql/essentials/udfs/common/python/bindings/py_list.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_list.cpp @@ -10,11 +10,11 @@ using namespace NKikimr; #if PY_MAJOR_VERSION >= 3 -#define SLICEOBJ(obj) obj + #define SLICEOBJ(obj) obj #else -#define SLICEOBJ(obj) (reinterpret_cast<PySliceObject*>(obj)) -// See details about need for backports in ya.make -#include "py27_backports.h" + #define SLICEOBJ(obj) (reinterpret_cast<PySliceObject*>(obj)) + // See details about need for backports in ya.make + #include "py27_backports.h" #endif namespace NPython { @@ -33,13 +33,12 @@ inline Py_ssize_t CastIndex(PyObject* key, const char* name) return index; } -} +} // namespace ////////////////////////////////////////////////////////////////////////////// // TPyLazyList interface ////////////////////////////////////////////////////////////////////////////// -struct TPyLazyList -{ +struct TPyLazyList { using TPtr = NUdf::TRefCountedPtr<TPyLazyList, TPyPtrOps<TPyLazyList>>; PyObject_HEAD; @@ -59,11 +58,11 @@ struct TPyLazyList } static PyObject* New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - NUdf::IBoxedValuePtr value, - Py_ssize_t step = 1, - Py_ssize_t size = -1); + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr value, + Py_ssize_t step = 1, + Py_ssize_t size = -1); static int Bool(PyObject* self); static PyObject* Repr(PyObject* self); @@ -85,64 +84,64 @@ PyMappingMethods LazyListMapping = { }; PyNumberMethods LazyListNumbering = { - INIT_MEMBER(nb_add, nullptr), - INIT_MEMBER(nb_subtract, nullptr), - INIT_MEMBER(nb_multiply, nullptr), + INIT_MEMBER(nb_add, nullptr), + INIT_MEMBER(nb_subtract, nullptr), + INIT_MEMBER(nb_multiply, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_divide, nullptr), + INIT_MEMBER(nb_divide, nullptr), #endif - INIT_MEMBER(nb_remainder, nullptr), - INIT_MEMBER(nb_divmod, nullptr), - INIT_MEMBER(nb_power, nullptr), - INIT_MEMBER(nb_negative, nullptr), - INIT_MEMBER(nb_positive, nullptr), - INIT_MEMBER(nb_absolute, nullptr), + INIT_MEMBER(nb_remainder, nullptr), + INIT_MEMBER(nb_divmod, nullptr), + INIT_MEMBER(nb_power, nullptr), + INIT_MEMBER(nb_negative, nullptr), + INIT_MEMBER(nb_positive, nullptr), + INIT_MEMBER(nb_absolute, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_bool, TPyLazyList::Bool), + INIT_MEMBER(nb_bool, TPyLazyList::Bool), #else - INIT_MEMBER(nb_nonzero, TPyLazyList::Bool), + INIT_MEMBER(nb_nonzero, TPyLazyList::Bool), #endif - INIT_MEMBER(nb_invert, nullptr), - INIT_MEMBER(nb_lshift, nullptr), - INIT_MEMBER(nb_rshift, nullptr), - INIT_MEMBER(nb_and, nullptr), - INIT_MEMBER(nb_xor, nullptr), - INIT_MEMBER(nb_or, nullptr), + INIT_MEMBER(nb_invert, nullptr), + INIT_MEMBER(nb_lshift, nullptr), + INIT_MEMBER(nb_rshift, nullptr), + INIT_MEMBER(nb_and, nullptr), + INIT_MEMBER(nb_xor, nullptr), + INIT_MEMBER(nb_or, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_coerce, nullptr), + INIT_MEMBER(nb_coerce, nullptr), #endif - INIT_MEMBER(nb_int, nullptr), + INIT_MEMBER(nb_int, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_reserved, nullptr), + INIT_MEMBER(nb_reserved, nullptr), #else - INIT_MEMBER(nb_long, nullptr), + INIT_MEMBER(nb_long, nullptr), #endif - INIT_MEMBER(nb_float, nullptr), + INIT_MEMBER(nb_float, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_oct, nullptr), - INIT_MEMBER(nb_hex, nullptr), + INIT_MEMBER(nb_oct, nullptr), + INIT_MEMBER(nb_hex, nullptr), #endif - INIT_MEMBER(nb_inplace_add, nullptr), - INIT_MEMBER(nb_inplace_subtract, nullptr), - INIT_MEMBER(nb_inplace_multiply, nullptr), - INIT_MEMBER(nb_inplace_remainder, nullptr), - INIT_MEMBER(nb_inplace_power, nullptr), - INIT_MEMBER(nb_inplace_lshift, nullptr), - INIT_MEMBER(nb_inplace_rshift, nullptr), - INIT_MEMBER(nb_inplace_and, nullptr), - INIT_MEMBER(nb_inplace_xor, nullptr), - INIT_MEMBER(nb_inplace_or, nullptr), - - INIT_MEMBER(nb_floor_divide, nullptr), - INIT_MEMBER(nb_true_divide, nullptr), - INIT_MEMBER(nb_inplace_floor_divide, nullptr), - INIT_MEMBER(nb_inplace_true_divide, nullptr), - - INIT_MEMBER(nb_index, nullptr), + INIT_MEMBER(nb_inplace_add, nullptr), + INIT_MEMBER(nb_inplace_subtract, nullptr), + INIT_MEMBER(nb_inplace_multiply, nullptr), + INIT_MEMBER(nb_inplace_remainder, nullptr), + INIT_MEMBER(nb_inplace_power, nullptr), + INIT_MEMBER(nb_inplace_lshift, nullptr), + INIT_MEMBER(nb_inplace_rshift, nullptr), + INIT_MEMBER(nb_inplace_and, nullptr), + INIT_MEMBER(nb_inplace_xor, nullptr), + INIT_MEMBER(nb_inplace_or, nullptr), + + INIT_MEMBER(nb_floor_divide, nullptr), + INIT_MEMBER(nb_true_divide, nullptr), + INIT_MEMBER(nb_inplace_floor_divide, nullptr), + INIT_MEMBER(nb_inplace_true_divide, nullptr), + + INIT_MEMBER(nb_index, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_matrix_multiply, nullptr), - INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), + INIT_MEMBER(nb_matrix_multiply, nullptr), + INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), #endif }; @@ -154,92 +153,91 @@ PyDoc_STRVAR(has_fast_len__doc__, "DEPRECATED: do not use."); PyDoc_STRVAR(has_items__doc__, "DEPRECATED: test list as bool instead."); static PyMethodDef TPyLazyListMethods[] = { - { "__reversed__", TPyLazyList::Reversed, METH_NOARGS, nullptr }, - { "to_index_dict", TPyLazyList::ToIndexDict, METH_NOARGS, to_index_dict__doc__ }, - { "reversed", TPyLazyList::Reversed, METH_NOARGS, reversed__doc__ }, - { "take", TPyLazyList::Take, METH_O, take__doc__ }, - { "skip", TPyLazyList::Skip, METH_O, skip__doc__ }, - { "has_fast_len", TPyLazyList::HasFastLen, METH_NOARGS, has_fast_len__doc__ }, - { "has_items", TPyLazyList::HasItems, METH_NOARGS, has_items__doc__ }, - { nullptr, nullptr, 0, nullptr } /* sentinel */ + {"__reversed__", TPyLazyList::Reversed, METH_NOARGS, nullptr}, + {"to_index_dict", TPyLazyList::ToIndexDict, METH_NOARGS, to_index_dict__doc__}, + {"reversed", TPyLazyList::Reversed, METH_NOARGS, reversed__doc__}, + {"take", TPyLazyList::Take, METH_O, take__doc__}, + {"skip", TPyLazyList::Skip, METH_O, skip__doc__}, + {"has_fast_len", TPyLazyList::HasFastLen, METH_NOARGS, has_fast_len__doc__}, + {"has_items", TPyLazyList::HasItems, METH_NOARGS, has_items__doc__}, + {nullptr, nullptr, 0, nullptr} /* sentinel */ }; #if PY_MAJOR_VERSION >= 3 -#define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) + #define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) #endif PyTypeObject PyLazyListType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TList"), - INIT_MEMBER(tp_basicsize , sizeof(TPyLazyList)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , TPyLazyList::Dealloc), + INIT_MEMBER(tp_name, "yql.TList"), + INIT_MEMBER(tp_basicsize, sizeof(TPyLazyList)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, TPyLazyList::Dealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , TPyLazyList::Repr), - INIT_MEMBER(tp_as_number , &LazyListNumbering), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , &LazyListMapping), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), - INIT_MEMBER(tp_doc , "yql.TList object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , TPyLazyList::Iter), - INIT_MEMBER(tp_iternext , nullptr), - INIT_MEMBER(tp_methods , TPyLazyListMethods), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, TPyLazyList::Repr), + INIT_MEMBER(tp_as_number, &LazyListNumbering), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, &LazyListMapping), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc, "yql.TList object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, TPyLazyList::Iter), + INIT_MEMBER(tp_iternext, nullptr), + INIT_MEMBER(tp_methods, TPyLazyListMethods), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; ////////////////////////////////////////////////////////////////////////////// // TPyLazyListIterator interface ////////////////////////////////////////////////////////////////////////////// -struct TPyLazyListIterator -{ +struct TPyLazyListIterator { PyObject_HEAD; TPyLazyList::TPtr List; TPyCleanupListItem<NUdf::TUnboxedValue> Iterator; @@ -269,68 +267,68 @@ struct TPyLazyListIterator PyTypeObject PyLazyListIteratorType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TListIterator"), - INIT_MEMBER(tp_basicsize , sizeof(TPyLazyListIterator)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , TPyLazyListIterator::Dealloc), + INIT_MEMBER(tp_name, "yql.TListIterator"), + INIT_MEMBER(tp_basicsize, sizeof(TPyLazyListIterator)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, TPyLazyListIterator::Dealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , TPyLazyListIterator::Repr), - INIT_MEMBER(tp_as_number , nullptr), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , nullptr), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), - INIT_MEMBER(tp_doc , "yql.ListIterator object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , PyObject_SelfIter), - INIT_MEMBER(tp_iternext , TPyLazyListIterator::Next), - INIT_MEMBER(tp_methods , nullptr), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, TPyLazyListIterator::Repr), + INIT_MEMBER(tp_as_number, nullptr), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, nullptr), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc, "yql.ListIterator object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, PyObject_SelfIter), + INIT_MEMBER(tp_iternext, TPyLazyListIterator::Next), + INIT_MEMBER(tp_methods, nullptr), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; @@ -338,11 +336,11 @@ PyTypeObject PyLazyListIteratorType = { // TPyLazyList implementation ////////////////////////////////////////////////////////////////////////////// PyObject* TPyLazyList::New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - NUdf::IBoxedValuePtr value, - Py_ssize_t step, - Py_ssize_t size) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr value, + Py_ssize_t step, + Py_ssize_t size) { TPyLazyList* list = new TPyLazyList; PyObject_INIT(list, &PyLazyListType); @@ -362,192 +360,198 @@ PyObject* TPyLazyList::Repr(PyObject*) } PyObject* TPyLazyList::Iter(PyObject* self) -{ - PY_TRY { - TPyLazyList* list = Cast(self); - return TPyLazyListIterator::New(list); - } PY_CATCH(nullptr) + { + PY_TRY{ + TPyLazyList* list = Cast(self); +return TPyLazyListIterator::New(list); +} // namespace NPython +PY_CATCH(nullptr) } Py_ssize_t TPyLazyList::Len(PyObject* self) -{ - PY_TRY { - TPyLazyList* list = Cast(self); - if (list->CachedLength == -1) { - list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetListLength(*list->Value.Get())); - } - return (list->CachedLength + list->Step - 1) / list->Step; - } PY_CATCH(-1) + { + PY_TRY{ + TPyLazyList* list = Cast(self); +if (list->CachedLength == -1) { + list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetListLength(*list->Value.Get())); +} +return (list->CachedLength + list->Step - 1) / list->Step; +} +PY_CATCH(-1) } PyObject* TPyLazyList::Subscript(PyObject* self, PyObject* slice) -{ - PY_TRY { - TPyLazyList* list = Cast(self); - const auto vb = list->CastCtx->ValueBuilder; + { + PY_TRY{ + TPyLazyList* list = Cast(self); +const auto vb = list->CastCtx->ValueBuilder; - if (PyIndex_Check(slice)) { - Py_ssize_t index = PyNumber_AsSsize_t(slice, PyExc_IndexError); +if (PyIndex_Check(slice)) { + Py_ssize_t index = PyNumber_AsSsize_t(slice, PyExc_IndexError); - if (!list->Dict.IsSet()) { - list->Dict.Set(list->CastCtx->PyCtx, vb->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())).AsBoxed()); - } + if (!list->Dict.IsSet()) { + list->Dict.Set(list->CastCtx->PyCtx, vb->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())).AsBoxed()); + } - if (index < 0) { - if (list->CachedLength == -1) { - list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*list->Dict.Get())); - } + if (index < 0) { + if (list->CachedLength == -1) { + list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetDictLength(*list->Dict.Get())); + } - ++index *= list->Step; - --index += list->CachedLength; - } else { - index *= list->Step; - } + ++index *= list->Step; + --index += list->CachedLength; + } else { + index *= list->Step; + } - if (index < 0 || (list->CachedLength != -1 && index >= list->CachedLength)) { - const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds, list size: %zd", index, list->CachedLength); - PyErr_SetObject(PyExc_IndexError, error.Get()); - return nullptr; - } + if (index < 0 || (list->CachedLength != -1 && index >= list->CachedLength)) { + const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds, list size: %zd", index, list->CachedLength); + PyErr_SetObject(PyExc_IndexError, error.Get()); + return nullptr; + } - if (const auto item = NUdf::TBoxedValueAccessor::Lookup(*list->Dict.Get(), NUdf::TUnboxedValuePod(ui64(index)))) { - return ToPyObject(list->CastCtx, list->ItemType, item.GetOptionalValue()).Release(); - } + if (const auto item = NUdf::TBoxedValueAccessor::Lookup(*list->Dict.Get(), NUdf::TUnboxedValuePod(ui64(index)))) { + return ToPyObject(list->CastCtx, list->ItemType, item.GetOptionalValue()).Release(); + } + + const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds", index); + PyErr_SetObject(PyExc_IndexError, error.Get()); + return nullptr; +} + +if (PySlice_Check(slice)) { + Py_ssize_t start, stop, step, size; - const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds", index); - PyErr_SetObject(PyExc_IndexError, error.Get()); + if (list->CachedLength >= 0) { + if (PySlice_GetIndicesEx(SLICEOBJ(slice), (list->CachedLength + list->Step - 1) / list->Step, &start, &stop, &step, &size) < 0) { + return nullptr; + } + } else { + if (PySlice_Unpack(slice, &start, &stop, &step) < 0) { return nullptr; } - if (PySlice_Check(slice)) { - Py_ssize_t start, stop, step, size; - - if (list->CachedLength >= 0) { - if (PySlice_GetIndicesEx(SLICEOBJ(slice), (list->CachedLength + list->Step - 1) / list->Step, &start, &stop, &step, &size) < 0) { - return nullptr; - } - } else { - if (PySlice_Unpack(slice, &start, &stop, &step) < 0) { - return nullptr; - } - - if (step < -1 || step > 1 || (start < 0 && start > PY_SSIZE_T_MIN) || (stop < 0 && stop > PY_SSIZE_T_MIN)) { - list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetListLength(*list->Value.Get())); - size = PySlice_AdjustIndices((list->CachedLength + list->Step - 1) / list->Step, &start, &stop, step); - } else { - size = PySlice_AdjustIndices(PY_SSIZE_T_MAX, &start, &stop, step); - } - } + if (step < -1 || step > 1 || (start < 0 && start > PY_SSIZE_T_MIN) || (stop < 0 && stop > PY_SSIZE_T_MIN)) { + list->CachedLength = static_cast<Py_ssize_t>(NUdf::TBoxedValueAccessor::GetListLength(*list->Value.Get())); + size = PySlice_AdjustIndices((list->CachedLength + list->Step - 1) / list->Step, &start, &stop, step); + } else { + size = PySlice_AdjustIndices(PY_SSIZE_T_MAX, &start, &stop, step); + } + } - if (!step) { - PyErr_SetString(PyExc_ValueError, "slice step cannot be zero"); - return nullptr; - } + if (!step) { + PyErr_SetString(PyExc_ValueError, "slice step cannot be zero"); + return nullptr; + } - const Py_ssize_t hi = PY_SSIZE_T_MAX / list->Step; - const Py_ssize_t lo = PY_SSIZE_T_MIN / list->Step; - step = step > lo && step < hi ? step * list->Step : (step > 0 ? PY_SSIZE_T_MAX : PY_SSIZE_T_MIN); - - NUdf::TUnboxedValue newList; - if (size > 0) { - size = step > 0 ? - (size < PY_SSIZE_T_MAX / step ? --size * step + 1 : PY_SSIZE_T_MAX): - (size < PY_SSIZE_T_MAX / -step ? --size * -step + 1 : PY_SSIZE_T_MAX); - - start = start < hi ? start * list->Step : PY_SSIZE_T_MAX; - const Py_ssize_t skip = step > 0 ? start : start - size + 1; - - newList = NUdf::TUnboxedValuePod(list->Value.Get().Get()); - if (skip > 0) { - newList = vb->SkipList(newList, skip); - } - - if (size < PY_SSIZE_T_MAX && (list->CachedLength == -1 || list->CachedLength - skip > size)) { - newList = vb->TakeList(newList, size); - } - - if (step < 0) { - step = -step; - newList = vb->ReverseList(newList); - } - } else { - newList = vb->NewEmptyList(); - } + const Py_ssize_t hi = PY_SSIZE_T_MAX / list->Step; + const Py_ssize_t lo = PY_SSIZE_T_MIN / list->Step; + step = step > lo && step < hi ? step * list->Step : (step > 0 ? PY_SSIZE_T_MAX : PY_SSIZE_T_MIN); - return New(list->CastCtx, list->ItemType, newList.AsBoxed(), step, size); + NUdf::TUnboxedValue newList; + if (size > 0) { + size = step > 0 ? (size < PY_SSIZE_T_MAX / step ? --size * step + 1 : PY_SSIZE_T_MAX) : (size < PY_SSIZE_T_MAX / -step ? --size * -step + 1 : PY_SSIZE_T_MAX); + + start = start < hi ? start * list->Step : PY_SSIZE_T_MAX; + const Py_ssize_t skip = step > 0 ? start : start - size + 1; + + newList = NUdf::TUnboxedValuePod(list->Value.Get().Get()); + if (skip > 0) { + newList = vb->SkipList(newList, skip); } - const TPyObjectPtr type = PyObject_Type(slice); - const TPyObjectPtr repr = PyObject_Repr(type.Get()); - const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported slice object type: %R", repr.Get()); - PyErr_SetObject(PyExc_TypeError, error.Get()); - return nullptr; - } PY_CATCH(nullptr) + if (size < PY_SSIZE_T_MAX && (list->CachedLength == -1 || list->CachedLength - skip > size)) { + newList = vb->TakeList(newList, size); + } + + if (step < 0) { + step = -step; + newList = vb->ReverseList(newList); + } + } else { + newList = vb->NewEmptyList(); + } + + return New(list->CastCtx, list->ItemType, newList.AsBoxed(), step, size); +} + +const TPyObjectPtr type = PyObject_Type(slice); +const TPyObjectPtr repr = PyObject_Repr(type.Get()); +const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported slice object type: %R", repr.Get()); +PyErr_SetObject(PyExc_TypeError, error.Get()); +return nullptr; +} +PY_CATCH(nullptr) } PyObject* TPyLazyList::ToIndexDict(PyObject* self, PyObject* /* arg */) -{ - PY_TRY { - TPyLazyList* list = Cast(self); - if (!list->Dict.IsSet()) { - list->Dict.Set(list->CastCtx->PyCtx, list->CastCtx->ValueBuilder->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())).AsBoxed()); - } + { + PY_TRY{ + TPyLazyList* list = Cast(self); +if (!list->Dict.IsSet()) { + list->Dict.Set(list->CastCtx->PyCtx, list->CastCtx->ValueBuilder->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())).AsBoxed()); +} - return ToPyLazyDict(list->CastCtx, nullptr, list->ItemType, NUdf::TUnboxedValuePod(list->Dict.Get().Get())).Release(); - } PY_CATCH(nullptr) +return ToPyLazyDict(list->CastCtx, nullptr, list->ItemType, NUdf::TUnboxedValuePod(list->Dict.Get().Get())).Release(); +} +PY_CATCH(nullptr) } PyObject* TPyLazyList::Reversed(PyObject* self, PyObject* /* arg */) -{ - PY_TRY { - TPyLazyList* list = Cast(self); - const auto newList = list->CastCtx->ValueBuilder->ReverseList(NUdf::TUnboxedValuePod(list->Value.Get().Get())); - return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step); - } PY_CATCH(nullptr) + { + PY_TRY{ + TPyLazyList* list = Cast(self); +const auto newList = list->CastCtx->ValueBuilder->ReverseList(NUdf::TUnboxedValuePod(list->Value.Get().Get())); +return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step); +} +PY_CATCH(nullptr) } PyObject* TPyLazyList::Take(PyObject* self, PyObject* arg) -{ - PY_TRY { - TPyLazyList* list = Cast(self); - Py_ssize_t count = CastIndex(arg, "take"); - if (count < 0) { - return nullptr; - } - count *= list->Step; + { + PY_TRY{ + TPyLazyList* list = Cast(self); +Py_ssize_t count = CastIndex(arg, "take"); +if (count < 0) { + return nullptr; +} +count *= list->Step; - auto vb = list->CastCtx->ValueBuilder; - NUdf::TUnboxedValue value(NUdf::TUnboxedValuePod(list->Value.Get().Get())); - auto newList = vb->TakeList(value, static_cast<ui64>(count)); - return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step); - } PY_CATCH(nullptr) +auto vb = list->CastCtx->ValueBuilder; +NUdf::TUnboxedValue value(NUdf::TUnboxedValuePod(list->Value.Get().Get())); +auto newList = vb->TakeList(value, static_cast<ui64>(count)); +return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step); +} +PY_CATCH(nullptr) } PyObject* TPyLazyList::Skip(PyObject* self, PyObject* arg) -{ - PY_TRY { - TPyLazyList* list = Cast(self); - Py_ssize_t count = CastIndex(arg, "skip"); - if (count < 0) { - return nullptr; - } - count *= list->Step; + { + PY_TRY{ + TPyLazyList* list = Cast(self); +Py_ssize_t count = CastIndex(arg, "skip"); +if (count < 0) { + return nullptr; +} +count *= list->Step; - NUdf::TUnboxedValue value(NUdf::TUnboxedValuePod(list->Value.Get().Get())); - const auto newList = list->CastCtx->ValueBuilder->SkipList(value, static_cast<ui64>(count)); - return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step); - } PY_CATCH(nullptr) +NUdf::TUnboxedValue value(NUdf::TUnboxedValuePod(list->Value.Get().Get())); +const auto newList = list->CastCtx->ValueBuilder->SkipList(value, static_cast<ui64>(count)); +return New(list->CastCtx, list->ItemType, newList.AsBoxed(), list->Step); +} +PY_CATCH(nullptr) } PyObject* TPyLazyList::HasFastLen(PyObject* self, PyObject* /* arg */) -{ - PY_TRY { - TPyLazyList* list = Cast(self); - if (NUdf::TBoxedValueAccessor::HasFastListLength(*list->Value.Get())) { - Py_RETURN_TRUE; - } - Py_RETURN_FALSE; - } PY_CATCH(nullptr) + { + PY_TRY{ + TPyLazyList* list = Cast(self); +if (NUdf::TBoxedValueAccessor::HasFastListLength(*list->Value.Get())) { + Py_RETURN_TRUE; +} +Py_RETURN_FALSE; +} +PY_CATCH(nullptr) } PyObject* TPyLazyList::HasItems(PyObject* self, PyObject* /* arg */) @@ -558,19 +562,21 @@ PyObject* TPyLazyList::HasItems(PyObject* self, PyObject* /* arg */) Py_RETURN_TRUE; } Py_RETURN_FALSE; - } PY_CATCH(nullptr) + } + PY_CATCH(nullptr) } int TPyLazyList::Bool(PyObject* self) -{ - PY_TRY { - TPyLazyList* list = Cast(self); - if (list->CachedLength == -1) { - return NUdf::TBoxedValueAccessor::HasListItems(*list->Value.Get()) ? 1 : 0; - } else { - return list->CachedLength > 0 ? 1 : 0; - } - } PY_CATCH(-1) + { + PY_TRY{ + TPyLazyList* list = Cast(self); +if (list->CachedLength == -1) { + return NUdf::TBoxedValueAccessor::HasListItems(*list->Value.Get()) ? 1 : 0; +} else { + return list->CachedLength > 0 ? 1 : 0; +} +} +PY_CATCH(-1) } ////////////////////////////////////////////////////////////////////////////// @@ -597,8 +603,9 @@ PyObject* TPyLazyListIterator::Next(PyObject* self) if (iter->Iterator.Get().Next(item)) { ++iter->Length; - for (auto skip = list->Step; --skip && iter->Iterator.Get().Skip(); ++iter->Length) + for (auto skip = list->Step; --skip && iter->Iterator.Get().Skip(); ++iter->Length) { continue; + } return ToPyObject(list->CastCtx, list->ItemType, item).Release(); } @@ -609,14 +616,14 @@ PyObject* TPyLazyListIterator::Next(PyObject* self) } return nullptr; - } PY_CATCH(nullptr) + } + PY_CATCH(nullptr) } ////////////////////////////////////////////////////////////////////////////// // TPyThinList interface ////////////////////////////////////////////////////////////////////////////// -struct TPyThinList -{ +struct TPyThinList { using TPtr = NUdf::TRefCountedPtr<TPyThinList, TPyPtrOps<TPyThinList>>; PyObject_HEAD; @@ -636,12 +643,12 @@ struct TPyThinList } static PyObject* New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - NUdf::IBoxedValuePtr value = NUdf::IBoxedValuePtr(), - const NUdf::TUnboxedValue* elements = nullptr, - Py_ssize_t length = 0, - Py_ssize_t step = 1); + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr value = NUdf::IBoxedValuePtr(), + const NUdf::TUnboxedValue* elements = nullptr, + Py_ssize_t length = 0, + Py_ssize_t step = 1); static int Bool(PyObject* self); static PyObject* Repr(PyObject* self); @@ -663,154 +670,153 @@ PyMappingMethods ThinListMapping = { }; PyNumberMethods ThinListNumbering = { - INIT_MEMBER(nb_add, nullptr), - INIT_MEMBER(nb_subtract, nullptr), - INIT_MEMBER(nb_multiply, nullptr), + INIT_MEMBER(nb_add, nullptr), + INIT_MEMBER(nb_subtract, nullptr), + INIT_MEMBER(nb_multiply, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_divide, nullptr), + INIT_MEMBER(nb_divide, nullptr), #endif - INIT_MEMBER(nb_remainder, nullptr), - INIT_MEMBER(nb_divmod, nullptr), - INIT_MEMBER(nb_power, nullptr), - INIT_MEMBER(nb_negative, nullptr), - INIT_MEMBER(nb_positive, nullptr), - INIT_MEMBER(nb_absolute, nullptr), + INIT_MEMBER(nb_remainder, nullptr), + INIT_MEMBER(nb_divmod, nullptr), + INIT_MEMBER(nb_power, nullptr), + INIT_MEMBER(nb_negative, nullptr), + INIT_MEMBER(nb_positive, nullptr), + INIT_MEMBER(nb_absolute, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_bool, TPyThinList::Bool), + INIT_MEMBER(nb_bool, TPyThinList::Bool), #else - INIT_MEMBER(nb_nonzero, TPyThinList::Bool), + INIT_MEMBER(nb_nonzero, TPyThinList::Bool), #endif - INIT_MEMBER(nb_invert, nullptr), - INIT_MEMBER(nb_lshift, nullptr), - INIT_MEMBER(nb_rshift, nullptr), - INIT_MEMBER(nb_and, nullptr), - INIT_MEMBER(nb_xor, nullptr), - INIT_MEMBER(nb_or, nullptr), + INIT_MEMBER(nb_invert, nullptr), + INIT_MEMBER(nb_lshift, nullptr), + INIT_MEMBER(nb_rshift, nullptr), + INIT_MEMBER(nb_and, nullptr), + INIT_MEMBER(nb_xor, nullptr), + INIT_MEMBER(nb_or, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_coerce, nullptr), + INIT_MEMBER(nb_coerce, nullptr), #endif - INIT_MEMBER(nb_int, nullptr), + INIT_MEMBER(nb_int, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_reserved, nullptr), + INIT_MEMBER(nb_reserved, nullptr), #else - INIT_MEMBER(nb_long, nullptr), + INIT_MEMBER(nb_long, nullptr), #endif - INIT_MEMBER(nb_float, nullptr), + INIT_MEMBER(nb_float, nullptr), #if PY_MAJOR_VERSION < 3 - INIT_MEMBER(nb_oct, nullptr), - INIT_MEMBER(nb_hex, nullptr), + INIT_MEMBER(nb_oct, nullptr), + INIT_MEMBER(nb_hex, nullptr), #endif - INIT_MEMBER(nb_inplace_add, nullptr), - INIT_MEMBER(nb_inplace_subtract, nullptr), - INIT_MEMBER(nb_inplace_multiply, nullptr), - INIT_MEMBER(nb_inplace_remainder, nullptr), - INIT_MEMBER(nb_inplace_power, nullptr), - INIT_MEMBER(nb_inplace_lshift, nullptr), - INIT_MEMBER(nb_inplace_rshift, nullptr), - INIT_MEMBER(nb_inplace_and, nullptr), - INIT_MEMBER(nb_inplace_xor, nullptr), - INIT_MEMBER(nb_inplace_or, nullptr), - - INIT_MEMBER(nb_floor_divide, nullptr), - INIT_MEMBER(nb_true_divide, nullptr), - INIT_MEMBER(nb_inplace_floor_divide, nullptr), - INIT_MEMBER(nb_inplace_true_divide, nullptr), - - INIT_MEMBER(nb_index, nullptr), + INIT_MEMBER(nb_inplace_add, nullptr), + INIT_MEMBER(nb_inplace_subtract, nullptr), + INIT_MEMBER(nb_inplace_multiply, nullptr), + INIT_MEMBER(nb_inplace_remainder, nullptr), + INIT_MEMBER(nb_inplace_power, nullptr), + INIT_MEMBER(nb_inplace_lshift, nullptr), + INIT_MEMBER(nb_inplace_rshift, nullptr), + INIT_MEMBER(nb_inplace_and, nullptr), + INIT_MEMBER(nb_inplace_xor, nullptr), + INIT_MEMBER(nb_inplace_or, nullptr), + + INIT_MEMBER(nb_floor_divide, nullptr), + INIT_MEMBER(nb_true_divide, nullptr), + INIT_MEMBER(nb_inplace_floor_divide, nullptr), + INIT_MEMBER(nb_inplace_true_divide, nullptr), + + INIT_MEMBER(nb_index, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(nb_matrix_multiply, nullptr), - INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), + INIT_MEMBER(nb_matrix_multiply, nullptr), + INIT_MEMBER(nb_inplace_matrix_multiply, nullptr), #endif }; static PyMethodDef TPyThinListMethods[] = { - { "__reversed__", TPyThinList::Reversed, METH_NOARGS, nullptr }, - { "to_index_dict", TPyThinList::ToIndexDict, METH_NOARGS, to_index_dict__doc__ }, - { "reversed", TPyThinList::Reversed, METH_NOARGS, reversed__doc__ }, - { "take", TPyThinList::Take, METH_O, take__doc__ }, - { "skip", TPyThinList::Skip, METH_O, skip__doc__ }, - { "has_fast_len", TPyThinList::HasFastLen, METH_NOARGS, has_fast_len__doc__ }, - { "has_items", TPyThinList::HasItems, METH_NOARGS, has_items__doc__ }, - { nullptr, nullptr, 0, nullptr } /* sentinel */ + {"__reversed__", TPyThinList::Reversed, METH_NOARGS, nullptr}, + {"to_index_dict", TPyThinList::ToIndexDict, METH_NOARGS, to_index_dict__doc__}, + {"reversed", TPyThinList::Reversed, METH_NOARGS, reversed__doc__}, + {"take", TPyThinList::Take, METH_O, take__doc__}, + {"skip", TPyThinList::Skip, METH_O, skip__doc__}, + {"has_fast_len", TPyThinList::HasFastLen, METH_NOARGS, has_fast_len__doc__}, + {"has_items", TPyThinList::HasItems, METH_NOARGS, has_items__doc__}, + {nullptr, nullptr, 0, nullptr} /* sentinel */ }; #if PY_MAJOR_VERSION >= 3 -#define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) + #define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) #endif PyTypeObject PyThinListType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TList"), - INIT_MEMBER(tp_basicsize , sizeof(TPyThinList)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , TPyThinList::Dealloc), + INIT_MEMBER(tp_name, "yql.TList"), + INIT_MEMBER(tp_basicsize, sizeof(TPyThinList)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, TPyThinList::Dealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , TPyThinList::Repr), - INIT_MEMBER(tp_as_number , &ThinListNumbering), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , &ThinListMapping), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), - INIT_MEMBER(tp_doc , "yql.TList object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , TPyThinList::Iter), - INIT_MEMBER(tp_iternext , nullptr), - INIT_MEMBER(tp_methods , TPyThinListMethods), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, TPyThinList::Repr), + INIT_MEMBER(tp_as_number, &ThinListNumbering), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, &ThinListMapping), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc, "yql.TList object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, TPyThinList::Iter), + INIT_MEMBER(tp_iternext, nullptr), + INIT_MEMBER(tp_methods, TPyThinListMethods), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; ////////////////////////////////////////////////////////////////////////////// // TPyThinListIterator interface ////////////////////////////////////////////////////////////////////////////// -struct TPyThinListIterator -{ +struct TPyThinListIterator { PyObject_HEAD; TPyThinList::TPtr List; const NUdf::TUnboxedValue* Elements; @@ -835,68 +841,68 @@ struct TPyThinListIterator PyTypeObject PyThinListIteratorType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TListIterator"), - INIT_MEMBER(tp_basicsize , sizeof(TPyThinListIterator)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , TPyThinListIterator::Dealloc), + INIT_MEMBER(tp_name, "yql.TListIterator"), + INIT_MEMBER(tp_basicsize, sizeof(TPyThinListIterator)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, TPyThinListIterator::Dealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , TPyThinListIterator::Repr), - INIT_MEMBER(tp_as_number , nullptr), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , nullptr), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), - INIT_MEMBER(tp_doc , "yql.ListIterator object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , PyObject_SelfIter), - INIT_MEMBER(tp_iternext , TPyThinListIterator::Next), - INIT_MEMBER(tp_methods , nullptr), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, TPyThinListIterator::Repr), + INIT_MEMBER(tp_as_number, nullptr), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, nullptr), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc, "yql.ListIterator object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, PyObject_SelfIter), + INIT_MEMBER(tp_iternext, TPyThinListIterator::Next), + INIT_MEMBER(tp_methods, nullptr), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; @@ -904,12 +910,12 @@ PyTypeObject PyThinListIteratorType = { // TPyThinList implementation ////////////////////////////////////////////////////////////////////////////// PyObject* TPyThinList::New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - NUdf::IBoxedValuePtr value, - const NUdf::TUnboxedValue* elements, - Py_ssize_t length, - Py_ssize_t step) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + NUdf::IBoxedValuePtr value, + const NUdf::TUnboxedValue* elements, + Py_ssize_t length, + Py_ssize_t step) { TPyThinList* list = new TPyThinList; PyObject_INIT(list, &PyThinListType); @@ -930,11 +936,12 @@ PyObject* TPyThinList::Repr(PyObject*) } PyObject* TPyThinList::Iter(PyObject* self) -{ - PY_TRY { - TPyThinList* list = Cast(self); - return TPyThinListIterator::New(list); - } PY_CATCH(nullptr) + { + PY_TRY{ + TPyThinList* list = Cast(self); +return TPyThinListIterator::New(list); +} +PY_CATCH(nullptr) } Py_ssize_t TPyThinList::Len(PyObject* self) @@ -943,116 +950,119 @@ Py_ssize_t TPyThinList::Len(PyObject* self) } PyObject* TPyThinList::Subscript(PyObject* self, PyObject* slice) -{ - PY_TRY { - TPyThinList* list = Cast(self); - const auto vb = list->CastCtx->ValueBuilder; + { + PY_TRY{ + TPyThinList* list = Cast(self); +const auto vb = list->CastCtx->ValueBuilder; - if (PyIndex_Check(slice)) { - Py_ssize_t index = PyNumber_AsSsize_t(slice, PyExc_IndexError); +if (PyIndex_Check(slice)) { + Py_ssize_t index = PyNumber_AsSsize_t(slice, PyExc_IndexError); - if (index < 0) { - index += list->Length; - } + if (index < 0) { + index += list->Length; + } - if (index < 0 || index >= list->Length) { - const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds, list size: %zd", index, list->Length); - PyErr_SetObject(PyExc_IndexError, error.Get()); - return nullptr; - } + if (index < 0 || index >= list->Length) { + const TPyObjectPtr error = PyUnicode_FromFormat("index %zd out of bounds, list size: %zd", index, list->Length); + PyErr_SetObject(PyExc_IndexError, error.Get()); + return nullptr; + } - if (list->Step > 0) { - index *= list->Step; - } else { - index = list->Length - ++index; - index *= -list->Step; - } + if (list->Step > 0) { + index *= list->Step; + } else { + index = list->Length - ++index; + index *= -list->Step; + } - return ToPyObject(list->CastCtx, list->ItemType, list->Elements[index]).Release(); - } + return ToPyObject(list->CastCtx, list->ItemType, list->Elements[index]).Release(); +} - if (PySlice_Check(slice)) { - Py_ssize_t start, stop, step, size; +if (PySlice_Check(slice)) { + Py_ssize_t start, stop, step, size; - if (PySlice_GetIndicesEx(SLICEOBJ(slice), list->Length, &start, &stop, &step, &size) < 0) { - return nullptr; - } + if (PySlice_GetIndicesEx(SLICEOBJ(slice), list->Length, &start, &stop, &step, &size) < 0) { + return nullptr; + } - if (!step) { - PyErr_SetString(PyExc_ValueError, "slice step cannot be zero"); - return nullptr; - } + if (!step) { + PyErr_SetString(PyExc_ValueError, "slice step cannot be zero"); + return nullptr; + } - if (size > 0) { - const Py_ssize_t skip = list->Step * (list->Step > 0 ? - (step > 0 ? start : start + step * (size - 1)): - (step > 0 ? stop : start + 1) - list->Length); + if (size > 0) { + const Py_ssize_t skip = list->Step * (list->Step > 0 ? (step > 0 ? start : start + step * (size - 1)) : (step > 0 ? stop : start + 1) - list->Length); - return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Elements + skip, size, step * list->Step); - } else { - return New(list->CastCtx, list->ItemType, list->Value.Get()); - } - } + return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Elements + skip, size, step * list->Step); + } else { + return New(list->CastCtx, list->ItemType, list->Value.Get()); + } +} - const TPyObjectPtr type = PyObject_Type(slice); - const TPyObjectPtr repr = PyObject_Repr(type.Get()); - const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported slice object type: %R", repr.Get()); - PyErr_SetObject(PyExc_TypeError, error.Get()); - return nullptr; - } PY_CATCH(nullptr) +const TPyObjectPtr type = PyObject_Type(slice); +const TPyObjectPtr repr = PyObject_Repr(type.Get()); +const TPyObjectPtr error = PyUnicode_FromFormat("Unsupported slice object type: %R", repr.Get()); +PyErr_SetObject(PyExc_TypeError, error.Get()); +return nullptr; +} +PY_CATCH(nullptr) } #undef SLICEOBJ PyObject* TPyThinList::ToIndexDict(PyObject* self, PyObject* /* arg */) -{ - PY_TRY { - TPyThinList* list = Cast(self); - const auto dict = list->CastCtx->ValueBuilder->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())); - return ToPyLazyDict(list->CastCtx, nullptr, list->ItemType, dict).Release(); - } PY_CATCH(nullptr) + { + PY_TRY{ + TPyThinList* list = Cast(self); +const auto dict = list->CastCtx->ValueBuilder->ToIndexDict(NUdf::TUnboxedValuePod(list->Value.Get().Get())); +return ToPyLazyDict(list->CastCtx, nullptr, list->ItemType, dict).Release(); +} +PY_CATCH(nullptr) } PyObject* TPyThinList::Reversed(PyObject* self, PyObject* /* arg */) -{ - PY_TRY { - TPyThinList* list = Cast(self); - return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Elements, list->Length, -list->Step); - } PY_CATCH(nullptr) + { + PY_TRY{ + TPyThinList* list = Cast(self); +return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Elements, list->Length, -list->Step); +} +PY_CATCH(nullptr) } PyObject* TPyThinList::Take(PyObject* self, PyObject* arg) -{ - PY_TRY { - TPyThinList* list = Cast(self); - const Py_ssize_t count = CastIndex(arg, "take"); - if (count < 0) { - return nullptr; - } + { + PY_TRY{ + TPyThinList* list = Cast(self); +const Py_ssize_t count = CastIndex(arg, "take"); +if (count < 0) { + return nullptr; +} - if (const auto size = std::min(count, list->Length)) { - return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Step > 0 ? list->Elements : list->Elements + list->Length + size * list->Step, size, list->Step); - } else { - return New(list->CastCtx, list->ItemType, list->Value.Get()); - } - } PY_CATCH(nullptr) +if (const auto size = std::min(count, list->Length)) { + return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Step > 0 ? list->Elements : list->Elements + list->Length + size * list->Step, size, list->Step); +} else { + return New(list->CastCtx, list->ItemType, list->Value.Get()); +} +} +PY_CATCH(nullptr) } PyObject* TPyThinList::Skip(PyObject* self, PyObject* arg) -{ - PY_TRY { - TPyThinList* list = Cast(self); - const Py_ssize_t count = CastIndex(arg, "skip"); - if (count < 0) { - return nullptr; - } + { + PY_TRY{ + TPyThinList* list = Cast(self); +const Py_ssize_t count = CastIndex(arg, "skip"); +if (count < 0) { + return nullptr; +} - if (const auto size = std::max(list->Length - count, Py_ssize_t(0))) { - return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Step > 0 ? list->Elements + count * list->Step : list->Elements, size, list->Step); - } else { - return New(list->CastCtx, list->ItemType); - } - } PY_CATCH(nullptr) +if (const auto size = std::max(list->Length - count, Py_ssize_t(0))) { + return New(list->CastCtx, list->ItemType, list->Value.Get(), list->Step > 0 ? list->Elements + count * list->Step : list->Elements, size, list->Step); +} else { + return New(list->CastCtx, list->ItemType); +} +} +PY_CATCH(nullptr) } PyObject* TPyThinList::HasFastLen(PyObject* self, PyObject* /* arg */) @@ -1062,10 +1072,11 @@ PyObject* TPyThinList::HasFastLen(PyObject* self, PyObject* /* arg */) PyObject* TPyThinList::HasItems(PyObject* self, PyObject* /* arg */) { - if (Cast(self)->Length > 0) + if (Cast(self)->Length > 0) { Py_RETURN_TRUE; - else + } else { Py_RETURN_FALSE; + } } int TPyThinList::Bool(PyObject* self) @@ -1087,24 +1098,25 @@ PyObject* TPyThinListIterator::New(TPyThinList* list) } PyObject* TPyThinListIterator::Next(PyObject* self) -{ - PY_TRY { - TPyThinListIterator* iter = Cast(self); - - if (iter->Count) { - --iter->Count; - TPyThinList* list = iter->List.Get(); - return ToPyObject(list->CastCtx, list->ItemType, *(iter->Elements += list->Step)).Release(); - } + { + PY_TRY{ + TPyThinListIterator* iter = Cast(self); + +if (iter->Count) { + --iter->Count; + TPyThinList* list = iter->List.Get(); + return ToPyObject(list->CastCtx, list->ItemType, *(iter->Elements += list->Step)).Release(); +} - return nullptr; - } PY_CATCH(nullptr) +return nullptr; +} +PY_CATCH(nullptr) } TPyObjectPtr ToPyLazyList( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* itemType, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* itemType, + const NUdf::TUnboxedValuePod& value) { if (const auto elements = value.GetElements()) { return TPyThinList::New(castCtx, itemType, value.AsBoxed(), elements, value.GetListLength()); diff --git a/yql/essentials/udfs/common/python/bindings/py_list.h b/yql/essentials/udfs/common/python/bindings/py_list.h index 9db170a7954..9266ff918f5 100644 --- a/yql/essentials/udfs/common/python/bindings/py_list.h +++ b/yql/essentials/udfs/common/python/bindings/py_list.h @@ -11,23 +11,23 @@ extern PyTypeObject PyThinListIteratorType; extern PyTypeObject PyThinListType; TPyObjectPtr ToPyLazyList( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* itemType, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* itemType, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyLazyGenerator( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - TPyObjectPtr callableObj); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + TPyObjectPtr callableObj); NKikimr::NUdf::TUnboxedValue FromPyLazyIterable( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - TPyObjectPtr iterableObj); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + TPyObjectPtr iterableObj); NKikimr::NUdf::TUnboxedValue FromPyLazyIterator( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - TPyObjectPtr iteratorObj); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + TPyObjectPtr iteratorObj); -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp index b2e9a640d4f..bceb75e6637 100644 --- a/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_list_ut.cpp @@ -4,1022 +4,990 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(TPyListTest) { - Y_UNIT_TEST(FromPyEmptyList) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TListType<ui32>>( - "def Test(): return []", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT_EQUAL(value.GetListLength(), 0); - }); - } - - Y_UNIT_TEST(FromPyList) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TListType<ui32>>( - "def Test(): return [1, 2, 3, 4]", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT_EQUAL(value.GetListLength(), 4); - const auto it = value.GetListIterator(); - NUdf::TUnboxedValue item; - - UNIT_ASSERT(it.Next(item)); - UNIT_ASSERT_EQUAL(item.Get<ui32>(), 1); - UNIT_ASSERT(it.Next(item)); - UNIT_ASSERT_EQUAL(item.Get<ui32>(), 2); - UNIT_ASSERT(it.Next(item)); - UNIT_ASSERT_EQUAL(item.Get<ui32>(), 3); - UNIT_ASSERT(it.Next(item)); - UNIT_ASSERT_EQUAL(item.Get<ui32>(), 4); - UNIT_ASSERT(false == it.Next(item)); - }); - } - - Y_UNIT_TEST(ToPyEmptyList) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<char*>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - return vb.NewEmptyList(); - }, - "def Test(value):\n" - " assert value.has_fast_len()\n" - " assert len(value) == 0\n"); - } - - Y_UNIT_TEST(ToPyList) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<double>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 3U> list = {{ - NUdf::TUnboxedValuePod(0.1), - NUdf::TUnboxedValuePod(0.2), - NUdf::TUnboxedValuePod(0.3) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(value):\n" - " assert value.has_fast_len()\n" - " assert len(value) == 3\n" - " assert all(isinstance(v, float) for v in value)\n" - " assert list(value) == [0.1, 0.2, 0.3]\n"); - } +Y_UNIT_TEST(FromPyEmptyList) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test(): return []", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetListLength(), 0); + }); +} - Y_UNIT_TEST(FromPyTuple) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TListType<ui32>>( - "def Test(): return (1, 2, 3)", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT_EQUAL(value.GetListLength(), 3); +Y_UNIT_TEST(FromPyList) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test(): return [1, 2, 3, 4]", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetListLength(), 4); + const auto it = value.GetListIterator(); + NUdf::TUnboxedValue item; + + UNIT_ASSERT(it.Next(item)); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 1); + UNIT_ASSERT(it.Next(item)); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 2); + UNIT_ASSERT(it.Next(item)); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 3); + UNIT_ASSERT(it.Next(item)); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 4); + UNIT_ASSERT(false == it.Next(item)); + }); +} - ui32 expected = 1; - auto it = value.GetListIterator(); - for (NUdf::TUnboxedValue item; it.Next(item);) { - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, expected); - expected++; - } - }); - } +Y_UNIT_TEST(ToPyEmptyList) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<char*>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + return vb.NewEmptyList(); + }, + "def Test(value):\n" + " assert value.has_fast_len()\n" + " assert len(value) == 0\n"); +} - Y_UNIT_TEST(ThinListIteration) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<double>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 3U> list = {{ - NUdf::TUnboxedValuePod(0.1), - NUdf::TUnboxedValuePod(0.2), - NUdf::TUnboxedValuePod(0.3) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(value):\n" - " assert '__iter__' in dir(value)\n" - " it = iter(value)\n" - " assert next(it) == 0.1\n" - " assert next(it) == 0.2\n" - " assert next(it) == 0.3\n" - " try:\n" - " next(it)\n" - " except StopIteration:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(ToPyList) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{NUdf::TUnboxedValuePod(0.1), + NUdf::TUnboxedValuePod(0.2), + NUdf::TUnboxedValuePod(0.3)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(value):\n" + " assert value.has_fast_len()\n" + " assert len(value) == 3\n" + " assert all(isinstance(v, float) for v in value)\n" + " assert list(value) == [0.1, 0.2, 0.3]\n"); +} - Y_UNIT_TEST(ThinListReversed) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 10U> list = {{ - NUdf::TUnboxedValuePod(0U), - NUdf::TUnboxedValuePod(1U), - NUdf::TUnboxedValuePod(2U), - NUdf::TUnboxedValuePod(3U), - NUdf::TUnboxedValuePod(4U), - NUdf::TUnboxedValuePod(5U), - NUdf::TUnboxedValuePod(6U), - NUdf::TUnboxedValuePod(7U), - NUdf::TUnboxedValuePod(8U), - NUdf::TUnboxedValuePod(9U) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(v):\n" - " e = list(range(0, 10))\n" - " assert '__reversed__' in dir(v)\n" - " assert all(one == two for one, two in zip(reversed(v), reversed(e)))\n" - ); - } +Y_UNIT_TEST(FromPyTuple) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test(): return (1, 2, 3)", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetListLength(), 3); + + ui32 expected = 1; + auto it = value.GetListIterator(); + for (NUdf::TUnboxedValue item; it.Next(item);) { + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, expected); + expected++; + } + }); +} - Y_UNIT_TEST(LazyListReversed) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); - }, - "def Test(v):\n" - " assert '__reversed__' in dir(v)\n" - " it = iter(reversed(v))\n" - " assert next(it) == 2\n" - " assert next(it) == 1\n" - " assert next(it) == 0\n" - " try:\n" - " next(it)\n" - " except StopIteration:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(ThinListIteration) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{NUdf::TUnboxedValuePod(0.1), + NUdf::TUnboxedValuePod(0.2), + NUdf::TUnboxedValuePod(0.3)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(value):\n" + " assert '__iter__' in dir(value)\n" + " it = iter(value)\n" + " assert next(it) == 0.1\n" + " assert next(it) == 0.2\n" + " assert next(it) == 0.3\n" + " try:\n" + " next(it)\n" + " except StopIteration:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(LazyListIteration) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); - }, - "def Test(value):\n" - " assert '__iter__' in dir(value)\n" - " it = iter(value)\n" - " assert next(it) == 0\n" - " assert next(it) == 1\n" - " assert next(it) == 2\n" - " try:\n" - " next(it)\n" - " except StopIteration:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(ThinListReversed) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__reversed__' in dir(v)\n" + " assert all(one == two for one, two in zip(reversed(v), reversed(e)))\n"); +} - Y_UNIT_TEST(LazyListInvalidIndexType) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); - }, - "def Test(v):\n" - " try:\n" - " print(v[{}])\n" - " except TypeError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(LazyListReversed) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(v):\n" + " assert '__reversed__' in dir(v)\n" + " it = iter(reversed(v))\n" + " assert next(it) == 2\n" + " assert next(it) == 1\n" + " assert next(it) == 0\n" + " try:\n" + " next(it)\n" + " except StopIteration:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(ThinListInvalidIndexType) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<double>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 3U> list = {{ - NUdf::TUnboxedValuePod(0.1), - NUdf::TUnboxedValuePod(0.2), - NUdf::TUnboxedValuePod(0.3) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(v):\n" - " try:\n" - " print(v[{}])\n" - " except TypeError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(LazyListIteration) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(value):\n" + " assert '__iter__' in dir(value)\n" + " it = iter(value)\n" + " assert next(it) == 0\n" + " assert next(it) == 1\n" + " assert next(it) == 2\n" + " try:\n" + " next(it)\n" + " except StopIteration:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(LazyListZeroSliceStep) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); - }, - "def Test(v):\n" - " try:\n" - " print(v[::0])\n" - " except ValueError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(LazyListInvalidIndexType) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(v):\n" + " try:\n" + " print(v[{}])\n" + " except TypeError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(ThinListZeroSliceStep) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<double>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 3U> list = {{ - NUdf::TUnboxedValuePod(0.1), - NUdf::TUnboxedValuePod(0.2), - NUdf::TUnboxedValuePod(0.3) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(v):\n" - " try:\n" - " print(v[::0])\n" - " except ValueError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(ThinListInvalidIndexType) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{NUdf::TUnboxedValuePod(0.1), + NUdf::TUnboxedValuePod(0.2), + NUdf::TUnboxedValuePod(0.3)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " try:\n" + " print(v[{}])\n" + " except TypeError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(ThinListSlice) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 10U> list = {{ - NUdf::TUnboxedValuePod(0U), - NUdf::TUnboxedValuePod(1U), - NUdf::TUnboxedValuePod(2U), - NUdf::TUnboxedValuePod(3U), - NUdf::TUnboxedValuePod(4U), - NUdf::TUnboxedValuePod(5U), - NUdf::TUnboxedValuePod(6U), - NUdf::TUnboxedValuePod(7U), - NUdf::TUnboxedValuePod(8U), - NUdf::TUnboxedValuePod(9U) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(v):\n" - " e = list(range(0, 10))\n" - " assert '__len__' in dir(v)\n" - " assert list(v[::1]) == e[::1]\n" - " assert list(v[::-1]) == e[::-1]\n" - " assert list(v[1::1]) == e[1::1]\n" - " assert list(v[2::1]) == e[2::1]\n" - " assert list(v[3::1]) == e[3::1]\n" - " assert list(v[:-1:1]) == e[:-1:1]\n" - " assert list(v[:-2:1]) == e[:-2:1]\n" - " assert list(v[:-3:1]) == e[:-3:1]\n" - " assert list(v[1::-1]) == e[1::-1]\n" - " assert list(v[2::-1]) == e[2::-1]\n" - " assert list(v[3::-1]) == e[3::-1]\n" - " assert list(v[:-1:-1]) == e[:-1:-1]\n" - " assert list(v[:-2:-1]) == e[:-2:-1]\n" - " assert list(v[:-3:-1]) == e[:-3:-1]\n" - " assert list(v[:-2:-1]) == e[:-2:-1]\n" - " assert list(v[-12:-1:1]) == e[-12:-1:1]\n" - " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n" - " assert list(v[-5:-3:1]) == e[-5:-3:1]\n" - " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n" - " assert list(v[:7:1]) == e[:7:1]\n" - " assert list(v[-1:4]) == e[-1:4]\n" - " assert list(v[5:11]) == e[5:11]\n" - " assert list(v[4:1]) == e[4:1]\n" - " assert list(v[5:-2]) == e[5:-2]\n" - ); - } +Y_UNIT_TEST(LazyListZeroSliceStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(v):\n" + " try:\n" + " print(v[::0])\n" + " except ValueError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(ThinListSliceOverReversed) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 10U> list = {{ - NUdf::TUnboxedValuePod(0U), - NUdf::TUnboxedValuePod(1U), - NUdf::TUnboxedValuePod(2U), - NUdf::TUnboxedValuePod(3U), - NUdf::TUnboxedValuePod(4U), - NUdf::TUnboxedValuePod(5U), - NUdf::TUnboxedValuePod(6U), - NUdf::TUnboxedValuePod(7U), - NUdf::TUnboxedValuePod(8U), - NUdf::TUnboxedValuePod(9U) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(x):\n" - " e = list(reversed(range(0, 10)))\n" - " v = reversed(x)\n" - " assert list(v[::1]) == e[::1]\n" - " assert list(v[::-1]) == e[::-1]\n" - " assert list(v[1::1]) == e[1::1]\n" - " assert list(v[2::1]) == e[2::1]\n" - " assert list(v[3::1]) == e[3::1]\n" - " assert list(v[:-1:1]) == e[:-1:1]\n" - " assert list(v[:-2:1]) == e[:-2:1]\n" - " assert list(v[:-3:1]) == e[:-3:1]\n" - " assert list(v[1::-1]) == e[1::-1]\n" - " assert list(v[2::-1]) == e[2::-1]\n" - " assert list(v[3::-1]) == e[3::-1]\n" - " assert list(v[:-1:-1]) == e[:-1:-1]\n" - " assert list(v[:-2:-1]) == e[:-2:-1]\n" - " assert list(v[:-3:-1]) == e[:-3:-1]\n" - " assert list(v[:-2:-1]) == e[:-2:-1]\n" - " assert list(v[-12:-1:1]) == e[-12:-1:1]\n" - " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n" - " assert list(v[-5:-3:1]) == e[-5:-3:1]\n" - " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n" - " assert list(v[:7:1]) == e[:7:1]\n" - " assert list(v[-1:4]) == e[-1:4]\n" - " assert list(v[5:11]) == e[5:11]\n" - " assert list(v[4:1]) == e[4:1]\n" - " assert list(v[5:-2]) == e[5:-2]\n" - ); - } +Y_UNIT_TEST(ThinListZeroSliceStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<double>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{NUdf::TUnboxedValuePod(0.1), + NUdf::TUnboxedValuePod(0.2), + NUdf::TUnboxedValuePod(0.3)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " try:\n" + " print(v[::0])\n" + " except ValueError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(LazyListSlice) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 10)); - }, - "def Test(v):\n" - " e = list(range(0, 10))\n" - " assert '__len__' in dir(v)\n" - " assert len(v) == len(e)\n" - " assert list(v[::1]) == e[::1]\n" - " assert list(v[::-1]) == e[::-1]\n" - " assert list(v[3:]) == e[3:]\n" - " assert list(v[-2:]) == e[-2:]\n" - " assert list(v[2::-1]) == e[2::-1]\n" - " assert list(v[:-2:-1]) == e[:-2:-1]\n" - " assert list(v[-12:-1:1]) == e[-12:-1:1]\n" - " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n" - " assert list(v[-5:-3:1]) == e[-5:-3:1]\n" - " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n" - " assert list(v[:7:1]) == e[:7:1]\n" - " assert list(v[-1:4]) == e[-1:4]\n" - " assert list(v[5:11]) == e[5:11]\n" - " assert list(v[4:1]) == e[4:1]\n" - " assert list(v[5:-2]) == e[5:-2]\n" - ); - } +Y_UNIT_TEST(ThinListSlice) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__len__' in dir(v)\n" + " assert list(v[::1]) == e[::1]\n" + " assert list(v[::-1]) == e[::-1]\n" + " assert list(v[1::1]) == e[1::1]\n" + " assert list(v[2::1]) == e[2::1]\n" + " assert list(v[3::1]) == e[3::1]\n" + " assert list(v[:-1:1]) == e[:-1:1]\n" + " assert list(v[:-2:1]) == e[:-2:1]\n" + " assert list(v[:-3:1]) == e[:-3:1]\n" + " assert list(v[1::-1]) == e[1::-1]\n" + " assert list(v[2::-1]) == e[2::-1]\n" + " assert list(v[3::-1]) == e[3::-1]\n" + " assert list(v[:-1:-1]) == e[:-1:-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[:-3:-1]) == e[:-3:-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[-12:-1:1]) == e[-12:-1:1]\n" + " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n" + " assert list(v[-5:-3:1]) == e[-5:-3:1]\n" + " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n" + " assert list(v[:7:1]) == e[:7:1]\n" + " assert list(v[-1:4]) == e[-1:4]\n" + " assert list(v[5:11]) == e[5:11]\n" + " assert list(v[4:1]) == e[4:1]\n" + " assert list(v[5:-2]) == e[5:-2]\n"); +} - Y_UNIT_TEST(ThinListIterateSliceWithStep) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 20U> list = {{ - NUdf::TUnboxedValuePod(0U), - NUdf::TUnboxedValuePod(1U), - NUdf::TUnboxedValuePod(2U), - NUdf::TUnboxedValuePod(3U), - NUdf::TUnboxedValuePod(4U), - NUdf::TUnboxedValuePod(5U), - NUdf::TUnboxedValuePod(6U), - NUdf::TUnboxedValuePod(7U), - NUdf::TUnboxedValuePod(8U), - NUdf::TUnboxedValuePod(9U), - NUdf::TUnboxedValuePod(10U), - NUdf::TUnboxedValuePod(11U), - NUdf::TUnboxedValuePod(12U), - NUdf::TUnboxedValuePod(13U), - NUdf::TUnboxedValuePod(14U), - NUdf::TUnboxedValuePod(15U), - NUdf::TUnboxedValuePod(16U), - NUdf::TUnboxedValuePod(17U), - NUdf::TUnboxedValuePod(18U), - NUdf::TUnboxedValuePod(19U) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(v):\n" - " e = list(range(0, 20))\n" - " assert all(one == two for one, two in zip(iter(v[::2]), e[::2]))\n" - " assert all(one == two for one, two in zip(iter(v[3:8:2]), e[3:8:2]))\n" - " assert all(one == two for one, two in zip(iter(v[::-2]), e[::-2]))\n" - " assert all(one == two for one, two in zip(iter(v[::-3]), e[::-3]))\n" - " assert all(one == two for one, two in zip(iter(v[:3:-3]), e[:3:-3]))\n" - " assert all(one == two for one, two in zip(iter(v[-7::-3]), e[-7::-3]))\n" - " assert all(one == two for one, two in zip(iter(v[-6::-3]), e[-6::-3]))\n" - " assert all(one == two for one, two in zip(iter(v[-5::-3]), e[-5::-3]))\n" - " assert all(one == two for one, two in zip(iter(v[:-2:-2]), e[:-2:-2]))\n" - " assert all(one == two for one, two in zip(iter(v[-2:-6:-2]), e[-2:-6:-2]))\n" - " assert all(one == two for one, two in zip(iter(v[2:-6:-2][::2]), e[2:-6:-2][::2]))\n" - " assert all(one == two for one, two in zip(iter(v[2:6:-2][:-2:-2]), e[2:6:-2][:-2:-2]))\n" - " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:3]), e[:-2:-2][:2:3]))\n" - " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:-3]), e[:-2:-2][:2:-3]))\n" - " assert all(one == two for one, two in zip(iter(v[:-2:2][:2:3]), e[:-2:2][:2:3]))\n" - ); - } +Y_UNIT_TEST(ThinListSliceOverReversed) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(x):\n" + " e = list(reversed(range(0, 10)))\n" + " v = reversed(x)\n" + " assert list(v[::1]) == e[::1]\n" + " assert list(v[::-1]) == e[::-1]\n" + " assert list(v[1::1]) == e[1::1]\n" + " assert list(v[2::1]) == e[2::1]\n" + " assert list(v[3::1]) == e[3::1]\n" + " assert list(v[:-1:1]) == e[:-1:1]\n" + " assert list(v[:-2:1]) == e[:-2:1]\n" + " assert list(v[:-3:1]) == e[:-3:1]\n" + " assert list(v[1::-1]) == e[1::-1]\n" + " assert list(v[2::-1]) == e[2::-1]\n" + " assert list(v[3::-1]) == e[3::-1]\n" + " assert list(v[:-1:-1]) == e[:-1:-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[:-3:-1]) == e[:-3:-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[-12:-1:1]) == e[-12:-1:1]\n" + " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n" + " assert list(v[-5:-3:1]) == e[-5:-3:1]\n" + " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n" + " assert list(v[:7:1]) == e[:7:1]\n" + " assert list(v[-1:4]) == e[-1:4]\n" + " assert list(v[5:11]) == e[5:11]\n" + " assert list(v[4:1]) == e[4:1]\n" + " assert list(v[5:-2]) == e[5:-2]\n"); +} - Y_UNIT_TEST(LazyListIterateSliceWithStep) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 20)); - }, - "def Test(v):\n" - " e = list(range(0, 20))\n" - " assert all(one == two for one, two in zip(iter(v[::2]), e[::2]))\n" - " assert all(one == two for one, two in zip(iter(v[::-3]), e[::-3]))\n" - " assert all(one == two for one, two in zip(iter(v[:3:-3]), e[:3:-3]))\n" - " assert all(one == two for one, two in zip(iter(v[3:4:2]), e[3:4:2]))\n" - " assert all(one == two for one, two in zip(iter(v[-7::-3]), e[-7::-3]))\n" - " assert all(one == two for one, two in zip(iter(v[-6::-3]), e[-6::-3]))\n" - " assert all(one == two for one, two in zip(iter(v[-5::-3]), e[-5::-3]))\n" - " assert all(one == two for one, two in zip(iter(v[:-2:-2]), e[:-2:-2]))\n" - " assert all(one == two for one, two in zip(iter(v[-2:-6:-2]), e[-2:-6:-2]))\n" - " assert all(one == two for one, two in zip(iter(v[2:-6:-2][::2]), e[2:-6:-2][::2]))\n" - " assert all(one == two for one, two in zip(iter(v[2:6:-2][:-2:-2]), e[2:6:-2][:-2:-2]))\n" - " assert all(one == two for one, two in zip(iter(v[:-2:2][:2:3]), e[:-2:2][:2:3]))\n" - " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:3]), e[:-2:-2][:2:3]))\n" - " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:-3]), e[:-2:-2][:2:-3]))\n" - ); - } +Y_UNIT_TEST(LazyListSlice) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 10)); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__len__' in dir(v)\n" + " assert len(v) == len(e)\n" + " assert list(v[::1]) == e[::1]\n" + " assert list(v[::-1]) == e[::-1]\n" + " assert list(v[3:]) == e[3:]\n" + " assert list(v[-2:]) == e[-2:]\n" + " assert list(v[2::-1]) == e[2::-1]\n" + " assert list(v[:-2:-1]) == e[:-2:-1]\n" + " assert list(v[-12:-1:1]) == e[-12:-1:1]\n" + " assert list(v[-12:-1:-1]) == e[-12:-1:-1]\n" + " assert list(v[-5:-3:1]) == e[-5:-3:1]\n" + " assert list(v[-7:-2:-1]) == e[-7:-2:-1]\n" + " assert list(v[:7:1]) == e[:7:1]\n" + " assert list(v[-1:4]) == e[-1:4]\n" + " assert list(v[5:11]) == e[5:11]\n" + " assert list(v[4:1]) == e[4:1]\n" + " assert list(v[5:-2]) == e[5:-2]\n"); +} - Y_UNIT_TEST(ThinListGetByIndexSliceWithStep) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 20U> list = {{ - NUdf::TUnboxedValuePod(0U), - NUdf::TUnboxedValuePod(1U), - NUdf::TUnboxedValuePod(2U), - NUdf::TUnboxedValuePod(3U), - NUdf::TUnboxedValuePod(4U), - NUdf::TUnboxedValuePod(5U), - NUdf::TUnboxedValuePod(6U), - NUdf::TUnboxedValuePod(7U), - NUdf::TUnboxedValuePod(8U), - NUdf::TUnboxedValuePod(9U), - NUdf::TUnboxedValuePod(10U), - NUdf::TUnboxedValuePod(11U), - NUdf::TUnboxedValuePod(12U), - NUdf::TUnboxedValuePod(13U), - NUdf::TUnboxedValuePod(14U), - NUdf::TUnboxedValuePod(15U), - NUdf::TUnboxedValuePod(16U), - NUdf::TUnboxedValuePod(17U), - NUdf::TUnboxedValuePod(18U), - NUdf::TUnboxedValuePod(19U) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(v):\n" - " e = list(range(0, 20))\n" - " assert v[::2][3] == e[::2][3]\n" - " assert v[::2][5] == e[::2][5]\n" - " assert v[::2][-3] == e[::2][-3]\n" - " assert v[::2][-7] == e[::2][-7]\n" - " assert v[2::2][4] == e[2::2][4]\n" - " assert v[2::2][5] == e[2::2][5]\n" - " assert v[2::2][-7] == e[2::2][-7]\n" - " assert v[2::2][-2] == e[2::2][-2]\n" - " assert v[:-3:2][2] == e[:-3:2][2]\n" - " assert v[:-3:2][4] == e[:-3:2][4]\n" - " assert v[:-3:2][-1] == e[:-3:2][-1]\n" - " assert v[:-3:2][-2] == e[:-3:2][-2]\n" - " assert v[:-4:3][2] == e[:-4:3][2]\n" - " assert v[:-4:3][4] == e[:-4:3][4]\n" - " assert v[:-4:3][-3] == e[:-4:3][-3]\n" - " assert v[:-4:3][-2] == e[:-4:3][-2]\n" - " assert v[-6::-3][1] == e[-6::-3][1]\n" - " assert v[-6::-3][3] == e[-6::-3][3]\n" - " assert v[-6::-3][-4] == e[-6::-3][-4]\n" - " assert v[-6::-3][-1] == e[-6::-3][-1]\n" - ); - } +Y_UNIT_TEST(ThinListIterateSliceWithStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 20U> list = {{NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U), + NUdf::TUnboxedValuePod(10U), + NUdf::TUnboxedValuePod(11U), + NUdf::TUnboxedValuePod(12U), + NUdf::TUnboxedValuePod(13U), + NUdf::TUnboxedValuePod(14U), + NUdf::TUnboxedValuePod(15U), + NUdf::TUnboxedValuePod(16U), + NUdf::TUnboxedValuePod(17U), + NUdf::TUnboxedValuePod(18U), + NUdf::TUnboxedValuePod(19U)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 20))\n" + " assert all(one == two for one, two in zip(iter(v[::2]), e[::2]))\n" + " assert all(one == two for one, two in zip(iter(v[3:8:2]), e[3:8:2]))\n" + " assert all(one == two for one, two in zip(iter(v[::-2]), e[::-2]))\n" + " assert all(one == two for one, two in zip(iter(v[::-3]), e[::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:3:-3]), e[:3:-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-7::-3]), e[-7::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-6::-3]), e[-6::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-5::-3]), e[-5::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2]), e[:-2:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[-2:-6:-2]), e[-2:-6:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[2:-6:-2][::2]), e[2:-6:-2][::2]))\n" + " assert all(one == two for one, two in zip(iter(v[2:6:-2][:-2:-2]), e[2:6:-2][:-2:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:3]), e[:-2:-2][:2:3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:-3]), e[:-2:-2][:2:-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:2][:2:3]), e[:-2:2][:2:3]))\n"); +} - Y_UNIT_TEST(LazyListGetByIndexSliceWithStep) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 20)); - }, - "def Test(v):\n" - " e = list(range(0, 20))\n" - " assert v[::2][3] == e[::2][3]\n" - " assert v[::2][5] == e[::2][5]\n" - " assert v[::2][-3] == e[::2][-3]\n" - " assert v[::2][-7] == e[::2][-7]\n" - " assert v[2::2][4] == e[2::2][4]\n" - " assert v[2::2][5] == e[2::2][5]\n" - " assert v[2::2][-7] == e[2::2][-7]\n" - " assert v[2::2][-2] == e[2::2][-2]\n" - " assert v[:-3:2][2] == e[:-3:2][2]\n" - " assert v[:-3:2][4] == e[:-3:2][4]\n" - " assert v[:-3:2][-1] == e[:-3:2][-1]\n" - " assert v[:-3:2][-2] == e[:-3:2][-2]\n" - " assert v[:-4:3][2] == e[:-4:3][2]\n" - " assert v[:-4:3][4] == e[:-4:3][4]\n" - " assert v[:-4:3][-3] == e[:-4:3][-3]\n" - " assert v[:-4:3][-2] == e[:-4:3][-2]\n" - " assert v[-6::-3][1] == e[-6::-3][1]\n" - " assert v[-6::-3][3] == e[-6::-3][3]\n" - " assert v[-6::-3][-4] == e[-6::-3][-4]\n" - " assert v[-6::-3][-1] == e[-6::-3][-1]\n" - ); - } +Y_UNIT_TEST(LazyListIterateSliceWithStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 20)); + }, + "def Test(v):\n" + " e = list(range(0, 20))\n" + " assert all(one == two for one, two in zip(iter(v[::2]), e[::2]))\n" + " assert all(one == two for one, two in zip(iter(v[::-3]), e[::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:3:-3]), e[:3:-3]))\n" + " assert all(one == two for one, two in zip(iter(v[3:4:2]), e[3:4:2]))\n" + " assert all(one == two for one, two in zip(iter(v[-7::-3]), e[-7::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-6::-3]), e[-6::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[-5::-3]), e[-5::-3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2]), e[:-2:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[-2:-6:-2]), e[-2:-6:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[2:-6:-2][::2]), e[2:-6:-2][::2]))\n" + " assert all(one == two for one, two in zip(iter(v[2:6:-2][:-2:-2]), e[2:6:-2][:-2:-2]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:2][:2:3]), e[:-2:2][:2:3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:3]), e[:-2:-2][:2:3]))\n" + " assert all(one == two for one, two in zip(iter(v[:-2:-2][:2:-3]), e[:-2:-2][:2:-3]))\n"); +} - Y_UNIT_TEST(ThinListByIndex) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 10U> list = {{ - NUdf::TUnboxedValuePod(0U), - NUdf::TUnboxedValuePod(1U), - NUdf::TUnboxedValuePod(2U), - NUdf::TUnboxedValuePod(3U), - NUdf::TUnboxedValuePod(4U), - NUdf::TUnboxedValuePod(5U), - NUdf::TUnboxedValuePod(6U), - NUdf::TUnboxedValuePod(7U), - NUdf::TUnboxedValuePod(8U), - NUdf::TUnboxedValuePod(9U) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(v):\n" - " e = list(range(0, 10))\n" - " assert '__getitem__' in dir(v)\n" - " assert v[0] == e[0]\n" - " assert v[3] == e[3]\n" - " assert v[5] == e[5]\n" - " assert v[9] == e[9]\n" - " assert v[-1] == e[-1]\n" - " assert v[-4] == e[-4]\n" - " assert v[-9] == e[-9]\n" - " assert v[-10] == e[-10]\n" - ); - } +Y_UNIT_TEST(ThinListGetByIndexSliceWithStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 20U> list = {{NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U), + NUdf::TUnboxedValuePod(10U), + NUdf::TUnboxedValuePod(11U), + NUdf::TUnboxedValuePod(12U), + NUdf::TUnboxedValuePod(13U), + NUdf::TUnboxedValuePod(14U), + NUdf::TUnboxedValuePod(15U), + NUdf::TUnboxedValuePod(16U), + NUdf::TUnboxedValuePod(17U), + NUdf::TUnboxedValuePod(18U), + NUdf::TUnboxedValuePod(19U)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 20))\n" + " assert v[::2][3] == e[::2][3]\n" + " assert v[::2][5] == e[::2][5]\n" + " assert v[::2][-3] == e[::2][-3]\n" + " assert v[::2][-7] == e[::2][-7]\n" + " assert v[2::2][4] == e[2::2][4]\n" + " assert v[2::2][5] == e[2::2][5]\n" + " assert v[2::2][-7] == e[2::2][-7]\n" + " assert v[2::2][-2] == e[2::2][-2]\n" + " assert v[:-3:2][2] == e[:-3:2][2]\n" + " assert v[:-3:2][4] == e[:-3:2][4]\n" + " assert v[:-3:2][-1] == e[:-3:2][-1]\n" + " assert v[:-3:2][-2] == e[:-3:2][-2]\n" + " assert v[:-4:3][2] == e[:-4:3][2]\n" + " assert v[:-4:3][4] == e[:-4:3][4]\n" + " assert v[:-4:3][-3] == e[:-4:3][-3]\n" + " assert v[:-4:3][-2] == e[:-4:3][-2]\n" + " assert v[-6::-3][1] == e[-6::-3][1]\n" + " assert v[-6::-3][3] == e[-6::-3][3]\n" + " assert v[-6::-3][-4] == e[-6::-3][-4]\n" + " assert v[-6::-3][-1] == e[-6::-3][-1]\n"); +} - Y_UNIT_TEST(LazyListByIndex) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 10)); - }, - "def Test(v):\n" - " e = list(range(0, 10))\n" - " assert '__getitem__' in dir(v)\n" - " assert v[0] == e[0]\n" - " assert v[3] == e[3]\n" - " assert v[5] == e[5]\n" - " assert v[9] == e[9]\n" - " assert v[-1] == e[-1]\n" - " assert v[-4] == e[-4]\n" - " assert v[-9] == e[-9]\n" - " assert v[-10] == e[-10]\n" - ); - } +Y_UNIT_TEST(LazyListGetByIndexSliceWithStep) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 20)); + }, + "def Test(v):\n" + " e = list(range(0, 20))\n" + " assert v[::2][3] == e[::2][3]\n" + " assert v[::2][5] == e[::2][5]\n" + " assert v[::2][-3] == e[::2][-3]\n" + " assert v[::2][-7] == e[::2][-7]\n" + " assert v[2::2][4] == e[2::2][4]\n" + " assert v[2::2][5] == e[2::2][5]\n" + " assert v[2::2][-7] == e[2::2][-7]\n" + " assert v[2::2][-2] == e[2::2][-2]\n" + " assert v[:-3:2][2] == e[:-3:2][2]\n" + " assert v[:-3:2][4] == e[:-3:2][4]\n" + " assert v[:-3:2][-1] == e[:-3:2][-1]\n" + " assert v[:-3:2][-2] == e[:-3:2][-2]\n" + " assert v[:-4:3][2] == e[:-4:3][2]\n" + " assert v[:-4:3][4] == e[:-4:3][4]\n" + " assert v[:-4:3][-3] == e[:-4:3][-3]\n" + " assert v[:-4:3][-2] == e[:-4:3][-2]\n" + " assert v[-6::-3][1] == e[-6::-3][1]\n" + " assert v[-6::-3][3] == e[-6::-3][3]\n" + " assert v[-6::-3][-4] == e[-6::-3][-4]\n" + " assert v[-6::-3][-1] == e[-6::-3][-1]\n"); +} - Y_UNIT_TEST(ThinListIndexOutOfBounds) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 3U> list = {{ - NUdf::TUnboxedValuePod(0U), - NUdf::TUnboxedValuePod(1U), - NUdf::TUnboxedValuePod(2U) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(v):\n" - " try:\n" - " print(v[3])\n" - " except IndexError:\n" - " pass\n" - " else:\n" - " assert False\n" - " try:\n" - " print(v[-4])\n" - " except IndexError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(ThinListByIndex) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__getitem__' in dir(v)\n" + " assert v[0] == e[0]\n" + " assert v[3] == e[3]\n" + " assert v[5] == e[5]\n" + " assert v[9] == e[9]\n" + " assert v[-1] == e[-1]\n" + " assert v[-4] == e[-4]\n" + " assert v[-9] == e[-9]\n" + " assert v[-10] == e[-10]\n"); +} - Y_UNIT_TEST(LazyListIndexOutOfBounds) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); - }, - "def Test(v):\n" - " try:\n" - " print(v[3])\n" - " except IndexError:\n" - " pass\n" - " else:\n" - " assert False\n" - " try:\n" - " print(v[-4])\n" - " except IndexError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(LazyListByIndex) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 10)); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert '__getitem__' in dir(v)\n" + " assert v[0] == e[0]\n" + " assert v[3] == e[3]\n" + " assert v[5] == e[5]\n" + " assert v[9] == e[9]\n" + " assert v[-1] == e[-1]\n" + " assert v[-4] == e[-4]\n" + " assert v[-9] == e[-9]\n" + " assert v[-10] == e[-10]\n"); +} - Y_UNIT_TEST(LazyListWithoutLenghNormalSlice) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 10)); - }, - "def Test(v):\n" - " e = range(0, 10)\n" - " assert '__len__' in dir(v)\n" - " assert all(one == two for one, two in zip(iter(v[::1]), e[::1]))\n" - " assert all(one == two for one, two in zip(iter(v[::-1]), e[::-1]))\n" - " assert all(one == two for one, two in zip(iter(v[4:]), e[4:]))\n" - " assert all(one == two for one, two in zip(iter(v[1::-1]), e[1::-1]))\n" - " assert all(one == two for one, two in zip(iter(v[:6:1]), e[:6:1]))\n" - " assert all(one == two for one, two in zip(iter(v[1::-1]), e[1::-1]))\n" - " assert all(one == two for one, two in zip(iter(v[4:11]), e[4:11]))\n" - " assert all(one == two for one, two in zip(iter(v[5:1]), e[5:1]))\n" - ); - } +Y_UNIT_TEST(ThinListIndexOutOfBounds) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 3U> list = {{NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " try:\n" + " print(v[3])\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n" + " try:\n" + " print(v[-4])\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(ThinListTakeSkip) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - std::array<NUdf::TUnboxedValue, 10U> list = {{ - NUdf::TUnboxedValuePod(0U), - NUdf::TUnboxedValuePod(1U), - NUdf::TUnboxedValuePod(2U), - NUdf::TUnboxedValuePod(3U), - NUdf::TUnboxedValuePod(4U), - NUdf::TUnboxedValuePod(5U), - NUdf::TUnboxedValuePod(6U), - NUdf::TUnboxedValuePod(7U), - NUdf::TUnboxedValuePod(8U), - NUdf::TUnboxedValuePod(9U) - }}; - return vb.NewList(list.data(), list.size()); - }, - "def Test(v):\n" - " e = list(range(0, 10))\n" - " assert len(v) == len(e)\n" - " assert list(v.skip(5)) == e[5:]\n" - " assert list(v.take(5)) == e[0:5]\n" - " assert list(v.skip(4).take(5)) == e[4:][:5]\n" - " try:\n" - " print(list(v.skip(-1)))\n" - " except IndexError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(LazyListIndexOutOfBounds) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 3)); + }, + "def Test(v):\n" + " try:\n" + " print(v[3])\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n" + " try:\n" + " print(v[-4])\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(LazyListTakeSkip) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 10)); - }, - "def Test(v):\n" - " e = list(range(0, 10))\n" - " assert list(v.skip(5)) == e[5:]\n" - " assert list(v.take(5)) == e[0:5]\n" - " assert list(v.skip(4).take(5)) == e[4:][:5]\n" - " try:\n" - " print(list(v.skip(-1)))\n" - " except IndexError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(LazyListWithoutLenghNormalSlice) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 10)); + }, + "def Test(v):\n" + " e = range(0, 10)\n" + " assert '__len__' in dir(v)\n" + " assert all(one == two for one, two in zip(iter(v[::1]), e[::1]))\n" + " assert all(one == two for one, two in zip(iter(v[::-1]), e[::-1]))\n" + " assert all(one == two for one, two in zip(iter(v[4:]), e[4:]))\n" + " assert all(one == two for one, two in zip(iter(v[1::-1]), e[1::-1]))\n" + " assert all(one == two for one, two in zip(iter(v[:6:1]), e[:6:1]))\n" + " assert all(one == two for one, two in zip(iter(v[1::-1]), e[1::-1]))\n" + " assert all(one == two for one, two in zip(iter(v[4:11]), e[4:11]))\n" + " assert all(one == two for one, two in zip(iter(v[5:1]), e[5:1]))\n"); +} - Y_UNIT_TEST(LazyListToIndexDict) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); - }, - "def Test(value):\n" - " d = value.to_index_dict()\n" - " assert len(d) == 3\n" - " assert d[0] == 3\n" - " assert d[1] == 4\n" - " assert d[2] == 5\n" - " assert 3 not in d"); - } +Y_UNIT_TEST(ThinListTakeSkip) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + std::array<NUdf::TUnboxedValue, 10U> list = {{NUdf::TUnboxedValuePod(0U), + NUdf::TUnboxedValuePod(1U), + NUdf::TUnboxedValuePod(2U), + NUdf::TUnboxedValuePod(3U), + NUdf::TUnboxedValuePod(4U), + NUdf::TUnboxedValuePod(5U), + NUdf::TUnboxedValuePod(6U), + NUdf::TUnboxedValuePod(7U), + NUdf::TUnboxedValuePod(8U), + NUdf::TUnboxedValuePod(9U)}}; + return vb.NewList(list.data(), list.size()); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert len(v) == len(e)\n" + " assert list(v.skip(5)) == e[5:]\n" + " assert list(v.take(5)) == e[0:5]\n" + " assert list(v.skip(4).take(5)) == e[4:][:5]\n" + " try:\n" + " print(list(v.skip(-1)))\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(LazyListTrue) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - NUdf::TUnboxedValue *items = nullptr; - return vb.NewArray(1U, items); - }, - "def Test(value):\n" - " assert value\n" - ); - } +Y_UNIT_TEST(LazyListTakeSkip) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<true>(0, 10)); + }, + "def Test(v):\n" + " e = list(range(0, 10))\n" + " assert list(v.skip(5)) == e[5:]\n" + " assert list(v.take(5)) == e[0:5]\n" + " assert list(v.skip(4).take(5)) == e[4:][:5]\n" + " try:\n" + " print(list(v.skip(-1)))\n" + " except IndexError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} - Y_UNIT_TEST(LazyListFalse) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 0)); - }, - "def Test(value):\n" - " assert not value\n" - ); - } +Y_UNIT_TEST(LazyListToIndexDict) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " d = value.to_index_dict()\n" + " assert len(d) == 3\n" + " assert d[0] == 3\n" + " assert d[1] == 4\n" + " assert d[2] == 5\n" + " assert 3 not in d"); +} - Y_UNIT_TEST(ThinListTrue) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); - }, - "def Test(value):\n" - " assert value\n" - ); - } +Y_UNIT_TEST(LazyListTrue) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + NUdf::TUnboxedValue* items = nullptr; + return vb.NewArray(1U, items); + }, + "def Test(value):\n" + " assert value\n"); +} - Y_UNIT_TEST(ThinListFalse) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); - return vb.NewEmptyList(); - }, - "def Test(value):\n" - " assert not value\n" - ); - } +Y_UNIT_TEST(LazyListFalse) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 0)); + }, + "def Test(value):\n" + " assert not value\n"); +} - Y_UNIT_TEST(LazyListHasItems) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); - }, - "def Test(value):\n" - " b = value.has_items()\n" - " assert b\n"); - } +Y_UNIT_TEST(ThinListTrue) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " assert value\n"); +} - Y_UNIT_TEST(LazyListEmptyHasItems) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 0)); - }, - "def Test(value):\n" - " b = value.has_items()\n" - " assert not b\n"); - } +Y_UNIT_TEST(ThinListFalse) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + return vb.NewEmptyList(); + }, + "def Test(value):\n" + " assert not value\n"); +} - Y_UNIT_TEST(LazyIndexDictContains) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); - }, - "def Test(value):\n" - " d = value.to_index_dict()\n" - " assert 0 in d\n" - " assert 1 in d\n" - " assert 2 in d\n" - " assert 3 not in d\n" - " assert -1 not in d"); - } +Y_UNIT_TEST(LazyListHasItems) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " b = value.has_items()\n" + " assert b\n"); +} - Y_UNIT_TEST(LazyIndexDictIter) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); - }, - "def Test(value):\n" - " d = value.to_index_dict()\n" - " i, j = 0, 3\n" - " for k, v in d.items():\n" - " assert i == k\n" - " assert j == v\n" - " i, j = i+1, j+1"); - } +Y_UNIT_TEST(LazyListEmptyHasItems) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(0, 0)); + }, + "def Test(value):\n" + " b = value.has_items()\n" + " assert not b\n"); +} - Y_UNIT_TEST(LazyIndexDictGet) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TListType<i32>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 5)); - }, - "def Test(value):\n" - " d = value.to_index_dict()\n" - " assert d.get(1) == 4\n" - " assert d.get(5) == None\n" - " assert d.get(5, 10) == 10\n"); - } +Y_UNIT_TEST(LazyIndexDictContains) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " d = value.to_index_dict()\n" + " assert 0 in d\n" + " assert 1 in d\n" + " assert 2 in d\n" + " assert 3 not in d\n" + " assert -1 not in d"); +} - Y_UNIT_TEST(FromPyGeneratorFactory) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TListType<ui32>>( - "def first_10():\n" - " num = 0\n" - " while num < 10:\n" - " yield num\n" - " num += 1\n" - "def Test():\n" - " return first_10\n", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(!value.HasFastListLength()); - UNIT_ASSERT(value.HasListItems()); +Y_UNIT_TEST(LazyIndexDictIter) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 6)); + }, + "def Test(value):\n" + " d = value.to_index_dict()\n" + " i, j = 0, 3\n" + " for k, v in d.items():\n" + " assert i == k\n" + " assert j == v\n" + " i, j = i+1, j+1"); +} - const auto it = value.GetListIterator(); - ui32 expected = 0; - for (NUdf::TUnboxedValue item; it.Next(item);) { - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, expected); - expected++; - } +Y_UNIT_TEST(LazyIndexDictGet) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TListType<i32>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new NUdf::TLazyList<false>(3, 5)); + }, + "def Test(value):\n" + " d = value.to_index_dict()\n" + " assert d.get(1) == 4\n" + " assert d.get(5) == None\n" + " assert d.get(5, 10) == 10\n"); +} - UNIT_ASSERT_EQUAL(value.GetEstimatedListLength(), 10); - UNIT_ASSERT_EQUAL(value.GetListLength(), 10); - }); - } +Y_UNIT_TEST(FromPyGeneratorFactory) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def first_10():\n" + " num = 0\n" + " while num < 10:\n" + " yield num\n" + " num += 1\n" + "def Test():\n" + " return first_10\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(!value.HasFastListLength()); + UNIT_ASSERT(value.HasListItems()); + + const auto it = value.GetListIterator(); + ui32 expected = 0; + for (NUdf::TUnboxedValue item; it.Next(item);) { + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, expected); + expected++; + } + + UNIT_ASSERT_EQUAL(value.GetEstimatedListLength(), 10); + UNIT_ASSERT_EQUAL(value.GetListLength(), 10); + }); +} - Y_UNIT_TEST(FromPyIterable) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TListType<ui32>>( - "def Test():\n" +Y_UNIT_TEST(FromPyIterable) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test():\n" #if PY_MAJOR_VERSION >= 3 - " return range(10)\n", + " return range(10)\n", #else - " return xrange(10)\n", + " return xrange(10)\n", #endif - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(!value.HasFastListLength()); - UNIT_ASSERT(value.HasListItems()); - - const auto it = value.GetListIterator(); - ui32 expected = 0U; - for (NUdf::TUnboxedValue item; it.Next(item);) { - UNIT_ASSERT_EQUAL(item.Get<ui32>(), expected++); - } - - UNIT_ASSERT_EQUAL(value.GetEstimatedListLength(), 10); - UNIT_ASSERT_EQUAL(value.GetListLength(), 10); - UNIT_ASSERT(value.HasFastListLength()); - }); - } - - Y_UNIT_TEST(FromPyCustomIterable) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TListType<ui32>>( - "class T:\n" - " def __init__(self, l):\n" - " self.l = l\n" - " def __len__(self):\n" - " return len(self.l)\n" - " def __nonzero__(self):\n" - " return bool(self.l)\n" - " def __iter__(self):\n" - " return iter(self.l)\n" - "\n" - "def Test():\n" - " return T([1, 2])\n", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(value.HasListItems()); - UNIT_ASSERT_EQUAL(value.GetListLength(), 2); - - auto it = value.GetListIterator(); - { - NUdf::TUnboxedValue item; - it.Next(item); - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, 1); - } - { - NUdf::TUnboxedValue item; - it.Next(item); - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, 2); - } - - UNIT_ASSERT(false == it.Skip()); - }); - } - - Y_UNIT_TEST(FromPyIterator) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TListType<ui32>>( - "def Test():\n" - " return iter(range(2))\n", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(false == value.HasFastListLength()); - - auto it = value.GetListIterator(); - { - NUdf::TUnboxedValue item; - it.Next(item); - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, 0); - } - { - NUdf::TUnboxedValue item; - it.Next(item); - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, 1); - } - - UNIT_ASSERT(false == it.Skip()); - }); - } + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(!value.HasFastListLength()); + UNIT_ASSERT(value.HasListItems()); + + const auto it = value.GetListIterator(); + ui32 expected = 0U; + for (NUdf::TUnboxedValue item; it.Next(item);) { + UNIT_ASSERT_EQUAL(item.Get<ui32>(), expected++); + } + + UNIT_ASSERT_EQUAL(value.GetEstimatedListLength(), 10); + UNIT_ASSERT_EQUAL(value.GetListLength(), 10); + UNIT_ASSERT(value.HasFastListLength()); + }); +} - Y_UNIT_TEST(FromPyGenerator) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TListType<ui32>>( - "def Test():\n" - " yield 0\n" - " yield 1\n", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT(false == value.HasFastListLength()); +Y_UNIT_TEST(FromPyCustomIterable) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "class T:\n" + " def __init__(self, l):\n" + " self.l = l\n" + " def __len__(self):\n" + " return len(self.l)\n" + " def __nonzero__(self):\n" + " return bool(self.l)\n" + " def __iter__(self):\n" + " return iter(self.l)\n" + "\n" + "def Test():\n" + " return T([1, 2])\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(value.HasListItems()); + UNIT_ASSERT_EQUAL(value.GetListLength(), 2); + + auto it = value.GetListIterator(); + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 1); + } + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 2); + } + + UNIT_ASSERT(false == it.Skip()); + }); +} - auto it = value.GetListIterator(); - { - NUdf::TUnboxedValue item; - it.Next(item); - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, 0); - } - { - NUdf::TUnboxedValue item; - it.Next(item); - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, 1); - } +Y_UNIT_TEST(FromPyIterator) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test():\n" + " return iter(range(2))\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(false == value.HasFastListLength()); + + auto it = value.GetListIterator(); + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 0); + } + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 1); + } + + UNIT_ASSERT(false == it.Skip()); + }); +} - UNIT_ASSERT(false == it.Skip()); - }); - } +Y_UNIT_TEST(FromPyGenerator) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TListType<ui32>>( + "def Test():\n" + " yield 0\n" + " yield 1\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT(false == value.HasFastListLength()); + + auto it = value.GetListIterator(); + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 0); + } + { + NUdf::TUnboxedValue item; + it.Next(item); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, 1); + } + + UNIT_ASSERT(false == it.Skip()); + }); } +} // Y_UNIT_TEST_SUITE(TPyListTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp index 35c94d5e8ed..19f7929b6da 100644 --- a/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_number_ut.cpp @@ -2,14 +2,14 @@ #include <library/cpp/testing/unittest/registar.h> -#define PY_CHECKER(Name, PyType, AsType, Type) \ - struct TPy##Name##Checker { \ - void operator()(PyObject* pyVal, Type expected) { \ - UNIT_ASSERT(Py##PyType##_Check(pyVal)); \ - Type val = Py##PyType##_As##AsType(pyVal); \ +#define PY_CHECKER(Name, PyType, AsType, Type) \ + struct TPy##Name##Checker { \ + void operator()(PyObject* pyVal, Type expected) { \ + UNIT_ASSERT(Py##PyType##_Check(pyVal)); \ + Type val = Py##PyType##_As##AsType(pyVal); \ UNIT_ASSERT(val != static_cast<Type>(-1) || !PyErr_Occurred()); \ - UNIT_ASSERT_EQUAL(val, expected); \ - } \ + UNIT_ASSERT_EQUAL(val, expected); \ + } \ }; #if PY_MAJOR_VERSION >= 3 @@ -33,327 +33,326 @@ PY_CHECKER(Float, Float, Double, long) using namespace NPython; Y_UNIT_TEST_SUITE(TPyNumberTest) { - template <typename T, typename TPyChecker> - void TestCastsInRange(T begin, T end) { - for (T i = begin; i < end; i++) { - TPyObjectPtr pyVal = PyCast<T>(i); - UNIT_ASSERT(pyVal.Get() != nullptr); - - TPyChecker c; - c(pyVal.Get(), i); - - T cppVal = PyCast<T>(pyVal.Get()); - UNIT_ASSERT_EQUAL(cppVal, i); - } - } +template <typename T, typename TPyChecker> +void TestCastsInRange(T begin, T end) { + for (T i = begin; i < end; i++) { + TPyObjectPtr pyVal = PyCast<T>(i); + UNIT_ASSERT(pyVal.Get() != nullptr); - template <typename T, typename TPyChecker, int range = 10> - void TestSignedCasts() { - TPythonTestEngine engine; - TestCastsInRange<T, TPyChecker>(Min<T>(), Min<T>() + range); - TestCastsInRange<T, TPyChecker>(-range, range); - TestCastsInRange<T, TPyChecker>(Max<T>() - range, Max<T>()); - } + TPyChecker c; + c(pyVal.Get(), i); - template <typename T, typename TPyDownChecker, - typename TPyUpChecker = TPyDownChecker, int range = 10> - void TestUnsignedCasts() { - TPythonTestEngine engine; - TestCastsInRange<T, TPyDownChecker>(Min<T>(), Min<T>() + range); - TestCastsInRange<T, TPyUpChecker>(Max<T>() - range, Max<T>()); + T cppVal = PyCast<T>(pyVal.Get()); + UNIT_ASSERT_EQUAL(cppVal, i); } +} - Y_UNIT_TEST(Bool) { - TPythonTestEngine engine; - UNIT_ASSERT_EQUAL(PyCast<bool>(Py_True), true); - UNIT_ASSERT_EQUAL(PyCast<bool>(Py_False), false); - - TPyObjectPtr list = PyList_New(0); - UNIT_ASSERT_EQUAL(PyCast<bool>(list.Get()), false); - bool res1; - UNIT_ASSERT(TryPyCast<bool>(list.Get(), res1)); - UNIT_ASSERT_EQUAL(res1, false); - - PyList_Append(list.Get(), Py_None); - UNIT_ASSERT_EQUAL(PyCast<bool>(list.Get()), true); - bool res2; - UNIT_ASSERT(TryPyCast<bool>(list.Get(), res2)); - UNIT_ASSERT_EQUAL(res2, true); - } +template <typename T, typename TPyChecker, int range = 10> +void TestSignedCasts() { + TPythonTestEngine engine; + TestCastsInRange<T, TPyChecker>(Min<T>(), Min<T>() + range); + TestCastsInRange<T, TPyChecker>(-range, range); + TestCastsInRange<T, TPyChecker>(Max<T>() - range, Max<T>()); +} - Y_UNIT_TEST(Float) { - TestSignedCasts<float, TPyFloatChecker>(); - } +template <typename T, typename TPyDownChecker, + typename TPyUpChecker = TPyDownChecker, int range = 10> +void TestUnsignedCasts() { + TPythonTestEngine engine; + TestCastsInRange<T, TPyDownChecker>(Min<T>(), Min<T>() + range); + TestCastsInRange<T, TPyUpChecker>(Max<T>() - range, Max<T>()); +} - Y_UNIT_TEST(Double) { - TestUnsignedCasts<double, TPyFloatChecker>(); - } +Y_UNIT_TEST(Bool) { + TPythonTestEngine engine; + UNIT_ASSERT_EQUAL(PyCast<bool>(Py_True), true); + UNIT_ASSERT_EQUAL(PyCast<bool>(Py_False), false); + + TPyObjectPtr list = PyList_New(0); + UNIT_ASSERT_EQUAL(PyCast<bool>(list.Get()), false); + bool res1; + UNIT_ASSERT(TryPyCast<bool>(list.Get(), res1)); + UNIT_ASSERT_EQUAL(res1, false); + + PyList_Append(list.Get(), Py_None); + UNIT_ASSERT_EQUAL(PyCast<bool>(list.Get()), true); + bool res2; + UNIT_ASSERT(TryPyCast<bool>(list.Get(), res2)); + UNIT_ASSERT_EQUAL(res2, true); +} - Y_UNIT_TEST(I64) { - TestSignedCasts<i64, TPyLLongChecker>(); - } +Y_UNIT_TEST(Float) { + TestSignedCasts<float, TPyFloatChecker>(); +} - Y_UNIT_TEST(Ui64) { - TestUnsignedCasts<ui64, TPyUlongChecker>(); - } +Y_UNIT_TEST(Double) { + TestUnsignedCasts<double, TPyFloatChecker>(); +} + +Y_UNIT_TEST(I64) { + TestSignedCasts<i64, TPyLLongChecker>(); +} + +Y_UNIT_TEST(Ui64) { + TestUnsignedCasts<ui64, TPyUlongChecker>(); +} #if PY_MAJOR_VERSION >= 3 - Y_UNIT_TEST(I8) { - TestSignedCasts<i8, TPyLongChecker>(); - } +Y_UNIT_TEST(I8) { + TestSignedCasts<i8, TPyLongChecker>(); +} - Y_UNIT_TEST(Ui8) { - TestUnsignedCasts<ui8, TPyLongChecker>(); - } +Y_UNIT_TEST(Ui8) { + TestUnsignedCasts<ui8, TPyLongChecker>(); +} - Y_UNIT_TEST(I16) { - TestSignedCasts<i16, TPyLongChecker>(); - } +Y_UNIT_TEST(I16) { + TestSignedCasts<i16, TPyLongChecker>(); +} + +Y_UNIT_TEST(Ui16) { + TestUnsignedCasts<ui16, TPyLongChecker>(); +} - Y_UNIT_TEST(Ui16) { - TestUnsignedCasts<ui16, TPyLongChecker>(); +Y_UNIT_TEST(I32) { + TestSignedCasts<i32, TPyLongChecker>(); +} + +Y_UNIT_TEST(Ui32) { + TestUnsignedCasts<ui32, TPyLongChecker>(); +} +Y_UNIT_TEST(ImplicitIntCasts) { + TPythonTestEngine engine; + const ui64 longMask = sizeof(long) == 4 ? Max<ui32>() : Max<ui64>(); + i64 expected = longMask & (static_cast<i64>(Max<ui32>()) + 10); + TPyObjectPtr pyInt = PyLong_FromLong(expected); + + { // signed + i64 actual = PyCast<i64>(pyInt.Get()); + UNIT_ASSERT_EQUAL(actual, expected); + + bool isOk = TryPyCast<i64>(pyInt.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, expected); } - Y_UNIT_TEST(I32) { - TestSignedCasts<i32, TPyLongChecker>(); + { // unsigned + ui64 actual = PyCast<ui64>(pyInt.Get()); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + + bool isOk = TryPyCast<ui64>(pyInt.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); } - Y_UNIT_TEST(Ui32) { - TestUnsignedCasts<ui32, TPyLongChecker>(); + { // to float + float f = PyCast<float>(pyInt.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + + bool isOk = TryPyCast<float>(pyInt.Get(), f); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); } - Y_UNIT_TEST(ImplicitIntCasts) { - TPythonTestEngine engine; - const ui64 longMask = sizeof(long) == 4 ? Max<ui32>() : Max<ui64>(); - i64 expected = longMask & (static_cast<i64>(Max<ui32>()) + 10); - TPyObjectPtr pyInt = PyLong_FromLong(expected); - - { // signed - i64 actual = PyCast<i64>(pyInt.Get()); - UNIT_ASSERT_EQUAL(actual, expected); - - bool isOk = TryPyCast<i64>(pyInt.Get(), actual); - UNIT_ASSERT(isOk); - UNIT_ASSERT_EQUAL(actual, expected); - } - - { // unsigned - ui64 actual = PyCast<ui64>(pyInt.Get()); - UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); - - bool isOk = TryPyCast<ui64>(pyInt.Get(), actual); - UNIT_ASSERT(isOk); - UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); - } - - { // to float - float f = PyCast<float>(pyInt.Get()); - UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); - - bool isOk = TryPyCast<float>(pyInt.Get(), f); - UNIT_ASSERT(isOk); - UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); - } - - { // to double - double d = PyCast<double>(pyInt.Get()); - UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); - - bool isOk = TryPyCast<double>(pyInt.Get(), d); - UNIT_ASSERT(isOk); - UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); - } - - // expected overflow - i32 tmp; - UNIT_ASSERT(!TryPyCast<i32>(pyInt.Get(), tmp)); - ui32 tmpu; - UNIT_ASSERT(!TryPyCast<ui32>(pyInt.Get(), tmpu)); + + { // to double + double d = PyCast<double>(pyInt.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + + bool isOk = TryPyCast<double>(pyInt.Get(), d); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); } + // expected overflow + i32 tmp; + UNIT_ASSERT(!TryPyCast<i32>(pyInt.Get(), tmp)); + ui32 tmpu; + UNIT_ASSERT(!TryPyCast<ui32>(pyInt.Get(), tmpu)); +} + #else - Y_UNIT_TEST(I8) { - TestSignedCasts<i8, TPyIntChecker>(); +Y_UNIT_TEST(I8) { + TestSignedCasts<i8, TPyIntChecker>(); +} + +Y_UNIT_TEST(Ui8) { + TestUnsignedCasts<ui8, TPyIntChecker>(); +} + +Y_UNIT_TEST(I16) { + TestSignedCasts<i16, TPyIntChecker>(); +} + +Y_UNIT_TEST(Ui16) { + TestUnsignedCasts<ui16, TPyIntChecker>(); +} + +Y_UNIT_TEST(I32) { + TestSignedCasts<i32, TPyIntChecker>(); +} + +Y_UNIT_TEST(Ui32) { + if (sizeof(long) == 4) { + TestUnsignedCasts<ui32, TPyIntChecker, TPyLLongChecker>(); + } else { + TestUnsignedCasts<ui32, TPyIntChecker>(); } +} + +Y_UNIT_TEST(ImplicitIntCasts) { + TPythonTestEngine engine; + const ui64 longMask = sizeof(long) == 4 ? Max<ui32>() : Max<ui64>(); + i64 expected = longMask & (static_cast<i64>(Max<ui32>()) + 10); + TPyObjectPtr pyInt = PyInt_FromLong(expected); + + { // signed + i64 actual = PyCast<i64>(pyInt.Get()); + UNIT_ASSERT_EQUAL(actual, expected); - Y_UNIT_TEST(Ui8) { - TestUnsignedCasts<ui8, TPyIntChecker>(); + bool isOk = TryPyCast<i64>(pyInt.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, expected); } - Y_UNIT_TEST(I16) { - TestSignedCasts<i16, TPyIntChecker>(); + { // unsigned + ui64 actual = PyCast<ui64>(pyInt.Get()); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + + bool isOk = TryPyCast<ui64>(pyInt.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); } - Y_UNIT_TEST(Ui16) { - TestUnsignedCasts<ui16, TPyIntChecker>(); + { // to float + float f = PyCast<float>(pyInt.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + + bool isOk = TryPyCast<float>(pyInt.Get(), f); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); } - Y_UNIT_TEST(I32) { - TestSignedCasts<i32, TPyIntChecker>(); + { // to double + double d = PyCast<double>(pyInt.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + + bool isOk = TryPyCast<double>(pyInt.Get(), d); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); } - Y_UNIT_TEST(Ui32) { - if (sizeof(long) == 4) { - TestUnsignedCasts<ui32, TPyIntChecker, TPyLLongChecker>(); - } else { - TestUnsignedCasts<ui32, TPyIntChecker>(); - } + // expected overflow + i32 tmp; + UNIT_ASSERT(!TryPyCast<i32>(pyInt.Get(), tmp)); + ui32 tmpu; + UNIT_ASSERT(!TryPyCast<ui32>(pyInt.Get(), tmpu)); +} +#endif + +Y_UNIT_TEST(ImplicitLongCasts) { + TPythonTestEngine engine; + i64 expected = static_cast<i64>(Max<ui32>()) + 10; + TPyObjectPtr pyLong; +#ifdef HAVE_LONG_LONG + pyLong = PyLong_FromLongLong(expected); +#else + pyLong = PyLong_FromLong(expected) +#endif + + { // signed + i64 actual = PyCast<i64>(pyLong.Get()); + UNIT_ASSERT_EQUAL(actual, expected); + + bool isOk = TryPyCast<i64>(pyLong.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, expected); } - Y_UNIT_TEST(ImplicitIntCasts) { - TPythonTestEngine engine; - const ui64 longMask = sizeof(long) == 4 ? Max<ui32>() : Max<ui64>(); - i64 expected = longMask & (static_cast<i64>(Max<ui32>()) + 10); - TPyObjectPtr pyInt = PyInt_FromLong(expected); - - { // signed - i64 actual = PyCast<i64>(pyInt.Get()); - UNIT_ASSERT_EQUAL(actual, expected); - - bool isOk = TryPyCast<i64>(pyInt.Get(), actual); - UNIT_ASSERT(isOk); - UNIT_ASSERT_EQUAL(actual, expected); - } - - { // unsigned - ui64 actual = PyCast<ui64>(pyInt.Get()); - UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); - - bool isOk = TryPyCast<ui64>(pyInt.Get(), actual); - UNIT_ASSERT(isOk); - UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); - } - - { // to float - float f = PyCast<float>(pyInt.Get()); - UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); - - bool isOk = TryPyCast<float>(pyInt.Get(), f); - UNIT_ASSERT(isOk); - UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); - } - - { // to double - double d = PyCast<double>(pyInt.Get()); - UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); - - bool isOk = TryPyCast<double>(pyInt.Get(), d); - UNIT_ASSERT(isOk); - UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); - } - - // expected overflow - i32 tmp; - UNIT_ASSERT(!TryPyCast<i32>(pyInt.Get(), tmp)); - ui32 tmpu; - UNIT_ASSERT(!TryPyCast<ui32>(pyInt.Get(), tmpu)); + { // unsigned + ui64 actual = PyCast<ui64>(pyLong.Get()); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); + + bool isOk = TryPyCast<ui64>(pyLong.Get(), actual); + UNIT_ASSERT(isOk); + UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); } -#endif + { // to float + float f = PyCast<float>(pyLong.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); - Y_UNIT_TEST(ImplicitLongCasts) { - TPythonTestEngine engine; - i64 expected = static_cast<i64>(Max<ui32>()) + 10; - TPyObjectPtr pyLong; - #ifdef HAVE_LONG_LONG - pyLong = PyLong_FromLongLong(expected); - #else - pyLong = PyLong_FromLong(expected) - #endif - - { // signed - i64 actual = PyCast<i64>(pyLong.Get()); - UNIT_ASSERT_EQUAL(actual, expected); - - bool isOk = TryPyCast<i64>(pyLong.Get(), actual); - UNIT_ASSERT(isOk); - UNIT_ASSERT_EQUAL(actual, expected); - } - - { // unsigned - ui64 actual = PyCast<ui64>(pyLong.Get()); - UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); - - bool isOk = TryPyCast<ui64>(pyLong.Get(), actual); - UNIT_ASSERT(isOk); - UNIT_ASSERT_EQUAL(actual, static_cast<ui64>(expected)); - } - - { // to float - float f = PyCast<float>(pyLong.Get()); - UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); - - bool isOk = TryPyCast<float>(pyLong.Get(), f); - UNIT_ASSERT(isOk); - UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); - } - - { // to double - double d = PyCast<double>(pyLong.Get()); - UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); - - bool isOk = TryPyCast<double>(pyLong.Get(), d); - UNIT_ASSERT(isOk); - UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); - } - - // expected overflow - i8 tmp; - UNIT_ASSERT(!TryPyCast<i8>(pyLong.Get(), tmp)); + bool isOk = TryPyCast<float>(pyLong.Get(), f); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); } - Y_UNIT_TEST(HugeLongOverflow) { - TPythonTestEngine engine; - TPyObjectPtr pyLong = PyLong_FromString((char*)"0xfffffffffffffffff", nullptr, 0); - TPyObjectPtr bitLength = PyObject_CallMethod(pyLong.Get(), (char*)"bit_length", (char*)"()"); - UNIT_ASSERT_EQUAL(PyCast<ui32>(bitLength.Get()), 68); // 68 bits number + { // to double + double d = PyCast<double>(pyLong.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); - ui64 resUI64; - UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI64)); + bool isOk = TryPyCast<double>(pyLong.Get(), d); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + } + + // expected overflow + i8 tmp; + UNIT_ASSERT(!TryPyCast<i8>(pyLong.Get(), tmp)); +} - i64 resI64; - UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI64)); +Y_UNIT_TEST(HugeLongOverflow) { + TPythonTestEngine engine; + TPyObjectPtr pyLong = PyLong_FromString((char*)"0xfffffffffffffffff", nullptr, 0); + TPyObjectPtr bitLength = PyObject_CallMethod(pyLong.Get(), (char*)"bit_length", (char*)"()"); + UNIT_ASSERT_EQUAL(PyCast<ui32>(bitLength.Get()), 68); // 68 bits number - ui32 resUI32; - UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI32)); + ui64 resUI64; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI64)); - i32 resI32; - UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI32)); + i64 resI64; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI64)); - ui16 resUI16; - UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI16)); + ui32 resUI32; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI32)); - i16 resI16; - UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI16)); + i32 resI32; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI32)); - ui8 resUI8; - UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI8)); + ui16 resUI16; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI16)); - i8 resI8; - UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI8)); - } + i16 resI16; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI16)); - Y_UNIT_TEST(ImplicitFloatCasts) { - TPythonTestEngine engine; - double expected = 3.14159; - TPyObjectPtr pyFloat = PyFloat_FromDouble(expected); + ui8 resUI8; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resUI8)); - { // to float - float f = PyCast<float>(pyFloat.Get()); - UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); + i8 resI8; + UNIT_ASSERT(!TryPyCast(pyLong.Get(), resI8)); +} - bool isOk = TryPyCast<float>(pyFloat.Get(), f); - UNIT_ASSERT(isOk); - UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); - } +Y_UNIT_TEST(ImplicitFloatCasts) { + TPythonTestEngine engine; + double expected = 3.14159; + TPyObjectPtr pyFloat = PyFloat_FromDouble(expected); - { // to double - double d = PyCast<double>(pyFloat.Get()); - UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + { // to float + float f = PyCast<float>(pyFloat.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); - bool isOk = TryPyCast<double>(pyFloat.Get(), d); - UNIT_ASSERT(isOk); - UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); - } + bool isOk = TryPyCast<float>(pyFloat.Get(), f); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(f, expected, 0.000001); } + { // to double + double d = PyCast<double>(pyFloat.Get()); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + + bool isOk = TryPyCast<double>(pyFloat.Get(), d); + UNIT_ASSERT(isOk); + UNIT_ASSERT_DOUBLES_EQUAL(d, expected, 0.000001); + } } + +} // Y_UNIT_TEST_SUITE(TPyNumberTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp index 4cc45f11840..fbcb98c323d 100644 --- a/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_optional_ut.cpp @@ -2,55 +2,55 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(FromPyNone) { - Y_UNIT_TEST(FromPyNone) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TOptional<ui32>>( - "def Test(): return None", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(!value); +Y_UNIT_TEST(FromPyNone) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TOptional<ui32>>( + "def Test(): return None", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(!value); }); - } +} - Y_UNIT_TEST(FromPyObject) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TOptional<ui32>>( - "def Test(): return 42", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT_EQUAL(value.Get<ui32>(), 42); - }); - } +Y_UNIT_TEST(FromPyObject) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TOptional<ui32>>( + "def Test(): return 42", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_EQUAL(value.Get<ui32>(), 42); + }); +} - Y_UNIT_TEST(ToPyNone) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TOptional<char*>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(); - }, - "def Test(value):\n" - " assert value == None\n"); - } +Y_UNIT_TEST(ToPyNone) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TOptional<char*>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(); + }, + "def Test(value):\n" + " assert value == None\n"); +} - Y_UNIT_TEST(ToPyFilledOptional) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TOptional<NUdf::TTuple<NUdf::TUtf8, bool>>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - const TOptionalType* optType = - static_cast<const TOptionalType*>(type); - NUdf::TUnboxedValue* items = nullptr; - auto tuple = vb.NewArray(static_cast<const TTupleType*>(optType->GetItemType())->GetElementsCount(), items); - items[0] = vb.NewString("test string"); - items[1] = NUdf::TUnboxedValuePod(false); - return NUdf::TUnboxedValue(tuple); - }, - "def Test(value):\n" - " assert isinstance(value, tuple)\n" - " assert len(value) == 2\n" - " assert value == ('test string', False)\n"); - } +Y_UNIT_TEST(ToPyFilledOptional) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TOptional<NUdf::TTuple<NUdf::TUtf8, bool>>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + const TOptionalType* optType = + static_cast<const TOptionalType*>(type); + NUdf::TUnboxedValue* items = nullptr; + auto tuple = vb.NewArray(static_cast<const TTupleType*>(optType->GetItemType())->GetElementsCount(), items); + items[0] = vb.NewString("test string"); + items[1] = NUdf::TUnboxedValuePod(false); + return NUdf::TUnboxedValue(tuple); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert len(value) == 2\n" + " assert value == ('test string', False)\n"); } +} // Y_UNIT_TEST_SUITE(FromPyNone) diff --git a/yql/essentials/udfs/common/python/bindings/py_ptr.h b/yql/essentials/udfs/common/python/bindings/py_ptr.h index 704629b86b7..66f70cea01c 100644 --- a/yql/essentials/udfs/common/python/bindings/py_ptr.h +++ b/yql/essentials/udfs/common/python/bindings/py_ptr.h @@ -7,8 +7,7 @@ namespace NPython { template <typename T> -class TPyPtrOps -{ +class TPyPtrOps { public: static inline void Ref(T* t) { Y_ASSERT(t); @@ -26,9 +25,7 @@ public: } }; -class TPyObjectPtr: - public NYql::NUdf::TRefCountedPtr<PyObject, TPyPtrOps<PyObject>> -{ +class TPyObjectPtr: public NYql::NUdf::TRefCountedPtr<PyObject, TPyPtrOps<PyObject>> { using TSelf = NYql::NUdf::TRefCountedPtr<PyObject, TPyPtrOps<PyObject>>; public: @@ -37,7 +34,7 @@ public: } inline TPyObjectPtr(PyObject* p) - : TSelf(p, STEAL_REF) // do not increment refcounter by default + : TSelf(p, STEAL_REF) // do not increment refcounter by default { } @@ -66,4 +63,4 @@ public: void Reset(PyObject* p) = delete; }; -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_resource.cpp b/yql/essentials/udfs/common/python/bindings/py_resource.cpp index 050eae0c8ce..33f446aff3b 100644 --- a/yql/essentials/udfs/common/python/bindings/py_resource.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_resource.cpp @@ -21,11 +21,11 @@ void DestroyResourceCapsule(PyObject* obj) { ///////////////////////////////////////////////////////////////////////////// // TResource ///////////////////////////////////////////////////////////////////////////// -class TResource final: public NUdf::TBoxedValue -{ +class TResource final: public NUdf::TBoxedValue { public: TResource(PyObject* value, const NUdf::TStringRef& tag) - : Value_(value, TPyObjectPtr::ADD_REF), Tag_(tag) + : Value_(value, TPyObjectPtr::ADD_REF) + , Tag_(tag) { } @@ -52,11 +52,10 @@ private: const char ResourceCapsuleName[] = "YqlResourceCapsule"; TPyObjectPtr ToPyResource( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) { - #if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 15) NUdf::TResourceTypeInspector inpector(*ctx->PyCtx->TypeInfoHelper, type); auto tag = inpector.GetTag(); @@ -77,10 +76,9 @@ TPyObjectPtr ToPyResource( } NUdf::TUnboxedValue FromPyResource( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, PyObject* value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, PyObject* value) { - #if UDF_ABI_COMPATIBILITY_VERSION_CURRENT >= UDF_ABI_COMPATIBILITY_VERSION(2, 15) NUdf::TResourceTypeInspector inpector(*ctx->PyCtx->TypeInfoHelper, type); auto tag = inpector.GetTag(); @@ -93,14 +91,14 @@ NUdf::TUnboxedValue FromPyResource( auto valueTag = resource->GetResourceTag(); if (valueTag != tag) { throw yexception() << "Mismatch of resource tag, expected: " - << tag << ", got: " << valueTag; + << tag << ", got: " << valueTag; } return *resource; } - throw yexception() << "Python object " << PyObjectRepr(value) \ - << " is not a valid resource with tag " << tag; + throw yexception() << "Python object " << PyObjectRepr(value) + << " is not a valid resource with tag " << tag; #else Y_UNUSED(type); if (PyCapsule_CheckExact(value)) { @@ -113,4 +111,4 @@ NUdf::TUnboxedValue FromPyResource( #endif } -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_resource.h b/yql/essentials/udfs/common/python/bindings/py_resource.h index b46b84c84b1..48def547e70 100644 --- a/yql/essentials/udfs/common/python/bindings/py_resource.h +++ b/yql/essentials/udfs/common/python/bindings/py_resource.h @@ -8,13 +8,13 @@ namespace NPython { extern const char ResourceCapsuleName[]; TPyObjectPtr ToPyResource( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyResource( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, - PyObject* value); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + PyObject* value); -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp index 25b43cbf6ae..095e642acdb 100644 --- a/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_resource_ut.cpp @@ -2,7 +2,6 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; extern const char SimpleDataTag[] = "SimpleData"; @@ -15,67 +14,71 @@ struct TSimpleData { TSimpleData(const TString& name, ui32 age) : Name(name) , Age(age) - {} + { + } }; using TSimpleDataResource = NUdf::TBoxedResource<TSimpleData, SimpleDataTag>; Y_UNIT_TEST_SUITE(TPyResourceTest) { - Y_UNIT_TEST(MkqlObject) { - TPythonTestEngine engine; - TPyObjectPtr pyValue = engine.ToPython<NUdf::TResource<SimpleDataTag>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValuePod(new TSimpleDataResource("Jamel", 99)); - }, - "import yql\n" - "\n" - "def Test(value):\n" - " assert str(value).startswith('<capsule object \"YqlResourceCapsule\" at ')\n" - " assert repr(value).startswith('<capsule object \"YqlResourceCapsule\" at ')\n" - " assert type(value).__name__ == 'PyCapsule'\n" - " return value\n"); - UNIT_ASSERT(!!pyValue); +Y_UNIT_TEST(MkqlObject) { + TPythonTestEngine engine; + TPyObjectPtr pyValue = engine.ToPython<NUdf::TResource<SimpleDataTag>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValuePod(new TSimpleDataResource("Jamel", 99)); + }, + "import yql\n" + "\n" + "def Test(value):\n" + " assert str(value).startswith('<capsule object \"YqlResourceCapsule\" at ')\n" + " assert repr(value).startswith('<capsule object \"YqlResourceCapsule\" at ')\n" + " assert type(value).__name__ == 'PyCapsule'\n" + " return value\n"); + UNIT_ASSERT(!!pyValue); - engine.ToMiniKQLWithArg<NUdf::TResource<SimpleDataTag>>( - pyValue.Get(), - "import yql\n" - "\n" - "def Test(value):\n" - " return value\n", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value);; - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT_STRINGS_EQUAL(value.GetResourceTag(), SimpleDataTag); - auto simpleData = - reinterpret_cast<TSimpleData*>(value.GetResource()); - UNIT_ASSERT_EQUAL(simpleData->Age, 99); - UNIT_ASSERT_STRINGS_EQUAL(simpleData->Name, "Jamel"); - }); - } + engine.ToMiniKQLWithArg<NUdf::TResource<SimpleDataTag>>( + pyValue.Get(), + "import yql\n" + "\n" + "def Test(value):\n" + " return value\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + ; + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_STRINGS_EQUAL(value.GetResourceTag(), SimpleDataTag); + auto simpleData = + reinterpret_cast<TSimpleData*>(value.GetResource()); + UNIT_ASSERT_EQUAL(simpleData->Age, 99); + UNIT_ASSERT_STRINGS_EQUAL(simpleData->Name, "Jamel"); + }); +} - Y_UNIT_TEST(PythonObject) { - TPythonTestEngine engine; - NUdf::TUnboxedValue mkqlValue = engine.FromPython<NUdf::TResource<PythonTestTag>>( - "class CustomStruct:\n" - " def __init__(self, name, age):\n" - " self.name = name\n" - " self.age = age\n" - "\n" - "def Test():\n" - " return CustomStruct('Jamel', 97)\n"); - UNIT_ASSERT(mkqlValue); - UNIT_ASSERT_STRINGS_EQUAL(mkqlValue.GetResourceTag(), PythonTestTag); +Y_UNIT_TEST(PythonObject) { + TPythonTestEngine engine; + NUdf::TUnboxedValue mkqlValue = engine.FromPython<NUdf::TResource<PythonTestTag>>( + "class CustomStruct:\n" + " def __init__(self, name, age):\n" + " self.name = name\n" + " self.age = age\n" + "\n" + "def Test():\n" + " return CustomStruct('Jamel', 97)\n"); + UNIT_ASSERT(mkqlValue); + UNIT_ASSERT_STRINGS_EQUAL(mkqlValue.GetResourceTag(), PythonTestTag); - TPyObjectPtr pyValue = engine.ToPython<NUdf::TResource<PythonTestTag>>( - [mkqlValue](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return mkqlValue; - }, - "def Test(value):\n" - " assert isinstance(value, CustomStruct)\n" - " assert value.age, 97\n" - " assert value.name, 'Jamel'\n"); - UNIT_ASSERT(!!pyValue); - } + TPyObjectPtr pyValue = engine.ToPython<NUdf::TResource<PythonTestTag>>( + [mkqlValue](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return mkqlValue; + }, + "def Test(value):\n" + " assert isinstance(value, CustomStruct)\n" + " assert value.age, 97\n" + " assert value.name, 'Jamel'\n"); + UNIT_ASSERT(!!pyValue); } +} // Y_UNIT_TEST_SUITE(TPyResourceTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_stream.cpp b/yql/essentials/udfs/common/python/bindings/py_stream.cpp index 24f7e0eb45d..130fc67b83e 100644 --- a/yql/essentials/udfs/common/python/bindings/py_stream.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_stream.cpp @@ -41,88 +41,88 @@ struct TPyStream { } static PyObject* New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* type, - NUdf::IBoxedValuePtr value); + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + NUdf::IBoxedValuePtr value); static PyObject* Next(PyObject* self); }; #if PY_MAJOR_VERSION >= 3 -#define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) + #define Py_TPFLAGS_HAVE_ITER 0 // NOLINT(readability-identifier-naming) #endif PyTypeObject PyStreamType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.TStream"), - INIT_MEMBER(tp_basicsize , sizeof(TPyStream)), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , TPyStream::Dealloc), + INIT_MEMBER(tp_name, "yql.TStream"), + INIT_MEMBER(tp_basicsize, sizeof(TPyStream)), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, TPyStream::Dealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , TPyStream::Repr), - INIT_MEMBER(tp_as_number , nullptr), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , nullptr), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , Py_TPFLAGS_HAVE_ITER), - INIT_MEMBER(tp_doc , "yql.TStream object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , PyObject_SelfIter), - INIT_MEMBER(tp_iternext , TPyStream::Next), - INIT_MEMBER(tp_methods , nullptr), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, TPyStream::Repr), + INIT_MEMBER(tp_as_number, nullptr), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, nullptr), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, Py_TPFLAGS_HAVE_ITER), + INIT_MEMBER(tp_doc, "yql.TStream object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, PyObject_SelfIter), + INIT_MEMBER(tp_iternext, TPyStream::Next), + INIT_MEMBER(tp_methods, nullptr), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; PyObject* TPyStream::New( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* type, - NUdf::IBoxedValuePtr value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + NUdf::IBoxedValuePtr value) { TPyStream* stream = new TPyStream; PyObject_INIT(stream, &PyStreamType); @@ -143,18 +143,19 @@ PyObject* TPyStream::Next(PyObject* self) { auto status = NUdf::TBoxedValueAccessor::Fetch(*stream->Value.Get(), item); switch (status) { - case NUdf::EFetchStatus::Ok: - return ToPyObject(stream->CastCtx, stream->ItemType, item) + case NUdf::EFetchStatus::Ok: + return ToPyObject(stream->CastCtx, stream->ItemType, item) .Release(); - case NUdf::EFetchStatus::Finish: - return nullptr; - case NUdf::EFetchStatus::Yield: - PyErr_SetNone(PyYieldIterationException); - return nullptr; - default: - Y_ABORT("Unknown stream status"); + case NUdf::EFetchStatus::Finish: + return nullptr; + case NUdf::EFetchStatus::Yield: + PyErr_SetNone(PyYieldIterationException); + return nullptr; + default: + Y_ABORT("Unknown stream status"); } - } PY_CATCH(nullptr) + } + PY_CATCH(nullptr) } ////////////////////////////////////////////////////////////////////////////// @@ -163,13 +164,13 @@ PyObject* TPyStream::Next(PyObject* self) { class TStreamOverPyIter final: public NUdf::TBoxedValue { public: TStreamOverPyIter( - TPyCastContext::TPtr castCtx, - const NUdf::TType* itemType, - TPyObjectPtr pyIter, - TPyObjectPtr pyIterable, - TPyObjectPtr pyGeneratorCallable, - TPyObjectPtr pyGeneratorCallableClosure, - TPyObjectPtr pyGeneratorCallableArgs) + TPyCastContext::TPtr castCtx, + const NUdf::TType* itemType, + TPyObjectPtr pyIter, + TPyObjectPtr pyIterable, + TPyObjectPtr pyGeneratorCallable, + TPyObjectPtr pyGeneratorCallableClosure, + TPyObjectPtr pyGeneratorCallableArgs) : CastCtx_(std::move(castCtx)) , ItemType_(itemType) , PyIter_(std::move(pyIter)) @@ -215,7 +216,9 @@ private: PyIter_.Reset(); TPyObjectPtr result(PyObject_CallObject(PyGeneratorCallable_.Get(), PyGeneratorCallableArgs_.Get())); if (!result) { - UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to execute:\n" << GetLastErrorAsString()).c_str()); + UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << "Failed to execute:\n" + << GetLastErrorAsString()) + .c_str()); } if (PyGen_Check(result.Get())) { @@ -244,8 +247,7 @@ private: } return NUdf::EFetchStatus::Finish; - } - catch (const yexception& e) { + } catch (const yexception& e) { UdfTerminate((TStringBuilder() << CastCtx_->PyCtx->Pos << e.what()).c_str()); } } @@ -260,14 +262,13 @@ private: TPyObjectPtr PyGeneratorCallableArgs_; }; - ////////////////////////////////////////////////////////////////////////////// // public functions ////////////////////////////////////////////////////////////////////////////// TPyObjectPtr ToPyStream( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value) { return TPyStream::New(castCtx, type, value.AsBoxed()); } @@ -278,8 +279,7 @@ NKikimr::NUdf::TUnboxedValue FromPyStream( const TPyObjectPtr& value, const TPyObjectPtr& originalCallable, const TPyObjectPtr& originalCallableClosure, - const TPyObjectPtr& originalCallableArgs -) + const TPyObjectPtr& originalCallableArgs) { const NUdf::TStreamTypeInspector inspector(*castCtx->PyCtx->TypeInfoHelper, type); const NUdf::TType* itemType = inspector.GetItemType(); @@ -290,7 +290,7 @@ NKikimr::NUdf::TUnboxedValue FromPyStream( UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << GetLastErrorAsString()).c_str()); } return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), nullptr, - originalCallable, originalCallableClosure, originalCallableArgs)); + originalCallable, originalCallableClosure, originalCallableArgs)); } if (PyIter_Check(value.Get()) @@ -301,7 +301,7 @@ NKikimr::NUdf::TUnboxedValue FromPyStream( ) { TPyObjectPtr iter(value.Get(), TPyObjectPtr::ADD_REF); return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), nullptr, - originalCallable, originalCallableClosure, originalCallableArgs)); + originalCallable, originalCallableClosure, originalCallableArgs)); } // assume that this function will returns generator @@ -324,7 +324,7 @@ NKikimr::NUdf::TUnboxedValue FromPyStream( } return NUdf::TUnboxedValuePod(new TStreamOverPyIter(castCtx, itemType, std::move(iter), nullptr, - originalCallable ? value : nullptr, originalCallable ? callableClosure : nullptr, nullptr)); + originalCallable ? value : nullptr, originalCallable ? callableClosure : nullptr, nullptr)); } // must be after checking for callable @@ -337,7 +337,9 @@ NKikimr::NUdf::TUnboxedValue FromPyStream( } UdfTerminate((TStringBuilder() << castCtx->PyCtx->Pos << "Expected iterator, generator, generator factory, " - "or iterable object, but got " << PyObjectRepr(value.Get())).c_str()); + "or iterable object, but got " + << PyObjectRepr(value.Get())) + .c_str()); } } // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_stream.h b/yql/essentials/udfs/common/python/bindings/py_stream.h index f677e23930d..ab28b846433 100644 --- a/yql/essentials/udfs/common/python/bindings/py_stream.h +++ b/yql/essentials/udfs/common/python/bindings/py_stream.h @@ -9,16 +9,16 @@ extern PyTypeObject PyStreamType; extern PyObject* PyYieldIterationException; TPyObjectPtr ToPyStream( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyStream( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - const TPyObjectPtr& value, - const TPyObjectPtr& originalCallable, - const TPyObjectPtr& originalCallableClosure, - const TPyObjectPtr& originalCallableArgs); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const TPyObjectPtr& value, + const TPyObjectPtr& originalCallable, + const TPyObjectPtr& originalCallableClosure, + const TPyObjectPtr& originalCallableArgs); } // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp index 4a36f7b8f36..61c0fe5caf3 100644 --- a/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_stream_ut.cpp @@ -2,207 +2,206 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(TPyStreamTest) { - void Ui32StreamValidator(const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - - NUdf::TUnboxedValue item; - ui32 expected = 0; - NUdf::EFetchStatus status; - - while (true) { - status = value.Fetch(item); - if (status != NUdf::EFetchStatus::Ok) break; - - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, expected); - expected++; +void Ui32StreamValidator(const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + + NUdf::TUnboxedValue item; + ui32 expected = 0; + NUdf::EFetchStatus status; + + while (true) { + status = value.Fetch(item); + if (status != NUdf::EFetchStatus::Ok) { + break; } - UNIT_ASSERT_EQUAL(status, NUdf::EFetchStatus::Finish); - UNIT_ASSERT_EQUAL(expected, 10); + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, expected); + expected++; } - struct TTestStream final: NUdf::TBoxedValue { - TTestStream(ui32 maxValue, ui32 yieldOn = Max<ui32>()) - : Current_(0) - , YieldOn_(yieldOn) - , MaxValue_(maxValue) - { - } - - private: - NUdf::EFetchStatus Fetch(NUdf::TUnboxedValue& result) override { - if (Current_ == YieldOn_) { - return NUdf::EFetchStatus::Yield; - } else if (Current_ >= MaxValue_) { - return NUdf::EFetchStatus::Finish; - } - result = NUdf::TUnboxedValuePod(Current_++); - return NUdf::EFetchStatus::Ok; - } + UNIT_ASSERT_EQUAL(status, NUdf::EFetchStatus::Finish); + UNIT_ASSERT_EQUAL(expected, 10); +} - ui32 Current_, YieldOn_, MaxValue_; - }; - - Y_UNIT_TEST(FromGenerator) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TStream<ui32>>( - "def Test():\n" - " num = 0\n" - " while num < 10:\n" - " yield num\n" - " num += 1\n", - Ui32StreamValidator); +struct TTestStream final: NUdf::TBoxedValue { + TTestStream(ui32 maxValue, ui32 yieldOn = Max<ui32>()) + : Current_(0) + , YieldOn_(yieldOn) + , MaxValue_(maxValue) + { } - Y_UNIT_TEST(FromGeneratorFactory) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TStream<ui32>>( - "def first_10():\n" - " num = 0\n" - " while num < 10:\n" - " yield num\n" - " num += 1\n" - "def Test():\n" - " return first_10\n", - Ui32StreamValidator); +private: + NUdf::EFetchStatus Fetch(NUdf::TUnboxedValue& result) override { + if (Current_ == YieldOn_) { + return NUdf::EFetchStatus::Yield; + } else if (Current_ >= MaxValue_) { + return NUdf::EFetchStatus::Finish; + } + result = NUdf::TUnboxedValuePod(Current_++); + return NUdf::EFetchStatus::Ok; } - Y_UNIT_TEST(FromIterator) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TStream<ui32>>( - "def Test():\n" - " return iter(range(10))\n", - Ui32StreamValidator); - } + ui32 Current_, YieldOn_, MaxValue_; +}; + +Y_UNIT_TEST(FromGenerator) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def Test():\n" + " num = 0\n" + " while num < 10:\n" + " yield num\n" + " num += 1\n", + Ui32StreamValidator); +} + +Y_UNIT_TEST(FromGeneratorFactory) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def first_10():\n" + " num = 0\n" + " while num < 10:\n" + " yield num\n" + " num += 1\n" + "def Test():\n" + " return first_10\n", + Ui32StreamValidator); +} + +Y_UNIT_TEST(FromIterator) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def Test():\n" + " return iter(range(10))\n", + Ui32StreamValidator); +} - Y_UNIT_TEST(FromIterable) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TStream<ui32>>( - "def Test():\n" +Y_UNIT_TEST(FromIterable) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def Test():\n" #if PY_MAJOR_VERSION >= 3 - " return range(10)\n", + " return range(10)\n", #else - " return xrange(10)\n", + " return xrange(10)\n", #endif - Ui32StreamValidator); - } + Ui32StreamValidator); +} - Y_UNIT_TEST(FromCustomIterable) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TStream<ui32>>( - "class T:\n" - " def __init__(self, l):\n" - " self.l = l\n" - " def __len__(self):\n" - " return len(self.l)\n" - " def __nonzero__(self):\n" - " return bool(self.l)\n" - " def __iter__(self):\n" - " return iter(self.l)\n" - "\n" - "def Test():\n" - " return T(list(range(10)))\n", - Ui32StreamValidator); - } +Y_UNIT_TEST(FromCustomIterable) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "class T:\n" + " def __init__(self, l):\n" + " self.l = l\n" + " def __len__(self):\n" + " return len(self.l)\n" + " def __nonzero__(self):\n" + " return bool(self.l)\n" + " def __iter__(self):\n" + " return iter(self.l)\n" + "\n" + "def Test():\n" + " return T(list(range(10)))\n", + Ui32StreamValidator); +} - Y_UNIT_TEST(FromList) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TStream<ui32>>( - "def Test():\n" - " return [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", - Ui32StreamValidator); - } +Y_UNIT_TEST(FromList) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "def Test():\n" + " return [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n", + Ui32StreamValidator); +} - Y_UNIT_TEST(ToPython) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TStream<ui32>>( - [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { - return NUdf::TUnboxedValuePod(new TTestStream(10)); - }, - "def Test(value):\n" - " import yql\n" - " assert repr(value) == '<yql.TStream>'\n" - " assert type(value).__name__ == 'TStream'\n" - " assert list(value) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n"); - } +Y_UNIT_TEST(ToPython) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TStream<ui32>>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + return NUdf::TUnboxedValuePod(new TTestStream(10)); + }, + "def Test(value):\n" + " import yql\n" + " assert repr(value) == '<yql.TStream>'\n" + " assert type(value).__name__ == 'TStream'\n" + " assert list(value) == [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]\n"); +} - Y_UNIT_TEST(ToPythonAndBackAsIs) { - TPythonTestEngine engine; - engine.ToPythonAndBack<NUdf::TStream<ui32>>( - [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { - return NUdf::TUnboxedValuePod(new TTestStream(10)); - }, - "def Test(value): return value", - Ui32StreamValidator - ); - } +Y_UNIT_TEST(ToPythonAndBackAsIs) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TStream<ui32>>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + return NUdf::TUnboxedValuePod(new TTestStream(10)); + }, + "def Test(value): return value", + Ui32StreamValidator); +} - Y_UNIT_TEST(YieldingStreamFromPython) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TStream<ui32>>( - "import yql\n" - "def Test():\n" - " yield 0\n" - " yield 1\n" - " yield yql.TYieldIteration\n" - " yield 2\n", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - - NUdf::TUnboxedValue item; - ui32 expected = 0; - NUdf::EFetchStatus status; - - while ((status = value.Fetch(item)) == NUdf::EFetchStatus::Ok) { - ui32 actual = item.Get<ui32>(); - UNIT_ASSERT_EQUAL(actual, expected); - expected++; - } - - UNIT_ASSERT_EQUAL(status, NUdf::EFetchStatus::Yield); - UNIT_ASSERT_EQUAL(expected, 2); - }); - } +Y_UNIT_TEST(YieldingStreamFromPython) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TStream<ui32>>( + "import yql\n" + "def Test():\n" + " yield 0\n" + " yield 1\n" + " yield yql.TYieldIteration\n" + " yield 2\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + + NUdf::TUnboxedValue item; + ui32 expected = 0; + NUdf::EFetchStatus status; + + while ((status = value.Fetch(item)) == NUdf::EFetchStatus::Ok) { + ui32 actual = item.Get<ui32>(); + UNIT_ASSERT_EQUAL(actual, expected); + expected++; + } - Y_UNIT_TEST(YieldingStreamFromCpp) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TStream<ui32>>( - [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { - return NUdf::TUnboxedValuePod(new TTestStream(5, 2)); - }, - "import yql\n" - "def Test(value):\n" - " assert repr(value) == '<yql.TStream>'\n" - " assert type(value).__name__ == 'TStream'\n" - " assert next(value) == 0\n" - " assert next(value) == 1\n" - " try:\n" - " next(value)\n" - " except yql.TYieldIteration:\n" - " pass\n" - " else:\n" - " assert False, 'Expected yql.TYieldIteration'\n"); - } + UNIT_ASSERT_EQUAL(status, NUdf::EFetchStatus::Yield); + UNIT_ASSERT_EQUAL(expected, 2); + }); +} - Y_UNIT_TEST(FromCppListIterator) { - TPythonTestEngine engine; - engine.ToPythonAndBack<NUdf::TListType<ui32>, NUdf::TStream<ui32>>( - [](const TType*, const NUdf::IValueBuilder& vb) { - NUdf::TUnboxedValue *items = nullptr; - const auto a = vb.NewArray(10U, items); - ui32 i = 0U; - std::generate_n(items, 10U, [&i](){ return NUdf::TUnboxedValuePod(i++); }); - return a; - }, - "def Test(value): return iter(value)", - Ui32StreamValidator - ); - } +Y_UNIT_TEST(YieldingStreamFromCpp) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TStream<ui32>>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + return NUdf::TUnboxedValuePod(new TTestStream(5, 2)); + }, + "import yql\n" + "def Test(value):\n" + " assert repr(value) == '<yql.TStream>'\n" + " assert type(value).__name__ == 'TStream'\n" + " assert next(value) == 0\n" + " assert next(value) == 1\n" + " try:\n" + " next(value)\n" + " except yql.TYieldIteration:\n" + " pass\n" + " else:\n" + " assert False, 'Expected yql.TYieldIteration'\n"); +} + +Y_UNIT_TEST(FromCppListIterator) { + TPythonTestEngine engine; + engine.ToPythonAndBack<NUdf::TListType<ui32>, NUdf::TStream<ui32>>( + [](const TType*, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + const auto a = vb.NewArray(10U, items); + ui32 i = 0U; + std::generate_n(items, 10U, [&i]() { return NUdf::TUnboxedValuePod(i++); }); + return a; + }, + "def Test(value): return iter(value)", + Ui32StreamValidator); } +} // Y_UNIT_TEST_SUITE(TPyStreamTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp index b1f5a13786b..bfe107e44b5 100644 --- a/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_string_ut.cpp @@ -5,94 +5,94 @@ using namespace NPython; Y_UNIT_TEST_SUITE(TPyStringTest) { - template <typename TStringType> - void TestStringCasts() { - TStringType testStr1(TStringBuf("test string")); - TStringBuf strBuf1 = testStr1; - TPyObjectPtr str1 = PyBytes_FromString(strBuf1.data()); - const auto value = PyCast<TStringType>(str1.Get()); - - UNIT_ASSERT_STRINGS_EQUAL(value, testStr1); - - TStringType testStr2(TStringBuf("another test string")); - TStringBuf strBuf2 = testStr2; - TPyObjectPtr str2 = PyCast<TStringType>(testStr2); - - Py_ssize_t size = 0U; - char* buf = nullptr; - const auto rc = PyBytes_AsStringAndSize(str2.Get(), &buf, &size); - UNIT_ASSERT(rc >= 0); - UNIT_ASSERT(buf != nullptr); - UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size()); - UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2); - } - - template <typename TStringType> - void TestBinaryStringCasts() { - TStringType testStr1(TStringBuf("\xa0\xa1"sv)); - TStringBuf strBuf1 = testStr1; - TPyObjectPtr str1 = PyBytes_FromString(strBuf1.data()); - const auto value = PyCast<TStringType>(str1.Get()); - - UNIT_ASSERT_STRINGS_EQUAL(value, testStr1); - - TStringType testStr2(TStringBuf("\xf0\x90\x28\xbc"sv)); - TStringBuf strBuf2 = testStr2; - TPyObjectPtr str2 = PyCast<TStringType>(testStr2); - - Py_ssize_t size = 0U; - char* buf = nullptr; - const auto rc = PyBytes_AsStringAndSize(str2.Get(), &buf, &size); - UNIT_ASSERT(rc >= 0); - UNIT_ASSERT(buf != nullptr); - UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size()); - UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2); - } - - template <typename TStringType> - void TestUtf8StringCasts() { - const TStringType testStr1(TStringBuf("тестовая строка")); - TStringBuf strBuf1 = testStr1; - const TPyObjectPtr str1 = PyUnicode_FromString(strBuf1.data()); - const TPyObjectPtr utf8 = PyUnicode_AsUTF8String(str1.Get()); - const auto value = PyCast<TStringType>(utf8.Get()); - UNIT_ASSERT_STRINGS_EQUAL(value, testStr1); - - const TStringType testStr2(TStringBuf("еще одна тестовая строка")); - TStringBuf strBuf2 = testStr2; - const auto str2 = ToPyUnicode<TStringType>(testStr2); - - UNIT_ASSERT(PyUnicode_Check(str2.Get())); - - Py_ssize_t size = 0U; +template <typename TStringType> +void TestStringCasts() { + TStringType testStr1(TStringBuf("test string")); + TStringBuf strBuf1 = testStr1; + TPyObjectPtr str1 = PyBytes_FromString(strBuf1.data()); + const auto value = PyCast<TStringType>(str1.Get()); + + UNIT_ASSERT_STRINGS_EQUAL(value, testStr1); + + TStringType testStr2(TStringBuf("another test string")); + TStringBuf strBuf2 = testStr2; + TPyObjectPtr str2 = PyCast<TStringType>(testStr2); + + Py_ssize_t size = 0U; + char* buf = nullptr; + const auto rc = PyBytes_AsStringAndSize(str2.Get(), &buf, &size); + UNIT_ASSERT(rc >= 0); + UNIT_ASSERT(buf != nullptr); + UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size()); + UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2); +} + +template <typename TStringType> +void TestBinaryStringCasts() { + TStringType testStr1(TStringBuf("\xa0\xa1"sv)); + TStringBuf strBuf1 = testStr1; + TPyObjectPtr str1 = PyBytes_FromString(strBuf1.data()); + const auto value = PyCast<TStringType>(str1.Get()); + + UNIT_ASSERT_STRINGS_EQUAL(value, testStr1); + + TStringType testStr2(TStringBuf("\xf0\x90\x28\xbc"sv)); + TStringBuf strBuf2 = testStr2; + TPyObjectPtr str2 = PyCast<TStringType>(testStr2); + + Py_ssize_t size = 0U; + char* buf = nullptr; + const auto rc = PyBytes_AsStringAndSize(str2.Get(), &buf, &size); + UNIT_ASSERT(rc >= 0); + UNIT_ASSERT(buf != nullptr); + UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size()); + UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2); +} + +template <typename TStringType> +void TestUtf8StringCasts() { + const TStringType testStr1(TStringBuf("тестовая строка")); + TStringBuf strBuf1 = testStr1; + const TPyObjectPtr str1 = PyUnicode_FromString(strBuf1.data()); + const TPyObjectPtr utf8 = PyUnicode_AsUTF8String(str1.Get()); + const auto value = PyCast<TStringType>(utf8.Get()); + UNIT_ASSERT_STRINGS_EQUAL(value, testStr1); + + const TStringType testStr2(TStringBuf("еще одна тестовая строка")); + TStringBuf strBuf2 = testStr2; + const auto str2 = ToPyUnicode<TStringType>(testStr2); + + UNIT_ASSERT(PyUnicode_Check(str2.Get())); + + Py_ssize_t size = 0U; #if PY_MAJOR_VERSION >= 3 - const auto buf = PyUnicode_AsUTF8AndSize(str2.Get(), &size); + const auto buf = PyUnicode_AsUTF8AndSize(str2.Get(), &size); #else - char* buf = nullptr; - const TPyObjectPtr pyUtf8Str = PyUnicode_AsUTF8String(str2.Get()); - const auto rc = PyBytes_AsStringAndSize(pyUtf8Str.Get(), &buf, &size); - UNIT_ASSERT(rc >= 0); + char* buf = nullptr; + const TPyObjectPtr pyUtf8Str = PyUnicode_AsUTF8String(str2.Get()); + const auto rc = PyBytes_AsStringAndSize(pyUtf8Str.Get(), &buf, &size); + UNIT_ASSERT(rc >= 0); #endif - UNIT_ASSERT(buf != nullptr); - UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size()); - UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2); - } - - Y_UNIT_TEST(Simple) { - TestStringCasts<TString>(); - TestStringCasts<TStringBuf>(); - TestStringCasts<NUdf::TStringRef>(); - } - - Y_UNIT_TEST(Utf8) { - TestUtf8StringCasts<TString>(); - TestUtf8StringCasts<TStringBuf>(); - TestUtf8StringCasts<NUdf::TStringRef>(); - } - - Y_UNIT_TEST(Binary) { - TestBinaryStringCasts<TString>(); - TestBinaryStringCasts<TStringBuf>(); - TestBinaryStringCasts<NUdf::TStringRef>(); - } + UNIT_ASSERT(buf != nullptr); + UNIT_ASSERT_EQUAL(static_cast<size_t>(size), strBuf2.size()); + UNIT_ASSERT_STRINGS_EQUAL(buf, testStr2); +} + +Y_UNIT_TEST(Simple) { + TestStringCasts<TString>(); + TestStringCasts<TStringBuf>(); + TestStringCasts<NUdf::TStringRef>(); +} + +Y_UNIT_TEST(Utf8) { + TestUtf8StringCasts<TString>(); + TestUtf8StringCasts<TStringBuf>(); + TestUtf8StringCasts<NUdf::TStringRef>(); +} + +Y_UNIT_TEST(Binary) { + TestBinaryStringCasts<TString>(); + TestBinaryStringCasts<TStringBuf>(); + TestBinaryStringCasts<NUdf::TStringRef>(); } +} // Y_UNIT_TEST_SUITE(TPyStringTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_struct.cpp b/yql/essentials/udfs/common/python/bindings/py_struct.cpp index bcfd86351fe..745f7feae19 100644 --- a/yql/essentials/udfs/common/python/bindings/py_struct.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_struct.cpp @@ -34,8 +34,7 @@ TPyObjectPtr CreateNewStrucInstance(const TPyCastContext::TPtr& ctx, const NKiki INIT_MEMBER(name, "yql.Struct"), INIT_MEMBER(doc, nullptr), INIT_MEMBER(fields, fields.data()), - INIT_MEMBER(n_in_sequence, int(inspector.GetMembersCount())) - }; + INIT_MEMBER(n_in_sequence, int(inspector.GetMembersCount()))}; const auto typeObject = new PyTypeObject(); if (0 > PyStructSequence_InitType2(typeObject, &desc)) { @@ -48,14 +47,14 @@ TPyObjectPtr CreateNewStrucInstance(const TPyCastContext::TPtr& ctx, const NKiki const TPyObjectPtr object = PyStructSequence_New(it.first->second.GetAs<PyTypeObject>()); #else const auto className = TString("yql.Struct_") += ToString(ctx->StructTypes.size()); - PyObject* metaclass = (PyObject *) &PyClass_Type; + PyObject* metaclass = (PyObject*)&PyClass_Type; const TPyObjectPtr name = PyRepr(TStringBuf(className)); const TPyObjectPtr bases = PyTuple_New(0); const TPyObjectPtr dict = PyDict_New(); TPyObjectPtr newClass = PyObject_CallFunctionObjArgs( - metaclass, name.Get(), bases.Get(), dict.Get(), - nullptr); + metaclass, name.Get(), bases.Get(), dict.Get(), + nullptr); if (!newClass) { throw yexception() << "can't create new type: " << GetLastErrorAsString(); } @@ -120,7 +119,7 @@ TPyObjectPtr GetAttrFromPyObject(PyObject* v, TStringBuf name) return PyObject_GetAttr(v, w.Get()); } -} +} // namespace TPyObjectPtr ToPyStruct(const TPyCastContext::TPtr& ctx, const NUdf::TType* type, const NUdf::TUnboxedValuePod& value) { @@ -138,8 +137,8 @@ TPyObjectPtr ToPyStruct(const TPyCastContext::TPtr& ctx, const NUdf::TType* type const auto item = ToPyObject(ctx, inspector.GetMemberType(i), *ptr++); if (0 > PyObject_SetAttrString(object.Get(), name.data(), item.Get())) { throw yexception() - << "Can't set attr '" << name << "' to python object: " - << GetLastErrorAsString(); + << "Can't set attr '" << name << "' to python object: " + << GetLastErrorAsString(); } #endif } @@ -153,8 +152,8 @@ TPyObjectPtr ToPyStruct(const TPyCastContext::TPtr& ctx, const NUdf::TType* type const auto item = ToPyObject(ctx, inspector.GetMemberType(i), value.GetElement(i)); if (0 > PyObject_SetAttrString(object.Get(), name.data(), item.Get())) { throw yexception() - << "Can't set attr '" << name << "' to python object: " - << GetLastErrorAsString(); + << "Can't set attr '" << name << "' to python object: " + << GetLastErrorAsString(); } #endif } @@ -195,7 +194,8 @@ NUdf::TUnboxedValue FromPyStruct(const TPyCastContext::TPtr& ctx, const NUdf::TT } if (!errors.empty()) { - throw yexception() << "Failed to convert dict to struct\n" << JoinSeq("\n", errors) << "\nDict repr: " << PyObjectRepr(value); + throw yexception() << "Failed to convert dict to struct\n" + << JoinSeq("\n", errors) << "\nDict repr: " << PyObjectRepr(value); } } else { for (ui32 i = 0; i < membersCount; i++) { @@ -222,11 +222,12 @@ NUdf::TUnboxedValue FromPyStruct(const TPyCastContext::TPtr& ctx, const NUdf::TT } if (!errors.empty()) { - throw yexception() << "Failed to convert object to struct\n" << JoinSeq("\n", errors) << "\nObject repr: " << PyObjectRepr(value); + throw yexception() << "Failed to convert object to struct\n" + << JoinSeq("\n", errors) << "\nObject repr: " << PyObjectRepr(value); } } return mkqlStruct; } -} +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_struct.h b/yql/essentials/udfs/common/python/bindings/py_struct.h index 79a380283fb..6f2bdb917ed 100644 --- a/yql/essentials/udfs/common/python/bindings/py_struct.h +++ b/yql/essentials/udfs/common/python/bindings/py_struct.h @@ -6,12 +6,12 @@ namespace NPython { TPyObjectPtr ToPyStruct( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyStruct( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, PyObject* value); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, PyObject* value); } // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp index b8662907907..5b5c1bfea6b 100644 --- a/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_struct_ut.cpp @@ -2,329 +2,287 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(TPyStructTest) { - Y_UNIT_TEST(FromPyObject) { - TPythonTestEngine engine; - - ui32 ageIdx = 0, nameIdx = 0; - auto personType = engine.GetTypeBuilder().Struct()-> - AddField<int>("age", &ageIdx) - .AddField<char*>("name", &nameIdx) - .Build(); - - engine.ToMiniKQL(personType, - "class Person:\n" - " def __init__(self, age, name):\n" - " self.age = age\n" - " self.name = name\n" - "\n" - "def Test():\n" - " return Person(99, 'Jamel')\n", - [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - auto name = value.GetElement(nameIdx); - UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Jamel"); - auto age = value.GetElement(ageIdx); - UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); - }); - } - - Y_UNIT_TEST(FromPyObjectMissingOptionalField) { - TPythonTestEngine engine; - - ui32 ageIdx = 0, nameIdx = 0; - auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build(); - auto personType = engine.GetTypeBuilder().Struct()-> - AddField<int>("age", &ageIdx) - .AddField("name", optionalStringType, &nameIdx) - .Build(); - - engine.ToMiniKQL(personType, - "class Person:\n" - " def __init__(self, age):\n" - " self.age = age\n" - "\n" - "def Test():\n" - " return Person(99)\n", - [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - auto name = value.GetElement(nameIdx); - UNIT_ASSERT(!name); - auto age = value.GetElement(ageIdx); - UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); - }); - } - - Y_UNIT_TEST(FromPyObjectBytesAttrWithNullCharacter) { - TPythonTestEngine engine; - - ui32 ageIdx = 0; - auto personType = engine.GetTypeBuilder().Struct()-> - AddField<int>("a\0ge", &ageIdx) - .Build(); - - engine.ToMiniKQL(personType, - "class Person:\n" - " def __init__(self, age):\n" - " setattr(self, 'a\\0ge', age)\n" - "\n" - "def Test():\n" - " return Person(99)\n", - [ageIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - auto age = value.GetElement(ageIdx); - UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); - }); - } - - Y_UNIT_TEST(FromPyDict) { - TPythonTestEngine engine; - - ui32 ageIdx = 0, nameIdx = 0; - auto personType = engine.GetTypeBuilder().Struct()-> - AddField<int>("age", &ageIdx) - .AddField<char*>("name", &nameIdx) - .Build(); - - engine.ToMiniKQL(personType, - "def Test():\n" - " return { 'name': 'Jamel', 'age': 99 }\n", - [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - auto name = value.GetElement(nameIdx); - UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Jamel"); - auto age = value.GetElement(ageIdx); - UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); - }); - } - - Y_UNIT_TEST(FromPyDictMissingOptionalField) { - TPythonTestEngine engine; - - ui32 ageIdx = 0, nameIdx = 0; - auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build(); - auto personType = engine.GetTypeBuilder().Struct()-> - AddField<int>("age", &ageIdx) - .AddField("name", optionalStringType, &nameIdx) - .Build(); - - engine.ToMiniKQL(personType, - "def Test():\n" - " return { 'age': 99 }\n", - [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - auto name = value.GetElement(nameIdx); - UNIT_ASSERT(!name); - auto age = value.GetElement(ageIdx); - UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); - }); - } - - Y_UNIT_TEST(FromPyDictBytesKeyWithNullCharacter) { - TPythonTestEngine engine; - - ui32 ageIdx = 0; - auto personType = engine.GetTypeBuilder().Struct()-> - AddField<int>("a\0ge", &ageIdx) - .Build(); - - engine.ToMiniKQL(personType, - "def Test():\n" - " return { b'a\\0ge': 99 }\n", - [ageIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - auto age = value.GetElement(ageIdx); - UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); - }); - } - - Y_UNIT_TEST(FromPyNamedTuple) { - TPythonTestEngine engine; - - ui32 ageIdx = 0, nameIdx = 0; - auto personType = engine.GetTypeBuilder().Struct()-> - AddField<int>("age", &ageIdx) - .AddField<char*>("name", &nameIdx) - .Build(); - - engine.ToMiniKQL(personType, - "from collections import namedtuple\n" - "def Test():\n" - " Person = namedtuple('Person', 'name age')\n" - " return Person(age=13, name='Tony')\n", - [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - auto name = value.GetElement(nameIdx); - UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Tony"); - auto age = value.GetElement(ageIdx); - UNIT_ASSERT_EQUAL(age.Get<ui32>(), 13); - }); - } - - Y_UNIT_TEST(FromPyNamedTupleNoneOptionalField) { - TPythonTestEngine engine; - - ui32 ageIdx = 0, nameIdx = 0; - auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build(); - auto personType = engine.GetTypeBuilder().Struct()-> - AddField<int>("age", &ageIdx) - .AddField("name", optionalStringType, &nameIdx) - .Build(); - - engine.ToMiniKQL(personType, - "from collections import namedtuple\n" - "def Test():\n" - " Pers = namedtuple('Person', 'name age')\n" - " return Pers(name=None, age=15)\n", - [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - auto name = value.GetElement(nameIdx); - UNIT_ASSERT(!name); - auto age = value.GetElement(ageIdx); - UNIT_ASSERT_EQUAL(age.Get<ui32>(), 15); - }); - } - - Y_UNIT_TEST(FromPyEmptyStruct) { - TPythonTestEngine engine; - auto emptyStruct = engine.GetTypeBuilder().Struct()->Build(); - - engine.ToMiniKQL(emptyStruct, - "class Empty: pass\n" - "\n" - "def Test():\n" - " return Empty()\n", - [](const NUdf::TUnboxedValuePod&) {}); - } - - Y_UNIT_TEST(ToPyObject) { - TPythonTestEngine engine; - - ui32 ageIdx = 0, nameIdx = 0, addressIdx = 0, cityIdx = 0, streetIdx = 0, buildingIdx = 0; - auto addressType = engine.GetTypeBuilder().Struct()-> - AddField<NUdf::TUtf8>("city", &cityIdx) - .AddField<NUdf::TUtf8>("street", &streetIdx) - .AddField<ui16>("building", &buildingIdx) - .Build(); - - auto personType = engine.GetTypeBuilder().Struct()-> - AddField<ui16>("age", &ageIdx) - .AddField<NUdf::TUtf8>("name", &nameIdx) - .AddField("address", addressType, &addressIdx) - .Build(); - - - engine.ToPython(personType, - [=](const TType* type, const NUdf::IValueBuilder& vb) { - NUdf::TUnboxedValue* items = nullptr; - auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items); - items[ageIdx] = NUdf::TUnboxedValuePod(ui16(97)); - items[nameIdx] = vb.NewString("Jamel"); - NUdf::TUnboxedValue* items2 = nullptr; - items[addressIdx] = vb.NewArray(static_cast<const TStructType*>(static_cast<const TStructType*>(type)->GetMemberType(addressIdx))->GetMembersCount(), items2); - items2[cityIdx] = vb.NewString("Moscow");; - items2[streetIdx] = vb.NewString("L'va Tolstogo"); - items2[buildingIdx] = NUdf::TUnboxedValuePod(ui16(16)); - return new_struct; - }, - "def Test(value):\n" - " assert isinstance(value, object)\n" - " assert value.name == 'Jamel'\n" - " assert value.age == 97\n" - " assert value.address.city == 'Moscow'\n" - " assert value.address.building == 16\n" - ); - } - - Y_UNIT_TEST(ToPyObjectKeywordsAsFields) { - TPythonTestEngine engine; - - ui32 passIdx = 0, whileIdx = 0, ifIdx = 0, notIdx = 0; - auto structType = engine.GetTypeBuilder().Struct()-> - AddField<NUdf::TUtf8>("pass", &passIdx) - .AddField<NUdf::TUtf8>("while", &whileIdx) - .AddField<NUdf::TUtf8>("if", &ifIdx) - .AddField<NUdf::TUtf8>("not", ¬Idx) - .Build(); - - engine.ToPython(structType, - [=](const TType* type, const NUdf::IValueBuilder& vb) { - NUdf::TUnboxedValue* items = nullptr; - auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items); - items[ifIdx] = vb.NewString("You"); - items[whileIdx] = vb.NewString("Shall"); - items[notIdx] = vb.NewString("Not"); - items[passIdx] = vb.NewString("Pass"); - return new_struct; - }, - "def Test(value):\n" - " assert getattr(value, 'if') == 'You'\n" - " assert getattr(value, 'while') == 'Shall'\n" - " assert getattr(value, 'not') == 'Not'\n" - " assert getattr(value, 'pass') == 'Pass'\n" - ); - } +Y_UNIT_TEST(FromPyObject) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + auto personType = engine.GetTypeBuilder().Struct()->AddField<int>("age", &ageIdx).AddField<char*>("name", &nameIdx).Build(); + + engine.ToMiniKQL(personType, + "class Person:\n" + " def __init__(self, age, name):\n" + " self.age = age\n" + " self.name = name\n" + "\n" + "def Test():\n" + " return Person(99, 'Jamel')\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Jamel"); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); +} + +Y_UNIT_TEST(FromPyObjectMissingOptionalField) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build(); + auto personType = engine.GetTypeBuilder().Struct()->AddField<int>("age", &ageIdx).AddField("name", optionalStringType, &nameIdx).Build(); + + engine.ToMiniKQL(personType, + "class Person:\n" + " def __init__(self, age):\n" + " self.age = age\n" + "\n" + "def Test():\n" + " return Person(99)\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT(!name); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); +} + +Y_UNIT_TEST(FromPyObjectBytesAttrWithNullCharacter) { + TPythonTestEngine engine; + + ui32 ageIdx = 0; + auto personType = engine.GetTypeBuilder().Struct()->AddField<int>("a\0ge", &ageIdx).Build(); + + engine.ToMiniKQL(personType, + "class Person:\n" + " def __init__(self, age):\n" + " setattr(self, 'a\\0ge', age)\n" + "\n" + "def Test():\n" + " return Person(99)\n", + [ageIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); +} + +Y_UNIT_TEST(FromPyDict) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + auto personType = engine.GetTypeBuilder().Struct()->AddField<int>("age", &ageIdx).AddField<char*>("name", &nameIdx).Build(); + + engine.ToMiniKQL(personType, + "def Test():\n" + " return { 'name': 'Jamel', 'age': 99 }\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Jamel"); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); +} + +Y_UNIT_TEST(FromPyDictMissingOptionalField) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build(); + auto personType = engine.GetTypeBuilder().Struct()->AddField<int>("age", &ageIdx).AddField("name", optionalStringType, &nameIdx).Build(); + + engine.ToMiniKQL(personType, + "def Test():\n" + " return { 'age': 99 }\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT(!name); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); +} + +Y_UNIT_TEST(FromPyDictBytesKeyWithNullCharacter) { + TPythonTestEngine engine; + + ui32 ageIdx = 0; + auto personType = engine.GetTypeBuilder().Struct()->AddField<int>("a\0ge", &ageIdx).Build(); + + engine.ToMiniKQL(personType, + "def Test():\n" + " return { b'a\\0ge': 99 }\n", + [ageIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 99); + }); +} + +Y_UNIT_TEST(FromPyNamedTuple) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + auto personType = engine.GetTypeBuilder().Struct()->AddField<int>("age", &ageIdx).AddField<char*>("name", &nameIdx).Build(); + + engine.ToMiniKQL(personType, + "from collections import namedtuple\n" + "def Test():\n" + " Person = namedtuple('Person', 'name age')\n" + " return Person(age=13, name='Tony')\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT_STRINGS_EQUAL(name.AsStringRef(), "Tony"); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 13); + }); +} + +Y_UNIT_TEST(FromPyNamedTupleNoneOptionalField) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0; + auto optionalStringType = engine.GetTypeBuilder().Optional()->Item<char*>().Build(); + auto personType = engine.GetTypeBuilder().Struct()->AddField<int>("age", &ageIdx).AddField("name", optionalStringType, &nameIdx).Build(); + + engine.ToMiniKQL(personType, + "from collections import namedtuple\n" + "def Test():\n" + " Pers = namedtuple('Person', 'name age')\n" + " return Pers(name=None, age=15)\n", + [ageIdx, nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + auto name = value.GetElement(nameIdx); + UNIT_ASSERT(!name); + auto age = value.GetElement(ageIdx); + UNIT_ASSERT_EQUAL(age.Get<ui32>(), 15); + }); +} + +Y_UNIT_TEST(FromPyEmptyStruct) { + TPythonTestEngine engine; + auto emptyStruct = engine.GetTypeBuilder().Struct()->Build(); + + engine.ToMiniKQL(emptyStruct, + "class Empty: pass\n" + "\n" + "def Test():\n" + " return Empty()\n", + [](const NUdf::TUnboxedValuePod&) {}); +} + +Y_UNIT_TEST(ToPyObject) { + TPythonTestEngine engine; + + ui32 ageIdx = 0, nameIdx = 0, addressIdx = 0, cityIdx = 0, streetIdx = 0, buildingIdx = 0; + auto addressType = engine.GetTypeBuilder().Struct()->AddField<NUdf::TUtf8>("city", &cityIdx).AddField<NUdf::TUtf8>("street", &streetIdx).AddField<ui16>("building", &buildingIdx).Build(); + + auto personType = engine.GetTypeBuilder().Struct()->AddField<ui16>("age", &ageIdx).AddField<NUdf::TUtf8>("name", &nameIdx).AddField("address", addressType, &addressIdx).Build(); + + engine.ToPython(personType, + [=](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items); + items[ageIdx] = NUdf::TUnboxedValuePod(ui16(97)); + items[nameIdx] = vb.NewString("Jamel"); + NUdf::TUnboxedValue* items2 = nullptr; + items[addressIdx] = vb.NewArray(static_cast<const TStructType*>(static_cast<const TStructType*>(type)->GetMemberType(addressIdx))->GetMembersCount(), items2); + items2[cityIdx] = vb.NewString("Moscow"); + ; + items2[streetIdx] = vb.NewString("L'va Tolstogo"); + items2[buildingIdx] = NUdf::TUnboxedValuePod(ui16(16)); + return new_struct; + }, + "def Test(value):\n" + " assert isinstance(value, object)\n" + " assert value.name == 'Jamel'\n" + " assert value.age == 97\n" + " assert value.address.city == 'Moscow'\n" + " assert value.address.building == 16\n"); +} + +Y_UNIT_TEST(ToPyObjectKeywordsAsFields) { + TPythonTestEngine engine; + + ui32 passIdx = 0, whileIdx = 0, ifIdx = 0, notIdx = 0; + auto structType = engine.GetTypeBuilder().Struct()->AddField<NUdf::TUtf8>("pass", &passIdx).AddField<NUdf::TUtf8>("while", &whileIdx).AddField<NUdf::TUtf8>("if", &ifIdx).AddField<NUdf::TUtf8>("not", ¬Idx).Build(); + + engine.ToPython(structType, + [=](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items); + items[ifIdx] = vb.NewString("You"); + items[whileIdx] = vb.NewString("Shall"); + items[notIdx] = vb.NewString("Not"); + items[passIdx] = vb.NewString("Pass"); + return new_struct; + }, + "def Test(value):\n" + " assert getattr(value, 'if') == 'You'\n" + " assert getattr(value, 'while') == 'Shall'\n" + " assert getattr(value, 'not') == 'Not'\n" + " assert getattr(value, 'pass') == 'Pass'\n"); +} #if PY_MAJOR_VERSION >= 3 // TODO: Fix for python 2 - Y_UNIT_TEST(ToPyObjectTryModify) { - TPythonTestEngine engine; - - ui32 field1Idx = 0, field2Idx = 0; - auto structType = engine.GetTypeBuilder().Struct()-> - AddField<NUdf::TUtf8>("field1", &field1Idx) - .AddField<NUdf::TUtf8>("field2", &field2Idx) - .Build(); - - engine.ToPython(structType, - [=](const TType* type, const NUdf::IValueBuilder& vb) { - NUdf::TUnboxedValue* items = nullptr; - auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items); - items[field1Idx] = NUdf::TUnboxedValuePod::Zero(); - items[field2Idx] = NUdf::TUnboxedValuePod::Embedded("empty"); - return new_struct; - }, - "def Test(value):\n" - " try:\n" - " setattr(value, 'field1', 17)\n" - " except AttributeError:\n" - " pass\n" - " else:\n" - " assert False\n" - " try:\n" - " value.field2 = 18\n" - " except AttributeError:\n" - " pass\n" - " else:\n" - " assert False\n" - ); - } +Y_UNIT_TEST(ToPyObjectTryModify) { + TPythonTestEngine engine; + + ui32 field1Idx = 0, field2Idx = 0; + auto structType = engine.GetTypeBuilder().Struct()->AddField<NUdf::TUtf8>("field1", &field1Idx).AddField<NUdf::TUtf8>("field2", &field2Idx).Build(); + + engine.ToPython(structType, + [=](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + auto new_struct = vb.NewArray(static_cast<const TStructType*>(type)->GetMembersCount(), items); + items[field1Idx] = NUdf::TUnboxedValuePod::Zero(); + items[field2Idx] = NUdf::TUnboxedValuePod::Embedded("empty"); + return new_struct; + }, + "def Test(value):\n" + " try:\n" + " setattr(value, 'field1', 17)\n" + " except AttributeError:\n" + " pass\n" + " else:\n" + " assert False\n" + " try:\n" + " value.field2 = 18\n" + " except AttributeError:\n" + " pass\n" + " else:\n" + " assert False\n"); +} #endif - Y_UNIT_TEST(ToPyObjectEmptyStruct) { - TPythonTestEngine engine; +Y_UNIT_TEST(ToPyObjectEmptyStruct) { + TPythonTestEngine engine; - auto personType = engine.GetTypeBuilder().Struct()->Build(); + auto personType = engine.GetTypeBuilder().Struct()->Build(); - engine.ToPython(personType, - [](const TType*, const NUdf::IValueBuilder& vb) { - return vb.NewEmptyList(); - }, - "def Test(value):\n" - " assert isinstance(value, object)\n" + engine.ToPython(personType, + [](const TType*, const NUdf::IValueBuilder& vb) { + return vb.NewEmptyList(); + }, + "def Test(value):\n" + " assert isinstance(value, object)\n" #if PY_MAJOR_VERSION >= 3 - " assert len(value) == 0\n" + " assert len(value) == 0\n" #endif - ); - } + ); } +} // Y_UNIT_TEST_SUITE(TPyStructTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_test_engine.h b/yql/essentials/udfs/common/python/bindings/py_test_engine.h index 6809fc61cff..c2fce35bca3 100644 --- a/yql/essentials/udfs/common/python/bindings/py_test_engine.h +++ b/yql/essentials/udfs/common/python/bindings/py_test_engine.h @@ -13,7 +13,6 @@ #define PYTHON_TEST_TAG "Python2Test" - using namespace NKikimr; using namespace NMiniKQL; @@ -49,15 +48,14 @@ public: , FunctionInfoBuilder_(NYql::UnknownLangVersion, Env_, TypeInfoHelper_, "", nullptr, {}) { HolderFactory_ = MakeHolder<THolderFactory>( - Alloc_.Ref(), - MemInfo_, - nullptr); + Alloc_.Ref(), + MemInfo_, + nullptr); ValueBuilder_ = MakeHolder<TDefaultValueBuilder>(*HolderFactory_, NUdf::EValidatePolicy::Exception); BindTerminator_ = MakeHolder<TBindTerminator>(ValueBuilder_.Get()); Singleton<TPyInitializer>(); CastCtx_ = MakeIntrusive<TPyCastContext>(&GetValueBuilder(), - MakeIntrusive<TPyContext>(TypeInfoHelper_.Get(), NUdf::TStringRef::Of(PYTHON_TEST_TAG), NUdf::TSourcePosition()) - ); + MakeIntrusive<TPyContext>(TypeInfoHelper_.Get(), NUdf::TStringRef::Of(PYTHON_TEST_TAG), NUdf::TSourcePosition())); } ~TPythonTestEngine() { @@ -90,8 +88,8 @@ public: template <typename TChecker> void ToMiniKQLWithArg( - NUdf::TType* udfType, PyObject* argValue, - const TStringBuf& script, TChecker&& checker) + NUdf::TType* udfType, PyObject* argValue, + const TStringBuf& script, TChecker&& checker) { TPyObjectPtr args = Py_BuildValue("(O)", argValue); @@ -108,8 +106,8 @@ public: template <typename TExpectedType, typename TChecker> void ToMiniKQLWithArg( - PyObject* argValue, - const TStringBuf& script, TChecker&& checker) + PyObject* argValue, + const TStringBuf& script, TChecker&& checker) { auto type = GetTypeBuilder().SimpleType<TExpectedType>(); ToMiniKQLWithArg<TChecker>(type, argValue, script, std::move(checker)); @@ -143,9 +141,9 @@ public: template <typename TMiniKQLValueBuilder> TPyObjectPtr ToPython( - NUdf::TType* udfType, - TMiniKQLValueBuilder&& builder, - const TStringBuf& script) + NUdf::TType* udfType, + TMiniKQLValueBuilder&& builder, + const TStringBuf& script) { try { TType* type = static_cast<TType*>(udfType); @@ -239,7 +237,7 @@ private: } TPyObjectPtr RunPythonFunction( - const TStringBuf& script, PyObject* args = nullptr) + const TStringBuf& script, PyObject* args = nullptr) { TPyObjectPtr function(CompilePythonFunction(script)); return PyObject_CallObject(function.Get(), args); diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple.cpp b/yql/essentials/udfs/common/python/bindings/py_tuple.cpp index 6cef25ea47f..60842a895fb 100644 --- a/yql/essentials/udfs/common/python/bindings/py_tuple.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_tuple.cpp @@ -58,4 +58,4 @@ NUdf::TUnboxedValue FromPyTuple(const TPyCastContext::TPtr& ctx, const NUdf::TTy throw yexception() << "Expected Tuple or Sequence but got: " << PyObjectRepr(value); } -} +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple.h b/yql/essentials/udfs/common/python/bindings/py_tuple.h index 7d66af9b011..a175c87adf0 100644 --- a/yql/essentials/udfs/common/python/bindings/py_tuple.h +++ b/yql/essentials/udfs/common/python/bindings/py_tuple.h @@ -6,12 +6,12 @@ namespace NPython { TPyObjectPtr ToPyTuple( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyTuple( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, PyObject* value); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, PyObject* value); } // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp index f465f0ebb68..905a5ec382a 100644 --- a/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_tuple_ut.cpp @@ -2,107 +2,106 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(TPyTupleTest) { - Y_UNIT_TEST(FromPyEmptyTuple) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TTuple<>>( - "def Test(): return ()", - [](const NUdf::TUnboxedValuePod&) {}); - } +Y_UNIT_TEST(FromPyEmptyTuple) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<>>( + "def Test(): return ()", + [](const NUdf::TUnboxedValuePod&) {}); +} - Y_UNIT_TEST(FromPyList) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TTuple<int, int, int>>( - "def Test(): return [1, 2, 3]", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1); - UNIT_ASSERT_EQUAL(value.GetElement(1).Get<int>(), 2); - UNIT_ASSERT_EQUAL(value.GetElement(2).Get<int>(), 3); - }); - } +Y_UNIT_TEST(FromPyList) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<int, int, int>>( + "def Test(): return [1, 2, 3]", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1); + UNIT_ASSERT_EQUAL(value.GetElement(1).Get<int>(), 2); + UNIT_ASSERT_EQUAL(value.GetElement(2).Get<int>(), 3); + }); +} - Y_UNIT_TEST(FromPyIter) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TTuple<int, int, int>>( - "def Test(): return iter({1, 2, 3})", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1); - UNIT_ASSERT_EQUAL(value.GetElement(1).Get<int>(), 2); - UNIT_ASSERT_EQUAL(value.GetElement(2).Get<int>(), 3); - }); - } +Y_UNIT_TEST(FromPyIter) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<int, int, int>>( + "def Test(): return iter({1, 2, 3})", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1); + UNIT_ASSERT_EQUAL(value.GetElement(1).Get<int>(), 2); + UNIT_ASSERT_EQUAL(value.GetElement(2).Get<int>(), 3); + }); +} - Y_UNIT_TEST(FromPyTuple) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TTuple<int, double, char*>>( - "def Test(): return (1, float(2.3), '4')", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1); - auto second = value.GetElement(1); - UNIT_ASSERT_DOUBLES_EQUAL(second.Get<double>(), 2.3, 0.0001); - const auto third = value.GetElement(2); - UNIT_ASSERT_EQUAL(third.AsStringRef(), "4"); - }); - } +Y_UNIT_TEST(FromPyTuple) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<int, double, char*>>( + "def Test(): return (1, float(2.3), '4')", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetElement(0).Get<int>(), 1); + auto second = value.GetElement(1); + UNIT_ASSERT_DOUBLES_EQUAL(second.Get<double>(), 2.3, 0.0001); + const auto third = value.GetElement(2); + UNIT_ASSERT_EQUAL(third.AsStringRef(), "4"); + }); +} - Y_UNIT_TEST(FromPyTupleInTuple) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TTuple<ui32, NUdf::TTuple<ui8, float>, char*>>( - "def Test(): return (1, (2, float(3.4)), '5')", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(value.IsBoxed()); - UNIT_ASSERT_EQUAL(value.GetElement(0).Get<ui32>(), 1); +Y_UNIT_TEST(FromPyTupleInTuple) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTuple<ui32, NUdf::TTuple<ui8, float>, char*>>( + "def Test(): return (1, (2, float(3.4)), '5')", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(value.IsBoxed()); + UNIT_ASSERT_EQUAL(value.GetElement(0).Get<ui32>(), 1); - auto second = value.GetElement(1); - UNIT_ASSERT(second); - UNIT_ASSERT(second.IsBoxed()); - UNIT_ASSERT_EQUAL(second.GetElement(0).Get<ui8>(), 2); - UNIT_ASSERT_DOUBLES_EQUAL( - second.GetElement(1).Get<float>(), 3.4, 0.0001); + auto second = value.GetElement(1); + UNIT_ASSERT(second); + UNIT_ASSERT(second.IsBoxed()); + UNIT_ASSERT_EQUAL(second.GetElement(0).Get<ui8>(), 2); + UNIT_ASSERT_DOUBLES_EQUAL( + second.GetElement(1).Get<float>(), 3.4, 0.0001); - const auto third = value.GetElement(2); - UNIT_ASSERT_EQUAL(third.AsStringRef(), "5"); - }); - } + const auto third = value.GetElement(2); + UNIT_ASSERT_EQUAL(third.AsStringRef(), "5"); + }); +} - Y_UNIT_TEST(ToPyEmptyTuple) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TTuple<>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - NUdf::TUnboxedValue* items = nullptr; - return vb.NewArray(static_cast<const TTupleType*>(type)->GetElementsCount(), items); - }, - "def Test(value):\n" - " assert isinstance(value, tuple)\n" - " assert len(value) == 0\n" - " assert value == ()\n"); - } +Y_UNIT_TEST(ToPyEmptyTuple) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTuple<>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + return vb.NewArray(static_cast<const TTupleType*>(type)->GetElementsCount(), items); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert len(value) == 0\n" + " assert value == ()\n"); +} - Y_UNIT_TEST(ToPyTuple) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TTuple<NUdf::TUtf8, ui64, ui8, float>>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - NUdf::TUnboxedValue* items = nullptr; - auto tuple = vb.NewArray(static_cast<const TTupleType*>(type)->GetElementsCount(), items); - items[0] = vb.NewString("111"); - items[1] = NUdf::TUnboxedValuePod((ui64) 2); - items[2] = NUdf::TUnboxedValuePod((ui8) 3); - items[3] = NUdf::TUnboxedValuePod((float) 4.5); - return tuple; - }, - "def Test(value):\n" - " assert isinstance(value, tuple)\n" - " assert len(value) == 4\n" - " assert value == ('111', 2, 3, 4.5)\n"); - } +Y_UNIT_TEST(ToPyTuple) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTuple<NUdf::TUtf8, ui64, ui8, float>>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + NUdf::TUnboxedValue* items = nullptr; + auto tuple = vb.NewArray(static_cast<const TTupleType*>(type)->GetElementsCount(), items); + items[0] = vb.NewString("111"); + items[1] = NUdf::TUnboxedValuePod((ui64)2); + items[2] = NUdf::TUnboxedValuePod((ui8)3); + items[3] = NUdf::TUnboxedValuePod((float)4.5); + return tuple; + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert len(value) == 4\n" + " assert value == ('111', 2, 3, 4.5)\n"); } +} // Y_UNIT_TEST_SUITE(TPyTupleTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp index 08b6b78b168..af1926243fb 100644 --- a/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_tzdate_ut.cpp @@ -4,82 +4,81 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(TPyTzDateTest) { - Y_UNIT_TEST(FromDate) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TTzDate>( - "def Test():\n" - " return (2, 'Europe/Moscow')\n", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT_VALUES_EQUAL(value.Get<ui16>(), 2); - UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); - }); - } +Y_UNIT_TEST(FromDate) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTzDate>( + "def Test():\n" + " return (2, 'Europe/Moscow')\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_VALUES_EQUAL(value.Get<ui16>(), 2); + UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); + }); +} - Y_UNIT_TEST(FromDatetime) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TTzDatetime>( - "def Test():\n" - " return (2, 'Europe/Moscow')\n", - [](const NUdf::TUnboxedValuePod& value) { +Y_UNIT_TEST(FromDatetime) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTzDatetime>( + "def Test():\n" + " return (2, 'Europe/Moscow')\n", + [](const NUdf::TUnboxedValuePod& value) { UNIT_ASSERT(value); UNIT_ASSERT_VALUES_EQUAL(value.Get<ui32>(), 2); UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); }); - } +} - Y_UNIT_TEST(FromTimestamp) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TTzTimestamp>( - "def Test():\n" - " return (2, 'Europe/Moscow')\n", - [](const NUdf::TUnboxedValuePod& value) { +Y_UNIT_TEST(FromTimestamp) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TTzTimestamp>( + "def Test():\n" + " return (2, 'Europe/Moscow')\n", + [](const NUdf::TUnboxedValuePod& value) { UNIT_ASSERT(value); UNIT_ASSERT_VALUES_EQUAL(value.Get<ui64>(), 2); UNIT_ASSERT_VALUES_EQUAL(value.GetTimezoneId(), NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); }); - } +} - Y_UNIT_TEST(ToDate) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TTzDate>( - [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { - auto ret = NUdf::TUnboxedValuePod((ui16)2); - ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); - return ret; - }, - "def Test(value):\n" - " assert isinstance(value, tuple)\n" - " assert value == (2, 'Europe/Moscow')\n"); - } +Y_UNIT_TEST(ToDate) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTzDate>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { + auto ret = NUdf::TUnboxedValuePod((ui16)2); + ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); + return ret; + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == (2, 'Europe/Moscow')\n"); +} - Y_UNIT_TEST(ToDatetime) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TTzDatetime>( - [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { +Y_UNIT_TEST(ToDatetime) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTzDatetime>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { auto ret = NUdf::TUnboxedValuePod((ui32)2); ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); return ret; }, - "def Test(value):\n" - " assert isinstance(value, tuple)\n" - " assert value == (2, 'Europe/Moscow')\n"); - } + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == (2, 'Europe/Moscow')\n"); +} - Y_UNIT_TEST(ToTimestamp) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TTzTimestamp>( - [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { +Y_UNIT_TEST(ToTimestamp) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TTzTimestamp>( + [](const TType* /*type*/, const NUdf::IValueBuilder& /*vb*/) { auto ret = NUdf::TUnboxedValuePod((ui64)2); ret.SetTimezoneId(NKikimr::NMiniKQL::GetTimezoneId("Europe/Moscow")); return ret; }, - "def Test(value):\n" - " assert isinstance(value, tuple)\n" - " assert value == (2, 'Europe/Moscow')\n"); - } + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == (2, 'Europe/Moscow')\n"); } +} // Y_UNIT_TEST_SUITE(TPyTzDateTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_utils.cpp b/yql/essentials/udfs/common/python/bindings/py_utils.cpp index d1e0e8b4846..412aebb874b 100644 --- a/yql/essentials/udfs/common/python/bindings/py_utils.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_utils.cpp @@ -8,12 +8,11 @@ #include <regex> - namespace NPython { TPyObjectPtr PyRepr(TStringBuf asciiStr, bool intern) { for (auto c : asciiStr) { - Y_ABORT_UNLESS((c&0x80) == 0, "expected ascii"); + Y_ABORT_UNLESS((c & 0x80) == 0, "expected ascii"); } Py_ssize_t size = static_cast<Py_ssize_t>(asciiStr.size()); @@ -41,7 +40,7 @@ TString PyObjectRepr(PyObject* value) { static constexpr std::string_view truncSuffix = "(truncated)"; const TPyObjectPtr repr(PyObject_Repr(value)); if (!repr) { - return TString("repr error: ") + GetLastErrorAsString(); + return TString("repr error: ") + GetLastErrorAsString(); } TString string; @@ -64,11 +63,13 @@ bool HasEncodingCookie(const TString& source) { // static std::regex encodingRe( - "^[ \\t\\v]*#.*?coding[:=][ \\t]*[-_.a-zA-Z0-9]+.*"); + "^[ \\t\\v]*#.*?coding[:=][ \\t]*[-_.a-zA-Z0-9]+.*"); int i = 0; - for (const auto& it: StringSplitter(source).Split('\n')) { - if (i++ == 2) break; + for (const auto& it : StringSplitter(source).Split('\n')) { + if (i++ == 2) { + break; + } TStringBuf line = it.Token(); if (std::regex_match(line.begin(), line.end(), encodingRe)) { @@ -86,4 +87,4 @@ void PyCleanup() { PySys_SetObject("last_traceback", Py_None); } -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_utils.h b/yql/essentials/udfs/common/python/bindings/py_utils.h index 0c5ef058f1a..cfb3f720836 100644 --- a/yql/essentials/udfs/common/python/bindings/py_utils.h +++ b/yql/essentials/udfs/common/python/bindings/py_utils.h @@ -5,9 +5,9 @@ #include <util/generic/strbuf.h> #ifdef _win_ -#define INIT_MEMBER(member, value) value //member + #define INIT_MEMBER(member, value) value // member #else -#define INIT_MEMBER(member, value) .member = (value) + #define INIT_MEMBER(member, value) .member = (value) #endif namespace NPython { @@ -15,7 +15,7 @@ namespace NPython { TPyObjectPtr PyRepr(TStringBuf asciiStr, bool intern = false); template <size_t size> -TPyObjectPtr PyRepr(const char(&str)[size]) { +TPyObjectPtr PyRepr(const char (&str)[size]) { return PyRepr(TStringBuf(str, size - 1), true); } @@ -25,4 +25,4 @@ bool HasEncodingCookie(const TString& source); void PyCleanup(); -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp index ce521689b40..9b8e9fa53c4 100644 --- a/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_utils_ut.cpp @@ -2,36 +2,35 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(TPyUtilsTest) { - Y_UNIT_TEST(EncodingCookie) { - UNIT_ASSERT(HasEncodingCookie("# -*- coding: latin-1 -*-")); - UNIT_ASSERT(HasEncodingCookie("# -*- coding:latin-1 -*-")); - UNIT_ASSERT(HasEncodingCookie("# -*- coding=latin-1 -*-")); - UNIT_ASSERT(HasEncodingCookie("# -*- encoding: latin-1 -*-")); - UNIT_ASSERT(HasEncodingCookie("# -*- encoding:latin-1 -*-")); - UNIT_ASSERT(HasEncodingCookie("# -*- encoding=latin-1 -*-")); - UNIT_ASSERT(HasEncodingCookie("# -*- coding: iso-8859-15 -*-")); - UNIT_ASSERT(HasEncodingCookie("# -*- coding: ascii -*-")); - UNIT_ASSERT(HasEncodingCookie( - "# This Python file uses the following encoding: utf-8")); - - // encoding commend on second line - UNIT_ASSERT(HasEncodingCookie( - "#!/usr/local/bin/python\n" - "# -*- coding: iso-8859-15 -*-\n" - "print 'hello'")); - - // missing "coding:" prefix - UNIT_ASSERT(false == HasEncodingCookie("# latin-1")); - - // encoding comment not on line 1 or 2 - UNIT_ASSERT(false == HasEncodingCookie( - "#!/usr/local/bin/python\n" - "#\n" - "# -*- coding: latin-1 -*-\n")); - } +Y_UNIT_TEST(EncodingCookie) { + UNIT_ASSERT(HasEncodingCookie("# -*- coding: latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- coding:latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- coding=latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- encoding: latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- encoding:latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- encoding=latin-1 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- coding: iso-8859-15 -*-")); + UNIT_ASSERT(HasEncodingCookie("# -*- coding: ascii -*-")); + UNIT_ASSERT(HasEncodingCookie( + "# This Python file uses the following encoding: utf-8")); + + // encoding commend on second line + UNIT_ASSERT(HasEncodingCookie( + "#!/usr/local/bin/python\n" + "# -*- coding: iso-8859-15 -*-\n" + "print 'hello'")); + + // missing "coding:" prefix + UNIT_ASSERT(false == HasEncodingCookie("# latin-1")); + + // encoding comment not on line 1 or 2 + UNIT_ASSERT(false == HasEncodingCookie( + "#!/usr/local/bin/python\n" + "#\n" + "# -*- coding: latin-1 -*-\n")); } +} // Y_UNIT_TEST_SUITE(TPyUtilsTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_variant.cpp b/yql/essentials/udfs/common/python/bindings/py_variant.cpp index ab222b34323..73764ad3fe5 100644 --- a/yql/essentials/udfs/common/python/bindings/py_variant.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_variant.cpp @@ -7,7 +7,6 @@ #include <yql/essentials/public/udf/udf_value_builder.h> #include <yql/essentials/public/udf/udf_type_inspection.h> - using namespace NKikimr; namespace NPython { @@ -16,9 +15,9 @@ namespace NPython { // public functions ////////////////////////////////////////////////////////////////////////////// TPyObjectPtr ToPyVariant( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* type, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) { auto& th = *castCtx->PyCtx->TypeInfoHelper; NUdf::TVariantTypeInspector varInsp(th, type); @@ -35,7 +34,7 @@ TPyObjectPtr ToPyVariant( } else if (auto structInsp = NUdf::TStructTypeInspector(th, subType)) { itemType = structInsp.GetMemberType(index); TPyObjectPtr pyName = ToPyUnicode<NUdf::TStringRef>( - structInsp.GetMemberName(index)); + structInsp.GetMemberName(index)); TPyObjectPtr pyItem = ToPyObject(castCtx, itemType, item); return PyTuple_Pack(2, pyName.Get(), pyItem.Get()); } @@ -44,9 +43,9 @@ TPyObjectPtr ToPyVariant( } NUdf::TUnboxedValue FromPyVariant( - const TPyCastContext::TPtr& castCtx, - const NUdf::TType* type, - PyObject* value) + const TPyCastContext::TPtr& castCtx, + const NUdf::TType* type, + PyObject* value) { PY_ENSURE(PyTuple_Check(value), "Expected to get Tuple, but got " << Py_TYPE(value)->tp_name); @@ -54,7 +53,7 @@ NUdf::TUnboxedValue FromPyVariant( Py_ssize_t tupleSize = PyTuple_GET_SIZE(value); PY_ENSURE(tupleSize == 2, "Expected to get Tuple with 2 elements, but got " - << tupleSize << " elements"); + << tupleSize << " elements"); auto& th = *castCtx->PyCtx->TypeInfoHelper; NUdf::TVariantTypeInspector varInsp(th, type); @@ -69,12 +68,12 @@ NUdf::TUnboxedValue FromPyVariant( if (auto tupleInsp = NUdf::TTupleTypeInspector(th, subType)) { PY_ENSURE(index < tupleInsp.GetElementsCount(), "Index must be < " << tupleInsp.GetElementsCount() - << ", but got " << index); + << ", but got " << index); auto* itemType = tupleInsp.GetElementType(index); return castCtx->ValueBuilder->NewVariant(index, FromPyObject(castCtx, itemType, el1)); } else { throw yexception() << "Cannot convert " << PyObjectRepr(value) - << " underlying Variant type is not a Tuple"; + << " underlying Variant type is not a Tuple"; } } else if (TryPyCast(el0, name)) { if (auto structInsp = NUdf::TStructTypeInspector(th, subType)) { @@ -85,13 +84,14 @@ NUdf::TUnboxedValue FromPyVariant( return castCtx->ValueBuilder->NewVariant(index, FromPyObject(castCtx, itemType, el1)); } else { throw yexception() << "Cannot convert " << PyObjectRepr(value) - << " underlying Variant type is not a Struct"; + << " underlying Variant type is not a Struct"; } } else { throw yexception() - << "Expected first Tuple element to either be an int " - "or a str, but got " << Py_TYPE(el0)->tp_name; + << "Expected first Tuple element to either be an int " + "or a str, but got " + << Py_TYPE(el0)->tp_name; } } -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_variant.h b/yql/essentials/udfs/common/python/bindings/py_variant.h index ca97123183b..dd96deba5b5 100644 --- a/yql/essentials/udfs/common/python/bindings/py_variant.h +++ b/yql/essentials/udfs/common/python/bindings/py_variant.h @@ -5,13 +5,13 @@ namespace NPython { TPyObjectPtr ToPyVariant( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyVariant( - const TPyCastContext::TPtr& castCtx, - const NKikimr::NUdf::TType* type, - PyObject* value); + const TPyCastContext::TPtr& castCtx, + const NKikimr::NUdf::TType* type, + PyObject* value); -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp index d792449d828..adfdd2a09b7 100644 --- a/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_variant_ut.cpp @@ -3,99 +3,94 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(TPyVariantTest) { - Y_UNIT_TEST(FromPyWithIndex) { - TPythonTestEngine engine; - engine.ToMiniKQL<NUdf::TVariant<float, ui32, char*>>( - "def Test():\n" - " return (2, 'hello')\n", - [](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT_EQUAL(value.GetVariantIndex(), 2); - auto item = value.GetVariantItem(); - UNIT_ASSERT_STRINGS_EQUAL(item.AsStringRef(), "hello"); - }); - } +Y_UNIT_TEST(FromPyWithIndex) { + TPythonTestEngine engine; + engine.ToMiniKQL<NUdf::TVariant<float, ui32, char*>>( + "def Test():\n" + " return (2, 'hello')\n", + [](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_EQUAL(value.GetVariantIndex(), 2); + auto item = value.GetVariantItem(); + UNIT_ASSERT_STRINGS_EQUAL(item.AsStringRef(), "hello"); + }); +} - Y_UNIT_TEST(FromPyWithName) { - TPythonTestEngine engine; +Y_UNIT_TEST(FromPyWithName) { + TPythonTestEngine engine; - ui32 ageIdx = 0, nameIdx = 0; - NUdf::TType* personType = engine.GetTypeBuilder().Struct()-> - AddField<ui32>("age", &ageIdx) - .AddField<char*>("name", &nameIdx) - .Build(); + ui32 ageIdx = 0, nameIdx = 0; + NUdf::TType* personType = engine.GetTypeBuilder().Struct()->AddField<ui32>("age", &ageIdx).AddField<char*>("name", &nameIdx).Build(); - NUdf::TType* variantType = engine.GetTypeBuilder() - .Variant()->Over(personType).Build(); + NUdf::TType* variantType = engine.GetTypeBuilder() + .Variant() + ->Over(personType) + .Build(); - engine.ToMiniKQL( - variantType, - "def Test():\n" - " return ('age', 99)\n", - [ageIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT_EQUAL(value.GetVariantIndex(), ageIdx); - auto item = value.GetVariantItem(); - UNIT_ASSERT_EQUAL(item.Get<ui32>(), 99); - }); + engine.ToMiniKQL( + variantType, + "def Test():\n" + " return ('age', 99)\n", + [ageIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_EQUAL(value.GetVariantIndex(), ageIdx); + auto item = value.GetVariantItem(); + UNIT_ASSERT_EQUAL(item.Get<ui32>(), 99); + }); - engine.ToMiniKQL( - variantType, - "def Test():\n" - " return ('name', 'Jamel')\n", - [nameIdx](const NUdf::TUnboxedValuePod& value) { - UNIT_ASSERT(value); - UNIT_ASSERT_EQUAL(value.GetVariantIndex(), nameIdx); - auto item = value.GetVariantItem(); - UNIT_ASSERT_STRINGS_EQUAL(item.AsStringRef(), "Jamel"); - }); - } + engine.ToMiniKQL( + variantType, + "def Test():\n" + " return ('name', 'Jamel')\n", + [nameIdx](const NUdf::TUnboxedValuePod& value) { + UNIT_ASSERT(value); + UNIT_ASSERT_EQUAL(value.GetVariantIndex(), nameIdx); + auto item = value.GetVariantItem(); + UNIT_ASSERT_STRINGS_EQUAL(item.AsStringRef(), "Jamel"); + }); +} - Y_UNIT_TEST(ToPyWithIndex) { - TPythonTestEngine engine; - engine.ToPython<NUdf::TVariant<float, ui32, char*>>( - [](const TType* /*type*/, const NUdf::IValueBuilder& vb) { - return vb.NewVariant(1, NUdf::TUnboxedValuePod((ui32) 42)); - }, - "def Test(value):\n" - " assert isinstance(value, tuple)\n" - " assert value == (1, 42)\n"); - } +Y_UNIT_TEST(ToPyWithIndex) { + TPythonTestEngine engine; + engine.ToPython<NUdf::TVariant<float, ui32, char*>>( + [](const TType* /*type*/, const NUdf::IValueBuilder& vb) { + return vb.NewVariant(1, NUdf::TUnboxedValuePod((ui32)42)); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == (1, 42)\n"); +} - Y_UNIT_TEST(ToPyWithName) { - TPythonTestEngine engine; +Y_UNIT_TEST(ToPyWithName) { + TPythonTestEngine engine; - ui32 ageIdx = 0, nameIdx = 0; - NUdf::TType* personType = engine.GetTypeBuilder().Struct()-> - AddField<ui32>("age", &ageIdx) - .AddField<NUdf::TUtf8>("name", &nameIdx) - .Build(); + ui32 ageIdx = 0, nameIdx = 0; + NUdf::TType* personType = engine.GetTypeBuilder().Struct()->AddField<ui32>("age", &ageIdx).AddField<NUdf::TUtf8>("name", &nameIdx).Build(); - NUdf::TType* variantType = engine.GetTypeBuilder() - .Variant()->Over(personType).Build(); + NUdf::TType* variantType = engine.GetTypeBuilder() + .Variant() + ->Over(personType) + .Build(); - engine.ToPython( - variantType, - [ageIdx](const TType* /*type*/, const NUdf::IValueBuilder& vb) { - return vb.NewVariant(ageIdx, NUdf::TUnboxedValuePod((ui32) 99)); - }, - "def Test(value):\n" - " assert isinstance(value, tuple)\n" - " assert value == ('age', 99)\n" - ); + engine.ToPython( + variantType, + [ageIdx](const TType* /*type*/, const NUdf::IValueBuilder& vb) { + return vb.NewVariant(ageIdx, NUdf::TUnboxedValuePod((ui32)99)); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == ('age', 99)\n"); - engine.ToPython( - variantType, - [nameIdx](const TType* /*type*/, const NUdf::IValueBuilder& vb) { - return vb.NewVariant(nameIdx, vb.NewString("Jamel")); - }, - "def Test(value):\n" - " assert isinstance(value, tuple)\n" - " assert value == ('name', 'Jamel')\n" - ); - } + engine.ToPython( + variantType, + [nameIdx](const TType* /*type*/, const NUdf::IValueBuilder& vb) { + return vb.NewVariant(nameIdx, vb.NewString("Jamel")); + }, + "def Test(value):\n" + " assert isinstance(value, tuple)\n" + " assert value == ('name', 'Jamel')\n"); } +} // Y_UNIT_TEST_SUITE(TPyVariantTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_void.cpp b/yql/essentials/udfs/common/python/bindings/py_void.cpp index ef72c052fbc..8214259fb49 100644 --- a/yql/essentials/udfs/common/python/bindings/py_void.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_void.cpp @@ -21,80 +21,78 @@ static void VoidDealloc(PyObject*) { PyTypeObject PyVoidType = { PyVarObject_HEAD_INIT(&PyType_Type, 0) - INIT_MEMBER(tp_name , "yql.Void"), - INIT_MEMBER(tp_basicsize , 0), - INIT_MEMBER(tp_itemsize , 0), - INIT_MEMBER(tp_dealloc , VoidDealloc), + INIT_MEMBER(tp_name, "yql.Void"), + INIT_MEMBER(tp_basicsize, 0), + INIT_MEMBER(tp_itemsize, 0), + INIT_MEMBER(tp_dealloc, VoidDealloc), #if PY_VERSION_HEX < 0x030800b4 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #else INIT_MEMBER(tp_vectorcall_offset, 0), #endif - INIT_MEMBER(tp_getattr , nullptr), - INIT_MEMBER(tp_setattr , nullptr), + INIT_MEMBER(tp_getattr, nullptr), + INIT_MEMBER(tp_setattr, nullptr), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_as_async , nullptr), + INIT_MEMBER(tp_as_async, nullptr), #else - INIT_MEMBER(tp_compare , nullptr), + INIT_MEMBER(tp_compare, nullptr), #endif - INIT_MEMBER(tp_repr , VoidRepr), - INIT_MEMBER(tp_as_number , nullptr), - INIT_MEMBER(tp_as_sequence , nullptr), - INIT_MEMBER(tp_as_mapping , nullptr), - INIT_MEMBER(tp_hash , nullptr), - INIT_MEMBER(tp_call , nullptr), - INIT_MEMBER(tp_str , nullptr), - INIT_MEMBER(tp_getattro , nullptr), - INIT_MEMBER(tp_setattro , nullptr), - INIT_MEMBER(tp_as_buffer , nullptr), - INIT_MEMBER(tp_flags , 0), - INIT_MEMBER(tp_doc , "yql.Void object"), - INIT_MEMBER(tp_traverse , nullptr), - INIT_MEMBER(tp_clear , nullptr), - INIT_MEMBER(tp_richcompare , nullptr), - INIT_MEMBER(tp_weaklistoffset , 0), - INIT_MEMBER(tp_iter , nullptr), - INIT_MEMBER(tp_iternext , nullptr), - INIT_MEMBER(tp_methods , nullptr), - INIT_MEMBER(tp_members , nullptr), - INIT_MEMBER(tp_getset , nullptr), - INIT_MEMBER(tp_base , nullptr), - INIT_MEMBER(tp_dict , nullptr), - INIT_MEMBER(tp_descr_get , nullptr), - INIT_MEMBER(tp_descr_set , nullptr), - INIT_MEMBER(tp_dictoffset , 0), - INIT_MEMBER(tp_init , nullptr), - INIT_MEMBER(tp_alloc , nullptr), - INIT_MEMBER(tp_new , nullptr), - INIT_MEMBER(tp_free , nullptr), - INIT_MEMBER(tp_is_gc , nullptr), - INIT_MEMBER(tp_bases , nullptr), - INIT_MEMBER(tp_mro , nullptr), - INIT_MEMBER(tp_cache , nullptr), - INIT_MEMBER(tp_subclasses , nullptr), - INIT_MEMBER(tp_weaklist , nullptr), - INIT_MEMBER(tp_del , nullptr), - INIT_MEMBER(tp_version_tag , 0), + INIT_MEMBER(tp_repr, VoidRepr), + INIT_MEMBER(tp_as_number, nullptr), + INIT_MEMBER(tp_as_sequence, nullptr), + INIT_MEMBER(tp_as_mapping, nullptr), + INIT_MEMBER(tp_hash, nullptr), + INIT_MEMBER(tp_call, nullptr), + INIT_MEMBER(tp_str, nullptr), + INIT_MEMBER(tp_getattro, nullptr), + INIT_MEMBER(tp_setattro, nullptr), + INIT_MEMBER(tp_as_buffer, nullptr), + INIT_MEMBER(tp_flags, 0), + INIT_MEMBER(tp_doc, "yql.Void object"), + INIT_MEMBER(tp_traverse, nullptr), + INIT_MEMBER(tp_clear, nullptr), + INIT_MEMBER(tp_richcompare, nullptr), + INIT_MEMBER(tp_weaklistoffset, 0), + INIT_MEMBER(tp_iter, nullptr), + INIT_MEMBER(tp_iternext, nullptr), + INIT_MEMBER(tp_methods, nullptr), + INIT_MEMBER(tp_members, nullptr), + INIT_MEMBER(tp_getset, nullptr), + INIT_MEMBER(tp_base, nullptr), + INIT_MEMBER(tp_dict, nullptr), + INIT_MEMBER(tp_descr_get, nullptr), + INIT_MEMBER(tp_descr_set, nullptr), + INIT_MEMBER(tp_dictoffset, 0), + INIT_MEMBER(tp_init, nullptr), + INIT_MEMBER(tp_alloc, nullptr), + INIT_MEMBER(tp_new, nullptr), + INIT_MEMBER(tp_free, nullptr), + INIT_MEMBER(tp_is_gc, nullptr), + INIT_MEMBER(tp_bases, nullptr), + INIT_MEMBER(tp_mro, nullptr), + INIT_MEMBER(tp_cache, nullptr), + INIT_MEMBER(tp_subclasses, nullptr), + INIT_MEMBER(tp_weaklist, nullptr), + INIT_MEMBER(tp_del, nullptr), + INIT_MEMBER(tp_version_tag, 0), #if PY_MAJOR_VERSION >= 3 - INIT_MEMBER(tp_finalize , nullptr), + INIT_MEMBER(tp_finalize, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b1 - INIT_MEMBER(tp_vectorcall , nullptr), + INIT_MEMBER(tp_vectorcall, nullptr), #endif #if PY_VERSION_HEX >= 0x030800b4 && PY_VERSION_HEX < 0x03090000 - INIT_MEMBER(tp_print , nullptr), + INIT_MEMBER(tp_print, nullptr), #endif }; PyObject PyVoidObject = { - _PyObject_EXTRA_INIT - 1, &PyVoidType -}; + _PyObject_EXTRA_INIT 1, &PyVoidType}; TPyObjectPtr ToPyVoid( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, - const NUdf::TUnboxedValuePod& value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + const NUdf::TUnboxedValuePod& value) { Y_UNUSED(ctx); Y_UNUSED(type); @@ -103,9 +101,9 @@ TPyObjectPtr ToPyVoid( } NUdf::TUnboxedValue FromPyVoid( - const TPyCastContext::TPtr& ctx, - const NUdf::TType* type, - PyObject* value) + const TPyCastContext::TPtr& ctx, + const NUdf::TType* type, + PyObject* value) { Y_UNUSED(ctx); Y_UNUSED(type); @@ -114,4 +112,4 @@ NUdf::TUnboxedValue FromPyVoid( return NUdf::TUnboxedValue::Void(); } -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_void.h b/yql/essentials/udfs/common/python/bindings/py_void.h index 3c8203ab6e8..1015a530ff0 100644 --- a/yql/essentials/udfs/common/python/bindings/py_void.h +++ b/yql/essentials/udfs/common/python/bindings/py_void.h @@ -9,13 +9,13 @@ extern PyTypeObject PyVoidType; extern PyObject PyVoidObject; TPyObjectPtr ToPyVoid( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, - const NKikimr::NUdf::TUnboxedValuePod& value); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + const NKikimr::NUdf::TUnboxedValuePod& value); NKikimr::NUdf::TUnboxedValue FromPyVoid( - const TPyCastContext::TPtr& ctx, - const NKikimr::NUdf::TType* type, - PyObject* value); + const TPyCastContext::TPtr& ctx, + const NKikimr::NUdf::TType* type, + PyObject* value); -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp b/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp index e6e8a72768c..e300f395d23 100644 --- a/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_void_ut.cpp @@ -2,36 +2,36 @@ #include <library/cpp/testing/unittest/registar.h> - using namespace NPython; Y_UNIT_TEST_SUITE(TPyVoidTest) { - Y_UNIT_TEST(FromPython) { - TPythonTestEngine engine; - engine.ToMiniKQL<void>( - "import yql\n" - "\n" - "def Test():\n" - " return yql.Void\n", - [](const NUdf::TUnboxedValue& value) { - UNIT_ASSERT(value); - UNIT_ASSERT(false == value.IsBoxed()); - }); - } +Y_UNIT_TEST(FromPython) { + TPythonTestEngine engine; + engine.ToMiniKQL<void>( + "import yql\n" + "\n" + "def Test():\n" + " return yql.Void\n", + [](const NUdf::TUnboxedValue& value) { + UNIT_ASSERT(value); + UNIT_ASSERT(false == value.IsBoxed()); + }); +} - Y_UNIT_TEST(ToPython) { - TPythonTestEngine engine; - engine.ToPython<void>( - [](const TType* type, const NUdf::IValueBuilder& vb) { - Y_UNUSED(type); Y_UNUSED(vb); - return NUdf::TUnboxedValue::Void(); - }, - "import yql\n" - "\n" - "def Test(value):\n" - " assert str(value) == 'yql.Void'\n" - " assert repr(value) == 'yql.Void'\n" - " assert isinstance(value, yql.TVoid)\n" - " assert value is yql.Void\n"); - } +Y_UNIT_TEST(ToPython) { + TPythonTestEngine engine; + engine.ToPython<void>( + [](const TType* type, const NUdf::IValueBuilder& vb) { + Y_UNUSED(type); + Y_UNUSED(vb); + return NUdf::TUnboxedValue::Void(); + }, + "import yql\n" + "\n" + "def Test(value):\n" + " assert str(value) == 'yql.Void'\n" + " assert repr(value) == 'yql.Void'\n" + " assert isinstance(value, yql.TVoid)\n" + " assert value is yql.Void\n"); } +} // Y_UNIT_TEST_SUITE(TPyVoidTest) diff --git a/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp b/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp index 11ba4262173..b9a1df22ae2 100644 --- a/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp +++ b/yql/essentials/udfs/common/python/bindings/py_yql_module.cpp @@ -14,84 +14,84 @@ namespace NPython { static PyMethodDef ModuleMethods[] = { - { nullptr, nullptr, 0, nullptr } /* sentinel */ + {nullptr, nullptr, 0, nullptr} /* sentinel */ }; #define MODULE_NAME "yql" #if PY_MAJOR_VERSION >= 3 -#define MODULE_NAME_TYPING "yql.typing" + #define MODULE_NAME_TYPING "yql.typing" #endif #define MODULE_INITIALIZED_ATTRIBUTE "_initialized" PyDoc_STRVAR(ModuleDoc, - "This module provides YQL specific types for Python."); + "This module provides YQL specific types for Python."); #if PY_MAJOR_VERSION >= 3 PyDoc_STRVAR(ModuleDocTyping, - "This module provides annotations for YQL types for Python."); + "This module provides annotations for YQL types for Python."); #endif PyDoc_STRVAR(StopIterationException_doc, - "Can be throwed to yield stream iteration."); + "Can be throwed to yield stream iteration."); -#define PREPARE_TYPE(Name, Type) \ - do { \ - if (PyType_Ready(Type) < 0) { \ +#define PREPARE_TYPE(Name, Type) \ + do { \ + if (PyType_Ready(Type) < 0) { \ throw yexception() << "Can't prepare type: " << (Name); \ - } \ + } \ } while (0) -#define REGISTER_TYPE(Name, Type) \ - do { \ - PREPARE_TYPE(Name, Type); \ - Py_INCREF(Type); \ - if (PyModule_AddObject(module, (Name), (PyObject*) Type) < 0) { \ - throw yexception() << "Can't add type: " << (Name); \ - } \ +#define REGISTER_TYPE(Name, Type) \ + do { \ + PREPARE_TYPE(Name, Type); \ + Py_INCREF(Type); \ + if (PyModule_AddObject(module, (Name), (PyObject*)Type) < 0) { \ + throw yexception() << "Can't add type: " << (Name); \ + } \ } while (0) -#define REGISTER_OBJECT(Name, Object) \ - do { \ - if (PyDict_SetItemString(dict, (Name), (PyObject *) (Object)) < 0) \ - throw yexception() << "Can't register object: " << (Name); \ +#define REGISTER_OBJECT(Name, Object) \ + do { \ + if (PyDict_SetItemString(dict, (Name), (PyObject*)(Object)) < 0) \ + throw yexception() << "Can't register object: " << (Name); \ } while (0) -#define REGISTER_EXCEPTION(Name, Object, Doc) \ - do { \ - if (!Object) { \ - Object = PyErr_NewExceptionWithDoc((char*) MODULE_NAME "." Name, Doc, nullptr, nullptr); \ - if (!Object) { \ - throw yexception() << "Can't register exception: " << (Name); \ - } \ - REGISTER_OBJECT(Name, Object); \ - } \ +#define REGISTER_EXCEPTION(Name, Object, Doc) \ + do { \ + if (!Object) { \ + Object = PyErr_NewExceptionWithDoc((char*)MODULE_NAME "." Name, Doc, nullptr, nullptr); \ + if (!Object) { \ + throw yexception() << "Can't register exception: " << (Name); \ + } \ + REGISTER_OBJECT(Name, Object); \ + } \ } while (0) #if PY_MAJOR_VERSION >= 3 static PyModuleDef ModuleDefinition = { - PyModuleDef_HEAD_INIT, - INIT_MEMBER(m_name, MODULE_NAME), - INIT_MEMBER(m_doc, ModuleDoc), - INIT_MEMBER(m_size, -1), - INIT_MEMBER(m_methods, ModuleMethods), - INIT_MEMBER(m_slots, nullptr), - INIT_MEMBER(m_traverse, nullptr), - INIT_MEMBER(m_clear, nullptr), - INIT_MEMBER(m_free, nullptr), + PyModuleDef_HEAD_INIT, + INIT_MEMBER(m_name, MODULE_NAME), + INIT_MEMBER(m_doc, ModuleDoc), + INIT_MEMBER(m_size, -1), + INIT_MEMBER(m_methods, ModuleMethods), + INIT_MEMBER(m_slots, nullptr), + INIT_MEMBER(m_traverse, nullptr), + INIT_MEMBER(m_clear, nullptr), + INIT_MEMBER(m_free, nullptr), }; static PyModuleDef ModuleDefinitionTyping = { - PyModuleDef_HEAD_INIT, - INIT_MEMBER(m_name, MODULE_NAME_TYPING), - INIT_MEMBER(m_doc, ModuleDocTyping), - INIT_MEMBER(m_size, -1), - INIT_MEMBER(m_methods, nullptr), - INIT_MEMBER(m_slots, nullptr), - INIT_MEMBER(m_traverse, nullptr), - INIT_MEMBER(m_clear, nullptr), - INIT_MEMBER(m_free, nullptr), + PyModuleDef_HEAD_INIT, + INIT_MEMBER(m_name, MODULE_NAME_TYPING), + INIT_MEMBER(m_doc, ModuleDocTyping), + INIT_MEMBER(m_size, -1), + INIT_MEMBER(m_methods, nullptr), + INIT_MEMBER(m_slots, nullptr), + INIT_MEMBER(m_traverse, nullptr), + INIT_MEMBER(m_clear, nullptr), + INIT_MEMBER(m_free, nullptr), }; PyMODINIT_FUNC PyInit_YQL(void) // NOLINT(readability-identifier-naming) @@ -207,7 +207,7 @@ void InitYqlModule(NYql::NUdf::EPythonFlavor pythonFlavor, bool standalone) { ythrow yexception() << "Can't parse YQL type annotations module"; } - auto processError = [&] (PyObject* obj, TStringBuf message) { + auto processError = [&](PyObject* obj, TStringBuf message) { if (obj) { return; } @@ -248,4 +248,4 @@ void TermYqlModule() { PyYieldIterationException = nullptr; } -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/py_yql_module.h b/yql/essentials/udfs/common/python/bindings/py_yql_module.h index 970471d029e..f621175af50 100644 --- a/yql/essentials/udfs/common/python/bindings/py_yql_module.h +++ b/yql/essentials/udfs/common/python/bindings/py_yql_module.h @@ -8,4 +8,4 @@ void PrepareYqlModule(); void InitYqlModule(NYql::NUdf::EPythonFlavor pythonFlavor, bool standalone = true); void TermYqlModule(); -} // namspace NPython +} // namespace NPython diff --git a/yql/essentials/udfs/common/python/bindings/typing.py b/yql/essentials/udfs/common/python/bindings/typing.py index 0e53ad1e0a4..da32d5b5de9 100644 --- a/yql/essentials/udfs/common/python/bindings/typing.py +++ b/yql/essentials/udfs/common/python/bindings/typing.py @@ -18,9 +18,7 @@ def main(): import yandex.type_info.typing as ti_typing import six except ImportError as e: - raise ImportError( - str(e) + ". Make sure that library/python/type_info is in your PEERDIR list" - ) + raise ImportError(str(e) + ". Make sure that library/python/type_info is in your PEERDIR list") from yql import typing @@ -80,10 +78,10 @@ def main(): arg_type = param.stop ti_base.validate_type(arg_type) if param.step is not None: - for x in param.step: - if x != AutoMap: - raise ValueError("Expected AutoMap as parameter flag but got: {}".format(ti_base._with_type(x))) - flags.add(x) + for x in param.step: + if x != AutoMap: + raise ValueError("Expected AutoMap as parameter flag but got: {}".format(ti_base._with_type(x))) + flags.add(x) else: ti_base.validate_type(arg_type) return (name, arg_type, flags) @@ -91,13 +89,17 @@ def main(): @six.python_2_unicode_compatible class GenericCallableAlias(ti_base.Type): def __str__(self): - return ("Callable<(" + - ",".join(_format_arg(x) for x in self.args[:len(self.args)-self.optional_args]) + - ("," if len(self.args) > self.optional_args and self.optional_args else "") + - ("[" if self.optional_args else "") + - ",".join(_format_arg(x) for x in self.args[len(self.args)-self.optional_args:]) + - ("]" if self.optional_args else "") + - ")->" + str(getattr(self, "return")) + ">") + return ( + "Callable<(" + + ",".join(_format_arg(x) for x in self.args[: len(self.args) - self.optional_args]) + + ("," if len(self.args) > self.optional_args and self.optional_args else "") + + ("[" if self.optional_args else "") + + ",".join(_format_arg(x) for x in self.args[len(self.args) - self.optional_args :]) + + ("]" if self.optional_args else "") + + ")->" + + str(getattr(self, "return")) + + ">" + ) def to_yson_type(self): yson_repr = { @@ -108,18 +110,28 @@ def main(): } return yson_repr - class GenericCallable(ti_base.Generic): def __getitem__(self, params): - if not isinstance(params, tuple) or len(params) < 2 or not isinstance(params[0], int) or not ti_typing.is_valid_type(params[1]): - raise ValueError("Expected at least two arguments (integer and type of return value) but got: {}".format(ti_base._with_type(params))) + if ( + not isinstance(params, tuple) + or len(params) < 2 + or not isinstance(params[0], int) + or not ti_typing.is_valid_type(params[1]) + ): + raise ValueError( + "Expected at least two arguments (integer and type of return value) but got: {}".format( + ti_base._with_type(params) + ) + ) args = [] for param in params[2:]: name, arg_type, flags = _extract_arg_info(param) args.append((name, arg_type, flags)) if params[0] < 0 or params[0] > len(args): - raise ValueError("Optional argument count - " + str(params[0]) + " out of range [0.." + str(len(args)) + "]") + raise ValueError( + "Optional argument count - " + str(params[0]) + " out of range [0.." + str(len(args)) + "]" + ) attrs = { "optional_args": params[0], diff --git a/yql/essentials/udfs/common/python/bindings/ya.make b/yql/essentials/udfs/common/python/bindings/ya.make index aea3e547176..29dca847ee9 100644 --- a/yql/essentials/udfs/common/python/bindings/ya.make +++ b/yql/essentials/udfs/common/python/bindings/ya.make @@ -2,6 +2,8 @@ PY23_NATIVE_LIBRARY() YQL_ABI_VERSION(2 27 0) +ENABLE(YQL_STYLE_CPP) + SRCS( py_callable.cpp py_cast.cpp diff --git a/yql/essentials/udfs/common/python/bindings/ya.make.test.inc b/yql/essentials/udfs/common/python/bindings/ya.make.test.inc index 67803ad18f4..036c72bc147 100644 --- a/yql/essentials/udfs/common/python/bindings/ya.make.test.inc +++ b/yql/essentials/udfs/common/python/bindings/ya.make.test.inc @@ -1,3 +1,5 @@ +ENABLE(YQL_STYLE_CPP) + SRCS( py_callable_ut.cpp py_cast_ut.cpp diff --git a/yql/essentials/udfs/common/python/main_py3/include/main.h b/yql/essentials/udfs/common/python/main_py3/include/main.h index c96402004e3..f8ae216a196 100644 --- a/yql/essentials/udfs/common/python/main_py3/include/main.h +++ b/yql/essentials/udfs/common/python/main_py3/include/main.h @@ -4,9 +4,7 @@ #ifdef __cplusplus extern "C" { #endif -Y_PUBLIC -int RunPython(int argc, char** argv); + Y_PUBLIC int RunPython(int argc, char** argv); #ifdef __cplusplus } #endif - diff --git a/yql/essentials/udfs/common/python/main_py3/main.cpp b/yql/essentials/udfs/common/python/main_py3/main.cpp index edc3c89ca5b..a347c2eb864 100644 --- a/yql/essentials/udfs/common/python/main_py3/main.cpp +++ b/yql/essentials/udfs/common/python/main_py3/main.cpp @@ -1,9 +1,7 @@ #include "main.h" -extern "C" -int RunPythonImpl(int argc, char** argv); +extern "C" int RunPythonImpl(int argc, char** argv); -extern "C" -int RunPython(int argc, char** argv) { +extern "C" int RunPython(int argc, char** argv) { return RunPythonImpl(argc, argv); } diff --git a/yql/essentials/udfs/common/python/main_py3/ya.make b/yql/essentials/udfs/common/python/main_py3/ya.make index 7bbb4d0f304..1b2dc5b61bd 100644 --- a/yql/essentials/udfs/common/python/main_py3/ya.make +++ b/yql/essentials/udfs/common/python/main_py3/ya.make @@ -1,5 +1,7 @@ LIBRARY() +ENABLE(YQL_STYLE_CPP) + USE_PYTHON3() ADDINCL( diff --git a/yql/essentials/udfs/common/python/python_udf/python_function_factory.h b/yql/essentials/udfs/common/python/python_udf/python_function_factory.h index 7d96f67a083..6f81817f609 100644 --- a/yql/essentials/udfs/common/python/python_udf/python_function_factory.h +++ b/yql/essentials/udfs/common/python/python_udf/python_function_factory.h @@ -25,15 +25,14 @@ using namespace NPython; ////////////////////////////////////////////////////////////////////////////// // TPythonFunctionFactory ////////////////////////////////////////////////////////////////////////////// -class TPythonFunctionFactory: public TBoxedValue -{ +class TPythonFunctionFactory: public TBoxedValue { public: TPythonFunctionFactory( - const TStringRef& name, - const TStringRef& tag, - const TType* functionType, - ITypeInfoHelper::TPtr&& helper, - const NYql::NUdf::TSourcePosition& pos) + const TStringRef& name, + const TStringRef& tag, + const TType* functionType, + ITypeInfoHelper::TPtr&& helper, + const NYql::NUdf::TSourcePosition& pos) : Ctx_(new TPyContext(helper, tag, pos)) , FunctionName_(name) , FunctionType_(functionType) @@ -47,9 +46,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { TPyCastContext::TPtr castCtx = MakeIntrusive<TPyCastContext>(valueBuilder, Ctx_); // for get propper c-compatible null-terminating string @@ -74,7 +72,8 @@ private: SetupCallableSettings(castCtx, function.Get()); } catch (const yexception& e) { UdfTerminate((TStringBuilder() << Ctx_->Pos << "Failed to setup callable settings: " - << e.what()).c_str()); + << e.what()) + .c_str()); } return FromPyCallable(castCtx, FunctionType_, function.Release()); } @@ -92,7 +91,7 @@ private: cflags.cf_flags = PyCF_SOURCE_IS_UTF8; code.ResetSteal(Py_CompileStringFlags( - source.data(), filename.data(), Py_file_input, &cflags)); + source.data(), filename.data(), Py_file_input, &cflags)); } if (code) { diff --git a/yql/essentials/udfs/common/python/python_udf/python_udf.cpp b/yql/essentials/udfs/common/python/python_udf/python_udf.cpp index a14d9d81c32..fd0743962ba 100644 --- a/yql/essentials/udfs/common/python/python_udf/python_udf.cpp +++ b/yql/essentials/udfs/common/python/python_udf/python_udf.cpp @@ -10,9 +10,9 @@ namespace { #if PY_MAJOR_VERSION >= 3 -#define PYTHON_PROGRAMM_NAME L"YQL::Python3" + #define PYTHON_PROGRAMM_NAME L"YQL::Python3" #else -#define PYTHON_PROGRAMM_NAME "YQL::Python2" + #define PYTHON_PROGRAMM_NAME "YQL::Python2" #endif int AddToPythonPath(const TVector<TStringBuf>& pathVals) @@ -20,9 +20,11 @@ int AddToPythonPath(const TVector<TStringBuf>& pathVals) char pathVar[] = "path"; // PySys_{Get,Set}Object take a non-const char* arg TPyObjectPtr sysPath(PySys_GetObject(pathVar), TPyObjectPtr::ADD_REF); - if (!sysPath) return -1; + if (!sysPath) { + return -1; + } - for (const auto& val: pathVals) { + for (const auto& val : pathVals) { TPyObjectPtr pyStr = PyRepr(val.data()); int rc = PyList_Append(sysPath.Get(), pyStr.Get()); if (rc != 0) { @@ -45,11 +47,11 @@ void InitArcadiaPythonRuntime() ////////////////////////////////////////////////////////////////////////////// // TPythonModule ////////////////////////////////////////////////////////////////////////////// -class TPythonModule: public IUdfModule -{ +class TPythonModule: public IUdfModule { public: TPythonModule(const TString& resourceName, EPythonFlavor pythonFlavor, bool standalone = true) - : ResourceName_(resourceName), Standalone_(standalone) + : ResourceName_(resourceName) + , Standalone_(standalone) { if (Standalone_) { Py_SetProgramName(PYTHON_PROGRAMM_NAME); @@ -99,15 +101,15 @@ public: PyCleanup(); } - void GetAllFunctions(IFunctionsSink&) const final {} + void GetAllFunctions(IFunctionsSink&) const final { + } void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { Y_UNUSED(typeConfig); if (flags & TFlags::TypesOnly) { @@ -137,44 +139,43 @@ private: // TStubModule ////////////////////////////////////////////////////////////////////////////// class TStubModule: public IUdfModule { - void GetAllFunctions(IFunctionsSink&) const final {} + void GetAllFunctions(IFunctionsSink&) const final { + } void BuildFunctionTypeInfo( - const TStringRef& /*name*/, - TType* /*userType*/, - const TStringRef& /*typeConfig*/, - ui32 flags, - IFunctionTypeInfoBuilder& /*builder*/) const final - { + const TStringRef& /*name*/, + TType* /*userType*/, + const TStringRef& /*typeConfig*/, + ui32 flags, + IFunctionTypeInfoBuilder& /*builder*/) const final { Y_DEBUG_ABORT_UNLESS(flags & TFlags::TypesOnly, - "in stub module this function can be called only for types loading"); + "in stub module this function can be called only for types loading"); } - void CleanupOnTerminate() const final {} + void CleanupOnTerminate() const final { + } }; } // namespace void NKikimr::NUdf::RegisterYqlPythonUdf( - IRegistrator& registrator, - ui32 flags, - TStringBuf moduleName, - TStringBuf resourceName, - EPythonFlavor pythonFlavor) + IRegistrator& registrator, + ui32 flags, + TStringBuf moduleName, + TStringBuf resourceName, + EPythonFlavor pythonFlavor) { if (flags & IRegistrator::TFlags::TypesOnly) { registrator.AddModule(moduleName, new TStubModule); } else { registrator.AddModule( moduleName, - NKikimr::NUdf::GetYqlPythonUdfModule(resourceName, pythonFlavor, true) - ); + NKikimr::NUdf::GetYqlPythonUdfModule(resourceName, pythonFlavor, true)); } } TUniquePtr<NKikimr::NUdf::IUdfModule> NKikimr::NUdf::GetYqlPythonUdfModule( TStringBuf resourceName, NKikimr::NUdf::EPythonFlavor pythonFlavor, - bool standalone -) { + bool standalone) { return new TPythonModule(TString(resourceName), pythonFlavor, standalone); } diff --git a/yql/essentials/udfs/common/python/python_udf/python_udf.h b/yql/essentials/udfs/common/python/python_udf/python_udf.h index 83b3bb86e6f..ff8f6e3b07a 100644 --- a/yql/essentials/udfs/common/python/python_udf/python_udf.h +++ b/yql/essentials/udfs/common/python/python_udf/python_udf.h @@ -6,7 +6,7 @@ namespace NYql { namespace NUdf { inline constexpr char STANDART_STREAM_PROXY_INJECTION_SCRIPT[] = -R"( + R"( # numpy on import may find installed openblas library and load it, # which in turn causes it to start CPUCOUNT threads # with approx. 40Mb memory reserved for each thread; @@ -66,11 +66,11 @@ enum class EPythonFlavor { }; void RegisterYqlPythonUdf( - IRegistrator& registrator, - ui32 flags, - TStringBuf moduleName, - TStringBuf resourceName, - EPythonFlavor pythonFlavor); + IRegistrator& registrator, + ui32 flags, + TStringBuf moduleName, + TStringBuf resourceName, + EPythonFlavor pythonFlavor); TUniquePtr<IUdfModule> GetYqlPythonUdfModule( TStringBuf resourceName, diff --git a/yql/essentials/udfs/common/python/python_udf/ya.make b/yql/essentials/udfs/common/python/python_udf/ya.make index 9a2090665a2..124f075c904 100644 --- a/yql/essentials/udfs/common/python/python_udf/ya.make +++ b/yql/essentials/udfs/common/python/python_udf/ya.make @@ -2,6 +2,8 @@ PY23_NATIVE_LIBRARY() YQL_ABI_VERSION(2 27 0) +ENABLE(YQL_STYLE_CPP) + SRCS( python_udf.cpp ) diff --git a/yql/essentials/udfs/common/re2/re2_udf.cpp b/yql/essentials/udfs/common/re2/re2_udf.cpp index 2f1f6dbb529..b13d975cf35 100644 --- a/yql/essentials/udfs/common/re2/re2_udf.cpp +++ b/yql/essentials/udfs/common/re2/re2_udf.cpp @@ -15,14 +15,14 @@ using namespace NUdf; namespace { - template <typename T> - T Id(T x) { - return x; - } +template <typename T> +T Id(T x) { + return x; +} - re2::RE2::Options::Encoding EncodingFromBool(bool x) { - return x ? re2::RE2::Options::Encoding::EncodingUTF8 : re2::RE2::Options::Encoding::EncodingLatin1; - } +re2::RE2::Options::Encoding EncodingFromBool(bool x) { + return x ? re2::RE2::Options::Encoding::EncodingUTF8 : re2::RE2::Options::Encoding::EncodingLatin1; +} #define OPTIONS_MAP(xx) \ xx(Utf8, 0, bool, true, set_encoding, EncodingFromBool) \ @@ -39,309 +39,310 @@ namespace { xx(WordBoundary, 11, bool, false, set_word_boundary, Id) \ xx(OneLine, 12, bool, false, set_one_line, Id) - ui64 GetFailProbability() { - auto envResult = TryGetEnv("YQL_RE2_REGEXP_PROBABILITY_FAIL"); - if (!envResult) { - return 0; - } - ui64 result; - bool isValid = TryIntFromString<10, ui64>(envResult->data(), envResult->size(), result); - Y_ENSURE(isValid, TStringBuilder() << "Error while parsing YQL_RE2_REGEXP_PROBABILITY_FAIL. Actual value is: " << *envResult); - return result; +ui64 GetFailProbability() { + auto envResult = TryGetEnv("YQL_RE2_REGEXP_PROBABILITY_FAIL"); + if (!envResult) { + return 0; } + ui64 result; + bool isValid = TryIntFromString<10, ui64>(envResult->data(), envResult->size(), result); + Y_ENSURE(isValid, TStringBuilder() << "Error while parsing YQL_RE2_REGEXP_PROBABILITY_FAIL. Actual value is: " << *envResult); + return result; +} - bool ShouldFailOnInvalidRegexp(const std::string_view regexp, NYql::TLangVersion currentLangVersion) { - if (currentLangVersion >= NYql::MakeLangVersion(2025, 3)) { - return true; - } - THashType hash = GetStringHash(regexp) % 100; - static ui64 failProbability = GetFailProbability(); - return hash < failProbability; +bool ShouldFailOnInvalidRegexp(const std::string_view regexp, NYql::TLangVersion currentLangVersion) { + if (currentLangVersion >= NYql::MakeLangVersion(2025, 3)) { + return true; } + THashType hash = GetStringHash(regexp) % 100; + static ui64 failProbability = GetFailProbability(); + return hash < failProbability; +} - RE2::Options CreateDefaultOptions(){ - RE2::Options options; +RE2::Options CreateDefaultOptions() { + RE2::Options options; #define FIELD_HANDLE(name, index, type, defVal, setter, conv) options.setter(conv(defVal)); - OPTIONS_MAP(FIELD_HANDLE) + OPTIONS_MAP(FIELD_HANDLE) #undef FIELD_HANDLE - options.set_log_errors(false); - return options; - } - - TString FormatRegexpError(const RE2& Regexp) { - return TStringBuilder() << "Regexp compilation failed. Regexp: \"" << Regexp.pattern() << "\". Original error is: \"" << Regexp.error() << "\""; - } - - enum EOptionsField: ui32 { - OPTIONS_MAP(ENUM_VALUE_GEN) - Count - }; - - struct TOptionsSchema { - TType* StructType; - ui32 Indices[EOptionsField::Count]; - }; + options.set_log_errors(false); + return options; +} - RE2::Options ExtractOptions(std::string_view pattern, TUnboxedValuePod optionsValue, const TOptionsSchema& schema, bool posix) { - RE2::Options options = CreateDefaultOptions(); +TString FormatRegexpError(const RE2& Regexp) { + return TStringBuilder() << "Regexp compilation failed. Regexp: \"" << Regexp.pattern() << "\". Original error is: \"" << Regexp.error() << "\""; +} - options.set_posix_syntax(posix); - bool needUtf8 = (UTF8Detect(pattern) == UTF8); - options.set_encoding( - needUtf8 - ? RE2::Options::Encoding::EncodingUTF8 - : RE2::Options::Encoding::EncodingLatin1); - if (optionsValue) { +enum EOptionsField: ui32 { + OPTIONS_MAP(ENUM_VALUE_GEN) + Count +}; + +struct TOptionsSchema { + TType* StructType; + ui32 Indices[EOptionsField::Count]; +}; + +RE2::Options ExtractOptions(std::string_view pattern, TUnboxedValuePod optionsValue, const TOptionsSchema& schema, bool posix) { + RE2::Options options = CreateDefaultOptions(); + + options.set_posix_syntax(posix); + bool needUtf8 = (UTF8Detect(pattern) == UTF8); + options.set_encoding( + needUtf8 + ? RE2::Options::Encoding::EncodingUTF8 + : RE2::Options::Encoding::EncodingLatin1); + if (optionsValue) { #define FIELD_HANDLE(name, index, type, defVal, setter, conv) options.setter(conv(optionsValue.GetElement(schema.Indices[index]).Get<type>())); - OPTIONS_MAP(FIELD_HANDLE) + OPTIONS_MAP(FIELD_HANDLE) #undef FIELD_HANDLE - options.set_log_errors(false); - } - return options; + options.set_log_errors(false); } + return options; +} - struct TRegexpGroups { - TVector<TString> Names; - TVector<ui32> Indexes; +struct TRegexpGroups { + TVector<TString> Names; + TVector<ui32> Indexes; +}; + +class TRe2Udf: public TBoxedValue { +public: + enum EMode { + MATCH, + GREP, + CAPTURE, + REPLACE, + COUNT, + FIND_AND_CONSUME, }; - class TRe2Udf: public TBoxedValue { + template <bool posix> + class TFactory: public TBoxedValue { public: - enum EMode { - MATCH, - GREP, - CAPTURE, - REPLACE, - COUNT, - FIND_AND_CONSUME, - }; - - template <bool posix> - class TFactory: public TBoxedValue { - public: - TFactory( - EMode mode, - const TOptionsSchema& optionsSchema, - TSourcePosition pos, - NYql::TLangVersion currentlangVersion, - const TRegexpGroups& regexpGroups = TRegexpGroups()) - : Mode_(mode) - , OptionsSchema_(optionsSchema) - , Pos_(pos) - , RegexpGroups_(regexpGroups) - , CurrentLangVersion_(currentlangVersion) - { - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - return TUnboxedValuePod( - new TRe2Udf( - valueBuilder, - args[0], - RegexpGroups_, - Mode_, - posix, - OptionsSchema_, - Pos_, - CurrentLangVersion_)); - } - - EMode Mode_; - const TOptionsSchema OptionsSchema_; - TSourcePosition Pos_; - const TRegexpGroups RegexpGroups_; - NYql::TLangVersion CurrentLangVersion_; - }; - - static const TStringRef& Name(EMode mode) { - static auto match = TStringRef::Of("Match"); - static auto grep = TStringRef::Of("Grep"); - static auto capture = TStringRef::Of("Capture"); - static auto replace = TStringRef::Of("Replace"); - static auto count = TStringRef::Of("Count"); - static auto findAndconsume = TStringRef::Of("FindAndConsume"); - - switch (mode) { - case EMode::MATCH: - return match; - case EMode::GREP: - return grep; - case EMode::CAPTURE: - return capture; - case EMode::REPLACE: - return replace; - case EMode::COUNT: - return count; - case EMode::FIND_AND_CONSUME: - return findAndconsume; - } - Y_ABORT("Unexpected mode"); - } - - TRe2Udf( - const IValueBuilder*, - const TUnboxedValuePod& runConfig, - const TRegexpGroups regexpGroups, + TFactory( EMode mode, - bool posix, const TOptionsSchema& optionsSchema, TSourcePosition pos, - NYql::TLangVersion currentLangVersion) - : RegexpGroups_(regexpGroups) - , Mode_(mode) - , Captured_() + NYql::TLangVersion currentlangVersion, + const TRegexpGroups& regexpGroups = TRegexpGroups()) + : Mode_(mode) , OptionsSchema_(optionsSchema) , Pos_(pos) - , CurrentLangVersion_(currentLangVersion) { - try { - auto patternValue = runConfig.GetElement(0); - auto optionsValue = runConfig.GetElement(1); - const std::string_view pattern(patternValue.AsStringRef()); + , RegexpGroups_(regexpGroups) + , CurrentLangVersion_(currentlangVersion) + { + } - RE2::Options options = ExtractOptions(pattern, optionsValue, OptionsSchema_, posix); - Regexp_ = std::make_unique<RE2>(StringPiece(pattern.data(), pattern.size()), options); + private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + return TUnboxedValuePod( + new TRe2Udf( + valueBuilder, + args[0], + RegexpGroups_, + Mode_, + posix, + OptionsSchema_, + Pos_, + CurrentLangVersion_)); + } - if (!Regexp_->ok() && ShouldFailOnInvalidRegexp(pattern, CurrentLangVersion_)) { - throw yexception() << FormatRegexpError(*Regexp_); - } + EMode Mode_; + const TOptionsSchema OptionsSchema_; + TSourcePosition Pos_; + const TRegexpGroups RegexpGroups_; + NYql::TLangVersion CurrentLangVersion_; + }; - if (mode == EMode::CAPTURE) { - Captured_ = std::make_unique<StringPiece[]>(Regexp_->NumberOfCapturingGroups() + 1); - } + static const TStringRef& Name(EMode mode) { + static auto match = TStringRef::Of("Match"); + static auto grep = TStringRef::Of("Grep"); + static auto capture = TStringRef::Of("Capture"); + static auto replace = TStringRef::Of("Replace"); + static auto count = TStringRef::Of("Count"); + static auto findAndconsume = TStringRef::Of("FindAndConsume"); + + switch (mode) { + case EMode::MATCH: + return match; + case EMode::GREP: + return grep; + case EMode::CAPTURE: + return capture; + case EMode::REPLACE: + return replace; + case EMode::COUNT: + return count; + case EMode::FIND_AND_CONSUME: + return findAndconsume; + } + Y_ABORT("Unexpected mode"); + } + + TRe2Udf( + const IValueBuilder*, + const TUnboxedValuePod& runConfig, + const TRegexpGroups regexpGroups, + EMode mode, + bool posix, + const TOptionsSchema& optionsSchema, + TSourcePosition pos, + NYql::TLangVersion currentLangVersion) + : RegexpGroups_(regexpGroups) + , Mode_(mode) + , Captured_() + , OptionsSchema_(optionsSchema) + , Pos_(pos) + , CurrentLangVersion_(currentLangVersion) + { + try { + auto patternValue = runConfig.GetElement(0); + auto optionsValue = runConfig.GetElement(1); + const std::string_view pattern(patternValue.AsStringRef()); + + RE2::Options options = ExtractOptions(pattern, optionsValue, OptionsSchema_, posix); + Regexp_ = std::make_unique<RE2>(StringPiece(pattern.data(), pattern.size()), options); + + if (!Regexp_->ok() && ShouldFailOnInvalidRegexp(pattern, CurrentLangVersion_)) { + throw yexception() << FormatRegexpError(*Regexp_); + } - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + if (mode == EMode::CAPTURE) { + Captured_ = std::make_unique<StringPiece[]>(Regexp_->NumberOfCapturingGroups() + 1); } + + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try { - RE2::Anchor anchor = RE2::UNANCHORED; - if (args[0]) { - const std::string_view input(args[0].AsStringRef()); - const StringPiece piece(input.data(), input.size()); - - switch (Mode_) { - case MATCH: - anchor = RE2::ANCHOR_BOTH; - [[fallthrough]]; - case GREP: - return TUnboxedValuePod(Regexp_->Match(piece, 0, input.size(), anchor, nullptr, 0)); - case CAPTURE: { - const int count = Regexp_->NumberOfCapturingGroups() + 1; - TUnboxedValue* items = nullptr; - const auto result = valueBuilder->NewArray(RegexpGroups_.Names.size(), items); - if (Regexp_->Match(piece, 0, input.size(), anchor, Captured_.get(), count)) { - for (int i = 0; i < count; ++i) { - if (!Captured_[i].empty()) { - items[RegexpGroups_.Indexes[i]] = valueBuilder->SubString(args[0], std::distance(piece.begin(), Captured_[i].begin()), Captured_[i].size()); - } +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + RE2::Anchor anchor = RE2::UNANCHORED; + if (args[0]) { + const std::string_view input(args[0].AsStringRef()); + const StringPiece piece(input.data(), input.size()); + + switch (Mode_) { + case MATCH: + anchor = RE2::ANCHOR_BOTH; + [[fallthrough]]; + case GREP: + return TUnboxedValuePod(Regexp_->Match(piece, 0, input.size(), anchor, nullptr, 0)); + case CAPTURE: { + const int count = Regexp_->NumberOfCapturingGroups() + 1; + TUnboxedValue* items = nullptr; + const auto result = valueBuilder->NewArray(RegexpGroups_.Names.size(), items); + if (Regexp_->Match(piece, 0, input.size(), anchor, Captured_.get(), count)) { + for (int i = 0; i < count; ++i) { + if (!Captured_[i].empty()) { + items[RegexpGroups_.Indexes[i]] = valueBuilder->SubString(args[0], std::distance(piece.begin(), Captured_[i].begin()), Captured_[i].size()); } - } else { - return BuildEmptyStruct(valueBuilder); - } - return result; - } - case REPLACE: { - const std::string_view rewriteRef(args[1].AsStringRef()); - const StringPiece rewrite(rewriteRef.data(), rewriteRef.size()); - TString rewriteError; - if (!Regexp_->CheckRewriteString(rewrite, &rewriteError)) { - UdfTerminate((TStringBuilder() << Pos_ << " [rewrite error] " << rewriteError).c_str()); } - std::string result(input); - RE2::GlobalReplace(&result, *Regexp_, rewrite); - return input == result ? TUnboxedValue(args[0]) : valueBuilder->NewString(result); + } else { + return BuildEmptyStruct(valueBuilder); } - case COUNT: { - std::string inputHolder(input); - const ui32 result = RE2::GlobalReplace(&inputHolder, *Regexp_, ""); - return TUnboxedValuePod(result); + return result; + } + case REPLACE: { + const std::string_view rewriteRef(args[1].AsStringRef()); + const StringPiece rewrite(rewriteRef.data(), rewriteRef.size()); + TString rewriteError; + if (!Regexp_->CheckRewriteString(rewrite, &rewriteError)) { + UdfTerminate((TStringBuilder() << Pos_ << " [rewrite error] " << rewriteError).c_str()); } - case FIND_AND_CONSUME: { - StringPiece text(piece); - std::vector<TUnboxedValue> matches; - for (StringPiece w; text.begin() < text.end() && RE2::FindAndConsume(&text, *Regexp_, &w);) { - if (w.size() == 0 && !text.empty()) { - text.remove_prefix(1); - } - matches.emplace_back(valueBuilder->SubString(args[0], std::distance(piece.begin(), w.begin()), w.size())); + std::string result(input); + RE2::GlobalReplace(&result, *Regexp_, rewrite); + return input == result ? TUnboxedValue(args[0]) : valueBuilder->NewString(result); + } + case COUNT: { + std::string inputHolder(input); + const ui32 result = RE2::GlobalReplace(&inputHolder, *Regexp_, ""); + return TUnboxedValuePod(result); + } + case FIND_AND_CONSUME: { + StringPiece text(piece); + std::vector<TUnboxedValue> matches; + for (StringPiece w; text.begin() < text.end() && RE2::FindAndConsume(&text, *Regexp_, &w);) { + if (w.size() == 0 && !text.empty()) { + text.remove_prefix(1); } - return valueBuilder->NewList(matches.data(), matches.size()); + matches.emplace_back(valueBuilder->SubString(args[0], std::distance(piece.begin(), w.begin()), w.size())); } + return valueBuilder->NewList(matches.data(), matches.size()); } - Y_ABORT("Unexpected mode"); - } else { - switch (Mode_) { - case MATCH: - case GREP: - return TUnboxedValuePod(false); - case CAPTURE: - return BuildEmptyStruct(valueBuilder); - case REPLACE: - return TUnboxedValuePod(); - case COUNT: - return TUnboxedValuePod::Zero(); - case FIND_AND_CONSUME: - return valueBuilder->NewEmptyList(); - } - Y_ABORT("Unexpected mode"); } - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - - std::unique_ptr<RE2> Regexp_; - const TRegexpGroups RegexpGroups_; - EMode Mode_; - std::unique_ptr<StringPiece[]> Captured_; - const TOptionsSchema OptionsSchema_; - TSourcePosition Pos_; - NYql::TLangVersion CurrentLangVersion_; - - TUnboxedValue BuildEmptyStruct(const IValueBuilder* valueBuilder) const { - TUnboxedValue* items = nullptr; - return valueBuilder->NewArray(RegexpGroups_.Names.size(), items); + Y_ABORT("Unexpected mode"); + } else { + switch (Mode_) { + case MATCH: + case GREP: + return TUnboxedValuePod(false); + case CAPTURE: + return BuildEmptyStruct(valueBuilder); + case REPLACE: + return TUnboxedValuePod(); + case COUNT: + return TUnboxedValuePod::Zero(); + case FIND_AND_CONSUME: + return valueBuilder->NewEmptyList(); + } + Y_ABORT("Unexpected mode"); } - }; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + } - SIMPLE_STRICT_UDF(TEscape, char*(char*)) { - const std::string_view input(args[0].AsStringRef()); - const auto& result = RE2::QuoteMeta(StringPiece(input.data(), input.size())); - return input == result ? TUnboxedValue(args[0]) : valueBuilder->NewString(result); + std::unique_ptr<RE2> Regexp_; + const TRegexpGroups RegexpGroups_; + EMode Mode_; + std::unique_ptr<StringPiece[]> Captured_; + const TOptionsSchema OptionsSchema_; + TSourcePosition Pos_; + NYql::TLangVersion CurrentLangVersion_; + + TUnboxedValue BuildEmptyStruct(const IValueBuilder* valueBuilder) const { + TUnboxedValue* items = nullptr; + return valueBuilder->NewArray(RegexpGroups_.Names.size(), items); } +}; - TOptionsSchema MakeOptionsSchema(::NKikimr::NUdf::IFunctionTypeInfoBuilder& builder) { - TOptionsSchema ret; - auto structBuilder = builder.Struct(EOptionsField::Count); +SIMPLE_STRICT_UDF(TEscape, char*(char*)) { + const std::string_view input(args[0].AsStringRef()); + const auto& result = RE2::QuoteMeta(StringPiece(input.data(), input.size())); + return input == result ? TUnboxedValue(args[0]) : valueBuilder->NewString(result); +} + +TOptionsSchema MakeOptionsSchema(::NKikimr::NUdf::IFunctionTypeInfoBuilder& builder) { + TOptionsSchema ret; + auto structBuilder = builder.Struct(EOptionsField::Count); #define FIELD_HANDLE(name, index, type, ...) structBuilder->AddField<type>(TStringRef::Of(#name), &ret.Indices[index]); - OPTIONS_MAP(FIELD_HANDLE) + OPTIONS_MAP(FIELD_HANDLE) #undef FIELD_HANDLE - ret.StructType = structBuilder->Build(); - return ret; - } + ret.StructType = structBuilder->Build(); + return ret; +} - class TOptions: public TBoxedValue { - private: - const TOptionsSchema Schema_; +class TOptions: public TBoxedValue { +private: + const TOptionsSchema Schema_; - public: - TOptions(const TOptionsSchema& schema) - : Schema_(schema) - { - } +public: + TOptions(const TOptionsSchema& schema) + : Schema_(schema) + { + } - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - TUnboxedValue* items = nullptr; - const auto result = valueBuilder->NewArray(EOptionsField::Count, items); + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + TUnboxedValue* items = nullptr; + const auto result = valueBuilder->NewArray(EOptionsField::Count, items); #define FIELD_HANDLE(name, index, type, defVal, ...) \ { \ auto structIndex = Schema_.Indices[index]; \ @@ -352,294 +353,295 @@ namespace { } \ } - OPTIONS_MAP(FIELD_HANDLE) + OPTIONS_MAP(FIELD_HANDLE) #undef FIELD_HANDLE - return result; - } + return result; + } - static const ::NKikimr::NUdf::TStringRef& Name() { - static auto name = ::NKikimr::NUdf::TStringRef::Of("Options"); - return name; - } + static const ::NKikimr::NUdf::TStringRef& Name() { + static auto name = ::NKikimr::NUdf::TStringRef::Of("Options"); + return name; + } - static bool DeclareSignature( - const ::NKikimr::NUdf::TStringRef& name, - ::NKikimr::NUdf::TType* userType, - ::NKikimr::NUdf::IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - builder.IsStrict(); + static bool DeclareSignature( + const ::NKikimr::NUdf::TStringRef& name, + ::NKikimr::NUdf::TType* userType, + ::NKikimr::NUdf::IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + builder.IsStrict(); - auto argsBuilder = builder.Args(); + auto argsBuilder = builder.Args(); #define FIELD_HANDLE(name, index, type, ...) argsBuilder->Add<TOptional<type>>().Name(TStringRef::Of(#name)); - OPTIONS_MAP(FIELD_HANDLE) + OPTIONS_MAP(FIELD_HANDLE) #undef FIELD_HANDLE - auto optionsSchema = MakeOptionsSchema(builder); - builder.Returns(optionsSchema.StructType); - builder.OptionalArgs(EOptionsField::Count); - if (!typesOnly) { - builder.Implementation(new TOptions(optionsSchema)); - } - - return true; - } else { - return false; + auto optionsSchema = MakeOptionsSchema(builder); + builder.Returns(optionsSchema.StructType); + builder.OptionalArgs(EOptionsField::Count); + if (!typesOnly) { + builder.Implementation(new TOptions(optionsSchema)); } - } - }; - template <bool posix> - class TIsValidRegexp: public TBoxedValue { - public: - TIsValidRegexp(const TOptionsSchema optionsSchema) - : OptionsSchema_(std::move(optionsSchema)) - { + return true; + } else { + return false; } + } +}; + +template <bool posix> +class TIsValidRegexp: public TBoxedValue { +public: + TIsValidRegexp(const TOptionsSchema optionsSchema) + : OptionsSchema_(std::move(optionsSchema)) + { + } - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - Y_UNUSED(valueBuilder); - if (!args[0]) { - return TUnboxedValuePod(false); - } - RE2::Options options = ExtractOptions(args[0].AsStringRef(), args[1], OptionsSchema_, posix); - RE2 regexp(args[0].AsStringRef(), options); - return TUnboxedValuePod(regexp.ok()); + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + Y_UNUSED(valueBuilder); + if (!args[0]) { + return TUnboxedValuePod(false); } + RE2::Options options = ExtractOptions(args[0].AsStringRef(), args[1], OptionsSchema_, posix); + RE2 regexp(args[0].AsStringRef(), options); + return TUnboxedValuePod(regexp.ok()); + } - static const ::NKikimr::NUdf::TStringRef& Name() { - static auto name = ::NKikimr::NUdf::TStringRef::Of("IsValidRegexp"); - return name; - } + static const ::NKikimr::NUdf::TStringRef& Name() { + static auto name = ::NKikimr::NUdf::TStringRef::Of("IsValidRegexp"); + return name; + } - static bool DeclareSignature( - const ::NKikimr::NUdf::TStringRef& name, - ::NKikimr::NUdf::TType* userType, - ::NKikimr::NUdf::IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - TOptionsSchema optionsSchema = MakeOptionsSchema(builder); - auto optOptionsStructType = builder.Optional()->Item(optionsSchema.StructType).Build(); - builder.Args() - ->Add(builder.Optional()->Item(builder.SimpleType<char*>())) - .Add(optOptionsStructType) - .Done() - .Returns(builder.SimpleType<bool>()); - - builder.OptionalArgs(1); - if (!typesOnly) { - builder.Implementation(new TIsValidRegexp(std::move(optionsSchema))); - } - builder.IsStrict(); - return true; - } else { - return false; + static bool DeclareSignature( + const ::NKikimr::NUdf::TStringRef& name, + ::NKikimr::NUdf::TType* userType, + ::NKikimr::NUdf::IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + TOptionsSchema optionsSchema = MakeOptionsSchema(builder); + auto optOptionsStructType = builder.Optional()->Item(optionsSchema.StructType).Build(); + builder.Args() + ->Add(builder.Optional()->Item(builder.SimpleType<char*>())) + .Add(optOptionsStructType) + .Done() + .Returns(builder.SimpleType<bool>()); + + builder.OptionalArgs(1); + if (!typesOnly) { + builder.Implementation(new TIsValidRegexp(std::move(optionsSchema))); } + builder.IsStrict(); + return true; + } else { + return false; } + } - private: - const TOptionsSchema OptionsSchema_; - }; - - SIMPLE_UDF_WITH_OPTIONAL_ARGS(TPatternFromLike, char*(char*, TOptional<char*>), 1) { - const std::string_view input(args[0].AsStringRef()); - const bool hasEscape = bool(args[1]); - char escape = 0; - if (hasEscape) { - const std::string_view escapeRef(args[1].AsStringRef()); - if (escapeRef.size() != 1U) { - UdfTerminate((TStringBuilder() << GetPos() << " Escape should be single character").c_str()); - } - escape = escapeRef.front(); +private: + const TOptionsSchema OptionsSchema_; +}; + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TPatternFromLike, char*(char*, TOptional<char*>), 1) { + const std::string_view input(args[0].AsStringRef()); + const bool hasEscape = bool(args[1]); + char escape = 0; + if (hasEscape) { + const std::string_view escapeRef(args[1].AsStringRef()); + if (escapeRef.size() != 1U) { + UdfTerminate((TStringBuilder() << GetPos() << " Escape should be single character").c_str()); } - const TString escaped(RE2::QuoteMeta(StringPiece(input.data(), input.size()))); - - TStringBuilder result; - result << "(?s)"; - bool slash = false; - bool escapeOn = false; - - for (const char& c : escaped) { - switch (c) { - case '\\': - if (slash) { - result << "\\\\"; - } - slash = !slash; - break; - case '%': - if (escapeOn) { - result << "\\%"; - escapeOn = false; - } else { - result << ".*"; - } - slash = false; - break; - case '_': + escape = escapeRef.front(); + } + const TString escaped(RE2::QuoteMeta(StringPiece(input.data(), input.size()))); + + TStringBuilder result; + result << "(?s)"; + bool slash = false; + bool escapeOn = false; + + for (const char& c : escaped) { + switch (c) { + case '\\': + if (slash) { + result << "\\\\"; + } + slash = !slash; + break; + case '%': + if (escapeOn) { + result << "\\%"; + escapeOn = false; + } else { + result << ".*"; + } + slash = false; + break; + case '_': + if (escapeOn) { + result << "\\_"; + escapeOn = false; + } else { + result << '.'; + } + slash = false; + break; + default: + if (hasEscape && c == escape) { if (escapeOn) { - result << "\\_"; - escapeOn = false; - } else { - result << '.'; + result << RE2::QuoteMeta(StringPiece(&c, 1)); } - slash = false; - break; - default: - if (hasEscape && c == escape) { - if (escapeOn) { - result << RE2::QuoteMeta(StringPiece(&c, 1)); - } - escapeOn = !escapeOn; - } else { - if (slash) - result << '\\'; - result << c; - escapeOn = false; + escapeOn = !escapeOn; + } else { + if (slash) { + result << '\\'; } - slash = false; - break; - } + result << c; + escapeOn = false; + } + slash = false; + break; } - return valueBuilder->NewString(result); - } - - TType* MakeRunConfigType(IFunctionTypeInfoBuilder& builder, TType* optOptionsStructType) { - return builder.Tuple()->Add<char*>().Add(optOptionsStructType).Build(); } + return valueBuilder->NewString(result); +} - template <bool posix> - class TRe2Module: public IUdfModule { - public: - TStringRef Name() const { - return posix ? TStringRef::Of("Re2posix") : TStringRef::Of("Re2"); - } - - void CleanupOnTerminate() const final { - } - - void GetAllFunctions(IFunctionsSink& sink) const final { - sink.Add(TRe2Udf::Name(TRe2Udf::EMode::MATCH)); - sink.Add(TRe2Udf::Name(TRe2Udf::EMode::GREP)); - sink.Add(TRe2Udf::Name(TRe2Udf::EMode::CAPTURE))->SetTypeAwareness(); - sink.Add(TRe2Udf::Name(TRe2Udf::EMode::REPLACE)); - sink.Add(TRe2Udf::Name(TRe2Udf::EMode::COUNT)); - sink.Add(TRe2Udf::Name(TRe2Udf::EMode::FIND_AND_CONSUME)); - sink.Add(TEscape::Name()); - sink.Add(TPatternFromLike::Name()); - sink.Add(TOptions::Name()); - sink.Add(TIsValidRegexp<posix>::Name()); - } +TType* MakeRunConfigType(IFunctionTypeInfoBuilder& builder, TType* optOptionsStructType) { + return builder.Tuple()->Add<char*>().Add(optOptionsStructType).Build(); +} - void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final try { - Y_UNUSED(userType); - TOptionsSchema optionsSchema = MakeOptionsSchema(builder); - auto optOptionsStructType = builder.Optional()->Item(optionsSchema.StructType).Build(); +template <bool posix> +class TRe2Module: public IUdfModule { +public: + TStringRef Name() const { + return posix ? TStringRef::Of("Re2posix") : TStringRef::Of("Re2"); + } - bool typesOnly = (flags & TFlags::TypesOnly); - bool isMatch = (TRe2Udf::Name(TRe2Udf::EMode::MATCH) == name); - bool isGrep = (TRe2Udf::Name(TRe2Udf::EMode::GREP) == name); - bool isCapture = (TRe2Udf::Name(TRe2Udf::EMode::CAPTURE) == name); - bool isReplace = (TRe2Udf::Name(TRe2Udf::EMode::REPLACE) == name); - bool isCount = (TRe2Udf::Name(TRe2Udf::EMode::COUNT) == name); - bool isFindAndConsume = (TRe2Udf::Name(TRe2Udf::FIND_AND_CONSUME) == name); + void CleanupOnTerminate() const final { + } - if (isMatch || isGrep) { - builder.SimpleSignature<bool(TOptional<char*>)>() - .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(TRe2Udf::Name(TRe2Udf::EMode::MATCH)); + sink.Add(TRe2Udf::Name(TRe2Udf::EMode::GREP)); + sink.Add(TRe2Udf::Name(TRe2Udf::EMode::CAPTURE))->SetTypeAwareness(); + sink.Add(TRe2Udf::Name(TRe2Udf::EMode::REPLACE)); + sink.Add(TRe2Udf::Name(TRe2Udf::EMode::COUNT)); + sink.Add(TRe2Udf::Name(TRe2Udf::EMode::FIND_AND_CONSUME)); + sink.Add(TEscape::Name()); + sink.Add(TPatternFromLike::Name()); + sink.Add(TOptions::Name()); + sink.Add(TIsValidRegexp<posix>::Name()); + } - if (!typesOnly) { - const auto mode = isMatch ? TRe2Udf::EMode::MATCH : TRe2Udf::EMode::GREP; - builder.Implementation(new TRe2Udf::TFactory<posix>(mode, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); - } - } else if (isCapture) { - TRegexpGroups groups; - auto optionalStringType = builder.Optional()->Item<char*>().Build(); - auto structBuilder = builder.Struct(); - RE2::Options options = CreateDefaultOptions(); - RE2 regexp(StringPiece(typeConfig.Data(), typeConfig.Size()), options); - if (!regexp.ok()) { - builder.SetError(FormatRegexpError(regexp)); - return; - } - const auto& groupNames = regexp.CapturingGroupNames(); - int groupCount = regexp.NumberOfCapturingGroups(); - if (groupCount >= 0) { - std::unordered_set<std::string_view> groupNamesSet; - int unnamedCount = 0; - ++groupCount; - groups.Indexes.resize(groupCount); - groups.Names.resize(groupCount); - for (int i = 0; i < groupCount; ++i) { - TString fieldName; - auto it = groupNames.find(i); - if (it != groupNames.end()) { - if (!groupNamesSet.insert(it->second).second) { - builder.SetError( - TStringBuilder() << "Regexp contains duplicate capturing group name: " << it->second); - return; - } - fieldName = it->second; - } else { - fieldName = "_" + ToString(unnamedCount); - ++unnamedCount; + void BuildFunctionTypeInfo( + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final try { + Y_UNUSED(userType); + TOptionsSchema optionsSchema = MakeOptionsSchema(builder); + auto optOptionsStructType = builder.Optional()->Item(optionsSchema.StructType).Build(); + + bool typesOnly = (flags & TFlags::TypesOnly); + bool isMatch = (TRe2Udf::Name(TRe2Udf::EMode::MATCH) == name); + bool isGrep = (TRe2Udf::Name(TRe2Udf::EMode::GREP) == name); + bool isCapture = (TRe2Udf::Name(TRe2Udf::EMode::CAPTURE) == name); + bool isReplace = (TRe2Udf::Name(TRe2Udf::EMode::REPLACE) == name); + bool isCount = (TRe2Udf::Name(TRe2Udf::EMode::COUNT) == name); + bool isFindAndConsume = (TRe2Udf::Name(TRe2Udf::FIND_AND_CONSUME) == name); + + if (isMatch || isGrep) { + builder.SimpleSignature<bool(TOptional<char*>)>() + .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); + + if (!typesOnly) { + const auto mode = isMatch ? TRe2Udf::EMode::MATCH : TRe2Udf::EMode::GREP; + builder.Implementation(new TRe2Udf::TFactory<posix>(mode, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); + } + } else if (isCapture) { + TRegexpGroups groups; + auto optionalStringType = builder.Optional()->Item<char*>().Build(); + auto structBuilder = builder.Struct(); + RE2::Options options = CreateDefaultOptions(); + RE2 regexp(StringPiece(typeConfig.Data(), typeConfig.Size()), options); + if (!regexp.ok()) { + builder.SetError(FormatRegexpError(regexp)); + return; + } + const auto& groupNames = regexp.CapturingGroupNames(); + int groupCount = regexp.NumberOfCapturingGroups(); + if (groupCount >= 0) { + std::unordered_set<std::string_view> groupNamesSet; + int unnamedCount = 0; + ++groupCount; + groups.Indexes.resize(groupCount); + groups.Names.resize(groupCount); + for (int i = 0; i < groupCount; ++i) { + TString fieldName; + auto it = groupNames.find(i); + if (it != groupNames.end()) { + if (!groupNamesSet.insert(it->second).second) { + builder.SetError( + TStringBuilder() << "Regexp contains duplicate capturing group name: " << it->second); + return; } - groups.Names[i] = fieldName; - structBuilder->AddField(fieldName, optionalStringType, &groups.Indexes[i]); - } - builder.Args(1)->Add(optionalStringType).Done().Returns(structBuilder->Build()).RunConfig(MakeRunConfigType(builder, optOptionsStructType)); - - if (!typesOnly) { - builder.Implementation( - new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::CAPTURE, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer(), groups)); + fieldName = it->second; + } else { + fieldName = "_" + ToString(unnamedCount); + ++unnamedCount; } - - } else { - Y_ENSURE(regexp.ok()); - builder.SetError("Regexp contains no capturing groups"); + groups.Names[i] = fieldName; + structBuilder->AddField(fieldName, optionalStringType, &groups.Indexes[i]); } - } else if (isReplace) { - builder.SimpleSignature<TOptional<char*>(TOptional<char*>, char*)>() - .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); + builder.Args(1)->Add(optionalStringType).Done().Returns(structBuilder->Build()).RunConfig(MakeRunConfigType(builder, optOptionsStructType)); if (!typesOnly) { - builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::REPLACE, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); + builder.Implementation( + new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::CAPTURE, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer(), groups)); } - } else if (isCount) { - builder.SimpleSignature<ui32(TOptional<char*>)>() - .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); - if (!typesOnly) { - builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::COUNT, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); - } - } else if (isFindAndConsume) { - builder.SimpleSignature<TListType<char*>(TOptional<char*>)>() - .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); - if (!typesOnly) { - builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::FIND_AND_CONSUME, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); - } - } else if (!( - TEscape::DeclareSignature(name, userType, builder, typesOnly) || - TPatternFromLike::DeclareSignature(name, userType, builder, typesOnly) || - TOptions::DeclareSignature(name, userType, builder, typesOnly) || - TIsValidRegexp<posix>::DeclareSignature(name, userType, builder, typesOnly))) { - builder.SetError( - TStringBuilder() << "Unknown function name: " << TString(name)); + } else { + Y_ENSURE(regexp.ok()); + builder.SetError("Regexp contains no capturing groups"); } - } catch (const std::exception& e) { - builder.SetError(CurrentExceptionMessage()); + } else if (isReplace) { + builder.SimpleSignature<TOptional<char*>(TOptional<char*>, char*)>() + .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); + + if (!typesOnly) { + builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::REPLACE, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); + } + } else if (isCount) { + builder.SimpleSignature<ui32(TOptional<char*>)>() + .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); + + if (!typesOnly) { + builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::COUNT, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); + } + } else if (isFindAndConsume) { + builder.SimpleSignature<TListType<char*>(TOptional<char*>)>() + .RunConfig(MakeRunConfigType(builder, optOptionsStructType)); + if (!typesOnly) { + builder.Implementation(new TRe2Udf::TFactory<posix>(TRe2Udf::EMode::FIND_AND_CONSUME, optionsSchema, builder.GetSourcePosition(), builder.GetCurrentLangVer())); + } + } else if (!( + TEscape::DeclareSignature(name, userType, builder, typesOnly) || + TPatternFromLike::DeclareSignature(name, userType, builder, typesOnly) || + TOptions::DeclareSignature(name, userType, builder, typesOnly) || + TIsValidRegexp<posix>::DeclareSignature(name, userType, builder, typesOnly))) { + builder.SetError( + TStringBuilder() << "Unknown function name: " << TString(name)); } - }; + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); + } +}; -} +} // namespace REGISTER_MODULES( TRe2Module<false>, diff --git a/yql/essentials/udfs/common/re2/ya.make b/yql/essentials/udfs/common/re2/ya.make index ca8be7370ba..895f75bd6cc 100644 --- a/yql/essentials/udfs/common/re2/ya.make +++ b/yql/essentials/udfs/common/re2/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(re2_udf) 43 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( re2_udf.cpp diff --git a/yql/essentials/udfs/common/set/set_udf.cpp b/yql/essentials/udfs/common/set/set_udf.cpp index a7851ec8633..86e9dae50d6 100644 --- a/yql/essentials/udfs/common/set/set_udf.cpp +++ b/yql/essentials/udfs/common/set/set_udf.cpp @@ -18,7 +18,8 @@ private: protected: TSetBase(THash hash, TEquals equals) : Set_(1, hash, equals) - {} + { + } void Init(const TUnboxedValuePod& value, ui32 maxSize) { MaxSize_ = maxSize ? maxSize : std::numeric_limits<ui32>::max(); @@ -89,8 +90,7 @@ public: template <EDataSlot Slot> class TSetData - : public TSetBase<TUnboxedValueHash<Slot>, TUnboxedValueEquals<Slot>> -{ + : public TSetBase<TUnboxedValueHash<Slot>, TUnboxedValueEquals<Slot>> { public: using TBase = TSetBase<TUnboxedValueHash<Slot>, TUnboxedValueEquals<Slot>>; @@ -130,27 +130,26 @@ struct TGenericEquals { }; class TSetGeneric - : public TSetBase<TGenericHash, TGenericEquals> -{ + : public TSetBase<TGenericHash, TGenericEquals> { public: using TBase = TSetBase<TGenericHash, TGenericEquals>; TSetGeneric(const TUnboxedValuePod& value, ui32 maxSize, - IHash::TPtr hash, IEquate::TPtr equate) + IHash::TPtr hash, IEquate::TPtr equate) : TBase(TGenericHash{hash}, TGenericEquals{equate}) { TBase::Init(value, maxSize); } TSetGeneric(const TSetGeneric& left, const TSetGeneric& right, - IHash::TPtr hash, IEquate::TPtr equate) + IHash::TPtr hash, IEquate::TPtr equate) : TBase(TGenericHash{hash}, TGenericEquals{equate}) { TBase::Merge(left, right); } TSetGeneric(const TUnboxedValuePod& serialized, - IHash::TPtr hash, IEquate::TPtr equate) + IHash::TPtr hash, IEquate::TPtr equate) : TBase(TGenericHash{hash}, TGenericEquals{equate}) { TBase::Deserialize(serialized); @@ -158,14 +157,13 @@ public: }; extern const char SetResourceNameGeneric[] = "Set.SetResource.Generic"; -class TSetResource: - public TBoxedResource<TSetGeneric, SetResourceNameGeneric> -{ +class TSetResource: public TBoxedResource<TSetGeneric, SetResourceNameGeneric> { public: template <typename... Args> inline TSetResource(Args&&... args) : TBoxedResource(std::forward<Args>(args)...) - {} + { + } }; template <EDataSlot Slot> @@ -182,7 +180,6 @@ TSetResource* GetSetResource(const TUnboxedValuePod& arg) { return static_cast<TSetResource*>(arg.AsBoxed().Get()); } - template <EDataSlot Slot> class TSetCreateData: public TBoxedValue { private: @@ -201,7 +198,8 @@ public: TSetCreate(IHash::TPtr hash, IEquate::TPtr equate) : Hash_(hash) , Equate_(equate) - {} + { + } private: IHash::TPtr Hash_; @@ -279,7 +277,8 @@ public: TSetDeserialize(IHash::TPtr hash, IEquate::TPtr equate) : Hash_(hash) , Equate_(equate) - {} + { + } private: IHash::TPtr Hash_; @@ -308,7 +307,8 @@ public: TSetMerge(IHash::TPtr hash, IEquate::TPtr equate) : Hash_(hash) , Equate_(equate) - {} + { + } private: IHash::TPtr Hash_; @@ -330,26 +330,24 @@ private: } }; - -#define MAKE_RESOURCE(slot, ...) \ -extern const char SetResourceName##slot[] = "Set.SetResource."#slot; \ -template <> \ -class TSetResourceData<EDataSlot::slot>: \ - public TBoxedResource<TSetData<EDataSlot::slot>, SetResourceName##slot> \ -{ \ -public: \ - template <typename... Args> \ - inline TSetResourceData(Args&&... args) \ - : TBoxedResource(std::forward<Args>(args)...) \ - {} \ -}; +#define MAKE_RESOURCE(slot, ...) \ + extern const char SetResourceName##slot[] = "Set.SetResource." #slot; \ + template <> \ + class TSetResourceData<EDataSlot::slot>: public TBoxedResource<TSetData<EDataSlot::slot>, SetResourceName##slot> { \ + public: \ + template <typename... Args> \ + inline TSetResourceData(Args&&... args) \ + : TBoxedResource(std::forward<Args>(args)...) \ + { \ + } \ + }; UDF_TYPE_ID_MAP(MAKE_RESOURCE) -#define MAKE_IMPL(operation, slot) \ -case EDataSlot::slot: \ - builder.Implementation(new operation<EDataSlot::slot>); \ - break; +#define MAKE_IMPL(operation, slot) \ + case EDataSlot::slot: \ + builder.Implementation(new operation<EDataSlot::slot>); \ + break; #define MAKE_CREATE(slot, ...) MAKE_IMPL(TSetCreateData, slot) #define MAKE_ADD_VALUE(slot, ...) MAKE_IMPL(TSetAddValueData, slot) @@ -359,11 +357,10 @@ case EDataSlot::slot: \ #define MAKE_MERGE(slot, ...) MAKE_IMPL(TSetMergeData, slot) #define MAKE_GET_RESULT(slot, ...) MAKE_IMPL(TSetGetResultData, slot) -#define MAKE_TYPE(slot, ...) \ -case EDataSlot::slot: \ - setType = builder.Resource(SetResourceName##slot); \ - break; - +#define MAKE_TYPE(slot, ...) \ + case EDataSlot::slot: \ + setType = builder.Resource(SetResourceName##slot); \ + break; static const auto CreateName = TStringRef::Of("Create"); static const auto AddValueName = TStringRef::Of("AddValue"); @@ -397,8 +394,7 @@ public: TType* userType, const TStringRef& typeConfig, ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + IFunctionTypeInfoBuilder& builder) const final { Y_UNUSED(typeConfig); try { @@ -449,7 +445,7 @@ public: setType = builder.Resource(SetResourceNameGeneric); } else { switch (*slot) { - UDF_TYPE_ID_MAP(MAKE_TYPE) + UDF_TYPE_ID_MAP(MAKE_TYPE) } } @@ -463,7 +459,7 @@ public: builder.Implementation(new TSetCreate(hash, equate)); } else { switch (*slot) { - UDF_TYPE_ID_MAP(MAKE_CREATE) + UDF_TYPE_ID_MAP(MAKE_CREATE) } } } @@ -479,7 +475,7 @@ public: builder.Implementation(new TSetAddValue); } else { switch (*slot) { - UDF_TYPE_ID_MAP(MAKE_ADD_VALUE) + UDF_TYPE_ID_MAP(MAKE_ADD_VALUE) } } } @@ -495,7 +491,7 @@ public: builder.Implementation(new TSetWasChanged); } else { switch (*slot) { - UDF_TYPE_ID_MAP(MAKE_WAS_CHANGED) + UDF_TYPE_ID_MAP(MAKE_WAS_CHANGED) } } } @@ -511,7 +507,7 @@ public: builder.Implementation(new TSetMerge(hash, equate)); } else { switch (*slot) { - UDF_TYPE_ID_MAP(MAKE_MERGE) + UDF_TYPE_ID_MAP(MAKE_MERGE) } } } @@ -527,7 +523,7 @@ public: builder.Implementation(new TSetSerialize); } else { switch (*slot) { - UDF_TYPE_ID_MAP(MAKE_SERIALIZE) + UDF_TYPE_ID_MAP(MAKE_SERIALIZE) } } } @@ -541,7 +537,7 @@ public: builder.Implementation(new TSetDeserialize(hash, equate)); } else { switch (*slot) { - UDF_TYPE_ID_MAP(MAKE_DESERIALIZE) + UDF_TYPE_ID_MAP(MAKE_DESERIALIZE) } } } @@ -559,7 +555,7 @@ public: builder.Implementation(new TSetGetResult); } else { switch (*slot) { - UDF_TYPE_ID_MAP(MAKE_GET_RESULT) + UDF_TYPE_ID_MAP(MAKE_GET_RESULT) } } } diff --git a/yql/essentials/udfs/common/set/ya.make b/yql/essentials/udfs/common/set/ya.make index 9c235a3084a..21a51dba968 100644 --- a/yql/essentials/udfs/common/set/ya.make +++ b/yql/essentials/udfs/common/set/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(set_udf) 28 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( set_udf.cpp diff --git a/yql/essentials/udfs/common/stat/stat_udf_ut.cpp b/yql/essentials/udfs/common/stat/stat_udf_ut.cpp index 2a033ff31e4..cb4c03ffa22 100644 --- a/yql/essentials/udfs/common/stat/stat_udf_ut.cpp +++ b/yql/essentials/udfs/common/stat/stat_udf_ut.cpp @@ -11,353 +11,349 @@ namespace NYql { using namespace NKikimr::NMiniKQL; - namespace NUdf { - extern NUdf::TUniquePtr<NUdf::IUdfModule> CreateStatModule(); +namespace NUdf { +extern NUdf::TUniquePtr<NUdf::IUdfModule> CreateStatModule(); +} // namespace NUdf + +Y_UNIT_TEST_SUITE(TUDFStatTest) { +Y_UNIT_TEST(SimplePercentile) { + auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); + auto randomProvider = CreateDeterministicRandomProvider(1); + auto timeProvider = CreateDeterministicTimeProvider(10000000); + NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); + mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); + TScopedAlloc alloc(__LOCATION__); + TTypeEnvironment env(alloc); + TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); + auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); + auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); + auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); + + TRuntimeNode pgmDigest; + { + auto param1 = pgmBuilder.NewDataLiteral<double>(0.0); + TVector<TRuntimeNode> params = {param1}; + pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); } - Y_UNIT_TEST_SUITE(TUDFStatTest) { - Y_UNIT_TEST(SimplePercentile) { - auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); - auto randomProvider = CreateDeterministicRandomProvider(1); - auto timeProvider = CreateDeterministicTimeProvider(10000000); - NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); - mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); - TScopedAlloc alloc(__LOCATION__); - TTypeEnvironment env(alloc); - TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); - auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); - auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); - auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); - - TRuntimeNode pgmDigest; - { - auto param1 = pgmBuilder.NewDataLiteral<double>(0.0); - TVector<TRuntimeNode> params = {param1}; - pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); - } - - for (int n = 1; n < 10; n += 1) { - auto param2 = pgmBuilder.NewDataLiteral((double)n); - TVector<TRuntimeNode> params = {pgmDigest, param2}; - pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); - } - - TRuntimeNode pgmReturn; - { - auto param2 = pgmBuilder.NewDataLiteral<double>(0.9); - TVector<TRuntimeNode> params = {pgmDigest, param2}; - pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); - } - - TExploringNodeVisitor explorer; - explorer.Walk(pgmReturn.GetNode(), env); - TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), - NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); - auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); - auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); - auto value = graph->GetValue(); - UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 8.5, 0.001); - } + for (int n = 1; n < 10; n += 1) { + auto param2 = pgmBuilder.NewDataLiteral((double)n); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); + } + + TRuntimeNode pgmReturn; + { + auto param2 = pgmBuilder.NewDataLiteral<double>(0.9); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); + } + + TExploringNodeVisitor explorer; + explorer.Walk(pgmReturn.GetNode(), env); + TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); + auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); + auto value = graph->GetValue(); + UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 8.5, 0.001); +} - Y_UNIT_TEST(SimplePercentileSpecific) { - auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); - auto randomProvider = CreateDeterministicRandomProvider(1); - auto timeProvider = CreateDeterministicTimeProvider(1); - NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); - mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); - TScopedAlloc alloc(__LOCATION__); - TTypeEnvironment env(alloc); - TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); - auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); - auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); - auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); - - TRuntimeNode pgmDigest; - { - auto param1 = pgmBuilder.NewDataLiteral<double>(75.0); - TVector<TRuntimeNode> params = {param1}; - pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); - } - - TVector<double> vals = {800, 20, 150}; - for (auto val : vals) { - auto param2 = pgmBuilder.NewDataLiteral(val); - TVector<TRuntimeNode> params = {pgmDigest, param2}; - pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); - } - - TRuntimeNode pgmReturn; - { - auto param2 = pgmBuilder.NewDataLiteral<double>(0.5); - TVector<TRuntimeNode> params = {pgmDigest, param2}; - pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); - } - - TExploringNodeVisitor explorer; - explorer.Walk(pgmReturn.GetNode(), env); - TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), - NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); - auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); - auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); - auto value = graph->GetValue(); - Cerr << value.Get<double>() << Endl; - //~ UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 9.0, 0.001); +Y_UNIT_TEST(SimplePercentileSpecific) { + auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); + auto randomProvider = CreateDeterministicRandomProvider(1); + auto timeProvider = CreateDeterministicTimeProvider(1); + NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); + mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); + TScopedAlloc alloc(__LOCATION__); + TTypeEnvironment env(alloc); + TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); + auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); + auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); + auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); + + TRuntimeNode pgmDigest; + { + auto param1 = pgmBuilder.NewDataLiteral<double>(75.0); + TVector<TRuntimeNode> params = {param1}; + pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); + } + + TVector<double> vals = {800, 20, 150}; + for (auto val : vals) { + auto param2 = pgmBuilder.NewDataLiteral(val); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); + } + + TRuntimeNode pgmReturn; + { + auto param2 = pgmBuilder.NewDataLiteral<double>(0.5); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); + } + + TExploringNodeVisitor explorer; + explorer.Walk(pgmReturn.GetNode(), env); + TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); + auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); + auto value = graph->GetValue(); + Cerr << value.Get<double>() << Endl; + //~ UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 9.0, 0.001); +} + +Y_UNIT_TEST(SerializedPercentile) { + auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); + auto randomProvider = CreateDeterministicRandomProvider(1); + auto timeProvider = CreateDeterministicTimeProvider(1); + NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); + mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); + TScopedAlloc alloc(__LOCATION__); + TTypeEnvironment env(alloc); + TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); + auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); + auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); + auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); + auto udfTDigest_Serialize = pgmBuilder.Udf("Stat.TDigest_Serialize"); + auto udfTDigest_Deserialize = pgmBuilder.Udf("Stat.TDigest_Deserialize"); + + TRuntimeNode pgmDigest; + { + auto param1 = pgmBuilder.NewDataLiteral<double>(0.0); + TVector<TRuntimeNode> params = {param1}; + pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); + } + + for (int n = 1; n < 10; n += 1) { + auto param2 = pgmBuilder.NewDataLiteral((double)n); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); + } + + TRuntimeNode pgmSerializedData; + { + TVector<TRuntimeNode> params = {pgmDigest}; + pgmSerializedData = pgmBuilder.Apply(udfTDigest_Serialize, params); + } + + TRuntimeNode pgmDigest2; + { + TVector<TRuntimeNode> params = {pgmSerializedData}; + pgmDigest2 = pgmBuilder.Apply(udfTDigest_Deserialize, params); + } + + TRuntimeNode pgmReturn; + { + auto param2 = pgmBuilder.NewDataLiteral<double>(0.9); + TVector<TRuntimeNode> params = {pgmDigest2, param2}; + pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); + } + + TExploringNodeVisitor explorer; + explorer.Walk(pgmReturn.GetNode(), env); + TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); + auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); + auto value = graph->GetValue(); + UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 8.5, 0.001); +} + +Y_UNIT_TEST(SerializedMergedPercentile) { + auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); + auto randomProvider = CreateDeterministicRandomProvider(1); + auto timeProvider = CreateDeterministicTimeProvider(1); + NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); + mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); + TScopedAlloc alloc(__LOCATION__); + TTypeEnvironment env(alloc); + TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); + auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); + auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); + auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); + auto udfTDigest_Serialize = pgmBuilder.Udf("Stat.TDigest_Serialize"); + auto udfTDigest_Deserialize = pgmBuilder.Udf("Stat.TDigest_Deserialize"); + auto udfTDigest_Merge = pgmBuilder.Udf("Stat.TDigest_Merge"); + + TVector<TRuntimeNode> pgmSerializedDataVector; + + for (int i = 0; i < 100; i += 10) { + TRuntimeNode pgmDigest; + { + auto param1 = pgmBuilder.NewDataLiteral(double(i) / 10); + TVector<TRuntimeNode> params = {param1}; + pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); } - Y_UNIT_TEST(SerializedPercentile) { - auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); - auto randomProvider = CreateDeterministicRandomProvider(1); - auto timeProvider = CreateDeterministicTimeProvider(1); - NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); - mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); - TScopedAlloc alloc(__LOCATION__); - TTypeEnvironment env(alloc); - TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); - auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); - auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); - auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); - auto udfTDigest_Serialize = pgmBuilder.Udf("Stat.TDigest_Serialize"); - auto udfTDigest_Deserialize = pgmBuilder.Udf("Stat.TDigest_Deserialize"); - - TRuntimeNode pgmDigest; - { - auto param1 = pgmBuilder.NewDataLiteral<double>(0.0); - TVector<TRuntimeNode> params = {param1}; - pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); - } - - for (int n = 1; n < 10; n += 1) { - auto param2 = pgmBuilder.NewDataLiteral((double)n); - TVector<TRuntimeNode> params = {pgmDigest, param2}; - pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); - } - - TRuntimeNode pgmSerializedData; - { - TVector<TRuntimeNode> params = {pgmDigest}; - pgmSerializedData = pgmBuilder.Apply(udfTDigest_Serialize, params); - } - - TRuntimeNode pgmDigest2; - { - TVector<TRuntimeNode> params = {pgmSerializedData}; - pgmDigest2 = pgmBuilder.Apply(udfTDigest_Deserialize, params); - } - - TRuntimeNode pgmReturn; - { - auto param2 = pgmBuilder.NewDataLiteral<double>(0.9); - TVector<TRuntimeNode> params = {pgmDigest2, param2}; - pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); - } - - TExploringNodeVisitor explorer; - explorer.Walk(pgmReturn.GetNode(), env); - TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), - NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); - auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); - auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); - auto value = graph->GetValue(); - UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 8.5, 0.001); + for (int n = i + 1; n < i + 10; n += 1) { + auto param2 = pgmBuilder.NewDataLiteral(double(n) / 10); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); } - Y_UNIT_TEST(SerializedMergedPercentile) { - auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); - auto randomProvider = CreateDeterministicRandomProvider(1); - auto timeProvider = CreateDeterministicTimeProvider(1); - NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); - mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); - TScopedAlloc alloc(__LOCATION__); - TTypeEnvironment env(alloc); - TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); - auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); - auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); - auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); - auto udfTDigest_Serialize = pgmBuilder.Udf("Stat.TDigest_Serialize"); - auto udfTDigest_Deserialize = pgmBuilder.Udf("Stat.TDigest_Deserialize"); - auto udfTDigest_Merge = pgmBuilder.Udf("Stat.TDigest_Merge"); - - TVector<TRuntimeNode> pgmSerializedDataVector; - - for (int i = 0; i < 100; i += 10) { - TRuntimeNode pgmDigest; - { - auto param1 = pgmBuilder.NewDataLiteral(double(i) / 10); - TVector<TRuntimeNode> params = {param1}; - pgmDigest = pgmBuilder.Apply(udfTDigest_Create, params); - } - - for (int n = i + 1; n < i + 10; n += 1) { - auto param2 = pgmBuilder.NewDataLiteral(double(n) / 10); - TVector<TRuntimeNode> params = {pgmDigest, param2}; - pgmDigest = pgmBuilder.Apply(udfTDigest_AddValue, params); - } - - TRuntimeNode pgmSerializedData; - { - TVector<TRuntimeNode> params = {pgmDigest}; - pgmSerializedData = pgmBuilder.Apply(udfTDigest_Serialize, params); - } - pgmSerializedDataVector.push_back(pgmSerializedData); - } - - TRuntimeNode pgmDigest; - for (size_t i = 0; i < pgmSerializedDataVector.size(); ++i) { - TRuntimeNode pgmDigest2; - { - TVector<TRuntimeNode> params = {pgmSerializedDataVector[i]}; - pgmDigest2 = pgmBuilder.Apply(udfTDigest_Deserialize, params); - } - if (!pgmDigest) { - pgmDigest = pgmDigest2; - } else { - TVector<TRuntimeNode> params = {pgmDigest, pgmDigest2}; - pgmDigest = pgmBuilder.Apply(udfTDigest_Merge, params); - } - } - - TRuntimeNode pgmReturn; - { - auto param2 = pgmBuilder.NewDataLiteral<double>(0.9); - TVector<TRuntimeNode> params = {pgmDigest, param2}; - pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); - } - - TExploringNodeVisitor explorer; - explorer.Walk(pgmReturn.GetNode(), env); - TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), - NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); - auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); - auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); - auto value = graph->GetValue(); - UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 8.95, 0.001); + TRuntimeNode pgmSerializedData; + { + TVector<TRuntimeNode> params = {pgmDigest}; + pgmSerializedData = pgmBuilder.Apply(udfTDigest_Serialize, params); } + pgmSerializedDataVector.push_back(pgmSerializedData); + } - static double GetParetoRandomNumber(double a) { - return 1 / pow(RandomNumber<double>(), double(1) / a); + TRuntimeNode pgmDigest; + for (size_t i = 0; i < pgmSerializedDataVector.size(); ++i) { + TRuntimeNode pgmDigest2; + { + TVector<TRuntimeNode> params = {pgmSerializedDataVector[i]}; + pgmDigest2 = pgmBuilder.Apply(udfTDigest_Deserialize, params); + } + if (!pgmDigest) { + pgmDigest = pgmDigest2; + } else { + TVector<TRuntimeNode> params = {pgmDigest, pgmDigest2}; + pgmDigest = pgmBuilder.Apply(udfTDigest_Merge, params); } + } + + TRuntimeNode pgmReturn; + { + auto param2 = pgmBuilder.NewDataLiteral<double>(0.9); + TVector<TRuntimeNode> params = {pgmDigest, param2}; + pgmReturn = pgmBuilder.Apply(udfTDigest_GetPercentile, params); + } - Y_UNIT_TEST(BigPercentile) { - auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); - auto randomProvider = CreateDeterministicRandomProvider(1); - auto timeProvider = CreateDeterministicTimeProvider(1); - NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); - mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); - TScopedAlloc alloc(__LOCATION__); - TTypeEnvironment env(alloc); - TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); - auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); - auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); - auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); - const size_t NUMBERS = 100000; - const double PERCENTILE = 0.99; - const double THRESHOLD = 0.0004; // at q=0.99 threshold is 4*delta*0.0099 - TVector<double> randomNumbers1; - TVector<TRuntimeNode> randomNumbers2; - randomNumbers1.reserve(NUMBERS); - randomNumbers2.reserve(NUMBERS); - for (size_t n = 0; n < NUMBERS; ++n) { - double randomNumber = GetParetoRandomNumber(10); - randomNumbers1.push_back(randomNumber); - randomNumbers2.push_back(pgmBuilder.NewDataLiteral(randomNumber)); - } - TRuntimeNode bigList = pgmBuilder.AsList(randomNumbers2); - auto pgmDigest = - pgmBuilder.Fold1(bigList, - [&](TRuntimeNode item) { + TExploringNodeVisitor explorer; + explorer.Walk(pgmReturn.GetNode(), env); + TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); + auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); + auto value = graph->GetValue(); + UNIT_ASSERT_DOUBLES_EQUAL(value.Get<double>(), 8.95, 0.001); +} + +static double GetParetoRandomNumber(double a) { + return 1 / pow(RandomNumber<double>(), double(1) / a); +} + +Y_UNIT_TEST(BigPercentile) { + auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); + auto randomProvider = CreateDeterministicRandomProvider(1); + auto timeProvider = CreateDeterministicTimeProvider(1); + NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); + mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); + TScopedAlloc alloc(__LOCATION__); + TTypeEnvironment env(alloc); + TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); + auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); + auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); + auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); + const size_t NUMBERS = 100000; + const double PERCENTILE = 0.99; + const double THRESHOLD = 0.0004; // at q=0.99 threshold is 4*delta*0.0099 + TVector<double> randomNumbers1; + TVector<TRuntimeNode> randomNumbers2; + randomNumbers1.reserve(NUMBERS); + randomNumbers2.reserve(NUMBERS); + for (size_t n = 0; n < NUMBERS; ++n) { + double randomNumber = GetParetoRandomNumber(10); + randomNumbers1.push_back(randomNumber); + randomNumbers2.push_back(pgmBuilder.NewDataLiteral(randomNumber)); + } + TRuntimeNode bigList = pgmBuilder.AsList(randomNumbers2); + auto pgmDigest = + pgmBuilder.Fold1(bigList, + [&](TRuntimeNode item) { std::array<TRuntimeNode, 1> args; args[0] = item; - return pgmBuilder.Apply(udfTDigest_Create, args); - }, - [&](TRuntimeNode item, TRuntimeNode state) { + return pgmBuilder.Apply(udfTDigest_Create, args); }, + [&](TRuntimeNode item, TRuntimeNode state) { std::array<TRuntimeNode, 2> args; args[0] = state; args[1] = item; - return pgmBuilder.Apply(udfTDigest_AddValue, args); - }); - TRuntimeNode pgmReturn = - pgmBuilder.Map(pgmDigest, [&](TRuntimeNode item) { - auto param2 = pgmBuilder.NewDataLiteral(PERCENTILE); - std::array<TRuntimeNode, 2> args; - args[0] = item; - args[1] = param2; - return pgmBuilder.Apply(udfTDigest_GetPercentile, args); - }); - - TExploringNodeVisitor explorer; - explorer.Walk(pgmReturn.GetNode(), env); - TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), - NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); - auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); - auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); - auto value = graph->GetValue(); - UNIT_ASSERT(value); - double digestValue = value.Get<double>(); - std::sort(randomNumbers1.begin(), randomNumbers1.end()); - // This gives us a 1-based index of the last value <= digestValue - auto index = std::upper_bound(randomNumbers1.begin(), randomNumbers1.end(), digestValue) - randomNumbers1.begin(); - // See https://en.wikipedia.org/wiki/Percentile#First_Variant.2C - double p = (index - 0.5) / double(randomNumbers1.size()); - UNIT_ASSERT_DOUBLES_EQUAL(p, PERCENTILE, THRESHOLD); - } + return pgmBuilder.Apply(udfTDigest_AddValue, args); }); + TRuntimeNode pgmReturn = + pgmBuilder.Map(pgmDigest, [&](TRuntimeNode item) { + auto param2 = pgmBuilder.NewDataLiteral(PERCENTILE); + std::array<TRuntimeNode, 2> args; + args[0] = item; + args[1] = param2; + return pgmBuilder.Apply(udfTDigest_GetPercentile, args); + }); + + TExploringNodeVisitor explorer; + explorer.Walk(pgmReturn.GetNode(), env); + TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); + auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); + auto value = graph->GetValue(); + UNIT_ASSERT(value); + double digestValue = value.Get<double>(); + std::sort(randomNumbers1.begin(), randomNumbers1.end()); + // This gives us a 1-based index of the last value <= digestValue + auto index = std::upper_bound(randomNumbers1.begin(), randomNumbers1.end(), digestValue) - randomNumbers1.begin(); + // See https://en.wikipedia.org/wiki/Percentile#First_Variant.2C + double p = (index - 0.5) / double(randomNumbers1.size()); + UNIT_ASSERT_DOUBLES_EQUAL(p, PERCENTILE, THRESHOLD); +} - Y_UNIT_TEST(CentroidPrecision) { - auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); - auto randomProvider = CreateDeterministicRandomProvider(1); - auto timeProvider = CreateDeterministicTimeProvider(1); - NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); - mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); - TScopedAlloc alloc(__LOCATION__); - TTypeEnvironment env(alloc); - TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); - auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); - auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); - auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); - const size_t NUMBERS = 100000; - const double PERCENTILE = 0.25; - const double minValue = 1.0; - const double maxValue = 100.0; - const double majorityValue = 50.0; - TVector<TRuntimeNode> numbers; - numbers.reserve(NUMBERS); - for (size_t n = 0; n < NUMBERS - 2; ++n) { - numbers.push_back(pgmBuilder.NewDataLiteral(majorityValue)); - } - numbers.push_back(pgmBuilder.NewDataLiteral(minValue)); - numbers.push_back(pgmBuilder.NewDataLiteral(maxValue)); - TRuntimeNode bigList = pgmBuilder.AsList(numbers); - auto pgmDigest = - pgmBuilder.Fold1(bigList, - [&](TRuntimeNode item) { +Y_UNIT_TEST(CentroidPrecision) { + auto mutableFunctionRegistry = CreateFunctionRegistry(CreateBuiltinRegistry())->Clone(); + auto randomProvider = CreateDeterministicRandomProvider(1); + auto timeProvider = CreateDeterministicTimeProvider(1); + NUdf::TUniquePtr<NUdf::IUdfModule> module = NUdf::CreateStatModule(); + mutableFunctionRegistry->AddModule("", "Stat", std::move(module)); + TScopedAlloc alloc(__LOCATION__); + TTypeEnvironment env(alloc); + TProgramBuilder pgmBuilder(env, *mutableFunctionRegistry); + auto udfTDigest_Create = pgmBuilder.Udf("Stat.TDigest_Create"); + auto udfTDigest_AddValue = pgmBuilder.Udf("Stat.TDigest_AddValue"); + auto udfTDigest_GetPercentile = pgmBuilder.Udf("Stat.TDigest_GetPercentile"); + const size_t NUMBERS = 100000; + const double PERCENTILE = 0.25; + const double minValue = 1.0; + const double maxValue = 100.0; + const double majorityValue = 50.0; + TVector<TRuntimeNode> numbers; + numbers.reserve(NUMBERS); + for (size_t n = 0; n < NUMBERS - 2; ++n) { + numbers.push_back(pgmBuilder.NewDataLiteral(majorityValue)); + } + numbers.push_back(pgmBuilder.NewDataLiteral(minValue)); + numbers.push_back(pgmBuilder.NewDataLiteral(maxValue)); + TRuntimeNode bigList = pgmBuilder.AsList(numbers); + auto pgmDigest = + pgmBuilder.Fold1(bigList, + [&](TRuntimeNode item) { std::array<TRuntimeNode, 1> args; args[0] = item; - return pgmBuilder.Apply(udfTDigest_Create, args); - }, - [&](TRuntimeNode item, TRuntimeNode state) { + return pgmBuilder.Apply(udfTDigest_Create, args); }, + [&](TRuntimeNode item, TRuntimeNode state) { std::array<TRuntimeNode, 2> args; args[0] = state; args[1] = item; - return pgmBuilder.Apply(udfTDigest_AddValue, args); - }); - TRuntimeNode pgmReturn = - pgmBuilder.Map(pgmDigest, [&](TRuntimeNode item) { - auto param2 = pgmBuilder.NewDataLiteral(PERCENTILE); - std::array<TRuntimeNode, 2> args; - args[0] = item; - args[1] = param2; - return pgmBuilder.Apply(udfTDigest_GetPercentile, args); - }); - - TExploringNodeVisitor explorer; - explorer.Walk(pgmReturn.GetNode(), env); - TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), - NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); - auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); - auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); - auto value = graph->GetValue(); - UNIT_ASSERT(value); - double digestValue = value.Get<double>(); - UNIT_ASSERT_EQUAL(digestValue, majorityValue); - } - } + return pgmBuilder.Apply(udfTDigest_AddValue, args); }); + TRuntimeNode pgmReturn = + pgmBuilder.Map(pgmDigest, [&](TRuntimeNode item) { + auto param2 = pgmBuilder.NewDataLiteral(PERCENTILE); + std::array<TRuntimeNode, 2> args; + args[0] = item; + args[1] = param2; + return pgmBuilder.Apply(udfTDigest_GetPercentile, args); + }); + + TExploringNodeVisitor explorer; + explorer.Walk(pgmReturn.GetNode(), env); + TComputationPatternOpts opts(alloc.Ref(), env, GetBuiltinFactory(), mutableFunctionRegistry.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + auto pattern = MakeComputationPattern(explorer, pgmReturn, {}, opts); + auto graph = pattern->Clone(opts.ToComputationOptions(*randomProvider, *timeProvider)); + auto value = graph->GetValue(); + UNIT_ASSERT(value); + double digestValue = value.Get<double>(); + UNIT_ASSERT_EQUAL(digestValue, majorityValue); } +} // Y_UNIT_TEST_SUITE(TUDFStatTest) +} // namespace NYql diff --git a/yql/essentials/udfs/common/stat/static/stat_udf.h b/yql/essentials/udfs/common/stat/static/stat_udf.h index 3ab6dbb20b4..e1d637132b7 100644 --- a/yql/essentials/udfs/common/stat/static/stat_udf.h +++ b/yql/essentials/udfs/common/stat/static/stat_udf.h @@ -9,57 +9,57 @@ using namespace NYql; using namespace NUdf; namespace { - extern const char DigestResourceName[] = "Stat.TDigestResource"; +extern const char DigestResourceName[] = "Stat.TDigestResource"; - typedef TBoxedResource<TDigest, DigestResourceName> TDigestResource; - typedef TRefCountedPtr<TDigestResource> TDigestResourcePtr; +typedef TBoxedResource<TDigest, DigestResourceName> TDigestResource; +typedef TRefCountedPtr<TDigestResource> TDigestResourcePtr; - SIMPLE_UDF_WITH_OPTIONAL_ARGS(TTDigest_Create, TResource<DigestResourceName>(double, TOptional<double>, TOptional<double>), 2) { - Y_UNUSED(valueBuilder); - const double delta = args[1].GetOrDefault<double>(0.01); - const double K = args[2].GetOrDefault<double>(25.0); - if (delta == 0 || K / delta < 1) { - UdfTerminate((TStringBuilder() << GetPos() << " Invalid combination of delta/K values").c_str()); - } - - return TUnboxedValuePod(new TDigestResource(delta, K, args[0].Get<double>(), true)); +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TTDigest_Create, TResource<DigestResourceName>(double, TOptional<double>, TOptional<double>), 2) { + Y_UNUSED(valueBuilder); + const double delta = args[1].GetOrDefault<double>(0.01); + const double K = args[2].GetOrDefault<double>(25.0); + if (delta == 0 || K / delta < 1) { + UdfTerminate((TStringBuilder() << GetPos() << " Invalid combination of delta/K values").c_str()); } - SIMPLE_STRICT_UDF(TTDigest_AddValue, TResource<DigestResourceName>(TResource<DigestResourceName>, double)) { - Y_UNUSED(valueBuilder); - TDigestResource::Validate(args[0]); - TDigestResource* resource = static_cast<TDigestResource*>(args[0].AsBoxed().Get()); - resource->Get()->AddValue(args[1].Get<double>()); - return TUnboxedValuePod(resource); - } + return TUnboxedValuePod(new TDigestResource(delta, K, args[0].Get<double>(), true)); +} - SIMPLE_STRICT_UDF(TTDigest_GetPercentile, double(TResource<DigestResourceName>, double)) { - Y_UNUSED(valueBuilder); - TDigestResource::Validate(args[0]); - return TUnboxedValuePod(static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get()->GetPercentile(args[1].Get<double>())); - } +SIMPLE_STRICT_UDF(TTDigest_AddValue, TResource<DigestResourceName>(TResource<DigestResourceName>, double)) { + Y_UNUSED(valueBuilder); + TDigestResource::Validate(args[0]); + TDigestResource* resource = static_cast<TDigestResource*>(args[0].AsBoxed().Get()); + resource->Get()->AddValue(args[1].Get<double>()); + return TUnboxedValuePod(resource); +} - SIMPLE_STRICT_UDF(TTDigest_Serialize, char*(TResource<DigestResourceName>)) { - TDigestResource::Validate(args[0]); - return valueBuilder->NewString(static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get()->Serialize()); - } +SIMPLE_STRICT_UDF(TTDigest_GetPercentile, double(TResource<DigestResourceName>, double)) { + Y_UNUSED(valueBuilder); + TDigestResource::Validate(args[0]); + return TUnboxedValuePod(static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get()->GetPercentile(args[1].Get<double>())); +} - SIMPLE_UDF(TTDigest_Deserialize, TResource<DigestResourceName>(char*)) { - Y_UNUSED(valueBuilder); - return TUnboxedValuePod(new TDigestResource(TString(args[0].AsStringRef()), true)); - } +SIMPLE_STRICT_UDF(TTDigest_Serialize, char*(TResource<DigestResourceName>)) { + TDigestResource::Validate(args[0]); + return valueBuilder->NewString(static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get()->Serialize()); +} - SIMPLE_STRICT_UDF(TTDigest_Merge, TResource<DigestResourceName>(TResource<DigestResourceName>, TResource<DigestResourceName>)) { - Y_UNUSED(valueBuilder); - TDigestResource::Validate(args[0]); - TDigestResource::Validate(args[1]); - return TUnboxedValuePod(new TDigestResource( - static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get(), - static_cast<TDigestResource*>(args[1].AsBoxed().Get())->Get(), - true)); - } +SIMPLE_UDF(TTDigest_Deserialize, TResource<DigestResourceName>(char*)) { + Y_UNUSED(valueBuilder); + return TUnboxedValuePod(new TDigestResource(TString(args[0].AsStringRef()), true)); +} - /* +SIMPLE_STRICT_UDF(TTDigest_Merge, TResource<DigestResourceName>(TResource<DigestResourceName>, TResource<DigestResourceName>)) { + Y_UNUSED(valueBuilder); + TDigestResource::Validate(args[0]); + TDigestResource::Validate(args[1]); + return TUnboxedValuePod(new TDigestResource( + static_cast<TDigestResource*>(args[0].AsBoxed().Get())->Get(), + static_cast<TDigestResource*>(args[1].AsBoxed().Get())->Get(), + true)); +} + +/* * * TODO: Memory tracking * @@ -67,12 +67,12 @@ namespace { * */ - SIMPLE_MODULE(TStatModule, - TTDigest_Create, - TTDigest_AddValue, - TTDigest_GetPercentile, - TTDigest_Serialize, - TTDigest_Deserialize, - TTDigest_Merge) +SIMPLE_MODULE(TStatModule, + TTDigest_Create, + TTDigest_AddValue, + TTDigest_GetPercentile, + TTDigest_Serialize, + TTDigest_Deserialize, + TTDigest_Merge) -} +} // namespace diff --git a/yql/essentials/udfs/common/stat/static/static_udf.cpp b/yql/essentials/udfs/common/stat/static/static_udf.cpp index 3cb1d88a1c8..571d3ca8d2a 100644 --- a/yql/essentials/udfs/common/stat/static/static_udf.cpp +++ b/yql/essentials/udfs/common/stat/static/static_udf.cpp @@ -1,10 +1,10 @@ #include "stat_udf.h" namespace NYql { - namespace NUdf { - NUdf::TUniquePtr<NUdf::IUdfModule> CreateStatModule() { - return new TStatModule(); - } - - } +namespace NUdf { +NUdf::TUniquePtr<NUdf::IUdfModule> CreateStatModule() { + return new TStatModule(); } + +} // namespace NUdf +} // namespace NYql diff --git a/yql/essentials/udfs/common/stat/static/ya.make b/yql/essentials/udfs/common/stat/static/ya.make index f3cc7842eea..892e8a34e93 100644 --- a/yql/essentials/udfs/common/stat/static/ya.make +++ b/yql/essentials/udfs/common/stat/static/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( static_udf.cpp stat_udf.h diff --git a/yql/essentials/udfs/common/stat/ut/ya.make b/yql/essentials/udfs/common/stat/ut/ya.make index fdce51cbdfb..1ab9bbb1432 100644 --- a/yql/essentials/udfs/common/stat/ut/ya.make +++ b/yql/essentials/udfs/common/stat/ut/ya.make @@ -1,5 +1,7 @@ UNITTEST_FOR(yql/essentials/udfs/common/stat/static) +ENABLE(YQL_STYLE_CPP) + SRCS( ../stat_udf_ut.cpp ) diff --git a/yql/essentials/udfs/common/stat/ya.make b/yql/essentials/udfs/common/stat/ya.make index d1e622b4447..8d7535044aa 100644 --- a/yql/essentials/udfs/common/stat/ya.make +++ b/yql/essentials/udfs/common/stat/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(stat_udf) 28 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( stat_udf.cpp diff --git a/yql/essentials/udfs/common/streaming/streaming_udf.cpp b/yql/essentials/udfs/common/streaming/streaming_udf.cpp index 63fa55e9e80..2229e8ff9e7 100644 --- a/yql/essentials/udfs/common/streaming/streaming_udf.cpp +++ b/yql/essentials/udfs/common/streaming/streaming_udf.cpp @@ -23,807 +23,809 @@ using namespace NKikimr; using namespace NUdf; namespace { - // Cyclic Read-Write buffer. - // Not thread safe, synchronization between reader and writer threads - // should be managed externally. - class TCyclicRWBuffer { - public: - TCyclicRWBuffer(size_t capacity) - : Buffer_(capacity) - , Finished_(false) - , DataStart_(0) - , DataSize_(0) - { - Buffer_.Resize(capacity); - } - - bool IsFinished() const { - return Finished_; - } - - void Finish() { - Finished_ = true; - } +// Cyclic Read-Write buffer. +// Not thread safe, synchronization between reader and writer threads +// should be managed externally. +class TCyclicRWBuffer { +public: + TCyclicRWBuffer(size_t capacity) + : Buffer_(capacity) + , Finished_(false) + , DataStart_(0) + , DataSize_(0) + { + Buffer_.Resize(capacity); + } - bool HasData() const { - return DataSize_ > 0; - } + bool IsFinished() const { + return Finished_; + } + + void Finish() { + Finished_ = true; + } + + bool HasData() const { + return DataSize_ > 0; + } + + size_t GetDataSize() const { + return DataSize_; + } + + void GetData(const char*& ptr, size_t& len) const { + size_t readSize = GetDataRegionSize(DataStart_, DataSize_); + ptr = Buffer_.Data() + DataStart_; + len = readSize; + } + + void CommitRead(size_t len) { + Y_DEBUG_ABORT_UNLESS(len <= GetDataRegionSize(DataStart_, DataSize_)); + + DataStart_ = GetBufferPosition(DataStart_ + len); + DataSize_ -= len; + } + + bool CanWrite() const { + return WriteSize() > 0; + } + + size_t WriteSize() const { + return Buffer_.Size() - DataSize_; + } + + size_t Write(const char*& ptr, size_t& len) { + if (!CanWrite()) { + return 0; + } + + size_t bytesWritten = 0; + size_t bytesToWrite = std::min(len, WriteSize()); + while (bytesToWrite > 0) { + size_t writeStart = GetWriteStart(); + size_t writeSize = GetDataRegionSize(writeStart, bytesToWrite); + + MemCopy(Data(writeStart), ptr, writeSize); + + DataSize_ += writeSize; + bytesWritten += writeSize; + bytesToWrite -= writeSize; + + ptr += writeSize; + len -= writeSize; + } + + return bytesWritten; + } + + size_t Write(IZeroCopyInput& input) { + const void* ptr; + size_t dataLen = input.Next(&ptr, WriteSize()); + const char* dataPtr = reinterpret_cast<const char*>(ptr); + return Write(dataPtr, dataLen); + } + +private: + size_t GetBufferPosition(size_t pos) const { + return pos % Buffer_.Size(); + } + + size_t GetDataRegionSize(size_t start, size_t size) const { + Y_DEBUG_ABORT_UNLESS(start < Buffer_.Size()); + + return std::min(size, Buffer_.Size() - start); + } + + size_t GetWriteStart() const { + return GetBufferPosition(DataStart_ + DataSize_); + } + + char* Data(size_t pos) { + Y_DEBUG_ABORT_UNLESS(pos < Buffer_.Size()); + + return (Buffer_.Data() + pos); + } + +private: + TBuffer Buffer_; + + bool Finished_; + + size_t DataStart_; + size_t DataSize_; +}; + +struct TStreamingParams { +public: + const size_t DefaultProcessPollLatencyMs = 5 * 1000; // 5 seconds + const size_t DefaultInputBufferSizeBytes = 4 * 1024 * 1024; // 4MB + const size_t DefaultOutputBufferSizeBytes = 16 * 1024 * 1024; // 16MB + const char* DefaultInputDelimiter = "\n"; + const char* DefaultOutputDelimiter = "\n"; + +public: + TUnboxedValue InputStreamObj; + TString CommandLine; + TUnboxedValue ArgumentsList; + TString InputDelimiter; + TString OutputDelimiter; + size_t InputBufferSizeBytes; + size_t OutputBufferSizeBytes; + size_t ProcessPollLatencyMs; + + TStreamingParams() + : InputDelimiter(DefaultInputDelimiter) + , OutputDelimiter(DefaultOutputDelimiter) + , InputBufferSizeBytes(DefaultInputBufferSizeBytes) + , OutputBufferSizeBytes(DefaultOutputBufferSizeBytes) + , ProcessPollLatencyMs(DefaultProcessPollLatencyMs) + { + } +}; + +struct TThreadSyncData { + TMutex BuffersMutex; + TCondVar InputBufferCanReadCond; + TCondVar MainThreadHasWorkCond; + TCondVar OutputBufferCanWriteCond; +}; + +class TStringListBufferedInputStream: public IInputStream { +public: + TStringListBufferedInputStream(TUnboxedValue rowsStream, const TString& delimiter, size_t bufferSizeBytes, + TThreadSyncData& syncData, TSourcePosition pos) + : RowsStream_(rowsStream) + , Delimiter_(delimiter) + , SyncData_(syncData) + , Pos_(pos) + , DelimiterMatcher_(delimiter) + , DelimiterInput_(delimiter) + , Buffer_(bufferSizeBytes) + , CurReadMode_(ReadMode::Start) + { + } + + TStringListBufferedInputStream(const TStringListBufferedInputStream&) = delete; + TStringListBufferedInputStream& operator=(const TStringListBufferedInputStream&) = delete; + + TCyclicRWBuffer& GetBuffer() { + return Buffer_; + } + + // Fetch input from upstream list iterator to the buffer. + // Called from Main thread. + EFetchStatus FetchInput() { + with_lock (SyncData_.BuffersMutex) { + Y_DEBUG_ABORT_UNLESS(!Buffer_.HasData()); + Y_DEBUG_ABORT_UNLESS(Buffer_.CanWrite()); + + bool receivedYield = false; + + while (Buffer_.CanWrite() && CurReadMode_ != ReadMode::Done && !receivedYield) { + switch (CurReadMode_) { + case ReadMode::Start: { + auto status = ReadNextString(); + if (status == EFetchStatus::Yield) { + receivedYield = true; + break; + } - size_t GetDataSize() const { - return DataSize_; - } + CurReadMode_ = (status == EFetchStatus::Ok) + ? ReadMode::String + : ReadMode::Done; - void GetData(const char*& ptr, size_t& len) const { - size_t readSize = GetDataRegionSize(DataStart_, DataSize_); - ptr = Buffer_.Data() + DataStart_; - len = readSize; - } + break; + } - void CommitRead(size_t len) { - Y_DEBUG_ABORT_UNLESS(len <= GetDataRegionSize(DataStart_, DataSize_)); + case ReadMode::String: + if (CurStringInput_.Exhausted()) { + DelimiterInput_.Reset(Delimiter_.data(), Delimiter_.size()); + CurReadMode_ = ReadMode::Delimiter; + break; + } - DataStart_ = GetBufferPosition(DataStart_ + len); - DataSize_ -= len; - } + Buffer_.Write(CurStringInput_); + break; - bool CanWrite() const { - return WriteSize() > 0; - } + case ReadMode::Delimiter: + if (DelimiterInput_.Exhausted()) { + CurReadMode_ = ReadMode::Start; + break; + } - size_t WriteSize() const { - return Buffer_.Size() - DataSize_; - } + Buffer_.Write(DelimiterInput_); + break; - size_t Write(const char*& ptr, size_t& len) { - if (!CanWrite()) { - return 0; + default: + break; + } } - size_t bytesWritten = 0; - size_t bytesToWrite = std::min(len, WriteSize()); - while (bytesToWrite > 0) { - size_t writeStart = GetWriteStart(); - size_t writeSize = GetDataRegionSize(writeStart, bytesToWrite); - - MemCopy(Data(writeStart), ptr, writeSize); - - DataSize_ += writeSize; - bytesWritten += writeSize; - bytesToWrite -= writeSize; - - ptr += writeSize; - len -= writeSize; + if (CurReadMode_ == ReadMode::Done) { + Buffer_.Finish(); } - return bytesWritten; + SyncData_.InputBufferCanReadCond.Signal(); + return receivedYield ? EFetchStatus::Yield : EFetchStatus::Ok; } + } - size_t Write(IZeroCopyInput& input) { - const void* ptr; - size_t dataLen = input.Next(&ptr, WriteSize()); - const char* dataPtr = reinterpret_cast<const char*>(ptr); - return Write(dataPtr, dataLen); - } - - private: - size_t GetBufferPosition(size_t pos) const { - return pos % Buffer_.Size(); - } - - size_t GetDataRegionSize(size_t start, size_t size) const { - Y_DEBUG_ABORT_UNLESS(start < Buffer_.Size()); +private: + // Read data to pass into the child process input pipe. + // Called from Communicate thread. + size_t DoRead(void* buf, size_t len) override { + try { + with_lock (SyncData_.BuffersMutex) { + while (!Buffer_.HasData() && !Buffer_.IsFinished()) { + SyncData_.MainThreadHasWorkCond.Signal(); + SyncData_.InputBufferCanReadCond.WaitI(SyncData_.BuffersMutex); + } - return std::min(size, Buffer_.Size() - start); - } + if (!Buffer_.HasData()) { + Y_DEBUG_ABORT_UNLESS(Buffer_.IsFinished()); + return 0; + } - size_t GetWriteStart() const { - return GetBufferPosition(DataStart_ + DataSize_); - } + const char* dataPtr; + size_t dataLen; + Buffer_.GetData(dataPtr, dataLen); - char* Data(size_t pos) { - Y_DEBUG_ABORT_UNLESS(pos < Buffer_.Size()); + size_t bytesRead = std::min(dataLen, len); + Y_DEBUG_ABORT_UNLESS(bytesRead > 0); + memcpy(buf, dataPtr, bytesRead); + Buffer_.CommitRead(bytesRead); + return bytesRead; + } - return (Buffer_.Data() + pos); + ythrow yexception(); + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - private: - TBuffer Buffer_; - - bool Finished_; - - size_t DataStart_; - size_t DataSize_; - }; - - struct TStreamingParams { - public: - const size_t DefaultProcessPollLatencyMs = 5 * 1000; // 5 seconds - const size_t DefaultInputBufferSizeBytes = 4 * 1024 * 1024; // 4MB - const size_t DefaultOutputBufferSizeBytes = 16 * 1024 * 1024; // 16MB - const char* DefaultInputDelimiter = "\n"; - const char* DefaultOutputDelimiter = "\n"; - - public: - TUnboxedValue InputStreamObj; - TString CommandLine; - TUnboxedValue ArgumentsList; - TString InputDelimiter; - TString OutputDelimiter; - size_t InputBufferSizeBytes; - size_t OutputBufferSizeBytes; - size_t ProcessPollLatencyMs; - - TStreamingParams() - : InputDelimiter(DefaultInputDelimiter) - , OutputDelimiter(DefaultOutputDelimiter) - , InputBufferSizeBytes(DefaultInputBufferSizeBytes) - , OutputBufferSizeBytes(DefaultOutputBufferSizeBytes) - , ProcessPollLatencyMs(DefaultProcessPollLatencyMs) - { + EFetchStatus ReadNextString() { + TUnboxedValue item; + EFetchStatus status = RowsStream_.Fetch(item); + switch (status) { + case EFetchStatus::Yield: + case EFetchStatus::Finish: + return status; + default: + break; } - }; - struct TThreadSyncData { - TMutex BuffersMutex; - TCondVar InputBufferCanReadCond; - TCondVar MainThreadHasWorkCond; - TCondVar OutputBufferCanWriteCond; - }; + CurString_ = item.GetElement(0); + CurStringInput_.Reset(CurString_.AsStringRef().Data(), CurString_.AsStringRef().Size()); - class TStringListBufferedInputStream: public IInputStream { - public: - TStringListBufferedInputStream(TUnboxedValue rowsStream, const TString& delimiter, size_t bufferSizeBytes, - TThreadSyncData& syncData, TSourcePosition pos) - : RowsStream_(rowsStream) - , Delimiter_(delimiter) - , SyncData_(syncData) - , Pos_(pos) - , DelimiterMatcher_(delimiter) - , DelimiterInput_(delimiter) - , Buffer_(bufferSizeBytes) - , CurReadMode_(ReadMode::Start) + // Check that input string doesn't contain delimiters + const char* match; + Y_UNUSED(match); + if (DelimiterMatcher_.SubStr( + CurString_.AsStringRef().Data(), + CurString_.AsStringRef().Data() + CurString_.AsStringRef().Size(), + match)) { + ythrow yexception() << "Delimiter found in input string."; } - TStringListBufferedInputStream(const TStringListBufferedInputStream&) = delete; - TStringListBufferedInputStream& operator=(const TStringListBufferedInputStream&) = delete; + return EFetchStatus::Ok; + } - TCyclicRWBuffer& GetBuffer() { - return Buffer_; - } +private: + enum class ReadMode { + Start, + String, + Delimiter, + Done + }; - // Fetch input from upstream list iterator to the buffer. - // Called from Main thread. - EFetchStatus FetchInput() { + TUnboxedValue RowsStream_; + TString Delimiter_; + TThreadSyncData& SyncData_; + TSourcePosition Pos_; + + TKMPMatcher DelimiterMatcher_; + TUnboxedValue CurString_; + TMemoryInput CurStringInput_; + TMemoryInput DelimiterInput_; + + TCyclicRWBuffer Buffer_; + + ReadMode CurReadMode_; +}; + +class TStringListBufferedOutputStream: public IOutputStream { +public: + TStringListBufferedOutputStream(const TString& delimiter, size_t stringBufferSizeBytes, + TStringListBufferedInputStream& inputStream, TThreadSyncData& syncData) + : Delimiter_(delimiter) + , InputStream_(inputStream) + , SyncData_(syncData) + , HasDelimiterMatch_(false) + , DelimiterMatcherCallback_(HasDelimiterMatch_) + , DelimiterMatcher_(delimiter.data(), delimiter.data() + delimiter.size(), &DelimiterMatcherCallback_) + , Buffer_(stringBufferSizeBytes) + { + } + + TStringListBufferedOutputStream(const TStringListBufferedOutputStream&) = delete; + TStringListBufferedOutputStream& operator=(const TStringListBufferedOutputStream&) = delete; + + // Get string record from buffer. + // Called from Main thread. + EFetchStatus FetchNextString(TString& str) { + while (!HasDelimiterMatch_) { with_lock (SyncData_.BuffersMutex) { - Y_DEBUG_ABORT_UNLESS(!Buffer_.HasData()); - Y_DEBUG_ABORT_UNLESS(Buffer_.CanWrite()); + bool inputHasData; + bool bufferNeedsData; - bool receivedYield = false; + do { + inputHasData = InputStream_.GetBuffer().HasData() || InputStream_.GetBuffer().IsFinished(); + bufferNeedsData = !Buffer_.HasData() && !Buffer_.IsFinished(); - while (Buffer_.CanWrite() && CurReadMode_ != ReadMode::Done && !receivedYield) { - switch (CurReadMode_) { - case ReadMode::Start: { - auto status = ReadNextString(); - if (status == EFetchStatus::Yield) { - receivedYield = true; - break; - } - - CurReadMode_ = (status == EFetchStatus::Ok) - ? ReadMode::String - : ReadMode::Done; - - break; - } - - case ReadMode::String: - if (CurStringInput_.Exhausted()) { - DelimiterInput_.Reset(Delimiter_.data(), Delimiter_.size()); - CurReadMode_ = ReadMode::Delimiter; - break; - } - - Buffer_.Write(CurStringInput_); - break; - - case ReadMode::Delimiter: - if (DelimiterInput_.Exhausted()) { - CurReadMode_ = ReadMode::Start; - break; - } - - Buffer_.Write(DelimiterInput_); - break; + if (inputHasData && bufferNeedsData) { + SyncData_.MainThreadHasWorkCond.WaitI(SyncData_.BuffersMutex); + } + } while (inputHasData && bufferNeedsData); - default: - break; + if (!inputHasData) { + auto status = InputStream_.FetchInput(); + if (status == EFetchStatus::Yield) { + return EFetchStatus::Yield; } } - if (CurReadMode_ == ReadMode::Done) { - Buffer_.Finish(); + if (bufferNeedsData) { + continue; } - SyncData_.InputBufferCanReadCond.Signal(); - return receivedYield ? EFetchStatus::Yield : EFetchStatus::Ok; - } - } - - private: - // Read data to pass into the child process input pipe. - // Called from Communicate thread. - size_t DoRead(void* buf, size_t len) override { - try { - with_lock (SyncData_.BuffersMutex) { - while (!Buffer_.HasData() && !Buffer_.IsFinished()) { - SyncData_.MainThreadHasWorkCond.Signal(); - SyncData_.InputBufferCanReadCond.WaitI(SyncData_.BuffersMutex); - } - - if (!Buffer_.HasData()) { - Y_DEBUG_ABORT_UNLESS(Buffer_.IsFinished()); - return 0; - } + if (!Buffer_.HasData()) { + Y_DEBUG_ABORT_UNLESS(Buffer_.IsFinished()); + str = TString(TStringBuf(CurrentString_.Data(), CurrentString_.Size())); + CurrentString_.Clear(); + return str.empty() ? EFetchStatus::Finish : EFetchStatus::Ok; + } - const char* dataPtr; - size_t dataLen; - Buffer_.GetData(dataPtr, dataLen); + const char* data; + size_t size; + Buffer_.GetData(data, size); - size_t bytesRead = std::min(dataLen, len); - Y_DEBUG_ABORT_UNLESS(bytesRead > 0); - memcpy(buf, dataPtr, bytesRead); - Buffer_.CommitRead(bytesRead); - return bytesRead; + size_t read = 0; + while (!HasDelimiterMatch_ && read < size) { + DelimiterMatcher_.Push(data[read]); + ++read; } - ythrow yexception(); - } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); - } - } - - EFetchStatus ReadNextString() { - TUnboxedValue item; - EFetchStatus status = RowsStream_.Fetch(item); - switch (status) { - case EFetchStatus::Yield: - case EFetchStatus::Finish: - return status; - default: - break; - } + Y_DEBUG_ABORT_UNLESS(read > 0); + CurrentString_.Append(data, read); + bool signalCanWrite = !Buffer_.CanWrite(); + Buffer_.CommitRead(read); - CurString_ = item.GetElement(0); - CurStringInput_.Reset(CurString_.AsStringRef().Data(), CurString_.AsStringRef().Size()); - - // Check that input string doesn't contain delimiters - const char* match; - Y_UNUSED(match); - if (DelimiterMatcher_.SubStr( - CurString_.AsStringRef().Data(), - CurString_.AsStringRef().Data() + CurString_.AsStringRef().Size(), - match)) - { - ythrow yexception() << "Delimiter found in input string."; + if (signalCanWrite) { + SyncData_.OutputBufferCanWriteCond.Signal(); + } } - - return EFetchStatus::Ok; } - private: - enum class ReadMode { - Start, - String, - Delimiter, - Done - }; - - TUnboxedValue RowsStream_; - TString Delimiter_; - TThreadSyncData& SyncData_; - TSourcePosition Pos_; - - TKMPMatcher DelimiterMatcher_; - TUnboxedValue CurString_; - TMemoryInput CurStringInput_; - TMemoryInput DelimiterInput_; - - TCyclicRWBuffer Buffer_; - - ReadMode CurReadMode_; - }; - - class TStringListBufferedOutputStream: public IOutputStream { - public: - TStringListBufferedOutputStream(const TString& delimiter, size_t stringBufferSizeBytes, - TStringListBufferedInputStream& inputStream, TThreadSyncData& syncData) - : Delimiter_(delimiter) - , InputStream_(inputStream) - , SyncData_(syncData) - , HasDelimiterMatch_(false) - , DelimiterMatcherCallback_(HasDelimiterMatch_) - , DelimiterMatcher_(delimiter.data(), delimiter.data() + delimiter.size(), &DelimiterMatcherCallback_) - , Buffer_(stringBufferSizeBytes) - { - } - - TStringListBufferedOutputStream(const TStringListBufferedOutputStream&) = delete; - TStringListBufferedOutputStream& operator=(const TStringListBufferedOutputStream&) = delete; - - // Get string record from buffer. - // Called from Main thread. - EFetchStatus FetchNextString(TString& str) { - while (!HasDelimiterMatch_) { - with_lock (SyncData_.BuffersMutex) { - bool inputHasData; - bool bufferNeedsData; - - do { - inputHasData = InputStream_.GetBuffer().HasData() || InputStream_.GetBuffer().IsFinished(); - bufferNeedsData = !Buffer_.HasData() && !Buffer_.IsFinished(); - - if (inputHasData && bufferNeedsData) { - SyncData_.MainThreadHasWorkCond.WaitI(SyncData_.BuffersMutex); - } - } while (inputHasData && bufferNeedsData); - - if (!inputHasData) { - auto status = InputStream_.FetchInput(); - if (status == EFetchStatus::Yield) { - return EFetchStatus::Yield; - } - } + Y_DEBUG_ABORT_UNLESS(CurrentString_.Size() >= Delimiter_.size()); + str = TString(TStringBuf(CurrentString_.Data(), CurrentString_.Size() - Delimiter_.size())); + CurrentString_.Clear(); + HasDelimiterMatch_ = false; - if (bufferNeedsData) { - continue; - } + return EFetchStatus::Ok; + } - if (!Buffer_.HasData()) { - Y_DEBUG_ABORT_UNLESS(Buffer_.IsFinished()); - str = TString(TStringBuf(CurrentString_.Data(), CurrentString_.Size())); - CurrentString_.Clear(); - return str.empty() ? EFetchStatus::Finish : EFetchStatus::Ok; - } + TCyclicRWBuffer& GetBuffer() { + return Buffer_; + } - const char* data; - size_t size; - Buffer_.GetData(data, size); +private: + // Write data from child process output to buffer. + // Called from Communicate thread. + void DoWrite(const void* buf, size_t len) override { + const char* curStrPos = reinterpret_cast<const char*>(buf); + size_t curStrLen = len; - size_t read = 0; - while (!HasDelimiterMatch_ && read < size) { - DelimiterMatcher_.Push(data[read]); - ++read; - } - - Y_DEBUG_ABORT_UNLESS(read > 0); - CurrentString_.Append(data, read); - bool signalCanWrite = !Buffer_.CanWrite(); - Buffer_.CommitRead(read); - - if (signalCanWrite) { - SyncData_.OutputBufferCanWriteCond.Signal(); - } + while (curStrLen > 0) { + with_lock (SyncData_.BuffersMutex) { + while (!Buffer_.CanWrite() && !Buffer_.IsFinished()) { + SyncData_.OutputBufferCanWriteCond.WaitI(SyncData_.BuffersMutex); } - } - - Y_DEBUG_ABORT_UNLESS(CurrentString_.Size() >= Delimiter_.size()); - str = TString(TStringBuf(CurrentString_.Data(), CurrentString_.Size() - Delimiter_.size())); - CurrentString_.Clear(); - HasDelimiterMatch_ = false; - return EFetchStatus::Ok; - } - - TCyclicRWBuffer& GetBuffer() { - return Buffer_; - } - - private: - // Write data from child process output to buffer. - // Called from Communicate thread. - void DoWrite(const void* buf, size_t len) override { - const char* curStrPos = reinterpret_cast<const char*>(buf); - size_t curStrLen = len; - - while (curStrLen > 0) { - with_lock (SyncData_.BuffersMutex) { - while (!Buffer_.CanWrite() && !Buffer_.IsFinished()) { - SyncData_.OutputBufferCanWriteCond.WaitI(SyncData_.BuffersMutex); - } - - if (Buffer_.IsFinished()) { - return; - } + if (Buffer_.IsFinished()) { + return; + } - bool signalCanRead = !Buffer_.HasData(); - Buffer_.Write(curStrPos, curStrLen); + bool signalCanRead = !Buffer_.HasData(); + Buffer_.Write(curStrPos, curStrLen); - if (signalCanRead) { - SyncData_.MainThreadHasWorkCond.Signal(); - } + if (signalCanRead) { + SyncData_.MainThreadHasWorkCond.Signal(); } } } + } - void DoFinish() override { - IOutputStream::DoFinish(); + void DoFinish() override { + IOutputStream::DoFinish(); - with_lock (SyncData_.BuffersMutex) { - Buffer_.Finish(); - SyncData_.MainThreadHasWorkCond.Signal(); - } + with_lock (SyncData_.BuffersMutex) { + Buffer_.Finish(); + SyncData_.MainThreadHasWorkCond.Signal(); } + } - private: - class MatcherCallback: public TKMPStreamMatcher<char>::ICallback { - public: - MatcherCallback(bool& hasMatch) - : HasMatch_(hasMatch) - { - } - - void OnMatch(const char* begin, const char* end) override { - Y_UNUSED(begin); - Y_UNUSED(end); - - HasMatch_ = true; - } - - private: - bool& HasMatch_; - }; - - private: - TString Delimiter_; - TStringListBufferedInputStream& InputStream_; - TThreadSyncData& SyncData_; - - bool HasDelimiterMatch_; - MatcherCallback DelimiterMatcherCallback_; - TKMPStreamMatcher<char> DelimiterMatcher_; - - TBuffer CurrentString_; - - TCyclicRWBuffer Buffer_; - }; - - class TStreamingOutputListIterator { +private: + class MatcherCallback: public TKMPStreamMatcher<char>::ICallback { public: - TStreamingOutputListIterator(const TStreamingParams& params, const IValueBuilder* valueBuilder, TSourcePosition pos) - : StreamingParams_(params) - , ValueBuilder_(valueBuilder) - , Pos_(pos) + MatcherCallback(bool& hasMatch) + : HasMatch_(hasMatch) { } - TStreamingOutputListIterator(const TStreamingOutputListIterator&) = delete; - TStreamingOutputListIterator& operator=(const TStreamingOutputListIterator&) = delete; + void OnMatch(const char* begin, const char* end) override { + Y_UNUSED(begin); + Y_UNUSED(end); - ~TStreamingOutputListIterator() { - if (ShellCommand_) { - Y_DEBUG_ABORT_UNLESS(InputStream_ && OutputStream_); + HasMatch_ = true; + } - try { - ShellCommand_->Terminate(); - } catch (const std::exception& e) { - Cerr << CurrentExceptionMessage(); - } + private: + bool& HasMatch_; + }; - // Let Communicate thread finish. - with_lock (ThreadSyncData_.BuffersMutex) { - InputStream_->GetBuffer().Finish(); - OutputStream_->GetBuffer().Finish(); - ThreadSyncData_.InputBufferCanReadCond.Signal(); - ThreadSyncData_.OutputBufferCanWriteCond.Signal(); - } +private: + TString Delimiter_; + TStringListBufferedInputStream& InputStream_; + TThreadSyncData& SyncData_; - ShellCommand_->Wait(); - } - } + bool HasDelimiterMatch_; + MatcherCallback DelimiterMatcherCallback_; + TKMPStreamMatcher<char> DelimiterMatcher_; - EFetchStatus Fetch(TUnboxedValue& result) { - try { - EFetchStatus status = EFetchStatus::Ok; - - if (!ProcessStarted()) { - StartProcess(); + TBuffer CurrentString_; - // Don't try to fetch data if there was a problem starting the process, - // this causes infinite wait on Windows system due to incorrect ShellCommand behavior. - if (ShellCommand_->GetStatus() != TShellCommand::SHELL_RUNNING && ShellCommand_->GetStatus() != TShellCommand::SHELL_FINISHED) { - status = EFetchStatus::Finish; - } - } + TCyclicRWBuffer Buffer_; +}; - if (status == EFetchStatus::Ok) { - status = OutputStream_->FetchNextString(CurrentRecord_); - } +class TStreamingOutputListIterator { +public: + TStreamingOutputListIterator(const TStreamingParams& params, const IValueBuilder* valueBuilder, TSourcePosition pos) + : StreamingParams_(params) + , ValueBuilder_(valueBuilder) + , Pos_(pos) + { + } - if (status == EFetchStatus::Finish) { - switch (ShellCommand_->GetStatus()) { - case TShellCommand::SHELL_FINISHED: - break; - case TShellCommand::SHELL_INTERNAL_ERROR: - ythrow yexception() << "Internal error running process: " << ShellCommand_->GetInternalError(); - break; - case TShellCommand::SHELL_ERROR: - ythrow yexception() << "Error running user process: " << ShellCommand_->GetError(); - break; - default: - ythrow yexception() << "Unexpected shell command status: " << (int)ShellCommand_->GetStatus(); - } - return EFetchStatus::Finish; - } + TStreamingOutputListIterator(const TStreamingOutputListIterator&) = delete; + TStreamingOutputListIterator& operator=(const TStreamingOutputListIterator&) = delete; - if (status == EFetchStatus::Ok) { - TUnboxedValue* items = nullptr; - result = ValueBuilder_->NewArray(1, items); - *items = ValueBuilder_->NewString(TStringRef(CurrentRecord_.data(), CurrentRecord_.size())); - } + ~TStreamingOutputListIterator() { + if (ShellCommand_) { + Y_DEBUG_ABORT_UNLESS(InputStream_ && OutputStream_); - return status; + try { + ShellCommand_->Terminate(); } catch (const std::exception& e) { - UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); + Cerr << CurrentExceptionMessage(); } - } - private: - void StartProcess() { - InputStream_.Reset(new TStringListBufferedInputStream( - StreamingParams_.InputStreamObj, StreamingParams_.InputDelimiter, - StreamingParams_.InputBufferSizeBytes, ThreadSyncData_, Pos_)); - - OutputStream_.Reset(new TStringListBufferedOutputStream( - StreamingParams_.OutputDelimiter, StreamingParams_.OutputBufferSizeBytes, *InputStream_, - ThreadSyncData_)); - - TShellCommandOptions opt; - opt.SetAsync(true).SetUseShell(false).SetLatency(StreamingParams_.ProcessPollLatencyMs).SetInputStream(InputStream_.Get()).SetOutputStream(OutputStream_.Get()).SetCloseStreams(true).SetCloseAllFdsOnExec(true); - - TList<TString> commandArguments; - auto argumetsIterator = StreamingParams_.ArgumentsList.GetListIterator(); - for (TUnboxedValue item; argumetsIterator.Next(item);) { - commandArguments.emplace_back(TStringBuf(item.AsStringRef())); + // Let Communicate thread finish. + with_lock (ThreadSyncData_.BuffersMutex) { + InputStream_->GetBuffer().Finish(); + OutputStream_->GetBuffer().Finish(); + ThreadSyncData_.InputBufferCanReadCond.Signal(); + ThreadSyncData_.OutputBufferCanWriteCond.Signal(); } - ShellCommand_.Reset(new TShellCommand(StreamingParams_.CommandLine, commandArguments, opt)); - ShellCommand_->Run(); - } - - bool ProcessStarted() const { - return !!ShellCommand_; + ShellCommand_->Wait(); } + } - private: - TStreamingParams StreamingParams_; - const IValueBuilder* ValueBuilder_; - TSourcePosition Pos_; - - TThreadSyncData ThreadSyncData_; + EFetchStatus Fetch(TUnboxedValue& result) { + try { + EFetchStatus status = EFetchStatus::Ok; - THolder<TShellCommand> ShellCommand_; - THolder<TStringListBufferedInputStream> InputStream_; - THolder<TStringListBufferedOutputStream> OutputStream_; + if (!ProcessStarted()) { + StartProcess(); - TString CurrentRecord_; - }; - - class TStreamingOutput: public TBoxedValue { - public: - TStreamingOutput(const TStreamingParams& params, const IValueBuilder* valueBuilder, TSourcePosition pos) - : StreamingParams_(params) - , ValueBuilder_(valueBuilder) - , Pos_(pos) - { - } - - TStreamingOutput(const TStreamingOutput&) = delete; - TStreamingOutput& operator=(const TStreamingOutput&) = delete; - - private: - EFetchStatus Fetch(TUnboxedValue& result) override { - if (IsFinished_) { - return EFetchStatus::Finish; + // Don't try to fetch data if there was a problem starting the process, + // this causes infinite wait on Windows system due to incorrect ShellCommand behavior. + if (ShellCommand_->GetStatus() != TShellCommand::SHELL_RUNNING && ShellCommand_->GetStatus() != TShellCommand::SHELL_FINISHED) { + status = EFetchStatus::Finish; + } } - if (!Iterator_) { - Iterator_.Reset(new TStreamingOutputListIterator(StreamingParams_, ValueBuilder_, Pos_)); + if (status == EFetchStatus::Ok) { + status = OutputStream_->FetchNextString(CurrentRecord_); } - auto ret = Iterator_->Fetch(result); + if (status == EFetchStatus::Finish) { + switch (ShellCommand_->GetStatus()) { + case TShellCommand::SHELL_FINISHED: + break; + case TShellCommand::SHELL_INTERNAL_ERROR: + ythrow yexception() << "Internal error running process: " << ShellCommand_->GetInternalError(); + break; + case TShellCommand::SHELL_ERROR: + ythrow yexception() << "Error running user process: " << ShellCommand_->GetError(); + break; + default: + ythrow yexception() << "Unexpected shell command status: " << (int)ShellCommand_->GetStatus(); + } + return EFetchStatus::Finish; + } - if (ret == EFetchStatus::Finish) { - IsFinished_ = true; - Iterator_.Reset(); + if (status == EFetchStatus::Ok) { + TUnboxedValue* items = nullptr; + result = ValueBuilder_->NewArray(1, items); + *items = ValueBuilder_->NewString(TStringRef(CurrentRecord_.data(), CurrentRecord_.size())); } - return ret; + return status; + } catch (const std::exception& e) { + UdfTerminate((TStringBuilder() << Pos_ << " " << e.what()).c_str()); } + } - TStreamingParams StreamingParams_; - const IValueBuilder* ValueBuilder_; - TSourcePosition Pos_; - bool IsFinished_ = false; - THolder<TStreamingOutputListIterator> Iterator_; - }; +private: + void StartProcess() { + InputStream_.Reset(new TStringListBufferedInputStream( + StreamingParams_.InputStreamObj, StreamingParams_.InputDelimiter, + StreamingParams_.InputBufferSizeBytes, ThreadSyncData_, Pos_)); - class TStreamingScriptOutput: public TStreamingOutput { - public: - TStreamingScriptOutput(const TStreamingParams& params, const IValueBuilder* valueBuilder, - TSourcePosition pos, const TString& script, const TString& scriptFilename) - : TStreamingOutput(params, valueBuilder, pos) - , ScriptFileHandle_(scriptFilename) - { - auto scriptStripped = StripBeforeShebang(script); - ScriptFileHandle_.Write(scriptStripped.data(), scriptStripped.size()); - ScriptFileHandle_.Close(); + OutputStream_.Reset(new TStringListBufferedOutputStream( + StreamingParams_.OutputDelimiter, StreamingParams_.OutputBufferSizeBytes, *InputStream_, + ThreadSyncData_)); - if (Chmod(ScriptFileHandle_.Name().c_str(), MODE0755) != 0) { - ythrow yexception() << "Chmod failed for script file:" << ScriptFileHandle_.Name() - << " with error: " << LastSystemErrorText(); - } - } - - private: - static TString StripBeforeShebang(const TString& script) { - auto shebangIndex = script.find("#!"); - if (shebangIndex != TString::npos) { - auto scriptStripped = StripStringLeft(script); + TShellCommandOptions opt; + opt.SetAsync(true).SetUseShell(false).SetLatency(StreamingParams_.ProcessPollLatencyMs).SetInputStream(InputStream_.Get()).SetOutputStream(OutputStream_.Get()).SetCloseStreams(true).SetCloseAllFdsOnExec(true); - if (scriptStripped.size() == script.size() - shebangIndex) { - return scriptStripped; - } - } - - return script; + TList<TString> commandArguments; + auto argumetsIterator = StreamingParams_.ArgumentsList.GetListIterator(); + for (TUnboxedValue item; argumetsIterator.Next(item);) { + commandArguments.emplace_back(TStringBuf(item.AsStringRef())); } - TTempFileHandle ScriptFileHandle_; - }; + ShellCommand_.Reset(new TShellCommand(StreamingParams_.CommandLine, commandArguments, opt)); + ShellCommand_->Run(); + } - class TStreamingProcess: public TBoxedValue { - public: - TStreamingProcess(TSourcePosition pos) - : Pos_(pos) - {} + bool ProcessStarted() const { + return !!ShellCommand_; + } - private: - TUnboxedValue Run(const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - auto inputListArg = args[0]; - auto commandLineArg = args[1].AsStringRef(); - auto argumentsArg = args[2]; - auto inputDelimiterArg = args[3]; - auto outputDelimiterArg = args[4]; - - Y_DEBUG_ABORT_UNLESS(inputListArg.IsBoxed()); - - TStreamingParams params; - params.InputStreamObj = TUnboxedValuePod(inputListArg); - params.CommandLine = TString(TStringBuf(commandLineArg)); - params.ArgumentsList = !argumentsArg - ? valueBuilder->NewEmptyList() - : TUnboxedValue(argumentsArg.GetOptionalValue()); - - if (inputDelimiterArg) { - params.InputDelimiter = TString(TStringBuf(inputDelimiterArg.AsStringRef())); - } - if (outputDelimiterArg) { - params.OutputDelimiter = TString(TStringBuf(outputDelimiterArg.AsStringRef())); - } +private: + TStreamingParams StreamingParams_; + const IValueBuilder* ValueBuilder_; + TSourcePosition Pos_; - return TUnboxedValuePod(new TStreamingOutput(params, valueBuilder, Pos_)); - } + TThreadSyncData ThreadSyncData_; - public: - static TStringRef Name() { - static auto name = TStringRef::Of("Process"); - return name; - } + THolder<TShellCommand> ShellCommand_; + THolder<TStringListBufferedInputStream> InputStream_; + THolder<TStringListBufferedOutputStream> OutputStream_; - private: - TSourcePosition Pos_; - }; + TString CurrentRecord_; +}; - class TStreamingProcessInline: public TBoxedValue { - public: - TStreamingProcessInline(TSourcePosition pos) - : Pos_(pos) - {} +class TStreamingOutput: public TBoxedValue { +public: + TStreamingOutput(const TStreamingParams& params, const IValueBuilder* valueBuilder, TSourcePosition pos) + : StreamingParams_(params) + , ValueBuilder_(valueBuilder) + , Pos_(pos) + { + } - private: - TUnboxedValue Run(const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override { - auto inputListArg = args[0]; - auto scriptArg = args[1].AsStringRef(); - auto argumentsArg = args[2]; - auto inputDelimiterArg = args[3]; - auto outputDelimiterArg = args[4]; - - TString script(scriptArg); - TString scriptFilename = MakeTempName("."); - - TStreamingParams params; - params.InputStreamObj = TUnboxedValuePod(inputListArg); - params.CommandLine = scriptFilename; - params.ArgumentsList = !argumentsArg - ? valueBuilder->NewEmptyList() - : TUnboxedValue(argumentsArg.GetOptionalValue()); - - if (inputDelimiterArg) { - params.InputDelimiter = TString(TStringBuf(inputDelimiterArg.AsStringRef())); - } - if (outputDelimiterArg) { - params.OutputDelimiter = TString(TStringBuf(outputDelimiterArg.AsStringRef())); - } + TStreamingOutput(const TStreamingOutput&) = delete; + TStreamingOutput& operator=(const TStreamingOutput&) = delete; - return TUnboxedValuePod(new TStreamingScriptOutput(params, valueBuilder, Pos_, script, scriptFilename)); +private: + EFetchStatus Fetch(TUnboxedValue& result) override { + if (IsFinished_) { + return EFetchStatus::Finish; } - public: - static TStringRef Name() { - static auto name = TStringRef::Of("ProcessInline"); - return name; + if (!Iterator_) { + Iterator_.Reset(new TStreamingOutputListIterator(StreamingParams_, ValueBuilder_, Pos_)); } - private: - TSourcePosition Pos_; - }; + auto ret = Iterator_->Fetch(result); - class TStreamingModule: public IUdfModule { - public: - TStringRef Name() const { - return TStringRef::Of("Streaming"); + if (ret == EFetchStatus::Finish) { + IsFinished_ = true; + Iterator_.Reset(); } - void CleanupOnTerminate() const final { - } + return ret; + } - void GetAllFunctions(IFunctionsSink& sink) const final { - sink.Add(TStreamingProcess::Name()); - sink.Add(TStreamingProcessInline::Name()); - } + TStreamingParams StreamingParams_; + const IValueBuilder* ValueBuilder_; + TSourcePosition Pos_; + bool IsFinished_ = false; + THolder<TStreamingOutputListIterator> Iterator_; +}; - void BuildFunctionTypeInfo( - const TStringRef& name, - NUdf::TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const override { - try { - Y_UNUSED(userType); - Y_UNUSED(typeConfig); +class TStreamingScriptOutput: public TStreamingOutput { +public: + TStreamingScriptOutput(const TStreamingParams& params, const IValueBuilder* valueBuilder, + TSourcePosition pos, const TString& script, const TString& scriptFilename) + : TStreamingOutput(params, valueBuilder, pos) + , ScriptFileHandle_(scriptFilename) + { + auto scriptStripped = StripBeforeShebang(script); + ScriptFileHandle_.Write(scriptStripped.data(), scriptStripped.size()); + ScriptFileHandle_.Close(); - bool typesOnly = (flags & TFlags::TypesOnly); + if (Chmod(ScriptFileHandle_.Name().c_str(), MODE0755) != 0) { + ythrow yexception() << "Chmod failed for script file:" << ScriptFileHandle_.Name() + << " with error: " << LastSystemErrorText(); + } + } - auto optionalStringType = builder.Optional()->Item<char*>().Build(); - auto rowType = builder.Struct(1)->AddField("Data", TDataType<char*>::Id, nullptr).Build(); - auto rowsType = builder.Stream()->Item(rowType).Build(); - auto stringListType = builder.List()->Item(TDataType<char*>::Id).Build(); - auto optionalStringListType = builder.Optional()->Item(stringListType).Build(); +private: + static TString StripBeforeShebang(const TString& script) { + auto shebangIndex = script.find("#!"); + if (shebangIndex != TString::npos) { + auto scriptStripped = StripStringLeft(script); - if (TStreamingProcess::Name() == name) { - builder.Returns(rowsType).Args()->Add(rowsType).Add<char*>().Add(optionalStringListType).Add(optionalStringType).Add(optionalStringType).Done().OptionalArgs(3); + if (scriptStripped.size() == script.size() - shebangIndex) { + return scriptStripped; + } + } - if (!typesOnly) { - builder.Implementation(new TStreamingProcess(builder.GetSourcePosition())); - } + return script; + } + + TTempFileHandle ScriptFileHandle_; +}; + +class TStreamingProcess: public TBoxedValue { +public: + TStreamingProcess(TSourcePosition pos) + : Pos_(pos) + { + } + +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + auto inputListArg = args[0]; + auto commandLineArg = args[1].AsStringRef(); + auto argumentsArg = args[2]; + auto inputDelimiterArg = args[3]; + auto outputDelimiterArg = args[4]; + + Y_DEBUG_ABORT_UNLESS(inputListArg.IsBoxed()); + + TStreamingParams params; + params.InputStreamObj = TUnboxedValuePod(inputListArg); + params.CommandLine = TString(TStringBuf(commandLineArg)); + params.ArgumentsList = !argumentsArg + ? valueBuilder->NewEmptyList() + : TUnboxedValue(argumentsArg.GetOptionalValue()); + + if (inputDelimiterArg) { + params.InputDelimiter = TString(TStringBuf(inputDelimiterArg.AsStringRef())); + } + if (outputDelimiterArg) { + params.OutputDelimiter = TString(TStringBuf(outputDelimiterArg.AsStringRef())); + } + + return TUnboxedValuePod(new TStreamingOutput(params, valueBuilder, Pos_)); + } + +public: + static TStringRef Name() { + static auto name = TStringRef::Of("Process"); + return name; + } + +private: + TSourcePosition Pos_; +}; + +class TStreamingProcessInline: public TBoxedValue { +public: + TStreamingProcessInline(TSourcePosition pos) + : Pos_(pos) + { + } + +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { + auto inputListArg = args[0]; + auto scriptArg = args[1].AsStringRef(); + auto argumentsArg = args[2]; + auto inputDelimiterArg = args[3]; + auto outputDelimiterArg = args[4]; + + TString script(scriptArg); + TString scriptFilename = MakeTempName("."); + + TStreamingParams params; + params.InputStreamObj = TUnboxedValuePod(inputListArg); + params.CommandLine = scriptFilename; + params.ArgumentsList = !argumentsArg + ? valueBuilder->NewEmptyList() + : TUnboxedValue(argumentsArg.GetOptionalValue()); + + if (inputDelimiterArg) { + params.InputDelimiter = TString(TStringBuf(inputDelimiterArg.AsStringRef())); + } + if (outputDelimiterArg) { + params.OutputDelimiter = TString(TStringBuf(outputDelimiterArg.AsStringRef())); + } + + return TUnboxedValuePod(new TStreamingScriptOutput(params, valueBuilder, Pos_, script, scriptFilename)); + } + +public: + static TStringRef Name() { + static auto name = TStringRef::Of("ProcessInline"); + return name; + } + +private: + TSourcePosition Pos_; +}; + +class TStreamingModule: public IUdfModule { +public: + TStringRef Name() const { + return TStringRef::Of("Streaming"); + } + + void CleanupOnTerminate() const final { + } + + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(TStreamingProcess::Name()); + sink.Add(TStreamingProcessInline::Name()); + } + + void BuildFunctionTypeInfo( + const TStringRef& name, + NUdf::TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const override { + try { + Y_UNUSED(userType); + Y_UNUSED(typeConfig); + + bool typesOnly = (flags & TFlags::TypesOnly); + + auto optionalStringType = builder.Optional()->Item<char*>().Build(); + auto rowType = builder.Struct(1)->AddField("Data", TDataType<char*>::Id, nullptr).Build(); + auto rowsType = builder.Stream()->Item(rowType).Build(); + auto stringListType = builder.List()->Item(TDataType<char*>::Id).Build(); + auto optionalStringListType = builder.Optional()->Item(stringListType).Build(); + + if (TStreamingProcess::Name() == name) { + builder.Returns(rowsType).Args()->Add(rowsType).Add<char*>().Add(optionalStringListType).Add(optionalStringType).Add(optionalStringType).Done().OptionalArgs(3); + + if (!typesOnly) { + builder.Implementation(new TStreamingProcess(builder.GetSourcePosition())); } + } - if (TStreamingProcessInline::Name() == name) { - builder.Returns(rowsType).Args()->Add(rowsType).Add<char*>().Add(optionalStringListType).Add(optionalStringType).Add(optionalStringType).Done().OptionalArgs(3); + if (TStreamingProcessInline::Name() == name) { + builder.Returns(rowsType).Args()->Add(rowsType).Add<char*>().Add(optionalStringListType).Add(optionalStringType).Add(optionalStringType).Done().OptionalArgs(3); - if (!typesOnly) { - builder.Implementation(new TStreamingProcessInline(builder.GetSourcePosition())); - } + if (!typesOnly) { + builder.Implementation(new TStreamingProcessInline(builder.GetSourcePosition())); } - } catch (const std::exception& e) { - builder.SetError(CurrentExceptionMessage()); } + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); } - }; + } +}; -} +} // namespace REGISTER_MODULES(TStreamingModule) diff --git a/yql/essentials/udfs/common/streaming/ya.make b/yql/essentials/udfs/common/streaming/ya.make index 9b080a7f86f..320490adcd2 100644 --- a/yql/essentials/udfs/common/streaming/ya.make +++ b/yql/essentials/udfs/common/streaming/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( streaming_udf.cpp ) diff --git a/yql/essentials/udfs/common/string/string_udf.cpp b/yql/essentials/udfs/common/string/string_udf.cpp index b1dbb528cbb..6574bacbeea 100644 --- a/yql/essentials/udfs/common/string/string_udf.cpp +++ b/yql/essentials/udfs/common/string/string_udf.cpp @@ -83,7 +83,7 @@ TString ReverseBits(const TStringRef input) { END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) // 'unsafe' udf is actually strict - it returns null on any exception -#define STRING_UNSAFE_UDF(udfName, function) \ +#define STRING_UNSAFE_UDF(udfName, function) \ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, TOptional<char*>(TOptional<char*>)) { \ EMPTY_RESULT_ON_EMPTY_ARG(0); \ const TStringBuf input(args[0].AsStringRef()); \ @@ -96,8 +96,7 @@ TString ReverseBits(const TStringRef input) { } \ \ struct T##udfName##KernelExec \ - : public TUnaryKernelExec<T##udfName##KernelExec> \ - { \ + : public TUnaryKernelExec<T##udfName##KernelExec> { \ template <typename TSink> \ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ if (!arg1) { \ @@ -119,7 +118,7 @@ TString ReverseBits(const TStringRef input) { // NOTE: The functions below are marked as deprecated, so block implementation // is not required for them SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), - builder.SetMaxLangVer(NYql::MakeLangVersion(2025, 1))) { + builder.SetMaxLangVer(NYql::MakeLangVersion(2025, 1))) { EMPTY_RESULT_ON_EMPTY_ARG(0) const TStringBuf input(args[0].AsStringRef()); try { @@ -144,7 +143,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } \ } -#define STROKA_ASCII_CASE_UDF(udfName, function) \ +#define STROKA_ASCII_CASE_UDF(udfName, function) \ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap<char*>)) { \ TString input(args[0].AsStringRef()); \ if (input.function()) { \ @@ -155,8 +154,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } \ \ struct T##udfName##KernelExec \ - : public TUnaryKernelExec<T##udfName##KernelExec> \ - { \ + : public TUnaryKernelExec<T##udfName##KernelExec> { \ template <typename TSink> \ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ TString input(arg1.AsStringRef()); \ @@ -170,31 +168,29 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), \ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) - -#define STROKA_FIND_UDF(udfName, function) \ - SIMPLE_STRICT_UDF(T##udfName, bool(TOptional<char*>, char*)) { \ - Y_UNUSED(valueBuilder); \ - if (args[0]) { \ - const TStringBuf haystack(args[0].AsStringRef()); \ - const TStringBuf needle(args[1].AsStringRef()); \ - return TUnboxedValuePod(haystack.function(needle)); \ - } else { \ - return TUnboxedValuePod(false); \ - } \ +#define STROKA_FIND_UDF(udfName, function) \ + SIMPLE_STRICT_UDF(T##udfName, bool(TOptional<char*>, char*)) { \ + Y_UNUSED(valueBuilder); \ + if (args[0]) { \ + const TStringBuf haystack(args[0].AsStringRef()); \ + const TStringBuf needle(args[1].AsStringRef()); \ + return TUnboxedValuePod(haystack.function(needle)); \ + } else { \ + return TUnboxedValuePod(false); \ + } \ } -#define STRING_TWO_ARGS_UDF_DEPRECATED_2025_02(udfName, function) \ - SIMPLE_STRICT_UDF_OPTIONS(T##udfName, bool(TOptional<char*>, char*), \ - builder.SetMaxLangVer(NYql::MakeLangVersion(2025, 1))) \ - { \ - Y_UNUSED(valueBuilder); \ - if (args[0]) { \ - const TStringBuf haystack(args[0].AsStringRef()); \ - const TStringBuf needle(args[1].AsStringRef()); \ - return TUnboxedValuePod(function(haystack, needle)); \ - } else { \ - return TUnboxedValuePod(false); \ - } \ +#define STRING_TWO_ARGS_UDF_DEPRECATED_2025_02(udfName, function) \ + SIMPLE_STRICT_UDF_OPTIONS(T##udfName, bool(TOptional<char*>, char*), \ + builder.SetMaxLangVer(NYql::MakeLangVersion(2025, 1))) { \ + Y_UNUSED(valueBuilder); \ + if (args[0]) { \ + const TStringBuf haystack(args[0].AsStringRef()); \ + const TStringBuf needle(args[1].AsStringRef()); \ + return TUnboxedValuePod(function(haystack, needle)); \ + } else { \ + return TUnboxedValuePod(false); \ + } \ } #define STRING_ASCII_CMP_IGNORE_CASE_UDF(udfName, function, minVersion) \ @@ -209,12 +205,10 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } \ \ struct T##udfName##KernelExec \ - : public TBinaryKernelExec<T##udfName##KernelExec> \ - { \ + : public TBinaryKernelExec<T##udfName##KernelExec> { \ template <typename TSink> \ static void Process(const IValueBuilder*, TBlockItem arg1, \ - TBlockItem arg2, const TSink& sink) \ - { \ + TBlockItem arg2, const TSink& sink) { \ if (arg1) { \ const TStringBuf haystack(arg1.AsStringRef()); \ const TStringBuf needle(arg2.AsStringRef()); \ @@ -226,9 +220,8 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), }; \ \ BEGIN_SIMPLE_STRICT_ARROW_UDF_OPTIONS(T##udfName, \ - bool(TOptional<char*>, char*), \ - builder.SetMinLangVer(minVersion)) \ - { \ + bool(TOptional<char*>, char*), \ + builder.SetMinLangVer(minVersion)) { \ Y_UNUSED(valueBuilder); \ return udfName##Impl(args); \ } \ @@ -236,8 +229,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) \ \ BEGIN_SIMPLE_STRICT_ARROW_UDF(T_yql_##udfName, \ - bool(TOptional<char*>, char*)) \ - { \ + bool(TOptional<char*>, char*)) { \ Y_UNUSED(valueBuilder); \ return udfName##Impl(args); \ } \ @@ -263,8 +255,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } \ \ struct T##function##KernelExec \ - : public TUnaryKernelExec<T##function##KernelExec> \ - { \ + : public TUnaryKernelExec<T##function##KernelExec> { \ template <typename TSink> \ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ if (arg1) { \ @@ -285,58 +276,54 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) - - -#define STRING_STREAM_PAD_FORMATTER_UDF(function) \ - BEGIN_SIMPLE_ARROW_UDF_WITH_OPTIONAL_ARGS(T##function, \ - char*(TAutoMap<char*>, ui64, TOptional<char*>), 1) \ - { \ - TStringStream result; \ - const TStringBuf input(args[0].AsStringRef()); \ - char paddingSymbol = ' '; \ - if (args[2]) { \ - TStringBuf filler = args[2].AsStringRef(); \ - if (filler.Size() != 1) { \ - ythrow yexception() << "Not 1 symbol in paddingSymbol"; \ - } \ - paddingSymbol = filler[0]; \ - } \ - const ui64 padLen = args[1].Get<ui64>(); \ - if (padLen > padLim) { \ - ythrow yexception() << "Padding length (" << padLen << ") exceeds maximum: " << padLim; \ - } \ - result << function(input, padLen, paddingSymbol); \ - return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ - } \ - \ - struct T##function##KernelExec \ - : public TGenericKernelExec<T##function##KernelExec, 3> \ - { \ - template <typename TSink> \ - static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { \ - TStringStream result; \ - const TStringBuf input(args.GetElement(0).AsStringRef()); \ - char paddingSymbol = ' '; \ - if (args.GetElement(2)) { \ - TStringBuf filler = args.GetElement(2).AsStringRef(); \ - if (filler.Size() != 1) { \ - ythrow yexception() << "Not 1 symbol in paddingSymbol"; \ - } \ - paddingSymbol = filler[0]; \ - } \ - const ui64 padLen = args.GetElement(1).Get<ui64>(); \ - if (padLen > padLim) { \ - ythrow yexception() << "Padding length (" << padLen \ - << ") exceeds maximum: " << padLim; \ - } \ - result << function(input, padLen, paddingSymbol); \ - sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ - } \ - }; \ - \ +#define STRING_STREAM_PAD_FORMATTER_UDF(function) \ + BEGIN_SIMPLE_ARROW_UDF_WITH_OPTIONAL_ARGS(T##function, \ + char*(TAutoMap<char*>, ui64, TOptional<char*>), 1) { \ + TStringStream result; \ + const TStringBuf input(args[0].AsStringRef()); \ + char paddingSymbol = ' '; \ + if (args[2]) { \ + TStringBuf filler = args[2].AsStringRef(); \ + if (filler.Size() != 1) { \ + ythrow yexception() << "Not 1 symbol in paddingSymbol"; \ + } \ + paddingSymbol = filler[0]; \ + } \ + const ui64 padLen = args[1].Get<ui64>(); \ + if (padLen > padLim) { \ + ythrow yexception() << "Padding length (" << padLen << ") exceeds maximum: " << padLim; \ + } \ + result << function(input, padLen, paddingSymbol); \ + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); \ + } \ + \ + struct T##function##KernelExec \ + : public TGenericKernelExec<T##function##KernelExec, 3> { \ + template <typename TSink> \ + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { \ + TStringStream result; \ + const TStringBuf input(args.GetElement(0).AsStringRef()); \ + char paddingSymbol = ' '; \ + if (args.GetElement(2)) { \ + TStringBuf filler = args.GetElement(2).AsStringRef(); \ + if (filler.Size() != 1) { \ + ythrow yexception() << "Not 1 symbol in paddingSymbol"; \ + } \ + paddingSymbol = filler[0]; \ + } \ + const ui64 padLen = args.GetElement(1).Get<ui64>(); \ + if (padLen > padLim) { \ + ythrow yexception() << "Padding length (" << padLen \ + << ") exceeds maximum: " << padLim; \ + } \ + result << function(input, padLen, paddingSymbol); \ + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); \ + } \ + }; \ + \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) -#define STRING_STREAM_NUM_FORMATTER_UDF(function, argType) \ +#define STRING_STREAM_NUM_FORMATTER_UDF(function, argType) \ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap<argType>)) { \ TStringStream result; \ result << function(args[0].Get<argType>()); \ @@ -344,8 +331,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } \ \ struct T##function##KernelExec \ - : public TUnaryKernelExec<T##function##KernelExec> \ - { \ + : public TUnaryKernelExec<T##function##KernelExec> { \ template <typename TSink> \ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ TStringStream result; \ @@ -356,7 +342,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) -#define STRING_STREAM_TEXT_FORMATTER_UDF(function) \ +#define STRING_STREAM_TEXT_FORMATTER_UDF(function) \ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##function, char*(TAutoMap<char*>)) { \ TStringStream result; \ const TStringBuf input(args[0].AsStringRef()); \ @@ -365,8 +351,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } \ \ struct T##function##KernelExec \ - : public TUnaryKernelExec<T##function##KernelExec> \ - { \ + : public TUnaryKernelExec<T##function##KernelExec> { \ template <typename TSink> \ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ TStringStream result; \ @@ -378,8 +363,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), \ END_SIMPLE_ARROW_UDF(T##function, T##function##KernelExec::Do) - -#define STRING_STREAM_HRSZ_FORMATTER_UDF(udfName, hrSize) \ +#define STRING_STREAM_HRSZ_FORMATTER_UDF(udfName, hrSize) \ BEGIN_SIMPLE_STRICT_ARROW_UDF(T##udfName, char*(TAutoMap<ui64>)) { \ TStringStream result; \ result << HumanReadableSize(args[0].Get<ui64>(), hrSize); \ @@ -387,8 +371,7 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } \ \ struct T##udfName##KernelExec \ - : public TUnaryKernelExec<T##udfName##KernelExec> \ - { \ + : public TUnaryKernelExec<T##udfName##KernelExec> { \ template <typename TSink> \ static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { \ TStringStream result; \ @@ -415,11 +398,11 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), XX(ReverseBytes, ReverseBytes, NYql::MakeLangVersion(2025, 2)) \ XX(ReverseBits, ReverseBits, NYql::MakeLangVersion(2025, 2)) -#define STRING_UNSAFE_UDF_MAP(XX) \ - XX(Base32Decode, Base32Decode) \ - XX(Base32StrictDecode, Base32StrictDecode) \ - XX(Base64Decode, Base64Decode) \ - XX(Base64StrictDecode, Base64StrictDecode) \ +#define STRING_UNSAFE_UDF_MAP(XX) \ + XX(Base32Decode, Base32Decode) \ + XX(Base32StrictDecode, Base32StrictDecode) \ + XX(Base64Decode, Base64Decode) \ + XX(Base64StrictDecode, Base64StrictDecode) \ XX(HexDecode, HexDecode) // NOTE: The functions below are marked as deprecated, so block implementation @@ -492,196 +475,212 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), XX(HumanReadableQuantity, SF_QUANTITY) \ XX(HumanReadableBytes, SF_BYTES) +BEGIN_SIMPLE_STRICT_ARROW_UDF(TCollapseText, char*(TAutoMap<char*>, ui64)) { + TString input(args[0].AsStringRef()); + ui64 maxLength = args[1].Get<ui64>(); + CollapseText(input, maxLength); + return valueBuilder->NewString(input); +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(TCollapseText, char*(TAutoMap<char*>, ui64)) { - TString input(args[0].AsStringRef()); - ui64 maxLength = args[1].Get<ui64>(); +struct TCollapseTextKernelExec + : public TBinaryKernelExec<TCollapseTextKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + TString input(arg1.AsStringRef()); + ui64 maxLength = arg2.Get<ui64>(); CollapseText(input, maxLength); - return valueBuilder->NewString(input); + return sink(TBlockItem(input)); } +}; - struct TCollapseTextKernelExec - : public TBinaryKernelExec<TCollapseTextKernelExec> - { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - TString input(arg1.AsStringRef()); - ui64 maxLength = arg2.Get<ui64>(); - CollapseText(input, maxLength); - return sink(TBlockItem(input)); - } - }; - - END_SIMPLE_ARROW_UDF(TCollapseText, TCollapseTextKernelExec::Do); +END_SIMPLE_ARROW_UDF(TCollapseText, TCollapseTextKernelExec::Do); - - BEGIN_SIMPLE_STRICT_ARROW_UDF(TContains, bool(TOptional<char*>, char*)) { - Y_UNUSED(valueBuilder); - if (!args[0]) - return TUnboxedValuePod(false); - - const TStringBuf haystack(args[0].AsStringRef()); - const TStringBuf needle(args[1].AsStringRef()); - return TUnboxedValuePod(haystack.Contains(needle)); +BEGIN_SIMPLE_STRICT_ARROW_UDF(TContains, bool(TOptional<char*>, char*)) { + Y_UNUSED(valueBuilder); + if (!args[0]) { + return TUnboxedValuePod(false); } - struct TContainsKernelExec : public TBinaryKernelExec<TContainsKernelExec> { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - if (!arg1) - return sink(TBlockItem(false)); + const TStringBuf haystack(args[0].AsStringRef()); + const TStringBuf needle(args[1].AsStringRef()); + return TUnboxedValuePod(haystack.Contains(needle)); +} - const TStringBuf haystack(arg1.AsStringRef()); - const TStringBuf needle(arg2.AsStringRef()); - sink(TBlockItem(haystack.Contains(needle))); +struct TContainsKernelExec: public TBinaryKernelExec<TContainsKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + if (!arg1) { + return sink(TBlockItem(false)); } - }; - - END_SIMPLE_ARROW_UDF(TContains, TContainsKernelExec::Do); - static bool IgnoreCaseComparator(char a, char b) { - return AsciiToUpper(a) == AsciiToUpper(b); + const TStringBuf haystack(arg1.AsStringRef()); + const TStringBuf needle(arg2.AsStringRef()); + sink(TBlockItem(haystack.Contains(needle))); } +}; - struct TAsciiContainsIgnoreCaseKernelExec - : public TBinaryKernelExec<TAsciiContainsIgnoreCaseKernelExec> - { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - if (!arg1) { - return sink(TBlockItem(arg2 ? false : true)); - } +END_SIMPLE_ARROW_UDF(TContains, TContainsKernelExec::Do); - const TStringBuf haystack(arg1.AsStringRef()); - const TStringBuf needle(arg2.AsStringRef()); - if (haystack.empty()) { - return sink(TBlockItem((needle.empty()))); - } - const auto found = std::search(haystack.cbegin(), haystack.cend(), - needle.cbegin(), needle.cend(), IgnoreCaseComparator); - sink(TBlockItem(found != haystack.cend())); - } - }; +static bool IgnoreCaseComparator(char a, char b) { + return AsciiToUpper(a) == AsciiToUpper(b); +} - TUnboxedValuePod AsciiContainsIgnoreCaseImpl(const TUnboxedValuePod* args) { - if (!args[0]) { - return TUnboxedValuePod(false); +struct TAsciiContainsIgnoreCaseKernelExec + : public TBinaryKernelExec<TAsciiContainsIgnoreCaseKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + if (!arg1) { + return sink(TBlockItem(arg2 ? false : true)); } - const TStringBuf haystack(args[0].AsStringRef()); - const TStringBuf needle(args[1].AsStringRef()); + const TStringBuf haystack(arg1.AsStringRef()); + const TStringBuf needle(arg2.AsStringRef()); if (haystack.empty()) { - return TUnboxedValuePod(needle.empty()); + return sink(TBlockItem((needle.empty()))); } const auto found = std::search(haystack.cbegin(), haystack.cend(), needle.cbegin(), needle.cend(), IgnoreCaseComparator); - return TUnboxedValuePod(found != haystack.cend()); + sink(TBlockItem(found != haystack.cend())); } +}; - BEGIN_SIMPLE_STRICT_ARROW_UDF_OPTIONS(TAsciiContainsIgnoreCase, bool(TOptional<char*>, char*), - builder.SetMinLangVer(NYql::MakeLangVersion(2025, 2))) - { - Y_UNUSED(valueBuilder); - return AsciiContainsIgnoreCaseImpl(args); +TUnboxedValuePod AsciiContainsIgnoreCaseImpl(const TUnboxedValuePod* args) { + if (!args[0]) { + return TUnboxedValuePod(false); } - END_SIMPLE_ARROW_UDF(TAsciiContainsIgnoreCase, TAsciiContainsIgnoreCaseKernelExec::Do); - - BEGIN_SIMPLE_STRICT_ARROW_UDF(T_yql_AsciiContainsIgnoreCase, bool(TOptional<char*>, char*)) - { - Y_UNUSED(valueBuilder); - return AsciiContainsIgnoreCaseImpl(args); + const TStringBuf haystack(args[0].AsStringRef()); + const TStringBuf needle(args[1].AsStringRef()); + if (haystack.empty()) { + return TUnboxedValuePod(needle.empty()); } + const auto found = std::search(haystack.cbegin(), haystack.cend(), + needle.cbegin(), needle.cend(), IgnoreCaseComparator); + return TUnboxedValuePod(found != haystack.cend()); +} - END_SIMPLE_ARROW_UDF(T_yql_AsciiContainsIgnoreCase, TAsciiContainsIgnoreCaseKernelExec::Do); - - BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceAll, char*(TAutoMap<char*>, char*, char*)) { - if (TString result(args[0].AsStringRef()); SubstGlobal(result, args[1].AsStringRef(), args[2].AsStringRef())) - return valueBuilder->NewString(result); - else - return args[0]; - } +BEGIN_SIMPLE_STRICT_ARROW_UDF_OPTIONS(TAsciiContainsIgnoreCase, bool(TOptional<char*>, char*), + builder.SetMinLangVer(NYql::MakeLangVersion(2025, 2))) +{ + Y_UNUSED(valueBuilder); + return AsciiContainsIgnoreCaseImpl(args); +} - struct TReplaceAllKernelExec - : public TGenericKernelExec<TReplaceAllKernelExec, 3> - { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { - TString result(args.GetElement(0).AsStringRef()); - const TStringBuf what(args.GetElement(1).AsStringRef()); - const TStringBuf with(args.GetElement(2).AsStringRef()); - if (SubstGlobal(result, what, with)) { - return sink(TBlockItem(result)); - } else { - return sink(args.GetElement(0)); - } - } - }; +END_SIMPLE_ARROW_UDF(TAsciiContainsIgnoreCase, TAsciiContainsIgnoreCaseKernelExec::Do); - END_SIMPLE_ARROW_UDF(TReplaceAll, TReplaceAllKernelExec::Do) +BEGIN_SIMPLE_STRICT_ARROW_UDF(T_yql_AsciiContainsIgnoreCase, bool(TOptional<char*>, char*)) +{ + Y_UNUSED(valueBuilder); + return AsciiContainsIgnoreCaseImpl(args); +} +END_SIMPLE_ARROW_UDF(T_yql_AsciiContainsIgnoreCase, TAsciiContainsIgnoreCaseKernelExec::Do); - BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceFirst, char*(TAutoMap<char*>, char*, char*)) { - std::string result(args[0].AsStringRef()); - const std::string_view what(args[1].AsStringRef()); - if (const auto index = result.find(what); index != std::string::npos) { - result.replace(index, what.size(), std::string_view(args[2].AsStringRef())); - return valueBuilder->NewString(result); - } +BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceAll, char*(TAutoMap<char*>, char*, char*)) { + if (TString result(args[0].AsStringRef()); SubstGlobal(result, args[1].AsStringRef(), args[2].AsStringRef())) { + return valueBuilder->NewString(result); + } else { return args[0]; } +} - struct TReplaceFirstKernelExec - : public TGenericKernelExec<TReplaceFirstKernelExec, 3> - { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { - std::string result(args.GetElement(0).AsStringRef()); - const std::string_view what(args.GetElement(1).AsStringRef()); - const std::string_view with(args.GetElement(2).AsStringRef()); - if (const auto index = result.find(what); index != std::string::npos) { - result.replace(index, what.size(), with); - return sink(TBlockItem(result)); - } +struct TReplaceAllKernelExec + : public TGenericKernelExec<TReplaceAllKernelExec, 3> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { + TString result(args.GetElement(0).AsStringRef()); + const TStringBuf what(args.GetElement(1).AsStringRef()); + const TStringBuf with(args.GetElement(2).AsStringRef()); + if (SubstGlobal(result, what, with)) { + return sink(TBlockItem(result)); + } else { return sink(args.GetElement(0)); } - }; + } +}; - END_SIMPLE_ARROW_UDF(TReplaceFirst, TReplaceFirstKernelExec::Do) +END_SIMPLE_ARROW_UDF(TReplaceAll, TReplaceAllKernelExec::Do) +BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceFirst, char*(TAutoMap<char*>, char*, char*)) { + std::string result(args[0].AsStringRef()); + const std::string_view what(args[1].AsStringRef()); + if (const auto index = result.find(what); index != std::string::npos) { + result.replace(index, what.size(), std::string_view(args[2].AsStringRef())); + return valueBuilder->NewString(result); + } + return args[0]; +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceLast, char*(TAutoMap<char*>, char*, char*)) { - std::string result(args[0].AsStringRef()); - const std::string_view what(args[1].AsStringRef()); - if (const auto index = result.rfind(what); index != std::string::npos) { - result.replace(index, what.size(), std::string_view(args[2].AsStringRef())); - return valueBuilder->NewString(result); +struct TReplaceFirstKernelExec + : public TGenericKernelExec<TReplaceFirstKernelExec, 3> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { + std::string result(args.GetElement(0).AsStringRef()); + const std::string_view what(args.GetElement(1).AsStringRef()); + const std::string_view with(args.GetElement(2).AsStringRef()); + if (const auto index = result.find(what); index != std::string::npos) { + result.replace(index, what.size(), with); + return sink(TBlockItem(result)); } - return args[0]; + return sink(args.GetElement(0)); } +}; - struct TReplaceLastKernelExec - : public TGenericKernelExec<TReplaceLastKernelExec, 3> - { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { - std::string result(args.GetElement(0).AsStringRef()); - const std::string_view what(args.GetElement(1).AsStringRef()); - const std::string_view with(args.GetElement(2).AsStringRef()); - if (const auto index = result.rfind(what); index != std::string::npos) { - result.replace(index, what.size(), with); - return sink(TBlockItem(result)); - } - return sink(args.GetElement(0)); +END_SIMPLE_ARROW_UDF(TReplaceFirst, TReplaceFirstKernelExec::Do) + +BEGIN_SIMPLE_STRICT_ARROW_UDF(TReplaceLast, char*(TAutoMap<char*>, char*, char*)) { + std::string result(args[0].AsStringRef()); + const std::string_view what(args[1].AsStringRef()); + if (const auto index = result.rfind(what); index != std::string::npos) { + result.replace(index, what.size(), std::string_view(args[2].AsStringRef())); + return valueBuilder->NewString(result); + } + return args[0]; +} + +struct TReplaceLastKernelExec + : public TGenericKernelExec<TReplaceLastKernelExec, 3> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem args, const TSink& sink) { + std::string result(args.GetElement(0).AsStringRef()); + const std::string_view what(args.GetElement(1).AsStringRef()); + const std::string_view with(args.GetElement(2).AsStringRef()); + if (const auto index = result.rfind(what); index != std::string::npos) { + result.replace(index, what.size(), with); + return sink(TBlockItem(result)); } - }; + return sink(args.GetElement(0)); + } +}; - END_SIMPLE_ARROW_UDF(TReplaceLast, TReplaceLastKernelExec::Do) +END_SIMPLE_ARROW_UDF(TReplaceLast, TReplaceLastKernelExec::Do) +BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveAll, char*(TAutoMap<char*>, char*)) { + std::string input(args[0].AsStringRef()); + const std::string_view remove(args[1].AsStringRef()); + std::array<bool, 256> chars{}; + for (const ui8 c : remove) { + chars[c] = true; + } + size_t tpos = 0; + for (const ui8 c : input) { + if (!chars[c]) { + input[tpos++] = c; + } + } + if (tpos != input.size()) { + input.resize(tpos); + return valueBuilder->NewString(input); + } + return args[0]; +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveAll, char*(TAutoMap<char*>, char*)) { - std::string input(args[0].AsStringRef()); - const std::string_view remove(args[1].AsStringRef()); +struct TRemoveAllKernelExec + : public TBinaryKernelExec<TRemoveAllKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + std::string input(arg1.AsStringRef()); + const std::string_view remove(arg2.AsStringRef()); std::array<bool, 256> chars{}; for (const ui8 c : remove) { chars[c] = true; @@ -694,42 +693,36 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), } if (tpos != input.size()) { input.resize(tpos); - return valueBuilder->NewString(input); + return sink(TBlockItem(input)); } - return args[0]; + sink(arg1); } +}; - struct TRemoveAllKernelExec - : public TBinaryKernelExec<TRemoveAllKernelExec> - { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - std::string input(arg1.AsStringRef()); - const std::string_view remove(arg2.AsStringRef()); - std::array<bool, 256> chars{}; - for (const ui8 c : remove) { - chars[c] = true; - } - size_t tpos = 0; - for (const ui8 c : input) { - if (!chars[c]) { - input[tpos++] = c; - } - } - if (tpos != input.size()) { - input.resize(tpos); - return sink(TBlockItem(input)); - } - sink(arg1); - } - }; - - END_SIMPLE_ARROW_UDF(TRemoveAll, TRemoveAllKernelExec::Do) +END_SIMPLE_ARROW_UDF(TRemoveAll, TRemoveAllKernelExec::Do) +BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveFirst, char*(TAutoMap<char*>, char*)) { + std::string input(args[0].AsStringRef()); + const std::string_view remove(args[1].AsStringRef()); + std::array<bool, 256> chars{}; + for (const ui8 c : remove) { + chars[c] = true; + } + for (auto it = input.cbegin(); it != input.cend(); ++it) { + if (chars[static_cast<ui8>(*it)]) { + input.erase(it); + return valueBuilder->NewString(input); + } + } + return args[0]; +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveFirst, char*(TAutoMap<char*>, char*)) { - std::string input(args[0].AsStringRef()); - const std::string_view remove(args[1].AsStringRef()); +struct TRemoveFirstKernelExec + : public TBinaryKernelExec<TRemoveFirstKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + std::string input(arg1.AsStringRef()); + const std::string_view remove(arg2.AsStringRef()); std::array<bool, 256> chars{}; for (const ui8 c : remove) { chars[c] = true; @@ -737,39 +730,37 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), for (auto it = input.cbegin(); it != input.cend(); ++it) { if (chars[static_cast<ui8>(*it)]) { input.erase(it); - return valueBuilder->NewString(input); + return sink(TBlockItem(input)); } } - return args[0]; + sink(arg1); } +}; - struct TRemoveFirstKernelExec - : public TBinaryKernelExec<TRemoveFirstKernelExec> - { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - std::string input(arg1.AsStringRef()); - const std::string_view remove(arg2.AsStringRef()); - std::array<bool, 256> chars{}; - for (const ui8 c : remove) { - chars[c] = true; - } - for (auto it = input.cbegin(); it != input.cend(); ++it) { - if (chars[static_cast<ui8>(*it)]) { - input.erase(it); - return sink(TBlockItem(input)); - } - } - sink(arg1); - } - }; - - END_SIMPLE_ARROW_UDF(TRemoveFirst, TRemoveFirstKernelExec::Do) +END_SIMPLE_ARROW_UDF(TRemoveFirst, TRemoveFirstKernelExec::Do) +BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveLast, char*(TAutoMap<char*>, char*)) { + std::string input(args[0].AsStringRef()); + const std::string_view remove(args[1].AsStringRef()); + std::array<bool, 256> chars{}; + for (const ui8 c : remove) { + chars[c] = true; + } + for (auto it = input.crbegin(); it != input.crend(); ++it) { + if (chars[static_cast<ui8>(*it)]) { + input.erase(input.crend() - it - 1, 1); + return valueBuilder->NewString(input); + } + } + return args[0]; +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(TRemoveLast, char*(TAutoMap<char*>, char*)) { - std::string input(args[0].AsStringRef()); - const std::string_view remove(args[1].AsStringRef()); +struct TRemoveLastKernelExec + : public TBinaryKernelExec<TRemoveLastKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + std::string input(arg1.AsStringRef()); + const std::string_view remove(arg2.AsStringRef()); std::array<bool, 256> chars{}; for (const ui8 c : remove) { chars[c] = true; @@ -777,347 +768,318 @@ SIMPLE_STRICT_UDF_OPTIONS(TReverse, TOptional<char*>(TOptional<char*>), for (auto it = input.crbegin(); it != input.crend(); ++it) { if (chars[static_cast<ui8>(*it)]) { input.erase(input.crend() - it - 1, 1); - return valueBuilder->NewString(input); + return sink(TBlockItem(input)); } } - return args[0]; + sink(arg1); } +}; + +END_SIMPLE_ARROW_UDF(TRemoveLast, TRemoveLastKernelExec::Do) + +// NOTE: String::Find is marked as deprecated, so block implementation is +// not required for them. Hence, only the scalar one is provided. +SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TFind, i64(TAutoMap<char*>, char*, TOptional<ui64>), 1) { + Y_UNUSED(valueBuilder); + const TStringBuf haystack(args[0].AsStringRef()); + const TStringBuf needle(args[1].AsStringRef()); + const ui64 pos = args[2].GetOrDefault<ui64>(0); + return TUnboxedValuePod(haystack.find(needle, pos)); +} - struct TRemoveLastKernelExec - : public TBinaryKernelExec<TRemoveLastKernelExec> - { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - std::string input(arg1.AsStringRef()); - const std::string_view remove(arg2.AsStringRef()); - std::array<bool, 256> chars{}; - for (const ui8 c : remove) { - chars[c] = true; - } - for (auto it = input.crbegin(); it != input.crend(); ++it) { - if (chars[static_cast<ui8>(*it)]) { - input.erase(input.crend() - it - 1, 1); - return sink(TBlockItem(input)); - } - } - sink(arg1); - } - }; - - END_SIMPLE_ARROW_UDF(TRemoveLast, TRemoveLastKernelExec::Do) - +// NOTE: String::ReverseFind is marked as deprecated, so block +// implementation is not required for them. Hence, only the scalar one is +// provided. +SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TReverseFind, i64(TAutoMap<char*>, char*, TOptional<ui64>), 1) { + Y_UNUSED(valueBuilder); + const TStringBuf haystack(args[0].AsStringRef()); + const TStringBuf needle(args[1].AsStringRef()); + const ui64 pos = args[2].GetOrDefault<ui64>(TStringBuf::npos); + return TUnboxedValuePod(haystack.rfind(needle, pos)); +} - // NOTE: String::Find is marked as deprecated, so block implementation is - // not required for them. Hence, only the scalar one is provided. - SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TFind, i64(TAutoMap<char*>, char*, TOptional<ui64>), 1) { - Y_UNUSED(valueBuilder); - const TStringBuf haystack(args[0].AsStringRef()); - const TStringBuf needle(args[1].AsStringRef()); - const ui64 pos = args[2].GetOrDefault<ui64>(0); - return TUnboxedValuePod(haystack.find(needle, pos)); - } +// NOTE: String::Substring is marked as deprecated, so block implementation +// is not required for them. Hence, only the scalar one is provided. +SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TSubstring, char*(TAutoMap<char*>, TOptional<ui64>, TOptional<ui64>), 1) { + const TStringBuf input(args[0].AsStringRef()); + const ui64 from = args[1].GetOrDefault<ui64>(0); + const ui64 count = args[2].GetOrDefault<ui64>(TStringBuf::npos); + return valueBuilder->NewString(input.substr(from, count)); +} - // NOTE: String::ReverseFind is marked as deprecated, so block - // implementation is not required for them. Hence, only the scalar one is - // provided. - SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TReverseFind, i64(TAutoMap<char*>, char*, TOptional<ui64>), 1) { - Y_UNUSED(valueBuilder); - const TStringBuf haystack(args[0].AsStringRef()); - const TStringBuf needle(args[1].AsStringRef()); - const ui64 pos = args[2].GetOrDefault<ui64>(TStringBuf::npos); - return TUnboxedValuePod(haystack.rfind(needle, pos)); +using TTmpVector = TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator>; + +template <typename TIt> +static void SplitToListImpl( + const IValueBuilder* valueBuilder, + const TUnboxedValue& input, + const std::string_view::const_iterator from, + const TIt& it, + TTmpVector& result) { + for (const auto& elem : it) { + result.emplace_back(valueBuilder->SubString(input, std::distance(from, elem.TokenStart()), std::distance(elem.TokenStart(), elem.TokenDelim()))); } - - // NOTE: String::Substring is marked as deprecated, so block implementation - // is not required for them. Hence, only the scalar one is provided. - SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TSubstring, char*(TAutoMap<char*>, TOptional<ui64>, TOptional<ui64>), 1) { - const TStringBuf input(args[0].AsStringRef()); - const ui64 from = args[1].GetOrDefault<ui64>(0); - const ui64 count = args[2].GetOrDefault<ui64>(TStringBuf::npos); - return valueBuilder->NewString(input.substr(from, count)); +} +template <typename TIt> +static void SplitToListImpl( + const IValueBuilder* valueBuilder, + const TUnboxedValue& input, + const std::string_view::const_iterator from, + TIt& it, + bool skipEmpty, + TTmpVector& result) { + if (skipEmpty) { + SplitToListImpl(valueBuilder, input, from, it.SkipEmpty(), result); + } else { + SplitToListImpl(valueBuilder, input, from, it, result); } +} - using TTmpVector = TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator>; - - template <typename TIt> - static void SplitToListImpl( - const IValueBuilder* valueBuilder, - const TUnboxedValue& input, - const std::string_view::const_iterator from, - const TIt& it, - TTmpVector& result) { - for (const auto& elem : it) { - result.emplace_back(valueBuilder->SubString(input, std::distance(from, elem.TokenStart()), std::distance(elem.TokenStart(), elem.TokenDelim()))); - } - } - template <typename TIt> - static void SplitToListImpl( - const IValueBuilder* valueBuilder, - const TUnboxedValue& input, - const std::string_view::const_iterator from, - TIt& it, - bool skipEmpty, - TTmpVector& result) { - if (skipEmpty) { - SplitToListImpl(valueBuilder, input, from, it.SkipEmpty(), result); +constexpr char delimeterStringName[] = "DelimeterString"; +constexpr char skipEmptyName[] = "SkipEmpty"; +constexpr char limitName[] = "Limit"; +using TDelimeterStringArg = TNamedArg<bool, delimeterStringName>; +using TSkipEmptyArg = TNamedArg<bool, skipEmptyName>; +using TLimitArg = TNamedArg<ui64, limitName>; + +SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TSplitToList, TListType<char*>(TOptional<char*>, + char*, + TDelimeterStringArg, + TSkipEmptyArg, + TLimitArg), + 3) { + TTmpVector result; + if (args[0]) { + const std::string_view input(args[0].AsStringRef()); + const std::string_view delimeter(args[1].AsStringRef()); + const bool delimiterString = args[2].GetOrDefault<bool>(true); + const bool skipEmpty = args[3].GetOrDefault<bool>(false); + const auto limit = args[4].GetOrDefault<ui64>(0); + if (delimiterString) { + if (limit) { + auto it = StringSplitter(input).SplitByString(delimeter).Limit(limit + 1); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); + } else { + auto it = StringSplitter(input).SplitByString(delimeter); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); + } } else { - SplitToListImpl(valueBuilder, input, from, it, result); - } - } - - constexpr char delimeterStringName[] = "DelimeterString"; - constexpr char skipEmptyName[] = "SkipEmpty"; - constexpr char limitName[] = "Limit"; - using TDelimeterStringArg = TNamedArg<bool, delimeterStringName>; - using TSkipEmptyArg = TNamedArg<bool, skipEmptyName>; - using TLimitArg = TNamedArg<ui64, limitName>; - - - SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TSplitToList, TListType<char*>( - TOptional<char*>, - char*, - TDelimeterStringArg, - TSkipEmptyArg, - TLimitArg - ), - 3) { - TTmpVector result; - if (args[0]) { - const std::string_view input(args[0].AsStringRef()); - const std::string_view delimeter(args[1].AsStringRef()); - const bool delimiterString = args[2].GetOrDefault<bool>(true); - const bool skipEmpty = args[3].GetOrDefault<bool>(false); - const auto limit = args[4].GetOrDefault<ui64>(0); - if (delimiterString) { - if (limit) { - auto it = StringSplitter(input).SplitByString(delimeter).Limit(limit + 1); - SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); - } else { - auto it = StringSplitter(input).SplitByString(delimeter); - SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); - } + if (limit) { + auto it = StringSplitter(input).SplitBySet(TString(delimeter).c_str()).Limit(limit + 1); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); } else { - if (limit) { - auto it = StringSplitter(input).SplitBySet(TString(delimeter).c_str()).Limit(limit + 1); - SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); - } else { - auto it = StringSplitter(input).SplitBySet(TString(delimeter).c_str()); - SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); - } + auto it = StringSplitter(input).SplitBySet(TString(delimeter).c_str()); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); } } - return valueBuilder->NewList(result.data(), result.size()); } + return valueBuilder->NewList(result.data(), result.size()); +} - SIMPLE_STRICT_UDF(TJoinFromList, char*(TAutoMap<TListType<TOptional<char*>>>, char*)) { - const TStringBuf delimeter(args[1].AsStringRef()); +SIMPLE_STRICT_UDF(TJoinFromList, char*(TAutoMap<TListType<TOptional<char*>>>, char*)) { + const TStringBuf delimeter(args[1].AsStringRef()); - // Construct the string in-place if the list is eager. - if (auto elems = args[0].GetElements()) { - ui64 elemCount = args[0].GetListLength(); - ui64 valueCount = 0; - ui64 resultLength = 0; + // Construct the string in-place if the list is eager. + if (auto elems = args[0].GetElements()) { + ui64 elemCount = args[0].GetListLength(); + ui64 valueCount = 0; + ui64 resultLength = 0; - for (ui64 i = 0; i != elemCount; ++i) { - if (elems[i]) { - resultLength += elems[i].AsStringRef().Size(); - ++valueCount; - } - } - if (valueCount > 0) { - resultLength += (valueCount - 1) * delimeter.size(); + for (ui64 i = 0; i != elemCount; ++i) { + if (elems[i]) { + resultLength += elems[i].AsStringRef().Size(); + ++valueCount; } + } + if (valueCount > 0) { + resultLength += (valueCount - 1) * delimeter.size(); + } - TUnboxedValue result = valueBuilder->NewStringNotFilled(resultLength); - if (!resultLength) { - return result; - } + TUnboxedValue result = valueBuilder->NewStringNotFilled(resultLength); + if (!resultLength) { + return result; + } - const auto buffer = result.AsStringRef(); - auto it = buffer.Data(); - const auto bufferEnd = buffer.Data() + buffer.Size(); - for (ui64 i = 0; i != elemCount; ++i) { - if (elems[i]) { - TStringBuf curStr = elems[i].AsStringRef(); - memcpy(it, curStr.data(), curStr.size()); - it += curStr.size(); - - // Last element just has been written. - if (it == bufferEnd) { - break; - } - memcpy(it, delimeter.data(), delimeter.size()); - it += delimeter.size(); + const auto buffer = result.AsStringRef(); + auto it = buffer.Data(); + const auto bufferEnd = buffer.Data() + buffer.Size(); + for (ui64 i = 0; i != elemCount; ++i) { + if (elems[i]) { + TStringBuf curStr = elems[i].AsStringRef(); + memcpy(it, curStr.data(), curStr.size()); + it += curStr.size(); + + // Last element just has been written. + if (it == bufferEnd) { + break; } + memcpy(it, delimeter.data(), delimeter.size()); + it += delimeter.size(); } - return result; } + return result; + } - auto input = args[0].GetListIterator(); + auto input = args[0].GetListIterator(); - // Since UnboxedValue can embed small strings, iterating over the list may invalidate StringRefs, thus a copy is required. - TVector<TString, TStdAllocatorForUdf<TString>> items; - if (args[0].HasFastListLength()) { - items.reserve(args[0].GetListLength()); - } + // Since UnboxedValue can embed small strings, iterating over the list may invalidate StringRefs, thus a copy is required. + TVector<TString, TStdAllocatorForUdf<TString>> items; + if (args[0].HasFastListLength()) { + items.reserve(args[0].GetListLength()); + } - for (TUnboxedValue current; input.Next(current);) { - if (current) { - items.emplace_back(current.AsStringRef()); - } + for (TUnboxedValue current; input.Next(current);) { + if (current) { + items.emplace_back(current.AsStringRef()); } - - return valueBuilder->NewString(JoinSeq(delimeter, items)); } - BEGIN_SIMPLE_STRICT_ARROW_UDF(TLevensteinDistance, ui64(TAutoMap<char*>, TAutoMap<char*>)) { - Y_UNUSED(valueBuilder); - const TStringBuf left(args[0].AsStringRef()); - const TStringBuf right(args[1].AsStringRef()); - const ui64 result = NLevenshtein::Distance(left, right); - return TUnboxedValuePod(result); - } + return valueBuilder->NewString(JoinSeq(delimeter, items)); +} - struct TLevensteinDistanceKernelExec : public TBinaryKernelExec<TLevensteinDistanceKernelExec> { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - const std::string_view left(arg1.AsStringRef()); - const std::string_view right(arg2.AsStringRef()); - const ui64 result = NLevenshtein::Distance(left, right); - sink(TBlockItem(result)); - } - }; +BEGIN_SIMPLE_STRICT_ARROW_UDF(TLevensteinDistance, ui64(TAutoMap<char*>, TAutoMap<char*>)) { + Y_UNUSED(valueBuilder); + const TStringBuf left(args[0].AsStringRef()); + const TStringBuf right(args[1].AsStringRef()); + const ui64 result = NLevenshtein::Distance(left, right); + return TUnboxedValuePod(result); +} - END_SIMPLE_ARROW_UDF(TLevensteinDistance, TLevensteinDistanceKernelExec::Do); +struct TLevensteinDistanceKernelExec: public TBinaryKernelExec<TLevensteinDistanceKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { + const std::string_view left(arg1.AsStringRef()); + const std::string_view right(arg2.AsStringRef()); + const ui64 result = NLevenshtein::Distance(left, right); + sink(TBlockItem(result)); + } +}; +END_SIMPLE_ARROW_UDF(TLevensteinDistance, TLevensteinDistanceKernelExec::Do); +BEGIN_SIMPLE_STRICT_ARROW_UDF(THumanReadableDuration, char*(TAutoMap<ui64>)) { + TStringStream result; + result << HumanReadable(TDuration::MicroSeconds(args[0].Get<ui64>())); + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(THumanReadableDuration, char*(TAutoMap<ui64>)) { +struct THumanReadableDurationKernelExec + : public TUnaryKernelExec<THumanReadableDurationKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { TStringStream result; - result << HumanReadable(TDuration::MicroSeconds(args[0].Get<ui64>())); - return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); + result << HumanReadable(TDuration::MicroSeconds(arg1.Get<ui64>())); + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); } +}; - struct THumanReadableDurationKernelExec - : public TUnaryKernelExec<THumanReadableDurationKernelExec> - { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem arg1, const TSink& sink) { - TStringStream result; - result << HumanReadable(TDuration::MicroSeconds(arg1.Get<ui64>())); - sink(TBlockItem(TStringRef(result.Data(), result.Size()))); - } - }; - - END_SIMPLE_ARROW_UDF(THumanReadableDuration, THumanReadableDurationKernelExec::Do) +END_SIMPLE_ARROW_UDF(THumanReadableDuration, THumanReadableDurationKernelExec::Do) +BEGIN_SIMPLE_STRICT_ARROW_UDF(TPrec, char*(TAutoMap<double>, ui64)) { + TStringStream result; + result << Prec(args[0].Get<double>(), args[1].Get<ui64>()); + return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); +} - BEGIN_SIMPLE_STRICT_ARROW_UDF(TPrec, char*(TAutoMap<double>, ui64)) { +struct TPrecKernelExec: public TBinaryKernelExec<TPrecKernelExec> { + template <typename TSink> + static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { TStringStream result; - result << Prec(args[0].Get<double>(), args[1].Get<ui64>()); - return valueBuilder->NewString(TStringRef(result.Data(), result.Size())); + result << Prec(arg1.Get<double>(), arg2.Get<ui64>()); + sink(TBlockItem(TStringRef(result.Data(), result.Size()))); } +}; - struct TPrecKernelExec : public TBinaryKernelExec<TPrecKernelExec> { - template <typename TSink> - static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { - TStringStream result; - result << Prec(arg1.Get<double>(), arg2.Get<ui64>()); - sink(TBlockItem(TStringRef(result.Data(), result.Size()))); - } - }; +END_SIMPLE_ARROW_UDF(TPrec, TPrecKernelExec::Do) - END_SIMPLE_ARROW_UDF(TPrec, TPrecKernelExec::Do) +SIMPLE_STRICT_UDF(TToByteList, TListType<ui8>(char*)) { + const TStringBuf input(args[0].AsStringRef()); + TUnboxedValue* items = nullptr; + TUnboxedValue result = valueBuilder->NewArray(input.size(), items); + for (const unsigned char c : input) { + *items++ = TUnboxedValuePod(c); + } + return result; +} +SIMPLE_STRICT_UDF(TFromByteList, char*(TListType<ui8>)) { + auto input = args[0]; - SIMPLE_STRICT_UDF(TToByteList, TListType<ui8>(char*)) { - const TStringBuf input(args[0].AsStringRef()); - TUnboxedValue* items = nullptr; - TUnboxedValue result = valueBuilder->NewArray(input.size(), items); - for (const unsigned char c : input) { - *items++ = TUnboxedValuePod(c); + if (auto elems = input.GetElements()) { + const auto elemCount = input.GetListLength(); + TUnboxedValue result = valueBuilder->NewStringNotFilled(input.GetListLength()); + auto bufferPtr = result.AsStringRef().Data(); + for (ui64 i = 0; i != elemCount; ++i) { + *(bufferPtr++) = elems[i].Get<ui8>(); } return result; } - SIMPLE_STRICT_UDF(TFromByteList, char*(TListType<ui8>)) { - auto input = args[0]; + std::vector<char, NKikimr::NUdf::TStdAllocatorForUdf<char>> buffer; + buffer.reserve(TUnboxedValuePod::InternalBufferSize); - if (auto elems = input.GetElements()) { - const auto elemCount = input.GetListLength(); - TUnboxedValue result = valueBuilder->NewStringNotFilled(input.GetListLength()); - auto bufferPtr = result.AsStringRef().Data(); - for (ui64 i = 0; i != elemCount; ++i) { - *(bufferPtr++) = elems[i].Get<ui8>(); - } - return result; - } - - std::vector<char, NKikimr::NUdf::TStdAllocatorForUdf<char>> buffer; - buffer.reserve(TUnboxedValuePod::InternalBufferSize); - - const auto& iter = input.GetListIterator(); - for (NUdf::TUnboxedValue item; iter.Next(item); ) { - buffer.push_back(item.Get<ui8>()); - } - - return valueBuilder->NewString(TStringRef(buffer.data(), buffer.size())); + const auto& iter = input.GetListIterator(); + for (NUdf::TUnboxedValue item; iter.Next(item);) { + buffer.push_back(item.Get<ui8>()); } + return valueBuilder->NewString(TStringRef(buffer.data(), buffer.size())); +} + #define STRING_REGISTER_UDF(udfName, ...) T##udfName, #define STRING_OPT_REGISTER_UDF(udfName, ...) T_yql_##udfName, - STRING_UDF_MAP(STRING_UDF) - STRING_UNSAFE_UDF_MAP(STRING_UNSAFE_UDF) - STROKA_CASE_UDF_MAP(STROKA_CASE_UDF) - STROKA_ASCII_CASE_UDF_MAP(STROKA_ASCII_CASE_UDF) - STROKA_FIND_UDF_MAP(STROKA_FIND_UDF) - STRING_TWO_ARGS_UDF_MAP_DEPRECATED_2025_02(STRING_TWO_ARGS_UDF_DEPRECATED_2025_02) - STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_ASCII_CMP_IGNORE_CASE_UDF) - IS_ASCII_UDF_MAP(IS_ASCII_UDF) - - static constexpr ui64 padLim = 1000000; - STRING_STREAM_PAD_FORMATTER_UDF_MAP(STRING_STREAM_PAD_FORMATTER_UDF) - STRING_STREAM_NUM_FORMATTER_UDF_MAP(STRING_STREAM_NUM_FORMATTER_UDF) - STRING_STREAM_TEXT_FORMATTER_UDF_MAP(STRING_STREAM_TEXT_FORMATTER_UDF) - STRING_STREAM_HRSZ_FORMATTER_UDF_MAP(STRING_STREAM_HRSZ_FORMATTER_UDF) - - SIMPLE_MODULE(TStringModule, - STRING_UDF_MAP(STRING_REGISTER_UDF) - STRING_UNSAFE_UDF_MAP(STRING_REGISTER_UDF) - STROKA_UDF_MAP(STRING_REGISTER_UDF) - STROKA_CASE_UDF_MAP(STRING_REGISTER_UDF) - STROKA_ASCII_CASE_UDF_MAP(STRING_REGISTER_UDF) - STROKA_FIND_UDF_MAP(STRING_REGISTER_UDF) - STRING_TWO_ARGS_UDF_MAP_DEPRECATED_2025_02(STRING_REGISTER_UDF) - STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_REGISTER_UDF) - STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_OPT_REGISTER_UDF) - IS_ASCII_UDF_MAP(STRING_REGISTER_UDF) - STRING_STREAM_PAD_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) - STRING_STREAM_NUM_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) - STRING_STREAM_TEXT_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) - STRING_STREAM_HRSZ_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) - TReverse, - TCollapseText, - TReplaceAll, - TReplaceFirst, - TReplaceLast, - TRemoveAll, - TRemoveFirst, - TRemoveLast, - TContains, - TAsciiContainsIgnoreCase, - T_yql_AsciiContainsIgnoreCase, - TFind, - TReverseFind, - TSubstring, - TSplitToList, - TJoinFromList, - TLevensteinDistance, - THumanReadableDuration, - TPrec, - TToByteList, - TFromByteList) - } // namespace +STRING_UDF_MAP(STRING_UDF) +STRING_UNSAFE_UDF_MAP(STRING_UNSAFE_UDF) +STROKA_CASE_UDF_MAP(STROKA_CASE_UDF) +STROKA_ASCII_CASE_UDF_MAP(STROKA_ASCII_CASE_UDF) +STROKA_FIND_UDF_MAP(STROKA_FIND_UDF) +STRING_TWO_ARGS_UDF_MAP_DEPRECATED_2025_02(STRING_TWO_ARGS_UDF_DEPRECATED_2025_02) +STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_ASCII_CMP_IGNORE_CASE_UDF) +IS_ASCII_UDF_MAP(IS_ASCII_UDF) + +static constexpr ui64 padLim = 1000000; +STRING_STREAM_PAD_FORMATTER_UDF_MAP(STRING_STREAM_PAD_FORMATTER_UDF) +STRING_STREAM_NUM_FORMATTER_UDF_MAP(STRING_STREAM_NUM_FORMATTER_UDF) +STRING_STREAM_TEXT_FORMATTER_UDF_MAP(STRING_STREAM_TEXT_FORMATTER_UDF) +STRING_STREAM_HRSZ_FORMATTER_UDF_MAP(STRING_STREAM_HRSZ_FORMATTER_UDF) + +SIMPLE_MODULE(TStringModule, + STRING_UDF_MAP(STRING_REGISTER_UDF) + STRING_UNSAFE_UDF_MAP(STRING_REGISTER_UDF) + STROKA_UDF_MAP(STRING_REGISTER_UDF) + STROKA_CASE_UDF_MAP(STRING_REGISTER_UDF) + STROKA_ASCII_CASE_UDF_MAP(STRING_REGISTER_UDF) + STROKA_FIND_UDF_MAP(STRING_REGISTER_UDF) + STRING_TWO_ARGS_UDF_MAP_DEPRECATED_2025_02(STRING_REGISTER_UDF) + STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_REGISTER_UDF) + STRING_ASCII_CMP_IGNORE_CASE_UDF_MAP(STRING_OPT_REGISTER_UDF) + IS_ASCII_UDF_MAP(STRING_REGISTER_UDF) + STRING_STREAM_PAD_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) + STRING_STREAM_NUM_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) + STRING_STREAM_TEXT_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) + STRING_STREAM_HRSZ_FORMATTER_UDF_MAP(STRING_REGISTER_UDF) + TReverse, + TCollapseText, + TReplaceAll, + TReplaceFirst, + TReplaceLast, + TRemoveAll, + TRemoveFirst, + TRemoveLast, + TContains, + TAsciiContainsIgnoreCase, + T_yql_AsciiContainsIgnoreCase, + TFind, + TReverseFind, + TSubstring, + TSplitToList, + TJoinFromList, + TLevensteinDistance, + THumanReadableDuration, + TPrec, + TToByteList, + TFromByteList) +} // namespace REGISTER_MODULES(TStringModule) diff --git a/yql/essentials/udfs/common/string/ya.make b/yql/essentials/udfs/common/string/ya.make index bd83c78b8c1..1f8c6d4926d 100644 --- a/yql/essentials/udfs/common/string/ya.make +++ b/yql/essentials/udfs/common/string/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(string_udf) 43 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( string_udf.cpp diff --git a/yql/essentials/udfs/common/top/top_udf.cpp b/yql/essentials/udfs/common/top/top_udf.cpp index a9b3d3e430d..41a0136f411 100644 --- a/yql/essentials/udfs/common/top/top_udf.cpp +++ b/yql/essentials/udfs/common/top/top_udf.cpp @@ -71,11 +71,13 @@ class TTopKeeperContainer { size_t MaxSize_ = 0; bool Finalized_ = false; TCompare Compare_; + public: explicit TTopKeeperContainer(TCompare compare) : Keeper_(0, compare) , Compare_(compare) - {} + { + } TVector<TValue, TAllocator> GetInternal() { if (OrderedSet_) { @@ -152,7 +154,8 @@ protected: protected: explicit TTopKeeperWrapperBase(TCompare compare) : Keeper_(compare) - {} + { + } void Init(const TUnboxedValuePod& value, ui32 maxSize) { Keeper_.SetMaxSize(maxSize); @@ -220,7 +223,8 @@ protected: protected: explicit TTopKeeperPairWrapperBase(TCompare compare) : Keeper_(compare) - {} + { + } void Init(const TUnboxedValuePod& key, const TUnboxedValuePod& payload, ui32 maxSize) { Keeper_.SetMaxSize(maxSize); @@ -284,14 +288,12 @@ public: } }; - template <EDataSlot Slot, bool HasKey, bool IsTop> class TTopKeeperDataWrapper; template <EDataSlot Slot, bool IsTop> class TTopKeeperDataWrapper<Slot, false, IsTop> - : public TTopKeeperWrapperBase<TDataCompare<Slot, IsTop>> -{ + : public TTopKeeperWrapperBase<TDataCompare<Slot, IsTop>> { public: using TBase = TTopKeeperWrapperBase<TDataCompare<Slot, IsTop>>; @@ -316,8 +318,7 @@ public: template <EDataSlot Slot, bool IsTop> class TTopKeeperDataWrapper<Slot, true, IsTop> - : public TTopKeeperPairWrapperBase<TDataPairCompare<Slot, IsTop>> -{ + : public TTopKeeperPairWrapperBase<TDataPairCompare<Slot, IsTop>> { public: using TBase = TTopKeeperPairWrapperBase<TDataPairCompare<Slot, IsTop>>; @@ -345,8 +346,7 @@ class TTopKeeperWrapper; template <bool IsTop> class TTopKeeperWrapper<false, IsTop> - : public TTopKeeperWrapperBase<TGenericCompare<IsTop>> -{ + : public TTopKeeperWrapperBase<TGenericCompare<IsTop>> { public: using TBase = TTopKeeperWrapperBase<TGenericCompare<IsTop>>; @@ -371,8 +371,7 @@ public: template <bool IsTop> class TTopKeeperWrapper<true, IsTop> - : public TTopKeeperPairWrapperBase<TGenericPairCompare<IsTop>> -{ + : public TTopKeeperPairWrapperBase<TGenericPairCompare<IsTop>> { public: using TBase = TTopKeeperPairWrapperBase<TGenericPairCompare<IsTop>>; @@ -395,7 +394,6 @@ public: } }; - template <EDataSlot Slot, bool HasKey, bool IsTop> class TTopResourceData; @@ -414,9 +412,8 @@ TTopResource<HasKey, IsTop>* GetTopResource(const TUnboxedValuePod& arg) { return static_cast<TTopResource<HasKey, IsTop>*>(arg.AsBoxed().Get()); } - template <EDataSlot Slot, bool HasKey, bool IsTop> -class TTopCreateData : public TBoxedValue { +class TTopCreateData: public TBoxedValue { private: template <bool HasKey_ = HasKey, typename std::enable_if_t<!HasKey_>* = nullptr> TUnboxedValue RunImpl(const TUnboxedValuePod* args) const { @@ -436,7 +433,7 @@ private: }; template <bool HasKey, bool IsTop> -class TTopCreate : public TBoxedValue { +class TTopCreate: public TBoxedValue { private: template <bool HasKey_ = HasKey, typename std::enable_if_t<!HasKey_>* = nullptr> TUnboxedValue RunImpl(const TUnboxedValuePod* args) const { @@ -457,14 +454,15 @@ private: public: explicit TTopCreate(ICompare::TPtr compare) : Compare_(compare) - {} + { + } private: ICompare::TPtr Compare_; }; template <EDataSlot Slot, bool HasKey, bool IsTop> -class TTopAddValueData : public TBoxedValue { +class TTopAddValueData: public TBoxedValue { private: template <bool HasKey_ = HasKey, typename std::enable_if_t<!HasKey_>* = nullptr> TUnboxedValue RunImpl(const TUnboxedValuePod* args) const { @@ -486,7 +484,7 @@ private: }; template <bool HasKey, bool IsTop> -class TTopAddValue : public TBoxedValue { +class TTopAddValue: public TBoxedValue { private: template <bool HasKey_ = HasKey, typename std::enable_if_t<!HasKey_>* = nullptr> TUnboxedValue RunImpl(const TUnboxedValuePod* args) const { @@ -508,11 +506,12 @@ private: public: explicit TTopAddValue(ICompare::TPtr) - {} + { + } }; template <EDataSlot Slot, bool HasKey, bool IsTop> -class TTopSerializeData : public TBoxedValue { +class TTopSerializeData: public TBoxedValue { private: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { auto resource = GetTopResourceData<Slot, HasKey, IsTop>(args[0]); @@ -521,7 +520,7 @@ private: }; template <bool HasKey, bool IsTop> -class TTopSerialize : public TBoxedValue { +class TTopSerialize: public TBoxedValue { private: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { auto resource = GetTopResource<HasKey, IsTop>(args[0]); @@ -530,11 +529,12 @@ private: public: explicit TTopSerialize(ICompare::TPtr) - {} + { + } }; template <EDataSlot Slot, bool HasKey, bool IsTop> -class TTopDeserializeData : public TBoxedValue { +class TTopDeserializeData: public TBoxedValue { private: TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { return TUnboxedValuePod(new TTopResourceData<Slot, HasKey, IsTop>(args[0])); @@ -542,7 +542,7 @@ private: }; template <bool HasKey, bool IsTop> -class TTopDeserialize : public TBoxedValue { +class TTopDeserialize: public TBoxedValue { private: TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { return TUnboxedValuePod(new TTopResource<HasKey, IsTop>(args[0], Compare_)); @@ -551,14 +551,15 @@ private: public: explicit TTopDeserialize(ICompare::TPtr compare) : Compare_(compare) - {} + { + } private: ICompare::TPtr Compare_; }; template <EDataSlot Slot, bool HasKey, bool IsTop> -class TTopMergeData : public TBoxedValue { +class TTopMergeData: public TBoxedValue { private: TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { auto left = GetTopResourceData<Slot, HasKey, IsTop>(args[0]); @@ -568,7 +569,7 @@ private: }; template <bool HasKey, bool IsTop> -class TTopMerge : public TBoxedValue { +class TTopMerge: public TBoxedValue { private: TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { auto left = GetTopResource<HasKey, IsTop>(args[0]); @@ -579,14 +580,15 @@ private: public: explicit TTopMerge(ICompare::TPtr compare) : Compare_(compare) - {} + { + } private: ICompare::TPtr Compare_; }; template <EDataSlot Slot, bool HasKey, bool IsTop> -class TTopGetResultData : public TBoxedValue { +class TTopGetResultData: public TBoxedValue { private: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { auto resource = GetTopResourceData<Slot, HasKey, IsTop>(args[0]); @@ -595,7 +597,7 @@ private: }; template <bool HasKey, bool IsTop> -class TTopGetResult : public TBoxedValue { +class TTopGetResult: public TBoxedValue { private: TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const override { auto resource = GetTopResource<HasKey, IsTop>(args[0]); @@ -604,25 +606,24 @@ private: public: explicit TTopGetResult(ICompare::TPtr) - {} + { + } }; - -#define RESOURCE(slot, hasKey, isTop) \ -extern const char TopResourceName_##slot##_##hasKey##_##isTop[] = \ - "Top.TopResource."#slot"."#hasKey"."#isTop; \ -template <> \ -class TTopResourceData<EDataSlot::slot, hasKey, isTop>: \ - public TBoxedResource< \ - TTopKeeperDataWrapper<EDataSlot::slot, hasKey, isTop>, \ - TopResourceName_##slot##_##hasKey##_##isTop> \ -{ \ -public: \ - template <typename... Args> \ - inline TTopResourceData(Args&&... args) \ - : TBoxedResource(std::forward<Args>(args)...) \ - {} \ -}; +#define RESOURCE(slot, hasKey, isTop) \ + extern const char TopResourceName_##slot##_##hasKey##_##isTop[] = \ + "Top.TopResource." #slot "." #hasKey "." #isTop; \ + template <> \ + class TTopResourceData<EDataSlot::slot, hasKey, isTop>: public TBoxedResource< \ + TTopKeeperDataWrapper<EDataSlot::slot, hasKey, isTop>, \ + TopResourceName_##slot##_##hasKey##_##isTop> { \ + public: \ + template <typename... Args> \ + inline TTopResourceData(Args&&... args) \ + : TBoxedResource(std::forward<Args>(args)...) \ + { \ + } \ + }; #define RESOURCE_00(slot, ...) RESOURCE(slot, false, false) #define RESOURCE_01(slot, ...) RESOURCE(slot, false, true) @@ -679,52 +680,50 @@ UDF_TYPE_ID_MAP(RESOURCE_11) #define TYPE_10(slot, ...) MAKE_TYPE(slot, true, false) #define TYPE_11(slot, ...) MAKE_TYPE(slot, true, true) -#define PARAMETRIZE(action) \ - if (hasKey) { \ - if (isTop) { \ - switch (*slot) { \ - UDF_TYPE_ID_MAP(action##_11) \ - } \ - } else { \ - switch (*slot) { \ - UDF_TYPE_ID_MAP(action##_10) \ - } \ - } \ - } else { \ - if (isTop) { \ - switch (*slot) { \ - UDF_TYPE_ID_MAP(action##_01) \ - } \ - } else { \ - switch (*slot) { \ - UDF_TYPE_ID_MAP(action##_00) \ - } \ - } \ - } - - -#define RESOURCE_GENERIC(hasKey, isTop) \ -extern const char TopResourceName_Generic_##hasKey##_##isTop[] = \ - "Top.TopResource.Generic."#hasKey"."#isTop; \ -template <> \ -class TTopResource<hasKey, isTop>: \ - public TBoxedResource< \ - TTopKeeperWrapper<hasKey, isTop>, \ - TopResourceName_Generic_##hasKey##_##isTop> \ -{ \ -public: \ - template <typename... Args> \ - inline TTopResource(Args&&... args) \ - : TBoxedResource(std::forward<Args>(args)...) \ - {} \ -}; +#define PARAMETRIZE(action) \ + if (hasKey) { \ + if (isTop) { \ + switch (*slot) { \ + UDF_TYPE_ID_MAP(action##_11) \ + } \ + } else { \ + switch (*slot) { \ + UDF_TYPE_ID_MAP(action##_10) \ + } \ + } \ + } else { \ + if (isTop) { \ + switch (*slot) { \ + UDF_TYPE_ID_MAP(action##_01) \ + } \ + } else { \ + switch (*slot) { \ + UDF_TYPE_ID_MAP(action##_00) \ + } \ + } \ + } + +#define RESOURCE_GENERIC(hasKey, isTop) \ + extern const char TopResourceName_Generic_##hasKey##_##isTop[] = \ + "Top.TopResource.Generic." #hasKey "." #isTop; \ + template <> \ + class TTopResource<hasKey, isTop>: public TBoxedResource< \ + TTopKeeperWrapper<hasKey, isTop>, \ + TopResourceName_Generic_##hasKey##_##isTop> { \ + public: \ + template <typename... Args> \ + inline TTopResource(Args&&... args) \ + : TBoxedResource(std::forward<Args>(args)...) \ + { \ + } \ + }; RESOURCE_GENERIC(false, false) RESOURCE_GENERIC(false, true) RESOURCE_GENERIC(true, false) RESOURCE_GENERIC(true, true) -#define MAKE_IMPL_GENERIC(operation, hasKey, isTop) \ +#define MAKE_IMPL_GENERIC(operation, hasKey, isTop) \ builder.Implementation(new operation<hasKey, isTop>(compare)); #define CREATE_GENERIC(hasKey, isTop) MAKE_IMPL_GENERIC(TTopCreate, hasKey, isTop) @@ -734,7 +733,7 @@ RESOURCE_GENERIC(true, true) #define DESERIALIZE_GENERIC(hasKey, isTop) MAKE_IMPL_GENERIC(TTopDeserialize, hasKey, isTop) #define GET_RESULT_GENERIC(hasKey, isTop) MAKE_IMPL_GENERIC(TTopGetResult, hasKey, isTop) -#define TYPE_GENERIC(hasKey, isTop) \ +#define TYPE_GENERIC(hasKey, isTop) \ topType = builder.Resource(TopResourceName_Generic_##hasKey##_##isTop); #define PARAMETRIZE_GENERIC(action) \ @@ -752,7 +751,6 @@ RESOURCE_GENERIC(true, true) } \ } - static const auto CreateName = TStringRef::Of("Create"); static const auto AddValueName = TStringRef::Of("AddValue"); static const auto SerializeName = TStringRef::Of("Serialize"); @@ -760,7 +758,7 @@ static const auto DeserializeName = TStringRef::Of("Deserialize"); static const auto MergeName = TStringRef::Of("Merge"); static const auto GetResultName = TStringRef::Of("GetResult"); -class TTopModule : public IUdfModule { +class TTopModule: public IUdfModule { public: TStringRef Name() const { return TStringRef::Of("Top"); @@ -783,8 +781,7 @@ public: TType* userType, const TStringRef& typeConfig, ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + IFunctionTypeInfoBuilder& builder) const final { Y_UNUSED(typeConfig); try { @@ -951,4 +948,3 @@ public: } // namespace REGISTER_MODULES(TTopModule) - diff --git a/yql/essentials/udfs/common/top/ya.make b/yql/essentials/udfs/common/top/ya.make index 4a8cdf859e3..f0818f2b34d 100644 --- a/yql/essentials/udfs/common/top/ya.make +++ b/yql/essentials/udfs/common/top/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(top_udf) 28 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( top_udf.cpp diff --git a/yql/essentials/udfs/common/topfreq/static/static_udf.cpp b/yql/essentials/udfs/common/topfreq/static/static_udf.cpp index 4075bfa9c2b..40e478c5276 100644 --- a/yql/essentials/udfs/common/topfreq/static/static_udf.cpp +++ b/yql/essentials/udfs/common/topfreq/static/static_udf.cpp @@ -1,10 +1,10 @@ #include "topfreq_udf.h" namespace NYql { - namespace NUdf { - NUdf::TUniquePtr<NUdf::IUdfModule> CreateTopFreqModule() { - return new TTopFreqModule(); - } - - } +namespace NUdf { +NUdf::TUniquePtr<NUdf::IUdfModule> CreateTopFreqModule() { + return new TTopFreqModule(); } + +} // namespace NUdf +} // namespace NYql diff --git a/yql/essentials/udfs/common/topfreq/static/topfreq.cpp b/yql/essentials/udfs/common/topfreq/static/topfreq.cpp index c118b52d0a1..321bfd5a667 100644 --- a/yql/essentials/udfs/common/topfreq/static/topfreq.cpp +++ b/yql/essentials/udfs/common/topfreq/static/topfreq.cpp @@ -8,7 +8,8 @@ using namespace NUdf; template <typename THash, typename TEquals> TTopFreqBase<THash, TEquals>::TTopFreqBase(THash hash, TEquals equals) : Indices_(0, hash, equals) -{} +{ +} template <typename THash, typename TEquals> void TTopFreqBase<THash, TEquals>::Init(const TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize) { @@ -179,21 +180,21 @@ UDF_TYPE_ID_MAP(INSTANCE_FOR) #undef INSTANCE_FOR TTopFreqGeneric::TTopFreqGeneric(const TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize, - IHash::TPtr hash, IEquate::TPtr equate) + IHash::TPtr hash, IEquate::TPtr equate) : TBase(TGenericHash{hash}, TGenericEquals{equate}) { TBase::Init(value, minSize, maxSize); } TTopFreqGeneric::TTopFreqGeneric(const TTopFreqGeneric& topFreq1, const TTopFreqGeneric& topFreq2, - IHash::TPtr hash, IEquate::TPtr equate) + IHash::TPtr hash, IEquate::TPtr equate) : TBase(TGenericHash{hash}, TGenericEquals{equate}) { TBase::Merge(topFreq1, topFreq2); } TTopFreqGeneric::TTopFreqGeneric(const TUnboxedValuePod& serialized, - IHash::TPtr hash, IEquate::TPtr equate) + IHash::TPtr hash, IEquate::TPtr equate) : TBase(TGenericHash{hash}, TGenericEquals{equate}) { TBase::Deserialize(serialized); @@ -210,4 +211,3 @@ TUnboxedValue TTopFreqGeneric::Get(const IValueBuilder* builder, ui32 resultSize void TTopFreqGeneric::AddValue(const TUnboxedValuePod& value) { TBase::AddValue(value); } - diff --git a/yql/essentials/udfs/common/topfreq/static/topfreq.h b/yql/essentials/udfs/common/topfreq/static/topfreq.h index b10574f33f6..c2d9d78d14c 100644 --- a/yql/essentials/udfs/common/topfreq/static/topfreq.h +++ b/yql/essentials/udfs/common/topfreq/static/topfreq.h @@ -42,9 +42,8 @@ protected: template <NKikimr::NUdf::EDataSlot Slot> class TTopFreqData : public TTopFreqBase< - NKikimr::NUdf::TUnboxedValueHash<Slot>, - NKikimr::NUdf::TUnboxedValueEquals<Slot>> -{ + NKikimr::NUdf::TUnboxedValueHash<Slot>, + NKikimr::NUdf::TUnboxedValueEquals<Slot>> { public: using TBase = TTopFreqBase< NKikimr::NUdf::TUnboxedValueHash<Slot>, @@ -72,24 +71,22 @@ struct TGenericEquals { bool operator()( const NKikimr::NUdf::TUnboxedValuePod& left, - const NKikimr::NUdf::TUnboxedValuePod& right) const - { + const NKikimr::NUdf::TUnboxedValuePod& right) const { return Equate->Equals(left, right); } }; class TTopFreqGeneric - : public TTopFreqBase<TGenericHash, TGenericEquals> -{ + : public TTopFreqBase<TGenericHash, TGenericEquals> { public: using TBase = TTopFreqBase<TGenericHash, TGenericEquals>; TTopFreqGeneric(const NKikimr::NUdf::TUnboxedValuePod& value, const ui32 minSize, const ui32 maxSize, - NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate); + NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate); TTopFreqGeneric(const TTopFreqGeneric& topFreq1, const TTopFreqGeneric& topFreq2, - NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate); + NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate); TTopFreqGeneric(const NKikimr::NUdf::TUnboxedValuePod& serialized, - NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate); + NKikimr::NUdf::IHash::TPtr hash, NKikimr::NUdf::IEquate::TPtr equate); NKikimr::NUdf::TUnboxedValue Serialize(const NKikimr::NUdf::IValueBuilder* builder); NKikimr::NUdf::TUnboxedValue Get(const NKikimr::NUdf::IValueBuilder* builder, ui32 resultSize); diff --git a/yql/essentials/udfs/common/topfreq/static/topfreq_udf.h b/yql/essentials/udfs/common/topfreq/static/topfreq_udf.h index 93f8c3fd587..aef26def698 100644 --- a/yql/essentials/udfs/common/topfreq/static/topfreq_udf.h +++ b/yql/essentials/udfs/common/topfreq/static/topfreq_udf.h @@ -15,177 +15,176 @@ using namespace NYql; using namespace NUdf; namespace { - extern const char TopFreqResourceNameGeneric[] = "TopFreq.TopFreqResource.Generic"; - class TTopFreqResource: - public TBoxedResource<TTopFreqGeneric, TopFreqResourceNameGeneric> +extern const char TopFreqResourceNameGeneric[] = "TopFreq.TopFreqResource.Generic"; +class TTopFreqResource: public TBoxedResource<TTopFreqGeneric, TopFreqResourceNameGeneric> { +public: + template <typename... Args> + inline TTopFreqResource(Args&&... args) + : TBoxedResource(std::forward<Args>(args)...) { - public: - template <typename... Args> - inline TTopFreqResource(Args&&... args) - : TBoxedResource(std::forward<Args>(args)...) - {} - }; - - template <EDataSlot Slot> - class TTopFreqResourceData; - - template <EDataSlot Slot> - TTopFreqResourceData<Slot>* GetTopFreqResourceData(const TUnboxedValuePod& arg) { - TTopFreqResourceData<Slot>::Validate(arg); - return static_cast<TTopFreqResourceData<Slot>*>(arg.AsBoxed().Get()); } - - TTopFreqResource* GetTopFreqResource(const TUnboxedValuePod& arg) { - TTopFreqResource::Validate(arg); - return static_cast<TTopFreqResource*>(arg.AsBoxed().Get()); +}; + +template <EDataSlot Slot> +class TTopFreqResourceData; + +template <EDataSlot Slot> +TTopFreqResourceData<Slot>* GetTopFreqResourceData(const TUnboxedValuePod& arg) { + TTopFreqResourceData<Slot>::Validate(arg); + return static_cast<TTopFreqResourceData<Slot>*>(arg.AsBoxed().Get()); +} + +TTopFreqResource* GetTopFreqResource(const TUnboxedValuePod& arg) { + TTopFreqResource::Validate(arg); + return static_cast<TTopFreqResource*>(arg.AsBoxed().Get()); +} + +template <EDataSlot Slot> +class TTopFreqCreateData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + ui32 minSize = args[1].Get<ui32>(); + return TUnboxedValuePod(new TTopFreqResourceData<Slot>(args[0], minSize, minSize * 2)); } +}; +class TTopFreqCreate: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + ui32 minSize = args[1].Get<ui32>(); + return TUnboxedValuePod(new TTopFreqResource(args[0], minSize, minSize * 2, Hash_, Equate_)); + } - template <EDataSlot Slot> - class TTopFreqCreateData: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { - ui32 minSize = args[1].Get<ui32>(); - return TUnboxedValuePod(new TTopFreqResourceData<Slot>(args[0], minSize, minSize * 2)); - } - }; - - class TTopFreqCreate: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { - ui32 minSize = args[1].Get<ui32>(); - return TUnboxedValuePod(new TTopFreqResource(args[0], minSize, minSize * 2, Hash_, Equate_)); - } - - public: - TTopFreqCreate(IHash::TPtr hash, IEquate::TPtr equate) - : Hash_(hash) - , Equate_(equate) - {} - - private: - IHash::TPtr Hash_; - IEquate::TPtr Equate_; - }; - - template <EDataSlot Slot> - class TTopFreqAddValueData: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { - const auto topFreq = GetTopFreqResourceData<Slot>(args[0]); - topFreq->Get()->AddValue(args[1]); - return TUnboxedValuePod(topFreq); - } - }; - - class TTopFreqAddValue: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { - const auto topFreq = GetTopFreqResource(args[0]); - topFreq->Get()->AddValue(args[1]); - return TUnboxedValuePod(topFreq); - } - }; - - template <EDataSlot Slot> - class TTopFreqSerializeData: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { - return GetTopFreqResourceData<Slot>(args[0])->Get()->Serialize(valueBuilder); - } - }; - - class TTopFreqSerialize: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { - return GetTopFreqResource(args[0])->Get()->Serialize(valueBuilder); - } - }; - - template <EDataSlot Slot> - class TTopFreqDeserializeData: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { - return TUnboxedValuePod(new TTopFreqResourceData<Slot>(args[0])); - } - }; +public: + TTopFreqCreate(IHash::TPtr hash, IEquate::TPtr equate) + : Hash_(hash) + , Equate_(equate) + { + } - class TTopFreqDeserialize: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { - return TUnboxedValuePod(new TTopFreqResource(args[0], Hash_, Equate_)); - } +private: + IHash::TPtr Hash_; + IEquate::TPtr Equate_; +}; + +template <EDataSlot Slot> +class TTopFreqAddValueData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + const auto topFreq = GetTopFreqResourceData<Slot>(args[0]); + topFreq->Get()->AddValue(args[1]); + return TUnboxedValuePod(topFreq); + } +}; + +class TTopFreqAddValue: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + const auto topFreq = GetTopFreqResource(args[0]); + topFreq->Get()->AddValue(args[1]); + return TUnboxedValuePod(topFreq); + } +}; - public: - TTopFreqDeserialize(IHash::TPtr hash, IEquate::TPtr equate) - : Hash_(hash) - , Equate_(equate) - {} +template <EDataSlot Slot> +class TTopFreqSerializeData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { + return GetTopFreqResourceData<Slot>(args[0])->Get()->Serialize(valueBuilder); + } +}; - private: - IHash::TPtr Hash_; - IEquate::TPtr Equate_; - }; +class TTopFreqSerialize: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { + return GetTopFreqResource(args[0])->Get()->Serialize(valueBuilder); + } +}; - template <EDataSlot Slot> - class TTopFreqMergeData: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { - const auto topFreq0 = GetTopFreqResourceData<Slot>(args[0]); - const auto topFreq1 = GetTopFreqResourceData<Slot>(args[1]); - return TUnboxedValuePod(new TTopFreqResourceData<Slot>(*topFreq0->Get(), *topFreq1->Get())); - } - }; +template <EDataSlot Slot> +class TTopFreqDeserializeData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + return TUnboxedValuePod(new TTopFreqResourceData<Slot>(args[0])); + } +}; - class TTopFreqMerge: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { - const auto topFreq0 = GetTopFreqResource(args[0]); - const auto topFreq1 = GetTopFreqResource(args[1]); - return TUnboxedValuePod(new TTopFreqResource(*topFreq0->Get(), *topFreq1->Get(), Hash_, Equate_)); - } +class TTopFreqDeserialize: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + return TUnboxedValuePod(new TTopFreqResource(args[0], Hash_, Equate_)); + } - public: - TTopFreqMerge(IHash::TPtr hash, IEquate::TPtr equate) - : Hash_(hash) - , Equate_(equate) - {} +public: + TTopFreqDeserialize(IHash::TPtr hash, IEquate::TPtr equate) + : Hash_(hash) + , Equate_(equate) + { + } - private: - IHash::TPtr Hash_; - IEquate::TPtr Equate_; - }; +private: + IHash::TPtr Hash_; + IEquate::TPtr Equate_; +}; + +template <EDataSlot Slot> +class TTopFreqMergeData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + const auto topFreq0 = GetTopFreqResourceData<Slot>(args[0]); + const auto topFreq1 = GetTopFreqResourceData<Slot>(args[1]); + return TUnboxedValuePod(new TTopFreqResourceData<Slot>(*topFreq0->Get(), *topFreq1->Get())); + } +}; + +class TTopFreqMerge: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const { + const auto topFreq0 = GetTopFreqResource(args[0]); + const auto topFreq1 = GetTopFreqResource(args[1]); + return TUnboxedValuePod(new TTopFreqResource(*topFreq0->Get(), *topFreq1->Get(), Hash_, Equate_)); + } - template <EDataSlot Slot> - class TTopFreqGetData: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { - return GetTopFreqResourceData<Slot>(args[0])->Get()->Get(valueBuilder, args[1].Get<ui32>()); - } - }; +public: + TTopFreqMerge(IHash::TPtr hash, IEquate::TPtr equate) + : Hash_(hash) + , Equate_(equate) + { + } - class TTopFreqGet: public TBoxedValue { - private: - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { - return GetTopFreqResource(args[0])->Get()->Get(valueBuilder, args[1].Get<ui32>()); - } - }; +private: + IHash::TPtr Hash_; + IEquate::TPtr Equate_; +}; +template <EDataSlot Slot> +class TTopFreqGetData: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { + return GetTopFreqResourceData<Slot>(args[0])->Get()->Get(valueBuilder, args[1].Get<ui32>()); + } +}; -#define MAKE_RESOURCE(slot, ...) \ - extern const char TopFreqResourceName##slot[] = "TopFreq.TopFreqResource."#slot; \ - template <> \ - class TTopFreqResourceData<EDataSlot::slot>: \ - public TBoxedResource<TTopFreqData<EDataSlot::slot>, TopFreqResourceName##slot> \ - { \ - public: \ - template <typename... Args> \ - inline TTopFreqResourceData(Args&&... args) \ - : TBoxedResource(std::forward<Args>(args)...) \ - {} \ +class TTopFreqGet: public TBoxedValue { +private: + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const { + return GetTopFreqResource(args[0])->Get()->Get(valueBuilder, args[1].Get<ui32>()); + } +}; + +#define MAKE_RESOURCE(slot, ...) \ + extern const char TopFreqResourceName##slot[] = "TopFreq.TopFreqResource." #slot; \ + template <> \ + class TTopFreqResourceData<EDataSlot::slot>: public TBoxedResource<TTopFreqData<EDataSlot::slot>, TopFreqResourceName##slot> { \ + public: \ + template <typename... Args> \ + inline TTopFreqResourceData(Args&&... args) \ + : TBoxedResource(std::forward<Args>(args)...) \ + { \ + } \ }; - UDF_TYPE_ID_MAP(MAKE_RESOURCE) +UDF_TYPE_ID_MAP(MAKE_RESOURCE) #define MAKE_IMPL(operation, slot) \ case EDataSlot::slot: \ @@ -204,190 +203,188 @@ namespace { topFreqType = builder.Resource(TopFreqResourceName##slot); \ break; +static const auto CreateName = TStringRef::Of("TopFreq_Create"); +static const auto AddValueName = TStringRef::Of("TopFreq_AddValue"); +static const auto SerializeName = TStringRef::Of("TopFreq_Serialize"); +static const auto DeserializeName = TStringRef::Of("TopFreq_Deserialize"); +static const auto MergeName = TStringRef::Of("TopFreq_Merge"); +static const auto GetName = TStringRef::Of("TopFreq_Get"); + +class TTopFreqModule: public IUdfModule { +public: + TStringRef Name() const { + return TStringRef::Of("TopFreq"); + } - static const auto CreateName = TStringRef::Of("TopFreq_Create"); - static const auto AddValueName = TStringRef::Of("TopFreq_AddValue"); - static const auto SerializeName = TStringRef::Of("TopFreq_Serialize"); - static const auto DeserializeName = TStringRef::Of("TopFreq_Deserialize"); - static const auto MergeName = TStringRef::Of("TopFreq_Merge"); - static const auto GetName = TStringRef::Of("TopFreq_Get"); - - class TTopFreqModule: public IUdfModule { - public: - TStringRef Name() const { - return TStringRef::Of("TopFreq"); - } + void CleanupOnTerminate() const final { + } - void CleanupOnTerminate() const final { - } + void GetAllFunctions(IFunctionsSink& sink) const final { + sink.Add(CreateName)->SetTypeAwareness(); + sink.Add(AddValueName)->SetTypeAwareness(); + sink.Add(SerializeName)->SetTypeAwareness(); + sink.Add(DeserializeName)->SetTypeAwareness(); + sink.Add(MergeName)->SetTypeAwareness(); + sink.Add(GetName)->SetTypeAwareness(); + } - void GetAllFunctions(IFunctionsSink& sink) const final { - sink.Add(CreateName)->SetTypeAwareness(); - sink.Add(AddValueName)->SetTypeAwareness(); - sink.Add(SerializeName)->SetTypeAwareness(); - sink.Add(DeserializeName)->SetTypeAwareness(); - sink.Add(MergeName)->SetTypeAwareness(); - sink.Add(GetName)->SetTypeAwareness(); - } + void BuildFunctionTypeInfo( + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { + Y_UNUSED(typeConfig); - void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { - Y_UNUSED(typeConfig); + try { + const bool typesOnly = (flags & TFlags::TypesOnly); + builder.UserType(userType); - try { - const bool typesOnly = (flags & TFlags::TypesOnly); - builder.UserType(userType); + auto typeHelper = builder.TypeInfoHelper(); - auto typeHelper = builder.TypeInfoHelper(); + auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); + if (!userTypeInspector || userTypeInspector.GetElementsCount() != 3) { + builder.SetError("User type is not a 3-tuple"); + return; + } - auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); - if (!userTypeInspector || userTypeInspector.GetElementsCount() != 3) { - builder.SetError("User type is not a 3-tuple"); + bool isGeneric = false; + IHash::TPtr hash; + IEquate::TPtr equate; + TMaybe<EDataSlot> slot; + + auto valueType = userTypeInspector.GetElementType(2); + auto valueTypeInspector = TDataTypeInspector(*typeHelper, valueType); + if (!valueTypeInspector) { + isGeneric = true; + hash = builder.MakeHash(valueType); + equate = builder.MakeEquate(valueType); + if (!hash || !equate) { return; } - - bool isGeneric = false; - IHash::TPtr hash; - IEquate::TPtr equate; - TMaybe<EDataSlot> slot; - - auto valueType = userTypeInspector.GetElementType(2); - auto valueTypeInspector = TDataTypeInspector(*typeHelper, valueType); - if (!valueTypeInspector) { - isGeneric = true; - hash = builder.MakeHash(valueType); - equate = builder.MakeEquate(valueType); - if (!hash || !equate) { - return; - } - } else { - slot = FindDataSlot(valueTypeInspector.GetTypeId()); - if (!slot) { - builder.SetError("Unknown data type"); - return; - } - const auto& features = NUdf::GetDataTypeInfo(*slot).Features; - if (!(features & NUdf::CanHash) || !(features & NUdf::CanEquate)) { - builder.SetError("Data type is not hashable or equatable"); - return; - } + } else { + slot = FindDataSlot(valueTypeInspector.GetTypeId()); + if (!slot) { + builder.SetError("Unknown data type"); + return; + } + const auto& features = NUdf::GetDataTypeInfo(*slot).Features; + if (!(features & NUdf::CanHash) || !(features & NUdf::CanEquate)) { + builder.SetError("Data type is not hashable or equatable"); + return; } + } - auto serializedItemType = builder.Tuple()->Add<ui64>().Add(valueType).Build(); - auto serializedListType = builder.List()->Item(serializedItemType).Build(); - auto serializedType = builder.Tuple()->Add<ui32>().Add<ui32>().Add(serializedListType).Build(); + auto serializedItemType = builder.Tuple()->Add<ui64>().Add(valueType).Build(); + auto serializedListType = builder.List()->Item(serializedItemType).Build(); + auto serializedType = builder.Tuple()->Add<ui32>().Add<ui32>().Add(serializedListType).Build(); - TType* topFreqType = nullptr; - if (isGeneric) { - topFreqType = builder.Resource(TopFreqResourceNameGeneric); - } else { - switch (*slot) { + TType* topFreqType = nullptr; + if (isGeneric) { + topFreqType = builder.Resource(TopFreqResourceNameGeneric); + } else { + switch (*slot) { UDF_TYPE_ID_MAP(MAKE_TYPE) - } } + } - if (name == CreateName) { - builder.Args()->Add(valueType).Add<ui32>().Done().Returns(topFreqType); + if (name == CreateName) { + builder.Args()->Add(valueType).Add<ui32>().Done().Returns(topFreqType); - if (!typesOnly) { - if (isGeneric) { - builder.Implementation(new TTopFreqCreate(hash, equate)); - } else { - switch (*slot) { + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TTopFreqCreate(hash, equate)); + } else { + switch (*slot) { UDF_TYPE_ID_MAP(MAKE_CREATE) - } } } - builder.IsStrict(); } + builder.IsStrict(); + } - if (name == AddValueName) { - builder.Args()->Add(topFreqType).Add(valueType).Done().Returns(topFreqType); + if (name == AddValueName) { + builder.Args()->Add(topFreqType).Add(valueType).Done().Returns(topFreqType); - if (!typesOnly) { - if (isGeneric) { - builder.Implementation(new TTopFreqAddValue); - } else { - switch (*slot) { + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TTopFreqAddValue); + } else { + switch (*slot) { UDF_TYPE_ID_MAP(MAKE_ADD_VALUE) - } } } - builder.IsStrict(); } + builder.IsStrict(); + } - if (name == MergeName) { - builder.Args()->Add(topFreqType).Add(topFreqType).Done().Returns(topFreqType); + if (name == MergeName) { + builder.Args()->Add(topFreqType).Add(topFreqType).Done().Returns(topFreqType); - if (!typesOnly) { - if (isGeneric) { - builder.Implementation(new TTopFreqMerge(hash, equate)); - } else { - switch (*slot) { + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TTopFreqMerge(hash, equate)); + } else { + switch (*slot) { UDF_TYPE_ID_MAP(MAKE_MERGE) - } } } - builder.IsStrict(); } + builder.IsStrict(); + } - if (name == SerializeName) { - builder.Args()->Add(topFreqType).Done().Returns(serializedType); + if (name == SerializeName) { + builder.Args()->Add(topFreqType).Done().Returns(serializedType); - if (!typesOnly) { - if (isGeneric) { - builder.Implementation(new TTopFreqSerialize); - } else { - switch (*slot) { + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TTopFreqSerialize); + } else { + switch (*slot) { UDF_TYPE_ID_MAP(MAKE_SERIALIZE) - } } } - builder.IsStrict(); } + builder.IsStrict(); + } - if (name == DeserializeName) { - builder.Args()->Add(serializedType).Done().Returns(topFreqType); + if (name == DeserializeName) { + builder.Args()->Add(serializedType).Done().Returns(topFreqType); - if (!typesOnly) { - if (isGeneric) { - builder.Implementation(new TTopFreqDeserialize(hash, equate)); - } else { - switch (*slot) { + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TTopFreqDeserialize(hash, equate)); + } else { + switch (*slot) { UDF_TYPE_ID_MAP(MAKE_DESERIALIZE) - } } } } + } - if (name == GetName) { - ui32 indexF, indexV; - auto itemType = builder.Struct()->AddField<ui64>("Frequency", &indexF).AddField("Value", valueType, &indexV).Build(); - auto resultType = builder.List()->Item(itemType).Build(); + if (name == GetName) { + ui32 indexF, indexV; + auto itemType = builder.Struct()->AddField<ui64>("Frequency", &indexF).AddField("Value", valueType, &indexV).Build(); + auto resultType = builder.List()->Item(itemType).Build(); - builder.Args()->Add(topFreqType).Add<ui32>().Done().Returns(resultType); + builder.Args()->Add(topFreqType).Add<ui32>().Done().Returns(resultType); - if (!typesOnly) { - if (isGeneric) { - builder.Implementation(new TTopFreqGet); - } else { - switch (*slot) { + if (!typesOnly) { + if (isGeneric) { + builder.Implementation(new TTopFreqGet); + } else { + switch (*slot) { UDF_TYPE_ID_MAP(MAKE_GET) - } } } - builder.IsStrict(); } - - } catch (const std::exception& e) { - builder.SetError(CurrentExceptionMessage()); + builder.IsStrict(); } + + } catch (const std::exception& e) { + builder.SetError(CurrentExceptionMessage()); } - }; + } +}; } // namespace diff --git a/yql/essentials/udfs/common/topfreq/static/ya.make b/yql/essentials/udfs/common/topfreq/static/ya.make index 95838f33c49..94379d474a9 100644 --- a/yql/essentials/udfs/common/topfreq/static/ya.make +++ b/yql/essentials/udfs/common/topfreq/static/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( static_udf.cpp topfreq.cpp diff --git a/yql/essentials/udfs/common/topfreq/topfreq_udf_ut.cpp b/yql/essentials/udfs/common/topfreq/topfreq_udf_ut.cpp index 9ce7b8561fb..51d02f43b2a 100644 --- a/yql/essentials/udfs/common/topfreq/topfreq_udf_ut.cpp +++ b/yql/essentials/udfs/common/topfreq/topfreq_udf_ut.cpp @@ -10,442 +10,443 @@ #include <yql/essentials/udfs/common/topfreq/static/topfreq_udf.h> namespace NYql { - using namespace NKikimr::NMiniKQL; - namespace NUdf { - extern NUdf::TUniquePtr<NUdf::IUdfModule> CreateTopFreqModule(); +using namespace NKikimr::NMiniKQL; +namespace NUdf { +extern NUdf::TUniquePtr<NUdf::IUdfModule> CreateTopFreqModule(); +} // namespace NUdf + +class TSetup { +public: + TSetup() + : MutableFunctionRegistry_(CreateFunctionRegistry(CreateBuiltinRegistry())->Clone()) + , RandomProvider_(CreateDeterministicRandomProvider(1)) + , TimeProvider_(CreateDeterministicTimeProvider(10000000)) + , Alloc_(__LOCATION__) + , Env_(Alloc_) + { + MutableFunctionRegistry_->AddModule("", "TopFreq", NUdf::CreateTopFreqModule()); + PgmBuidler_.Reset(new TProgramBuilder(Env_, *MutableFunctionRegistry_)); } - class TSetup { - public: - TSetup() - : MutableFunctionRegistry_(CreateFunctionRegistry(CreateBuiltinRegistry())->Clone()) - , RandomProvider_(CreateDeterministicRandomProvider(1)) - , TimeProvider_(CreateDeterministicTimeProvider(10000000)) - , Alloc_(__LOCATION__) - , Env_(Alloc_) - { - MutableFunctionRegistry_->AddModule("", "TopFreq", NUdf::CreateTopFreqModule()); - PgmBuidler_.Reset(new TProgramBuilder(Env_, *MutableFunctionRegistry_)); + TProgramBuilder& GetProgramBuilder() { + return *PgmBuidler_.Get(); + } + + NUdf::TUnboxedValue GetValue(TRuntimeNode& node) { + Explorer_.Walk(node.GetNode(), Env_); + + TComputationPatternOpts opts(Alloc_.Ref(), Env_, GetBuiltinFactory(), + MutableFunctionRegistry_.Get(), + NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); + Pattern_ = MakeComputationPattern(Explorer_, node, {}, opts); + Graph_ = Pattern_->Clone(opts.ToComputationOptions(*RandomProvider_, *TimeProvider_)); + + return Graph_->GetValue(); + } + +private: + using IMutableFunctionRegistryPtr = TIntrusivePtr<IMutableFunctionRegistry>; + using IRandomProviderPtr = TIntrusivePtr<IRandomProvider>; + using ITimeProviderPtr = TIntrusivePtr<ITimeProvider>; + + IMutableFunctionRegistryPtr MutableFunctionRegistry_; + IRandomProviderPtr RandomProvider_; + ITimeProviderPtr TimeProvider_; + TScopedAlloc Alloc_; + TTypeEnvironment Env_; + THolder<TProgramBuilder> PgmBuidler_; + IComputationPattern::TPtr Pattern_; + THolder<IComputationGraph> Graph_; + TExploringNodeVisitor Explorer_; +}; + +Y_UNIT_TEST_SUITE(TUDFTopFreqTest) { +Y_UNIT_TEST(SimpleTopFreq) { + TSetup setup; + TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); + + const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<i32>::Id); + const auto emptyStructType = pgmBuilder.NewEmptyStructType(); + const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Int32"); + const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); + + const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); + const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); + auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); + + auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); + auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); + auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); + + auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); + auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); + auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); + + TRuntimeNode pgmTopFreq; + { + auto val = pgmBuilder.NewDataLiteral<i32>(3); + auto param = pgmBuilder.NewDataLiteral<ui32>(10); + + TVector<TRuntimeNode> params = {val, param}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); + } + + for (int n = 0; n < 9; n++) { + auto value = pgmBuilder.NewDataLiteral<i32>(1); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + for (int n = 0; n < 7; n++) { + auto value = pgmBuilder.NewDataLiteral<i32>(4); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + TRuntimeNode pgmReturn; + { + auto param = pgmBuilder.NewDataLiteral<ui32>(4); + TVector<TRuntimeNode> params = {pgmTopFreq, param}; + pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); + } + + auto value = setup.GetValue(pgmReturn); + + auto listIterator = value.GetListIterator(); + + TUnboxedValue item; + + UNIT_ASSERT(listIterator.Next(item)); + UNIT_ASSERT_EQUAL(item.GetElement(1).Get<i32>(), 1); + UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 9); + + UNIT_ASSERT(listIterator.Next(item)); + UNIT_ASSERT_EQUAL(item.GetElement(1).Get<i32>(), 4); + UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 7); + + UNIT_ASSERT(listIterator.Next(item)); + UNIT_ASSERT_EQUAL(item.GetElement(1).Get<i32>(), 3); + UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 1); + + UNIT_ASSERT(!listIterator.Next(item)); +} + +Y_UNIT_TEST(MergingTopFreq) { + TSetup setup; + TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); + + const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); + const auto emptyStructType = pgmBuilder.NewEmptyStructType(); + const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Uint64"); + const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); + + const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); + const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); + auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); + + auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); + auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); + auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); + + auto mergeArgsType = pgmBuilder.NewTupleType({resourceType, resourceType}); + auto mergeUserType = pgmBuilder.NewTupleType({mergeArgsType, emptyStructType, valueType}); + auto udfTopFreq_Merge = pgmBuilder.Udf("TopFreq.TopFreq_Merge", TRuntimeNode(), mergeUserType); + + auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); + auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); + auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); + + TRuntimeNode pgmTopFreq; + { + auto value = pgmBuilder.NewDataLiteral<ui64>(1); + auto param = pgmBuilder.NewDataLiteral<ui32>(1); + TVector<TRuntimeNode> params = {value, param}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); + } + + for (int n = 0; n < 1; n++) { + auto value = pgmBuilder.NewDataLiteral<ui64>(1); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + for (int n = 0; n < 4; n++) { + auto value = pgmBuilder.NewDataLiteral<ui64>(5); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + for (int n = 0; n < 1; n++) { + auto value = pgmBuilder.NewDataLiteral<ui64>(3); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + TRuntimeNode pgmTopFreq2; + { + auto value = pgmBuilder.NewDataLiteral<ui64>(1); + auto param = pgmBuilder.NewDataLiteral<ui32>(1); + TVector<TRuntimeNode> params = {value, param}; + pgmTopFreq2 = pgmBuilder.Apply(udfTopFreq_Create, params); + } + + for (int n = 0; n < 5; n++) { + auto value = pgmBuilder.NewDataLiteral<ui64>(1); + TVector<TRuntimeNode> params = {pgmTopFreq2, value}; + pgmTopFreq2 = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + for (int n = 0; n < 5; n++) { + auto value = pgmBuilder.NewDataLiteral<ui64>(5); + TVector<TRuntimeNode> params = {pgmTopFreq2, value}; + pgmTopFreq2 = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + TRuntimeNode pgmTopFreq3; + { + TVector<TRuntimeNode> params = {pgmTopFreq, pgmTopFreq2}; + pgmTopFreq3 = pgmBuilder.Apply(udfTopFreq_Merge, params); + } + + TRuntimeNode pgmReturn; + { + auto param = pgmBuilder.NewDataLiteral<ui32>(1); + TVector<TRuntimeNode> params = {pgmTopFreq3, param}; + pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); + } + + auto value = setup.GetValue(pgmReturn); + + auto listIterator = value.GetListIterator(); + + TUnboxedValue item; + + UNIT_ASSERT(listIterator.Next(item)); + UNIT_ASSERT_EQUAL(item.GetElement(1).Get<ui64>(), 5); + UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 9); + + UNIT_ASSERT(!listIterator.Next(item)); +} + +Y_UNIT_TEST(SerializedTopFreq) { + TSetup setup; + TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); + + const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<bool>::Id); + const auto emptyStructType = pgmBuilder.NewEmptyStructType(); + const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Bool"); + const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); + const auto ui64Type = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); + + const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); + const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); + auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); + + auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); + auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); + auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); + + auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); + auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); + auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); + + auto serializeArgsType = pgmBuilder.NewTupleType({resourceType}); + auto serializeUserType = pgmBuilder.NewTupleType({serializeArgsType, emptyStructType, valueType}); + auto udfTopFreq_Serialize = pgmBuilder.Udf("TopFreq.TopFreq_Serialize", TRuntimeNode(), serializeUserType); + + auto serializedType = pgmBuilder.NewTupleType({ui32Type, ui32Type, + pgmBuilder.NewListType(pgmBuilder.NewTupleType({ui64Type, valueType}))}); + + auto deserializeArgsType = pgmBuilder.NewTupleType({serializedType}); + auto deserializeUserType = pgmBuilder.NewTupleType({deserializeArgsType, emptyStructType, valueType}); + auto udfTopFreq_Deserialize = pgmBuilder.Udf("TopFreq.TopFreq_Deserialize", TRuntimeNode(), deserializeUserType); + + TRuntimeNode pgmTopFreq; + { + auto value = pgmBuilder.NewDataLiteral<bool>(true); + auto param = pgmBuilder.NewDataLiteral<ui32>(10); + TVector<TRuntimeNode> params = {value, param}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); + } + + for (int n = 0; n < 7; n++) { + auto value = pgmBuilder.NewDataLiteral<bool>(true); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + for (int n = 0; n < 10; n++) { + auto value = pgmBuilder.NewDataLiteral<bool>(false); + TVector<TRuntimeNode> params = {pgmTopFreq, value}; + pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); + } + + TRuntimeNode pgmSerializedTopFreq; + { + TVector<TRuntimeNode> params = {pgmTopFreq}; + pgmSerializedTopFreq = pgmBuilder.Apply(udfTopFreq_Serialize, params); + } + + TRuntimeNode pgmDeserializedTopFreq; + { + TVector<TRuntimeNode> params = {pgmSerializedTopFreq}; + pgmDeserializedTopFreq = pgmBuilder.Apply(udfTopFreq_Deserialize, params); + } + + TRuntimeNode pgmReturn; + { + auto param = pgmBuilder.NewDataLiteral<ui32>(3); + TVector<TRuntimeNode> params = {pgmDeserializedTopFreq, param}; + pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); + } + + auto value = setup.GetValue(pgmReturn); + + auto listIterator = value.GetListIterator(); + + TUnboxedValue item; + + UNIT_ASSERT(listIterator.Next(item)); + UNIT_ASSERT_EQUAL(item.GetElement(1).Get<bool>(), false); + UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 10); + + UNIT_ASSERT(listIterator.Next(item)); + UNIT_ASSERT_EQUAL(item.GetElement(1).Get<bool>(), true); + UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 8); + + UNIT_ASSERT(!listIterator.Next(item)); +} + +Y_UNIT_TEST(ApproxTopFreq) { + TSetup setup; + TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); + + const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); + const auto emptyStructType = pgmBuilder.NewEmptyStructType(); + const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Uint64"); + const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); + const auto ui64Type = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); + + const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); + const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); + auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); + + auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); + auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); + auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); + + auto mergeArgsType = pgmBuilder.NewTupleType({resourceType, resourceType}); + auto mergeUserType = pgmBuilder.NewTupleType({mergeArgsType, emptyStructType, valueType}); + auto udfTopFreq_Merge = pgmBuilder.Udf("TopFreq.TopFreq_Merge", TRuntimeNode(), mergeUserType); + + auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); + auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); + auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); + + auto serializeArgsType = pgmBuilder.NewTupleType({resourceType}); + auto serializeUserType = pgmBuilder.NewTupleType({serializeArgsType, emptyStructType, valueType}); + auto udfTopFreq_Serialize = pgmBuilder.Udf("TopFreq.TopFreq_Serialize", TRuntimeNode(), serializeUserType); + + auto serializedType = pgmBuilder.NewTupleType({ui32Type, ui32Type, + pgmBuilder.NewListType(pgmBuilder.NewTupleType({ui64Type, valueType}))}); + + auto deserializeArgsType = pgmBuilder.NewTupleType({serializedType}); + auto deserializeUserType = pgmBuilder.NewTupleType({deserializeArgsType, emptyStructType, valueType}); + auto udfTopFreq_Deserialize = pgmBuilder.Udf("TopFreq.TopFreq_Deserialize", TRuntimeNode(), deserializeUserType); + + static const ui64 BigNum = 20; + static const ui64 BigEach = 5000; + static const ui64 SmallNum = 500; + static const ui64 SmallEach = 20; + static const ui64 Total = BigNum * BigEach + SmallNum * SmallEach; + static const i32 AskFor = 25; + static const ui64 BlockSize = 200; + static const ui64 BlockCount = 10; + static const i32 WorksIfAtLeast = 15; + + std::array<ui64, Total> values; + std::array<TRuntimeNode, BlockCount> pgmTopFreqs; + + i32 curIndex = 0; + for (ui64 i = 1; i <= BigNum; i++) { + for (ui64 j = 0; j < BigEach; j++) { + values[curIndex++] = i; } + } - TProgramBuilder& GetProgramBuilder() { - return *PgmBuidler_.Get(); + for (ui64 i = BigNum + 1; i <= BigNum + SmallNum; i++) { + for (ui64 j = 0; j < SmallEach; j++) { + values[curIndex++] = i; } + } - NUdf::TUnboxedValue GetValue(TRuntimeNode& node) { - Explorer_.Walk(node.GetNode(), Env_); + Shuffle(values.begin(), values.end()); - TComputationPatternOpts opts(Alloc_.Ref(), Env_, GetBuiltinFactory(), - MutableFunctionRegistry_.Get(), - NUdf::EValidateMode::None, NUdf::EValidatePolicy::Fail, "", EGraphPerProcess::Multi); - Pattern_ = MakeComputationPattern(Explorer_, node, {}, opts); - Graph_ = Pattern_->Clone(opts.ToComputationOptions(*RandomProvider_, *TimeProvider_)); + TVector<TRuntimeNode> params; + TRuntimeNode param; + TRuntimeNode pgmvalue; - return Graph_->GetValue(); + for (ui64 i = 0; i < BlockCount; i++) { + { + pgmvalue = pgmBuilder.NewDataLiteral<ui64>(values[i * BlockSize]); + param = pgmBuilder.NewDataLiteral<ui32>(AskFor); + params = {pgmvalue, param}; + pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_Create, params); } - private: - using IMutableFunctionRegistryPtr = TIntrusivePtr<IMutableFunctionRegistry>; - using IRandomProviderPtr = TIntrusivePtr<IRandomProvider>; - using ITimeProviderPtr = TIntrusivePtr<ITimeProvider>; - - IMutableFunctionRegistryPtr MutableFunctionRegistry_; - IRandomProviderPtr RandomProvider_; - ITimeProviderPtr TimeProvider_; - TScopedAlloc Alloc_; - TTypeEnvironment Env_; - THolder<TProgramBuilder> PgmBuidler_; - IComputationPattern::TPtr Pattern_; - THolder<IComputationGraph> Graph_; - TExploringNodeVisitor Explorer_; - }; - - Y_UNIT_TEST_SUITE(TUDFTopFreqTest) { - Y_UNIT_TEST(SimpleTopFreq) { - TSetup setup; - TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); - - const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<i32>::Id); - const auto emptyStructType = pgmBuilder.NewEmptyStructType(); - const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Int32"); - const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); - - const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); - const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); - auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); - - auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); - auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); - auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); - - auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); - auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); - auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); - - TRuntimeNode pgmTopFreq; - { - auto val = pgmBuilder.NewDataLiteral<i32>(3); - auto param = pgmBuilder.NewDataLiteral<ui32>(10); - - TVector<TRuntimeNode> params = {val, param}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); - } - - for (int n = 0; n < 9; n++) { - auto value = pgmBuilder.NewDataLiteral<i32>(1); - TVector<TRuntimeNode> params = {pgmTopFreq, value}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } - - for (int n = 0; n < 7; n++) { - auto value = pgmBuilder.NewDataLiteral<i32>(4); - TVector<TRuntimeNode> params = {pgmTopFreq, value}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } - - TRuntimeNode pgmReturn; - { - auto param = pgmBuilder.NewDataLiteral<ui32>(4); - TVector<TRuntimeNode> params = {pgmTopFreq, param}; - pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); - } - - auto value = setup.GetValue(pgmReturn); - - auto listIterator = value.GetListIterator(); - - TUnboxedValue item; - - UNIT_ASSERT(listIterator.Next(item)); - UNIT_ASSERT_EQUAL(item.GetElement(1).Get<i32>(), 1); - UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 9); - - UNIT_ASSERT(listIterator.Next(item)); - UNIT_ASSERT_EQUAL(item.GetElement(1).Get<i32>(), 4); - UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 7); - - UNIT_ASSERT(listIterator.Next(item)); - UNIT_ASSERT_EQUAL(item.GetElement(1).Get<i32>(), 3); - UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 1); - - UNIT_ASSERT(!listIterator.Next(item)); + for (ui64 j = i * BlockSize + 1; j < (i + 1) * BlockSize; j++) { + pgmvalue = pgmBuilder.NewDataLiteral<ui64>(values[j]); + params = {pgmTopFreqs[i], pgmvalue}; + pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_AddValue, params); } - Y_UNIT_TEST(MergingTopFreq) { - TSetup setup; - TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); - - const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); - const auto emptyStructType = pgmBuilder.NewEmptyStructType(); - const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Uint64"); - const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); - - const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); - const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); - auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); - - auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); - auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); - auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); - - auto mergeArgsType = pgmBuilder.NewTupleType({resourceType, resourceType}); - auto mergeUserType = pgmBuilder.NewTupleType({mergeArgsType, emptyStructType, valueType}); - auto udfTopFreq_Merge = pgmBuilder.Udf("TopFreq.TopFreq_Merge", TRuntimeNode(), mergeUserType); - - auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); - auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); - auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); - - TRuntimeNode pgmTopFreq; - { - auto value = pgmBuilder.NewDataLiteral<ui64>(1); - auto param = pgmBuilder.NewDataLiteral<ui32>(1); - TVector<TRuntimeNode> params = {value, param}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); - } - - for (int n = 0; n < 1; n++) { - auto value = pgmBuilder.NewDataLiteral<ui64>(1); - TVector<TRuntimeNode> params = {pgmTopFreq, value}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } - - for (int n = 0; n < 4; n++) { - auto value = pgmBuilder.NewDataLiteral<ui64>(5); - TVector<TRuntimeNode> params = {pgmTopFreq, value}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } - - for (int n = 0; n < 1; n++) { - auto value = pgmBuilder.NewDataLiteral<ui64>(3); - TVector<TRuntimeNode> params = {pgmTopFreq, value}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } - - TRuntimeNode pgmTopFreq2; - { - auto value = pgmBuilder.NewDataLiteral<ui64>(1); - auto param = pgmBuilder.NewDataLiteral<ui32>(1); - TVector<TRuntimeNode> params = {value, param}; - pgmTopFreq2 = pgmBuilder.Apply(udfTopFreq_Create, params); - } - - for (int n = 0; n < 5; n++) { - auto value = pgmBuilder.NewDataLiteral<ui64>(1); - TVector<TRuntimeNode> params = {pgmTopFreq2, value}; - pgmTopFreq2 = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } - - for (int n = 0; n < 5; n++) { - auto value = pgmBuilder.NewDataLiteral<ui64>(5); - TVector<TRuntimeNode> params = {pgmTopFreq2, value}; - pgmTopFreq2 = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } - - TRuntimeNode pgmTopFreq3; - { - TVector<TRuntimeNode> params = {pgmTopFreq, pgmTopFreq2}; - pgmTopFreq3 = pgmBuilder.Apply(udfTopFreq_Merge, params); - } - - TRuntimeNode pgmReturn; - { - auto param = pgmBuilder.NewDataLiteral<ui32>(1); - TVector<TRuntimeNode> params = {pgmTopFreq3, param}; - pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); - } - - auto value = setup.GetValue(pgmReturn); - - auto listIterator = value.GetListIterator(); - - TUnboxedValue item; - - UNIT_ASSERT(listIterator.Next(item)); - UNIT_ASSERT_EQUAL(item.GetElement(1).Get<ui64>(), 5); - UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 9); - - UNIT_ASSERT(!listIterator.Next(item)); + { + params = {pgmTopFreqs[i]}; + pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_Serialize, params); } + } - Y_UNIT_TEST(SerializedTopFreq) { - TSetup setup; - TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); - - const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<bool>::Id); - const auto emptyStructType = pgmBuilder.NewEmptyStructType(); - const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Bool"); - const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); - const auto ui64Type = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); - - const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); - const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); - auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); - - auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); - auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); - auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); - - auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); - auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); - auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); - - auto serializeArgsType = pgmBuilder.NewTupleType({resourceType}); - auto serializeUserType = pgmBuilder.NewTupleType({serializeArgsType, emptyStructType, valueType}); - auto udfTopFreq_Serialize = pgmBuilder.Udf("TopFreq.TopFreq_Serialize", TRuntimeNode(), serializeUserType); - - auto serializedType = pgmBuilder.NewTupleType({ui32Type, ui32Type, - pgmBuilder.NewListType(pgmBuilder.NewTupleType({ui64Type, valueType}))}); - - auto deserializeArgsType = pgmBuilder.NewTupleType({serializedType}); - auto deserializeUserType = pgmBuilder.NewTupleType({deserializeArgsType, emptyStructType, valueType}); - auto udfTopFreq_Deserialize = pgmBuilder.Udf("TopFreq.TopFreq_Deserialize", TRuntimeNode(), deserializeUserType); - - TRuntimeNode pgmTopFreq; - { - auto value = pgmBuilder.NewDataLiteral<bool>(true); - auto param = pgmBuilder.NewDataLiteral<ui32>(10); - TVector<TRuntimeNode> params = {value, param}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); - } - - for (int n = 0; n < 7; n++) { - auto value = pgmBuilder.NewDataLiteral<bool>(true); - TVector<TRuntimeNode> params = {pgmTopFreq, value}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } - - for (int n = 0; n < 10; n++) { - auto value = pgmBuilder.NewDataLiteral<bool>(false); - TVector<TRuntimeNode> params = {pgmTopFreq, value}; - pgmTopFreq = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } - - TRuntimeNode pgmSerializedTopFreq; - { - TVector<TRuntimeNode> params = {pgmTopFreq}; - pgmSerializedTopFreq = pgmBuilder.Apply(udfTopFreq_Serialize, params); - } - - TRuntimeNode pgmDeserializedTopFreq; - { - TVector<TRuntimeNode> params = {pgmSerializedTopFreq}; - pgmDeserializedTopFreq = pgmBuilder.Apply(udfTopFreq_Deserialize, params); - } - - TRuntimeNode pgmReturn; - { - auto param = pgmBuilder.NewDataLiteral<ui32>(3); - TVector<TRuntimeNode> params = {pgmDeserializedTopFreq, param}; - pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); - } - - auto value = setup.GetValue(pgmReturn); - - auto listIterator = value.GetListIterator(); - - TUnboxedValue item; - - UNIT_ASSERT(listIterator.Next(item)); - UNIT_ASSERT_EQUAL(item.GetElement(1).Get<bool>(), false); - UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 10); - - UNIT_ASSERT(listIterator.Next(item)); - UNIT_ASSERT_EQUAL(item.GetElement(1).Get<bool>(), true); - UNIT_ASSERT_EQUAL(item.GetElement(0).Get<ui64>(), 8); - - UNIT_ASSERT(!listIterator.Next(item)); - } + TRuntimeNode pgmMainTopFreq; + { + pgmvalue = pgmBuilder.NewDataLiteral<ui64>(Total + 2); + param = pgmBuilder.NewDataLiteral<ui32>(AskFor); + params = {pgmvalue, param}; + pgmMainTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); + } - Y_UNIT_TEST(ApproxTopFreq) { - TSetup setup; - TProgramBuilder& pgmBuilder = setup.GetProgramBuilder(); - - const auto valueType = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); - const auto emptyStructType = pgmBuilder.NewEmptyStructType(); - const auto resourceType = pgmBuilder.NewResourceType("TopFreq.TopFreqResource.Uint64"); - const auto ui32Type = pgmBuilder.NewDataType(NUdf::TDataType<ui32>::Id); - const auto ui64Type = pgmBuilder.NewDataType(NUdf::TDataType<ui64>::Id); - - const auto createArgsType = pgmBuilder.NewTupleType({valueType, ui32Type}); - const auto createUserType = pgmBuilder.NewTupleType({createArgsType, emptyStructType, valueType}); - auto udfTopFreq_Create = pgmBuilder.Udf("TopFreq.TopFreq_Create", TRuntimeNode(), createUserType); - - auto addValueArgsType = pgmBuilder.NewTupleType({resourceType, valueType}); - auto addValueUserType = pgmBuilder.NewTupleType({addValueArgsType, emptyStructType, valueType}); - auto udfTopFreq_AddValue = pgmBuilder.Udf("TopFreq.TopFreq_AddValue", TRuntimeNode(), addValueUserType); - - auto mergeArgsType = pgmBuilder.NewTupleType({resourceType, resourceType}); - auto mergeUserType = pgmBuilder.NewTupleType({mergeArgsType, emptyStructType, valueType}); - auto udfTopFreq_Merge = pgmBuilder.Udf("TopFreq.TopFreq_Merge", TRuntimeNode(), mergeUserType); - - auto getArgsType = pgmBuilder.NewTupleType({resourceType, ui32Type}); - auto getUserType = pgmBuilder.NewTupleType({getArgsType, emptyStructType, valueType}); - auto udfTopFreq_Get = pgmBuilder.Udf("TopFreq.TopFreq_Get", TRuntimeNode(), getUserType); - - auto serializeArgsType = pgmBuilder.NewTupleType({resourceType}); - auto serializeUserType = pgmBuilder.NewTupleType({serializeArgsType, emptyStructType, valueType}); - auto udfTopFreq_Serialize = pgmBuilder.Udf("TopFreq.TopFreq_Serialize", TRuntimeNode(), serializeUserType); - - auto serializedType = pgmBuilder.NewTupleType({ui32Type, ui32Type, - pgmBuilder.NewListType(pgmBuilder.NewTupleType({ui64Type, valueType}))}); - - auto deserializeArgsType = pgmBuilder.NewTupleType({serializedType}); - auto deserializeUserType = pgmBuilder.NewTupleType({deserializeArgsType, emptyStructType, valueType}); - auto udfTopFreq_Deserialize = pgmBuilder.Udf("TopFreq.TopFreq_Deserialize", TRuntimeNode(), deserializeUserType); - - static const ui64 BigNum = 20; - static const ui64 BigEach = 5000; - static const ui64 SmallNum = 500; - static const ui64 SmallEach = 20; - static const ui64 Total = BigNum * BigEach + SmallNum * SmallEach; - static const i32 AskFor = 25; - static const ui64 BlockSize = 200; - static const ui64 BlockCount = 10; - static const i32 WorksIfAtLeast = 15; - - std::array<ui64, Total> values; - std::array<TRuntimeNode, BlockCount> pgmTopFreqs; - - i32 curIndex = 0; - for (ui64 i = 1; i <= BigNum; i++) { - for (ui64 j = 0; j < BigEach; j++) { - values[curIndex++] = i; - } - } - - for (ui64 i = BigNum + 1; i <= BigNum + SmallNum; i++) { - for (ui64 j = 0; j < SmallEach; j++) { - values[curIndex++] = i; - } - } - - Shuffle(values.begin(), values.end()); - - TVector<TRuntimeNode> params; - TRuntimeNode param; - TRuntimeNode pgmvalue; - - for (ui64 i = 0; i < BlockCount; i++) { - { - pgmvalue = pgmBuilder.NewDataLiteral<ui64>(values[i * BlockSize]); - param = pgmBuilder.NewDataLiteral<ui32>(AskFor); - params = {pgmvalue, param}; - pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_Create, params); - } - - for (ui64 j = i * BlockSize + 1; j < (i + 1) * BlockSize; j++) { - pgmvalue = pgmBuilder.NewDataLiteral<ui64>(values[j]); - params = {pgmTopFreqs[i], pgmvalue}; - pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_AddValue, params); - } - - { - params = {pgmTopFreqs[i]}; - pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_Serialize, params); - } - } - - TRuntimeNode pgmMainTopFreq; - { - pgmvalue = pgmBuilder.NewDataLiteral<ui64>(Total + 2); - param = pgmBuilder.NewDataLiteral<ui32>(AskFor); - params = {pgmvalue, param}; - pgmMainTopFreq = pgmBuilder.Apply(udfTopFreq_Create, params); - } - - for (ui64 i = 0; i < BlockCount; i++) { - params = {pgmTopFreqs[i]}; - pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_Deserialize, params); - - params = {pgmMainTopFreq, pgmTopFreqs[i]}; - pgmMainTopFreq = pgmBuilder.Apply(udfTopFreq_Merge, params); - } - - TRuntimeNode pgmReturn; - { - param = pgmBuilder.NewDataLiteral<ui32>(AskFor); - params = {pgmMainTopFreq, param}; - pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); - } - - auto value = setup.GetValue(pgmReturn); - - auto listIterator = value.GetListIterator(); - - ui32 found = 0; - - for (ui64 i = 0; i < AskFor; i++) { - TUnboxedValue item; - - UNIT_ASSERT(listIterator.Next(item)); - ui64 current = item.GetElement(1).Get<ui64>(); - if (current <= BigNum) - found++; - } - - UNIT_ASSERT(!listIterator.Skip()); - UNIT_ASSERT(found >= WorksIfAtLeast); + for (ui64 i = 0; i < BlockCount; i++) { + params = {pgmTopFreqs[i]}; + pgmTopFreqs[i] = pgmBuilder.Apply(udfTopFreq_Deserialize, params); + + params = {pgmMainTopFreq, pgmTopFreqs[i]}; + pgmMainTopFreq = pgmBuilder.Apply(udfTopFreq_Merge, params); + } + + TRuntimeNode pgmReturn; + { + param = pgmBuilder.NewDataLiteral<ui32>(AskFor); + params = {pgmMainTopFreq, param}; + pgmReturn = pgmBuilder.Apply(udfTopFreq_Get, params); + } + + auto value = setup.GetValue(pgmReturn); + + auto listIterator = value.GetListIterator(); + + ui32 found = 0; + + for (ui64 i = 0; i < AskFor; i++) { + TUnboxedValue item; + + UNIT_ASSERT(listIterator.Next(item)); + ui64 current = item.GetElement(1).Get<ui64>(); + if (current <= BigNum) { + found++; } } + + UNIT_ASSERT(!listIterator.Skip()); + UNIT_ASSERT(found >= WorksIfAtLeast); } +} // Y_UNIT_TEST_SUITE(TUDFTopFreqTest) +} // namespace NYql diff --git a/yql/essentials/udfs/common/topfreq/ut/ya.make b/yql/essentials/udfs/common/topfreq/ut/ya.make index 142aea4ebfd..3d0b1b859a8 100644 --- a/yql/essentials/udfs/common/topfreq/ut/ya.make +++ b/yql/essentials/udfs/common/topfreq/ut/ya.make @@ -1,5 +1,7 @@ UNITTEST_FOR(yql/essentials/udfs/common/topfreq/static) +ENABLE(YQL_STYLE_CPP) + SRCS( ../topfreq_udf_ut.cpp ) diff --git a/yql/essentials/udfs/common/topfreq/ya.make b/yql/essentials/udfs/common/topfreq/ya.make index 2c91204fed6..3871d8f13ad 100644 --- a/yql/essentials/udfs/common/topfreq/ya.make +++ b/yql/essentials/udfs/common/topfreq/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(topfreq_udf) 28 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( topfreq_udf.cpp diff --git a/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.cpp b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.cpp index 3e90765e405..4c31f3d5612 100644 --- a/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.cpp +++ b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.cpp @@ -1 +1 @@ -#include "unicode_base_udf.h"
\ No newline at end of file +#include "unicode_base_udf.h" diff --git a/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h index 850990d7ba9..df930831dea 100644 --- a/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h +++ b/yql/essentials/udfs/common/unicode_base/lib/unicode_base_udf.h @@ -30,522 +30,523 @@ namespace { template <typename... Args> \ static auto Execute(Args&&... args) = delete; - inline constexpr bool IsAscii(wchar32 c) noexcept { - return ::IsAscii(c); - } - - template <class It> - struct TIsUnicodeSpaceAdapter { - bool operator()(const It& it) const noexcept { - return IsSpace(*it); - } - }; +inline constexpr bool IsAscii(wchar32 c) noexcept { + return ::IsAscii(c); +} - template <class It> - TIsUnicodeSpaceAdapter<It> IsUnicodeSpaceAdapter(It) { - return {}; +template <class It> +struct TIsUnicodeSpaceAdapter { + bool operator()(const It& it) const noexcept { + return IsSpace(*it); } +}; - struct TNoChangesTag {}; +template <class It> +TIsUnicodeSpaceAdapter<It> IsUnicodeSpaceAdapter(It) { + return {}; +} - template <typename TDerived> - struct TScalarOperationMixin { - static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) - requires requires { TDerived::Execute(TStringRef()); } - { - Y_DEBUG_ABORT_UNLESS(IsUtf8(args[0].AsStringRef())); - auto executeResult = TDerived::Execute(args[0].AsStringRef()); - return ProcessResult(builder, std::move(executeResult), args); - } +struct TNoChangesTag {}; - static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) - requires requires { TDerived::Execute(TMaybe<TStringRef>(TStringRef())); } - { - auto executeResult = TDerived::Execute(args[0] ? TMaybe<TStringRef>(args[0].AsStringRef()) : Nothing()); - return ProcessResult(builder, std::move(executeResult), args); - } +template <typename TDerived> +struct TScalarOperationMixin { + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TStringRef()); } + { + Y_DEBUG_ABORT_UNLESS(IsUtf8(args[0].AsStringRef())); + auto executeResult = TDerived::Execute(args[0].AsStringRef()); + return ProcessResult(builder, std::move(executeResult), args); + } - static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) - requires requires { TDerived::Execute(TStringRef(), TStringRef()); } - { - auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1].AsStringRef()); - return ProcessResult(builder, std::move(executeResult), args); - } + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TMaybe<TStringRef>(TStringRef())); } + { + auto executeResult = TDerived::Execute(args[0] ? TMaybe<TStringRef>(args[0].AsStringRef()) : Nothing()); + return ProcessResult(builder, std::move(executeResult), args); + } - static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) - requires requires { TDerived::Execute(TStringRef(), TMaybe<ui16>()); } - { - auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1] ? TMaybe<ui16>(args[1].Get<ui16>()) : Nothing()); - return ProcessResult(builder, std::move(executeResult), args); - } + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TStringRef(), TStringRef()); } + { + auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1].AsStringRef()); + return ProcessResult(builder, std::move(executeResult), args); + } - static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) - requires requires { TDerived::Execute(TStringRef(), TStringRef(), TStringRef()); } - { - auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1].AsStringRef(), args[2].AsStringRef()); - return ProcessResult(builder, std::move(executeResult), args); - } + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TStringRef(), TMaybe<ui16>()); } + { + auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1] ? TMaybe<ui16>(args[1].Get<ui16>()) : Nothing()); + return ProcessResult(builder, std::move(executeResult), args); + } - static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) - requires requires { TDerived::Execute(TStringRef(), TStringRef(), TMaybe<ui64>()); } - { - auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1].AsStringRef(), args[2] ? TMaybe<ui64>(args[2].Get<ui64>()) : Nothing()); - return ProcessResult(builder, std::move(executeResult), args); - } + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TStringRef(), TStringRef(), TStringRef()); } + { + auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1].AsStringRef(), args[2].AsStringRef()); + return ProcessResult(builder, std::move(executeResult), args); + } - static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) - requires requires { TDerived::Execute(TStringRef(), TMaybe<ui64>(), TMaybe<ui64>()); } - { - auto executeResult = TDerived::Execute(args[0].AsStringRef(), - args[1] ? TMaybe<ui64>(args[1].Get<ui64>()) : Nothing(), - args[2] ? TMaybe<ui64>(args[2].Get<ui64>()) : Nothing()); - return ProcessResult(builder, std::move(executeResult), args); - } - private: - static TUnboxedValue ProcessResult(const IValueBuilder* builder, const TString& newString, const TUnboxedValuePod*) { - return builder->NewString(newString); - } + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TStringRef(), TStringRef(), TMaybe<ui64>()); } + { + auto executeResult = TDerived::Execute(args[0].AsStringRef(), args[1].AsStringRef(), args[2] ? TMaybe<ui64>(args[2].Get<ui64>()) : Nothing()); + return ProcessResult(builder, std::move(executeResult), args); + } - static TUnboxedValue ProcessResult(const IValueBuilder* builder, const TStringBuf newString, const TUnboxedValuePod*) { - return builder->NewString(newString); - } + static TUnboxedValue DoExecute(const IValueBuilder* builder, const TUnboxedValuePod* args) + requires requires { TDerived::Execute(TStringRef(), TMaybe<ui64>(), TMaybe<ui64>()); } + { + auto executeResult = TDerived::Execute(args[0].AsStringRef(), + args[1] ? TMaybe<ui64>(args[1].Get<ui64>()) : Nothing(), + args[2] ? TMaybe<ui64>(args[2].Get<ui64>()) : Nothing()); + return ProcessResult(builder, std::move(executeResult), args); + } - template <typename T> - static TUnboxedValue ProcessResult(const IValueBuilder* builder, const std::variant<TNoChangesTag, T>& newValue, const TUnboxedValuePod* initialArg) { - if (std::holds_alternative<T>(newValue)) { - return ProcessResult(builder, std::move(std::get<T>(newValue)), initialArg); - } else { - return initialArg[0]; - } - } +private: + static TUnboxedValue ProcessResult(const IValueBuilder* builder, const TString& newString, const TUnboxedValuePod*) { + return builder->NewString(newString); + } - template <typename T> - static TUnboxedValue ProcessResult(const IValueBuilder* builder, const TMaybe<T>& newValue, const TUnboxedValuePod* initialArg) { - if (newValue.Defined()) { - return ProcessResult(builder, *newValue, initialArg); - } else { - return TUnboxedValuePod(); - } - } + static TUnboxedValue ProcessResult(const IValueBuilder* builder, const TStringBuf newString, const TUnboxedValuePod*) { + return builder->NewString(newString); + } - template <typename T, typename = std::enable_if_t<TPrimitiveDataType<T>::Result>> - static TUnboxedValue ProcessResult(const IValueBuilder* builder, T result, const TUnboxedValuePod*) { - Y_UNUSED(builder); - return TUnboxedValuePod(result); + template <typename T> + static TUnboxedValue ProcessResult(const IValueBuilder* builder, const std::variant<TNoChangesTag, T>& newValue, const TUnboxedValuePod* initialArg) { + if (std::holds_alternative<T>(newValue)) { + return ProcessResult(builder, std::move(std::get<T>(newValue)), initialArg); + } else { + return initialArg[0]; } - }; + } - template <typename TDerived> - struct TBlockOperationMixin { - template <typename TSink> - static void BlockDoExecute(const TBlockItem arg, const TSink& sink) - requires requires { TDerived::Execute(TStringRef()); } - { - Y_DEBUG_ABORT_UNLESS(IsUtf8(arg.AsStringRef())); - auto executeResult = TDerived::Execute(arg.AsStringRef()); - TBlockItem boxedValue = ProcessResult(executeResult, arg); - sink(boxedValue); + template <typename T> + static TUnboxedValue ProcessResult(const IValueBuilder* builder, const TMaybe<T>& newValue, const TUnboxedValuePod* initialArg) { + if (newValue.Defined()) { + return ProcessResult(builder, *newValue, initialArg); + } else { + return TUnboxedValuePod(); } + } - template <typename TSink> - static void BlockDoExecute(const TBlockItem arg, const TSink& sink) - requires requires { TDerived::Execute(TMaybe<TStringRef>(TStringRef())); } - { - auto executeResult = TDerived::Execute(arg ? TMaybe<TStringRef>(arg.AsStringRef()) : Nothing()); - TBlockItem boxedValue = ProcessResult(executeResult, arg); - sink(boxedValue); - } + template <typename T, typename = std::enable_if_t<TPrimitiveDataType<T>::Result>> + static TUnboxedValue ProcessResult(const IValueBuilder* builder, T result, const TUnboxedValuePod*) { + Y_UNUSED(builder); + return TUnboxedValuePod(result); + } +}; + +template <typename TDerived> +struct TBlockOperationMixin { + template <typename TSink> + static void BlockDoExecute(const TBlockItem arg, const TSink& sink) + requires requires { TDerived::Execute(TStringRef()); } + { + Y_DEBUG_ABORT_UNLESS(IsUtf8(arg.AsStringRef())); + auto executeResult = TDerived::Execute(arg.AsStringRef()); + TBlockItem boxedValue = ProcessResult(executeResult, arg); + sink(boxedValue); + } - template <typename TSink> - static void BlockDoExecute(const TBlockItem arg1, const TBlockItem arg2, const TSink& sink) - requires requires { TDerived::Execute(TStringRef(), TStringRef()); } - { - auto executeResult = TDerived::Execute(arg1.AsStringRef(), - arg2.AsStringRef()); - TBlockItem boxedValue = ProcessResult(executeResult, arg1); - sink(boxedValue); - } + template <typename TSink> + static void BlockDoExecute(const TBlockItem arg, const TSink& sink) + requires requires { TDerived::Execute(TMaybe<TStringRef>(TStringRef())); } + { + auto executeResult = TDerived::Execute(arg ? TMaybe<TStringRef>(arg.AsStringRef()) : Nothing()); + TBlockItem boxedValue = ProcessResult(executeResult, arg); + sink(boxedValue); + } - template <typename TSink> - static void BlockDoExecute(const TBlockItem arg1, const TBlockItem arg2, const TSink& sink) - requires requires { TDerived::Execute(TStringRef(), TMaybe<ui16>()); } - { - auto executeResult = TDerived::Execute(arg1.AsStringRef(), arg2 ? TMaybe<ui16>(arg2.Get<ui16>()) : Nothing()); - TBlockItem boxedValue = ProcessResult(executeResult, arg1); - sink(boxedValue); - } + template <typename TSink> + static void BlockDoExecute(const TBlockItem arg1, const TBlockItem arg2, const TSink& sink) + requires requires { TDerived::Execute(TStringRef(), TStringRef()); } + { + auto executeResult = TDerived::Execute(arg1.AsStringRef(), + arg2.AsStringRef()); + TBlockItem boxedValue = ProcessResult(executeResult, arg1); + sink(boxedValue); + } - template <typename TSink> - static void BlockDoExecute(const TBlockItem args, const TSink& sink) - requires(requires { TDerived::Execute(TStringRef(), TStringRef(), TStringRef()); }) - { - auto executeResult = TDerived::Execute(args.GetElement(0).AsStringRef(), - args.GetElement(1).AsStringRef(), - args.GetElement(2).AsStringRef()); - TBlockItem boxedValue = ProcessResult(executeResult, args.GetElement(0)); - sink(boxedValue); - } + template <typename TSink> + static void BlockDoExecute(const TBlockItem arg1, const TBlockItem arg2, const TSink& sink) + requires requires { TDerived::Execute(TStringRef(), TMaybe<ui16>()); } + { + auto executeResult = TDerived::Execute(arg1.AsStringRef(), arg2 ? TMaybe<ui16>(arg2.Get<ui16>()) : Nothing()); + TBlockItem boxedValue = ProcessResult(executeResult, arg1); + sink(boxedValue); + } - template <typename TSink> - static void BlockDoExecute(const TBlockItem args, const TSink& sink) - requires(requires { TDerived::Execute(TStringRef(), TStringRef(), TMaybe<ui64>(0ULL)); }) - { - auto executeResult = TDerived::Execute(args.GetElement(0).AsStringRef(), - args.GetElement(1).AsStringRef(), - (args.GetElement(2) ? TMaybe<ui64>(args.GetElement(2).Get<ui64>()) : Nothing())); - TBlockItem boxedValue = ProcessResult(executeResult, args.GetElement(0)); - sink(boxedValue); - } + template <typename TSink> + static void BlockDoExecute(const TBlockItem args, const TSink& sink) + requires(requires { TDerived::Execute(TStringRef(), TStringRef(), TStringRef()); }) + { + auto executeResult = TDerived::Execute(args.GetElement(0).AsStringRef(), + args.GetElement(1).AsStringRef(), + args.GetElement(2).AsStringRef()); + TBlockItem boxedValue = ProcessResult(executeResult, args.GetElement(0)); + sink(boxedValue); + } - template <typename TSink> - static void BlockDoExecute(const TBlockItem args, const TSink& sink) - requires(requires { TDerived::Execute(TStringRef(), TMaybe<ui64>(0ULL), TMaybe<ui64>(0ULL)); }) - { - auto executeResult = TDerived::Execute(args.GetElement(0).AsStringRef(), - (args.GetElement(1) ? TMaybe<ui64>(args.GetElement(1).Get<ui64>()) : Nothing()), - (args.GetElement(2) ? TMaybe<ui64>(args.GetElement(2).Get<ui64>()) : Nothing())); - TBlockItem boxedValue = ProcessResult(executeResult, args.GetElement(0)); - sink(boxedValue); - } + template <typename TSink> + static void BlockDoExecute(const TBlockItem args, const TSink& sink) + requires(requires { TDerived::Execute(TStringRef(), TStringRef(), TMaybe<ui64>(0ULL)); }) + { + auto executeResult = TDerived::Execute(args.GetElement(0).AsStringRef(), + args.GetElement(1).AsStringRef(), + (args.GetElement(2) ? TMaybe<ui64>(args.GetElement(2).Get<ui64>()) : Nothing())); + TBlockItem boxedValue = ProcessResult(executeResult, args.GetElement(0)); + sink(boxedValue); + } - private: - static TBlockItem ProcessResult(const TString& newString, const TBlockItem arg) { - Y_UNUSED(arg); - return TBlockItem(newString); - } + template <typename TSink> + static void BlockDoExecute(const TBlockItem args, const TSink& sink) + requires(requires { TDerived::Execute(TStringRef(), TMaybe<ui64>(0ULL), TMaybe<ui64>(0ULL)); }) + { + auto executeResult = TDerived::Execute(args.GetElement(0).AsStringRef(), + (args.GetElement(1) ? TMaybe<ui64>(args.GetElement(1).Get<ui64>()) : Nothing()), + (args.GetElement(2) ? TMaybe<ui64>(args.GetElement(2).Get<ui64>()) : Nothing())); + TBlockItem boxedValue = ProcessResult(executeResult, args.GetElement(0)); + sink(boxedValue); + } - static TBlockItem ProcessResult(const TStringBuf newString, const TBlockItem arg) { - Y_UNUSED(arg); - return TBlockItem(newString); - } +private: + static TBlockItem ProcessResult(const TString& newString, const TBlockItem arg) { + Y_UNUSED(arg); + return TBlockItem(newString); + } - template <typename T> - static TBlockItem ProcessResult(const TMaybe<T>& newValue, const TBlockItem arg) { - if (newValue.Defined()) { - return ProcessResult(*newValue, arg); - } else { - return TBlockItem(); - } - } + static TBlockItem ProcessResult(const TStringBuf newString, const TBlockItem arg) { + Y_UNUSED(arg); + return TBlockItem(newString); + } - template <typename T> - static TBlockItem ProcessResult(const std::variant<TNoChangesTag, T>& newValue, const TBlockItem arg) { - if (std::holds_alternative<T>(newValue)) { - return ProcessResult(std::get<T>(newValue), arg); - } else { - return arg; - } + template <typename T> + static TBlockItem ProcessResult(const TMaybe<T>& newValue, const TBlockItem arg) { + if (newValue.Defined()) { + return ProcessResult(*newValue, arg); + } else { + return TBlockItem(); } + } - template <typename T, typename = std::enable_if_t<TPrimitiveDataType<T>::Result>> - static TBlockItem ProcessResult(T result, const TBlockItem arg) { - Y_UNUSED(arg); - return TBlockItem(result); + template <typename T> + static TBlockItem ProcessResult(const std::variant<TNoChangesTag, T>& newValue, const TBlockItem arg) { + if (std::holds_alternative<T>(newValue)) { + return ProcessResult(std::get<T>(newValue), arg); + } else { + return arg; } - }; + } - template <typename TDerived> - struct TOperationMixin: public TBlockOperationMixin<TDerived>, public TScalarOperationMixin<TDerived> {}; + template <typename T, typename = std::enable_if_t<TPrimitiveDataType<T>::Result>> + static TBlockItem ProcessResult(T result, const TBlockItem arg) { + Y_UNUSED(arg); + return TBlockItem(result); + } +}; - template <auto mode> - struct TNormalizeUTF8: public TOperationMixin<TNormalizeUTF8<mode>> { - static TString Execute(TStringRef arg) { - const TUtf16String& input = UTF8ToWide(arg.Data(), arg.Size()); - return WideToUTF8(Normalize<mode>(input)); - } - DISABLE_IMPICT_ARGUMENT_CAST; - }; +template <typename TDerived> +struct TOperationMixin: public TBlockOperationMixin<TDerived>, public TScalarOperationMixin<TDerived> {}; - template <bool (*Function)(wchar32)> - struct TCheckAllChars: public TOperationMixin<TCheckAllChars<Function>> { - static bool Execute(TStringRef arg) { - const TStringBuf input(arg); - wchar32 rune; - const unsigned char* cur = reinterpret_cast<const unsigned char*>(input.begin()); - const unsigned char* last = reinterpret_cast<const unsigned char*>(input.end()); - while (cur != last) { - ReadUTF8CharAndAdvance(rune, cur, last); - if (!static_cast<bool (*)(wchar32)>(Function)(rune)) { - return false; - } - } - return true; - } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - - template <bool (*Function)(TUtf16String&, size_t pos, size_t count)> - struct TStringToStringMapper: public TOperationMixin<TStringToStringMapper<Function>> { - static std::variant<TNoChangesTag, TString> Execute(TStringRef arg) { - if (auto wide = UTF8ToWide(arg); - static_cast<bool (*)(TUtf16String&, size_t pos, size_t count)>(Function)(wide, 0, TUtf16String::npos)) { - return WideToUTF8(std::move(wide)); - } else { - return TNoChangesTag{}; +template <auto mode> +struct TNormalizeUTF8: public TOperationMixin<TNormalizeUTF8<mode>> { + static TString Execute(TStringRef arg) { + const TUtf16String& input = UTF8ToWide(arg.Data(), arg.Size()); + return WideToUTF8(Normalize<mode>(input)); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; + +template <bool (*Function)(wchar32)> +struct TCheckAllChars: public TOperationMixin<TCheckAllChars<Function>> { + static bool Execute(TStringRef arg) { + const TStringBuf input(arg); + wchar32 rune; + const unsigned char* cur = reinterpret_cast<const unsigned char*>(input.begin()); + const unsigned char* last = reinterpret_cast<const unsigned char*>(input.end()); + while (cur != last) { + ReadUTF8CharAndAdvance(rune, cur, last); + if (!static_cast<bool (*)(wchar32)>(Function)(rune)) { + return false; } } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - - struct TLengthGetter: public TOperationMixin<TLengthGetter> { - static ui64 Execute(TStringRef inputRef) { - size_t result; - GetNumberOfUTF8Chars(inputRef.Data(), inputRef.Size(), result); - return static_cast<ui64>(result); + return true; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; + +template <bool (*Function)(TUtf16String&, size_t pos, size_t count)> +struct TStringToStringMapper: public TOperationMixin<TStringToStringMapper<Function>> { + static std::variant<TNoChangesTag, TString> Execute(TStringRef arg) { + if (auto wide = UTF8ToWide(arg); + static_cast<bool (*)(TUtf16String&, size_t pos, size_t count)>(Function)(wide, 0, TUtf16String::npos)) { + return WideToUTF8(std::move(wide)); + } else { + return TNoChangesTag{}; } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - - struct TReverser: public TOperationMixin<TReverser> { - static TString Execute(TStringRef inputRef) { - auto wide = UTF8ToWide(inputRef); - ReverseInPlace(wide); - return WideToUTF8(wide); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; + +struct TLengthGetter: public TOperationMixin<TLengthGetter> { + static ui64 Execute(TStringRef inputRef) { + size_t result; + GetNumberOfUTF8Chars(inputRef.Data(), inputRef.Size(), result); + return static_cast<ui64>(result); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; + +struct TReverser: public TOperationMixin<TReverser> { + static TString Execute(TStringRef inputRef) { + auto wide = UTF8ToWide(inputRef); + ReverseInPlace(wide); + return WideToUTF8(wide); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; + +struct TStripper: public TOperationMixin<TStripper> { + static TString Execute(TStringRef inputRef) { + const TUtf32String input = UTF8ToUTF32<true>(inputRef); + const auto& result = StripString(input, IsUnicodeSpaceAdapter(input.begin())); + return WideToUTF8(result); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; + +struct TAllRemover: public TOperationMixin<TAllRemover> { + static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef removeRef) { + TUtf32String input = UTF8ToUTF32<true>(inputRef); + const TUtf32String remove = UTF8ToUTF32<true>(removeRef); + const std::unordered_set<wchar32> chars(remove.cbegin(), remove.cend()); + size_t tpos = 0; + for (const wchar32 c : input) { + if (!chars.contains(c)) { + input[tpos++] = c; + } } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - - struct TStripper: public TOperationMixin<TStripper> { - static TString Execute(TStringRef inputRef) { - const TUtf32String input = UTF8ToUTF32<true>(inputRef); - const auto& result = StripString(input, IsUnicodeSpaceAdapter(input.begin())); - return WideToUTF8(result); + if (tpos != input.size()) { + input.resize(tpos); + return WideToUTF8(input); } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - - struct TAllRemover: public TOperationMixin<TAllRemover> { - static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef removeRef) { - TUtf32String input = UTF8ToUTF32<true>(inputRef); - const TUtf32String remove = UTF8ToUTF32<true>(removeRef); - const std::unordered_set<wchar32> chars(remove.cbegin(), remove.cend()); - size_t tpos = 0; - for (const wchar32 c : input) { - if (!chars.contains(c)) { - input[tpos++] = c; - } - } - if (tpos != input.size()) { - input.resize(tpos); + return TNoChangesTag{}; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; + +struct TFirstRemover: public TOperationMixin<TFirstRemover> { + static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef removeRef) { + TUtf32String input = UTF8ToUTF32<true>(inputRef); + const auto remove = UTF8ToUTF32<true>(removeRef); + const std::unordered_set<wchar32> chars(remove.cbegin(), remove.cend()); + for (auto it = input.cbegin(); it != input.cend(); ++it) { + if (chars.contains(*it)) { + input.erase(it); return WideToUTF8(input); } - return TNoChangesTag{}; } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - - struct TFirstRemover: public TOperationMixin<TFirstRemover> { - static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef removeRef) { - TUtf32String input = UTF8ToUTF32<true>(inputRef); - const auto remove = UTF8ToUTF32<true>(removeRef); - const std::unordered_set<wchar32> chars(remove.cbegin(), remove.cend()); - for (auto it = input.cbegin(); it != input.cend(); ++it) { - if (chars.contains(*it)) { - input.erase(it); - return WideToUTF8(input); - } + return TNoChangesTag{}; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; + +struct TUnicodeSetMatcher: public TOperationMixin<TUnicodeSetMatcher> { + static bool Execute(TStringRef inputRef, TStringRef customCategoryRef) { + const TStringBuf input(inputRef); + const TUtf16String& customCategory = UTF8ToWide(customCategoryRef); + TUnicodeSet unicodeSet; + try { + unicodeSet.Parse(customCategory); + } catch (...) { + throw yexception() << "Failed to parse unicode set: " << CurrentExceptionMessage(); + } + wchar32 rune; + const unsigned char* cur = reinterpret_cast<const unsigned char*>(input.begin()); + const unsigned char* last = reinterpret_cast<const unsigned char*>(input.end()); + while (cur != last) { + ReadUTF8CharAndAdvance(rune, cur, last); + if (!unicodeSet.Has(rune)) { + return false; } - return TNoChangesTag{}; } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - - struct TUnicodeSetMatcher: public TOperationMixin<TUnicodeSetMatcher> { - static bool Execute(TStringRef inputRef, TStringRef customCategoryRef) { - const TStringBuf input(inputRef); - const TUtf16String& customCategory = UTF8ToWide(customCategoryRef); - TUnicodeSet unicodeSet; - try { - unicodeSet.Parse(customCategory); - } catch (...) { - throw yexception() << "Failed to parse unicode set: " << CurrentExceptionMessage(); - } - wchar32 rune; - const unsigned char* cur = reinterpret_cast<const unsigned char*>(input.begin()); - const unsigned char* last = reinterpret_cast<const unsigned char*>(input.end()); - while (cur != last) { - ReadUTF8CharAndAdvance(rune, cur, last); - if (!unicodeSet.Has(rune)) { - return false; - } + return true; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; + +struct TLevensteinDistanceFinder: public TOperationMixin<TLevensteinDistanceFinder> { + static ui64 Execute(TStringRef leftRef, TStringRef rightRef) { + const TStringBuf left(leftRef); + const TStringBuf right(rightRef); + const auto& leftUtf32 = UTF8ToUTF32<true>(left); + const auto& rightUtf32 = UTF8ToUTF32<true>(right); + return NLevenshtein::Distance(leftUtf32, rightUtf32); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; + +struct TLastRemoval: public TOperationMixin<TLastRemoval> { + static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef removeRef) { + TUtf32String input = UTF8ToUTF32<true>(inputRef); + const TUtf32String remove = UTF8ToUTF32<true>(removeRef); + const std::unordered_set<wchar32> chars(remove.cbegin(), remove.cend()); + for (auto it = input.crbegin(); it != input.crend(); ++it) { + if (chars.contains(*it)) { + input.erase(input.crend() - it - 1, 1); + return WideToUTF8(input); } - return true; - } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - - struct TLevensteinDistanceFinder: public TOperationMixin<TLevensteinDistanceFinder> { - static ui64 Execute(TStringRef leftRef, TStringRef rightRef) { - const TStringBuf left(leftRef); - const TStringBuf right(rightRef); - const auto& leftUtf32 = UTF8ToUTF32<true>(left); - const auto& rightUtf32 = UTF8ToUTF32<true>(right); - return NLevenshtein::Distance(leftUtf32, rightUtf32); } - DISABLE_IMPICT_ARGUMENT_CAST; - }; + return TNoChangesTag{}; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; - struct TLastRemoval: public TOperationMixin<TLastRemoval> { - static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef removeRef) { - TUtf32String input = UTF8ToUTF32<true>(inputRef); - const TUtf32String remove = UTF8ToUTF32<true>(removeRef); - const std::unordered_set<wchar32> chars(remove.cbegin(), remove.cend()); - for (auto it = input.crbegin(); it != input.crend(); ++it) { - if (chars.contains(*it)) { - input.erase(input.crend() - it - 1, 1); - return WideToUTF8(input); - } - } +struct TAllReplacer: public TOperationMixin<TAllReplacer> { + static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef whatReplace, TStringRef toReplace) { + if (TString result(inputRef); SubstGlobal(result, whatReplace, toReplace)) { + return result; + } else { return TNoChangesTag{}; } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - - struct TAllReplacer: public TOperationMixin<TAllReplacer> { - static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef whatReplace, TStringRef toReplace) { - if (TString result(inputRef); SubstGlobal(result, whatReplace, toReplace)) { - return result; - } else { - return TNoChangesTag{}; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; + +struct TFirstReplacer: public TOperationMixin<TFirstReplacer> { + static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef whatReplace, TStringRef toReplace) { + std::string result(inputRef); + const std::string_view what(whatReplace); + if (const auto index = result.find(what); index != std::string::npos) { + result.replace(index, what.size(), std::string_view(toReplace)); + return result; + } + return TNoChangesTag{}; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; + +struct TLastReplacer: public TOperationMixin<TLastReplacer> { + static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef whatReplace, TStringRef toReplace) { + std::string result(inputRef); + const std::string_view what(whatReplace); + if (const auto index = result.rfind(what); index != std::string::npos) { + result.replace(index, what.size(), std::string_view(toReplace)); + return result; + } + return TNoChangesTag{}; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; + +struct TFinder: public TOperationMixin<TFinder> { + static TMaybe<ui64> Execute(TStringRef inputRef, TStringRef whatFind, TMaybe<ui64> whereFind) { + const std::string_view string(inputRef); + const std::string_view needle(whatFind); + std::string_view::size_type pos = 0U; + + if (auto p = whereFind.GetOrElse(0ULL)) { + for (auto ptr = string.data(); p && pos < string.size(); --p) { + const auto width = WideCharSize(*ptr); + pos += width; + ptr += width; } } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - struct TFirstReplacer: public TOperationMixin<TFirstReplacer> { - static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef whatReplace, TStringRef toReplace) { - std::string result(inputRef); - const std::string_view what(whatReplace); - if (const auto index = result.find(what); index != std::string::npos) { - result.replace(index, what.size(), std::string_view(toReplace)); - return result; - } - return TNoChangesTag{}; + if (const auto find = string.find(needle, pos); std::string_view::npos != find) { + size_t result; + GetNumberOfUTF8Chars(string.data(), find, result); + return static_cast<ui64>(result); } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - - struct TLastReplacer: public TOperationMixin<TLastReplacer> { - static std::variant<TNoChangesTag, TString> Execute(TStringRef inputRef, TStringRef whatReplace, TStringRef toReplace) { - std::string result(inputRef); - const std::string_view what(whatReplace); - if (const auto index = result.rfind(what); index != std::string::npos) { - result.replace(index, what.size(), std::string_view(toReplace)); - return result; + return Nothing(); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; + +struct TRFinder: public TOperationMixin<TRFinder> { + static TMaybe<ui64> Execute(TStringRef inputRef, TStringRef whatFind, TMaybe<ui64> whereFind) { + const std::string_view string(inputRef); + const std::string_view needle(whatFind); + std::string_view::size_type pos = std::string_view::npos; + + if (auto p = whereFind.GetOrElse(std::string_view::npos); std::string_view::npos != p) { + pos = 0ULL; + for (auto ptr = string.data(); p && pos < string.size(); --p) { + const auto width = WideCharSize(*ptr); + pos += width; + ptr += width; } - return TNoChangesTag{}; } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - - struct TFinder: public TOperationMixin<TFinder> { - static TMaybe<ui64> Execute(TStringRef inputRef, TStringRef whatFind, TMaybe<ui64> whereFind) { - const std::string_view string(inputRef); - const std::string_view needle(whatFind); - std::string_view::size_type pos = 0U; - - if (auto p = whereFind.GetOrElse(0ULL)) { - for (auto ptr = string.data(); p && pos < string.size(); --p) { - const auto width = WideCharSize(*ptr); - pos += width; - ptr += width; - } - } - if (const auto find = string.find(needle, pos); std::string_view::npos != find) { - size_t result; - GetNumberOfUTF8Chars(string.data(), find, result); - return static_cast<ui64>(result); - } - return Nothing(); + if (const auto find = string.rfind(needle, pos); std::string_view::npos != find) { + size_t result; + GetNumberOfUTF8Chars(string.data(), find, result); + return static_cast<ui64>(result); } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - - struct TRFinder: public TOperationMixin<TRFinder> { - static TMaybe<ui64> Execute(TStringRef inputRef, TStringRef whatFind, TMaybe<ui64> whereFind) { - const std::string_view string(inputRef); - const std::string_view needle(whatFind); - std::string_view::size_type pos = std::string_view::npos; - - if (auto p = whereFind.GetOrElse(std::string_view::npos); std::string_view::npos != p) { - pos = 0ULL; - for (auto ptr = string.data(); p && pos < string.size(); --p) { - const auto width = WideCharSize(*ptr); - pos += width; - ptr += width; - } - } + return Nothing(); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; - if (const auto find = string.rfind(needle, pos); std::string_view::npos != find) { - size_t result; - GetNumberOfUTF8Chars(string.data(), find, result); - return static_cast<ui64>(result); - } +template <bool strict> +struct TToUint64Converter: public TOperationMixin<TToUint64Converter<strict>> { + static TNothing Terminate(const char* message) { + if constexpr (strict) { return Nothing(); + } else { + throw yexception() << message; } - DISABLE_IMPICT_ARGUMENT_CAST; }; - template <bool strict> - struct TToUint64Converter: public TOperationMixin<TToUint64Converter<strict>> { - static TNothing Terminate(const char* message) { - if constexpr (strict) { - return Nothing(); - } else { - throw yexception() << message; - } + static TMaybe<ui64> Execute(TStringRef inputRef, TMaybe<ui16> inputBase) { + const TString inputStr(inputRef); + const char* input = inputStr.data(); + const int base = inputBase.GetOrElse(0); + char* pos = nullptr; + auto prevErrno = errno; + errno = 0; + Y_DEFER { + errno = prevErrno; }; - - static TMaybe<ui64> Execute(TStringRef inputRef, TMaybe<ui16> inputBase) { - const TString inputStr(inputRef); - const char* input = inputStr.data(); - const int base = inputBase.GetOrElse(0); - char* pos = nullptr; - auto prevErrno = errno; - errno = 0; - Y_DEFER { - errno = prevErrno; - }; - unsigned long long res = std::strtoull(input, &pos, base); - if (!res && errno == EINVAL) { - return Terminate("Incorrect base"); - } - - ui64 ret = static_cast<ui64>(res); - if (!res && pos == input) { - return Terminate("Input string is not a number"); - } else if ((res == ULLONG_MAX && errno == ERANGE) || ret != res) { - return Terminate("Converted value falls out of Uint64 range"); - } else if (*pos) { - return Terminate("Input string contains junk after the number"); - } - return ret; + unsigned long long res = std::strtoull(input, &pos, base); + if (!res && errno == EINVAL) { + return Terminate("Incorrect base"); } - DISABLE_IMPICT_ARGUMENT_CAST; - }; - struct TUtf8Checker: public TOperationMixin<TUtf8Checker> { - static bool Execute(TMaybe<TStringRef> inputRef) { - if (!inputRef.Defined()) { - return false; - } - return IsUtf8(*inputRef); + ui64 ret = static_cast<ui64>(res); + if (!res && pos == input) { + return Terminate("Input string is not a number"); + } else if ((res == ULLONG_MAX && errno == ERANGE) || ret != res) { + return Terminate("Converted value falls out of Uint64 range"); + } else if (*pos) { + return Terminate("Input string contains junk after the number"); } - DISABLE_IMPICT_ARGUMENT_CAST; - }; + return ret; + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; - struct TSubstringGetter: public TOperationMixin<TSubstringGetter> { - static TStringBuf Execute(TStringRef inputRef Y_LIFETIME_BOUND, TMaybe<ui64> inputFrom, TMaybe<ui64> inputLen) { - const TStringBuf input(inputRef); - size_t from = inputFrom.GetOrElse(0); - size_t len = inputLen.GetOrElse(TStringBuf::npos); - return SubstrUTF8(input, from, len); +struct TUtf8Checker: public TOperationMixin<TUtf8Checker> { + static bool Execute(TMaybe<TStringRef> inputRef) { + if (!inputRef.Defined()) { + return false; } - DISABLE_IMPICT_ARGUMENT_CAST; - }; + return IsUtf8(*inputRef); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; + +struct TSubstringGetter: public TOperationMixin<TSubstringGetter> { + static TStringBuf Execute(TStringRef inputRef Y_LIFETIME_BOUND, TMaybe<ui64> inputFrom, TMaybe<ui64> inputLen) { + const TStringBuf input(inputRef); + size_t from = inputFrom.GetOrElse(0); + size_t len = inputLen.GetOrElse(TStringBuf::npos); + return SubstrUTF8(input, from, len); + } + DISABLE_IMPICT_ARGUMENT_CAST; +}; -#define DEFINE_UTF8_OPERATION_STRICT(udfName, Executor, signature, optArgs) \ +#define DEFINE_UTF8_OPERATION_STRICT(udfName, Executor, signature, optArgs) \ BEGIN_SIMPLE_STRICT_ARROW_UDF_WITH_OPTIONAL_ARGS(T##udfName, signature, optArgs) { \ return Executor::DoExecute(valueBuilder, args); \ } \ @@ -599,226 +600,226 @@ namespace { \ END_SIMPLE_ARROW_UDF(T##udfName, T##udfName##KernelExec::Do) - DEFINE_UTF8_OPERATION_STRICT(IsUtf, TUtf8Checker, bool(TOptional<char*>), /*optArgs=*/1); - - DEFINE_UTF8_OPERATION_STRICT(Normalize, TNormalizeUTF8<NFC>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(NormalizeNFD, TNormalizeUTF8<NFD>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(NormalizeNFC, TNormalizeUTF8<NFC>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(NormalizeNFKD, TNormalizeUTF8<NFKD>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(NormalizeNFKC, TNormalizeUTF8<NFKC>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - - DEFINE_UTF8_OPERATION_STRICT(IsAscii, TCheckAllChars<IsAscii>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(IsSpace, TCheckAllChars<IsSpace>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(IsUpper, TCheckAllChars<IsUpper>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(IsLower, TCheckAllChars<IsLower>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(IsDigit, TCheckAllChars<IsDigit>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(IsAlpha, TCheckAllChars<IsAlpha>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(IsAlnum, TCheckAllChars<IsAlnum>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(IsHex, TCheckAllChars<IsHexdigit>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); - - DEFINE_UTF8_OPERATION_STRICT(ToTitle, TStringToStringMapper<ToTitle>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(ToUpper, TStringToStringMapper<ToUpper>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(ToLower, TStringToStringMapper<ToLower>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - - DEFINE_UTF8_OPERATION_STRICT(GetLength, TLengthGetter, ui64(TAutoMap<TUtf8>), /*optArgs=*/0); - - DEFINE_UTF8_OPERATION_STRICT(Reverse, TReverser, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_STRICT(Strip, TStripper, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_MANY_STRICT(Substring, TSubstringGetter, TUtf8(TAutoMap<TUtf8>, TOptional<ui64>, TOptional<ui64>), /*argsCount=*/3, /*optArgs=*/1); - - DEFINE_UTF8_OPERATION_BIN_STRICT(RemoveAll, TAllRemover, TUtf8(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_BIN_STRICT(RemoveFirst, TFirstRemover, TUtf8(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_BIN_NOT_STRICT(IsUnicodeSet, TUnicodeSetMatcher, bool(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_BIN_STRICT(LevensteinDistance, TLevensteinDistanceFinder, ui64(TAutoMap<TUtf8>, TAutoMap<TUtf8>), /*optArgs=*/0); - DEFINE_UTF8_OPERATION_BIN_STRICT(RemoveLast, TLastRemoval, TUtf8(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); - - DEFINE_UTF8_OPERATION_MANY_STRICT(ReplaceAll, TAllReplacer, TUtf8(TAutoMap<TUtf8>, TUtf8, TUtf8), /*argsCount=*/3, /*optionalArgs=*/0); - DEFINE_UTF8_OPERATION_MANY_STRICT(ReplaceFirst, TFirstReplacer, TUtf8(TAutoMap<TUtf8>, TUtf8, TUtf8), /*argsCount=*/3, /*optionalArgs=*/0); - DEFINE_UTF8_OPERATION_MANY_STRICT(ReplaceLast, TLastReplacer, TUtf8(TAutoMap<TUtf8>, TUtf8, TUtf8), /*argsCount=*/3, /*optionalArgs=*/0); - - DEFINE_UTF8_OPERATION_MANY_STRICT(Find, TFinder, TOptional<ui64>(TAutoMap<TUtf8>, TUtf8, TOptional<ui64>), /*argsCount=*/3, /*optionalArgs=*/1); - DEFINE_UTF8_OPERATION_MANY_STRICT(RFind, TRFinder, TOptional<ui64>(TAutoMap<TUtf8>, TUtf8, TOptional<ui64>), /*argsCount=*/3, /*optionalArgs=*/1); - - DEFINE_UTF8_OPERATION_BIN_NOT_STRICT(ToUint64, TToUint64Converter</*strict=*/false>, ui64(TAutoMap<TUtf8>, TOptional<ui16>), /*optionalArgs=*/1); - DEFINE_UTF8_OPERATION_BIN_STRICT(TryToUint64, TToUint64Converter</*strict=*/true>, TOptional<ui64>(TAutoMap<TUtf8>, TOptional<ui16>), /*optionalArgs=*/1); - - using TTmpVector = TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator>; - - template <typename TIt> - static void SplitToListImpl( - const IValueBuilder* valueBuilder, - const TUnboxedValue& input, - const std::string_view::const_iterator from, - const TIt& it, - TTmpVector& result) { - for (const auto& elem : it) { - result.emplace_back(valueBuilder->SubString(input, std::distance(from, elem.TokenStart()), std::distance(elem.TokenStart(), elem.TokenDelim()))); - } +DEFINE_UTF8_OPERATION_STRICT(IsUtf, TUtf8Checker, bool(TOptional<char*>), /*optArgs=*/1); + +DEFINE_UTF8_OPERATION_STRICT(Normalize, TNormalizeUTF8<NFC>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(NormalizeNFD, TNormalizeUTF8<NFD>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(NormalizeNFC, TNormalizeUTF8<NFC>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(NormalizeNFKD, TNormalizeUTF8<NFKD>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(NormalizeNFKC, TNormalizeUTF8<NFKC>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); + +DEFINE_UTF8_OPERATION_STRICT(IsAscii, TCheckAllChars<IsAscii>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(IsSpace, TCheckAllChars<IsSpace>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(IsUpper, TCheckAllChars<IsUpper>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(IsLower, TCheckAllChars<IsLower>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(IsDigit, TCheckAllChars<IsDigit>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(IsAlpha, TCheckAllChars<IsAlpha>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(IsAlnum, TCheckAllChars<IsAlnum>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(IsHex, TCheckAllChars<IsHexdigit>, bool(TAutoMap<TUtf8>), /*optArgs=*/0); + +DEFINE_UTF8_OPERATION_STRICT(ToTitle, TStringToStringMapper<ToTitle>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(ToUpper, TStringToStringMapper<ToUpper>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(ToLower, TStringToStringMapper<ToLower>, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); + +DEFINE_UTF8_OPERATION_STRICT(GetLength, TLengthGetter, ui64(TAutoMap<TUtf8>), /*optArgs=*/0); + +DEFINE_UTF8_OPERATION_STRICT(Reverse, TReverser, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_STRICT(Strip, TStripper, TUtf8(TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_MANY_STRICT(Substring, TSubstringGetter, TUtf8(TAutoMap<TUtf8>, TOptional<ui64>, TOptional<ui64>), /*argsCount=*/3, /*optArgs=*/1); + +DEFINE_UTF8_OPERATION_BIN_STRICT(RemoveAll, TAllRemover, TUtf8(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_BIN_STRICT(RemoveFirst, TFirstRemover, TUtf8(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_BIN_NOT_STRICT(IsUnicodeSet, TUnicodeSetMatcher, bool(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_BIN_STRICT(LevensteinDistance, TLevensteinDistanceFinder, ui64(TAutoMap<TUtf8>, TAutoMap<TUtf8>), /*optArgs=*/0); +DEFINE_UTF8_OPERATION_BIN_STRICT(RemoveLast, TLastRemoval, TUtf8(TAutoMap<TUtf8>, TUtf8), /*optArgs=*/0); + +DEFINE_UTF8_OPERATION_MANY_STRICT(ReplaceAll, TAllReplacer, TUtf8(TAutoMap<TUtf8>, TUtf8, TUtf8), /*argsCount=*/3, /*optionalArgs=*/0); +DEFINE_UTF8_OPERATION_MANY_STRICT(ReplaceFirst, TFirstReplacer, TUtf8(TAutoMap<TUtf8>, TUtf8, TUtf8), /*argsCount=*/3, /*optionalArgs=*/0); +DEFINE_UTF8_OPERATION_MANY_STRICT(ReplaceLast, TLastReplacer, TUtf8(TAutoMap<TUtf8>, TUtf8, TUtf8), /*argsCount=*/3, /*optionalArgs=*/0); + +DEFINE_UTF8_OPERATION_MANY_STRICT(Find, TFinder, TOptional<ui64>(TAutoMap<TUtf8>, TUtf8, TOptional<ui64>), /*argsCount=*/3, /*optionalArgs=*/1); +DEFINE_UTF8_OPERATION_MANY_STRICT(RFind, TRFinder, TOptional<ui64>(TAutoMap<TUtf8>, TUtf8, TOptional<ui64>), /*argsCount=*/3, /*optionalArgs=*/1); + +DEFINE_UTF8_OPERATION_BIN_NOT_STRICT(ToUint64, TToUint64Converter</*strict=*/false>, ui64(TAutoMap<TUtf8>, TOptional<ui16>), /*optionalArgs=*/1); +DEFINE_UTF8_OPERATION_BIN_STRICT(TryToUint64, TToUint64Converter</*strict=*/true>, TOptional<ui64>(TAutoMap<TUtf8>, TOptional<ui16>), /*optionalArgs=*/1); + +using TTmpVector = TSmallVec<TUnboxedValue, TUnboxedValue::TAllocator>; + +template <typename TIt> +static void SplitToListImpl( + const IValueBuilder* valueBuilder, + const TUnboxedValue& input, + const std::string_view::const_iterator from, + const TIt& it, + TTmpVector& result) { + for (const auto& elem : it) { + result.emplace_back(valueBuilder->SubString(input, std::distance(from, elem.TokenStart()), std::distance(elem.TokenStart(), elem.TokenDelim()))); } +} - template <typename TIt> - static void SplitToListImpl( - const IValueBuilder* valueBuilder, - const TUnboxedValue& input, - const TUtf32String::const_iterator start, - const TIt& it, - TTmpVector& result) { - const std::string_view& original = input.AsStringRef(); - size_t charPos = 0U, bytePos = 0U; - for (const auto& elem : it) { - for (const size_t next = std::distance(start, elem.TokenStart()); charPos < next; ++charPos) - bytePos += WideCharSize(original[bytePos]); - const auto from = bytePos; - - for (const size_t next = charPos + std::distance(elem.TokenStart(), elem.TokenDelim()); charPos < next; ++charPos) - bytePos += WideCharSize(original[bytePos]); - const auto size = bytePos - from; - result.emplace_back(valueBuilder->SubString(input, from, size)); - } +template <typename TIt> +static void SplitToListImpl( + const IValueBuilder* valueBuilder, + const TUnboxedValue& input, + const TUtf32String::const_iterator start, + const TIt& it, + TTmpVector& result) { + const std::string_view& original = input.AsStringRef(); + size_t charPos = 0U, bytePos = 0U; + for (const auto& elem : it) { + for (const size_t next = std::distance(start, elem.TokenStart()); charPos < next; ++charPos) { + bytePos += WideCharSize(original[bytePos]); + } + const auto from = bytePos; + + for (const size_t next = charPos + std::distance(elem.TokenStart(), elem.TokenDelim()); charPos < next; ++charPos) { + bytePos += WideCharSize(original[bytePos]); + } + const auto size = bytePos - from; + result.emplace_back(valueBuilder->SubString(input, from, size)); } +} - template <typename TIt, typename TStrIt> - static void SplitToListImpl( - const IValueBuilder* valueBuilder, - const TUnboxedValue& input, - const TStrIt from, - TIt& it, - bool skipEmpty, - TTmpVector& result) { - if (skipEmpty) { - SplitToListImpl(valueBuilder, input, from, it.SkipEmpty(), result); - } else { - SplitToListImpl(valueBuilder, input, from, it, result); - } +template <typename TIt, typename TStrIt> +static void SplitToListImpl( + const IValueBuilder* valueBuilder, + const TUnboxedValue& input, + const TStrIt from, + TIt& it, + bool skipEmpty, + TTmpVector& result) { + if (skipEmpty) { + SplitToListImpl(valueBuilder, input, from, it.SkipEmpty(), result); + } else { + SplitToListImpl(valueBuilder, input, from, it, result); } +} - constexpr char delimeterStringName[] = "DelimeterString"; - constexpr char skipEmptyName[] = "SkipEmpty"; - constexpr char limitName[] = "Limit"; - using TDelimeterStringArg = TNamedArg<bool, delimeterStringName>; - using TSkipEmptyArg = TNamedArg<bool, skipEmptyName>; - using TLimitArg = TNamedArg<ui64, limitName>; - - SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSplitToList, TListType<TUtf8>( - TOptional<TUtf8>, - TUtf8, - TDelimeterStringArg, - TSkipEmptyArg, - TLimitArg - ), - 3) { - TTmpVector result; - if (args[0]) { - const bool delimiterString = args[2].GetOrDefault<bool>(true); - const bool skipEmpty = args[3].GetOrDefault<bool>(false); - const auto limit = args[4].GetOrDefault<ui64>(0); - if (delimiterString) { - const std::string_view input(args[0].AsStringRef()); - const std::string_view delimeter(args[1].AsStringRef()); - if (limit) { - auto it = StringSplitter(input).SplitByString(delimeter).Limit(limit + 1); - SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); - } else { - auto it = StringSplitter(input).SplitByString(delimeter); - SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); - } +constexpr char delimeterStringName[] = "DelimeterString"; +constexpr char skipEmptyName[] = "SkipEmpty"; +constexpr char limitName[] = "Limit"; +using TDelimeterStringArg = TNamedArg<bool, delimeterStringName>; +using TSkipEmptyArg = TNamedArg<bool, skipEmptyName>; +using TLimitArg = TNamedArg<ui64, limitName>; + +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSplitToList, TListType<TUtf8>(TOptional<TUtf8>, + TUtf8, + TDelimeterStringArg, + TSkipEmptyArg, + TLimitArg), + 3) { + TTmpVector result; + if (args[0]) { + const bool delimiterString = args[2].GetOrDefault<bool>(true); + const bool skipEmpty = args[3].GetOrDefault<bool>(false); + const auto limit = args[4].GetOrDefault<ui64>(0); + if (delimiterString) { + const std::string_view input(args[0].AsStringRef()); + const std::string_view delimeter(args[1].AsStringRef()); + if (limit) { + auto it = StringSplitter(input).SplitByString(delimeter).Limit(limit + 1); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); + } else { + auto it = StringSplitter(input).SplitByString(delimeter); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); + } + } else { + const auto& input = UTF8ToUTF32<true>(args[0].AsStringRef()); + const auto& delimeter = UTF8ToUTF32<true>(args[1].AsStringRef()); + if (limit) { + auto it = StringSplitter(input).SplitBySet(delimeter.c_str()).Limit(limit + 1); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); } else { - const auto& input = UTF8ToUTF32<true>(args[0].AsStringRef()); - const auto& delimeter = UTF8ToUTF32<true>(args[1].AsStringRef()); - if (limit) { - auto it = StringSplitter(input).SplitBySet(delimeter.c_str()).Limit(limit + 1); - SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); - } else { - auto it = StringSplitter(input).SplitBySet(delimeter.c_str()); - SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); - } + auto it = StringSplitter(input).SplitBySet(delimeter.c_str()); + SplitToListImpl(valueBuilder, args[0], input.cbegin(), it, skipEmpty, result); } } - return valueBuilder->NewList(result.data(), result.size()); } + return valueBuilder->NewList(result.data(), result.size()); +} - SIMPLE_UDF(TJoinFromList, TUtf8(TAutoMap<TListType<TOptional<TUtf8>>>, TUtf8)) { - const auto input = args[0].GetListIterator(); - const std::string_view delimeter(args[1].AsStringRef()); - std::vector<TString> items; +SIMPLE_UDF(TJoinFromList, TUtf8(TAutoMap<TListType<TOptional<TUtf8>>>, TUtf8)) { + const auto input = args[0].GetListIterator(); + const std::string_view delimeter(args[1].AsStringRef()); + std::vector<TString> items; - for (TUnboxedValue current; input.Next(current);) { - if (current) { - items.emplace_back(current.AsStringRef()); - } + for (TUnboxedValue current; input.Next(current);) { + if (current) { + items.emplace_back(current.AsStringRef()); } - - return valueBuilder->NewString(JoinSeq(delimeter, items)); } - SIMPLE_UDF(TToCodePointList, TListType<ui32>(TAutoMap<TUtf8>)) { - size_t codePointCount = 0; - const auto& inputRef = args[0].AsStringRef(); - if (!GetNumberOfUTF8Chars(inputRef.Data(), inputRef.Size(), codePointCount)) { - // should not happen but still we have to check return code - ythrow yexception() << "Unable to count code points"; - } + return valueBuilder->NewString(JoinSeq(delimeter, items)); +} - TUnboxedValue* itemsPtr = nullptr; - auto result = valueBuilder->NewArray(codePointCount, itemsPtr); - const unsigned char* current = reinterpret_cast<const unsigned char*>(inputRef.Data()); - const unsigned char* end = current + inputRef.Size(); - wchar32 rune = BROKEN_RUNE; - ui32 codePointIndex = 0; - RECODE_RESULT retcode = RECODE_OK; - while (current < end && RECODE_OK == (retcode = ReadUTF8CharAndAdvance(rune, current, end))) { - if (codePointIndex >= codePointCount) { - // sanity check - ythrow yexception() << "Too big code point index " << codePointIndex << ", expecting only " << codePointCount << " code points"; - } - itemsPtr[codePointIndex++] = TUnboxedValuePod(static_cast<ui32>(rune)); - } +SIMPLE_UDF(TToCodePointList, TListType<ui32>(TAutoMap<TUtf8>)) { + size_t codePointCount = 0; + const auto& inputRef = args[0].AsStringRef(); + if (!GetNumberOfUTF8Chars(inputRef.Data(), inputRef.Size(), codePointCount)) { + // should not happen but still we have to check return code + ythrow yexception() << "Unable to count code points"; + } - if (retcode != RECODE_OK) { - ythrow yexception() << "Malformed UTF-8 string"; - } + TUnboxedValue* itemsPtr = nullptr; + auto result = valueBuilder->NewArray(codePointCount, itemsPtr); + const unsigned char* current = reinterpret_cast<const unsigned char*>(inputRef.Data()); + const unsigned char* end = current + inputRef.Size(); + wchar32 rune = BROKEN_RUNE; + ui32 codePointIndex = 0; + RECODE_RESULT retcode = RECODE_OK; + while (current < end && RECODE_OK == (retcode = ReadUTF8CharAndAdvance(rune, current, end))) { + if (codePointIndex >= codePointCount) { + // sanity check + ythrow yexception() << "Too big code point index " << codePointIndex << ", expecting only " << codePointCount << " code points"; + } + itemsPtr[codePointIndex++] = TUnboxedValuePod(static_cast<ui32>(rune)); + } - return result; - } - - SIMPLE_UDF(TFromCodePointList, TUtf8(TAutoMap<TListType<ui32>>)) { - auto input = args[0]; - if (auto elems = input.GetElements()) { - const auto elemCount = input.GetListLength(); - auto bufferSize = WideToUTF8BufferSize(elemCount); - TTempBuf buffer(bufferSize); - auto bufferPtr = buffer.Data(); - auto bufferEnd = buffer.Data() + bufferSize; - for (ui64 i = 0; i != elemCount; ++i) { - const auto& item = elems[i]; - const wchar32 rune = item.Get<ui32>(); - size_t written = 0; - WideToUTF8(&rune, 1, bufferPtr, written); - Y_ENSURE(written <= 4); - bufferPtr += written; - Y_ENSURE(bufferPtr <= bufferEnd); - } - return valueBuilder->NewString(TStringRef(buffer.Data(), bufferPtr - buffer.Data())); - } + if (retcode != RECODE_OK) { + ythrow yexception() << "Malformed UTF-8 string"; + } - std::vector<char, NUdf::TStdAllocatorForUdf<char>> buffer; - buffer.reserve(TUnboxedValuePod::InternalBufferSize); + return result; +} - const auto& iter = input.GetListIterator(); - char runeBuffer[4] = {}; - for (NUdf::TUnboxedValue item; iter.Next(item); ) { +SIMPLE_UDF(TFromCodePointList, TUtf8(TAutoMap<TListType<ui32>>)) { + auto input = args[0]; + if (auto elems = input.GetElements()) { + const auto elemCount = input.GetListLength(); + auto bufferSize = WideToUTF8BufferSize(elemCount); + TTempBuf buffer(bufferSize); + auto bufferPtr = buffer.Data(); + auto bufferEnd = buffer.Data() + bufferSize; + for (ui64 i = 0; i != elemCount; ++i) { + const auto& item = elems[i]; const wchar32 rune = item.Get<ui32>(); size_t written = 0; - WideToUTF8(&rune, 1, runeBuffer, written); + WideToUTF8(&rune, 1, bufferPtr, written); Y_ENSURE(written <= 4); - buffer.insert(buffer.end(), runeBuffer, runeBuffer + written); + bufferPtr += written; + Y_ENSURE(bufferPtr <= bufferEnd); } + return valueBuilder->NewString(TStringRef(buffer.Data(), bufferPtr - buffer.Data())); + } - return valueBuilder->NewString(TStringRef(buffer.data(), buffer.size())); + std::vector<char, NUdf::TStdAllocatorForUdf<char>> buffer; + buffer.reserve(TUnboxedValuePod::InternalBufferSize); + + const auto& iter = input.GetListIterator(); + char runeBuffer[4] = {}; + for (NUdf::TUnboxedValue item; iter.Next(item);) { + const wchar32 rune = item.Get<ui32>(); + size_t written = 0; + WideToUTF8(&rune, 1, runeBuffer, written); + Y_ENSURE(written <= 4); + buffer.insert(buffer.end(), runeBuffer, runeBuffer + written); } + return valueBuilder->NewString(TStringRef(buffer.data(), buffer.size())); +} + #define EXPORTED_UNICODE_BASE_UDF \ - TIsUtf, \ + TIsUtf, \ TGetLength, \ TSubstring, \ TFind, \ @@ -855,4 +856,4 @@ namespace { TIsAlpha, \ TIsAlnum, \ TIsHex -} +} // namespace diff --git a/yql/essentials/udfs/common/unicode_base/lib/ya.make b/yql/essentials/udfs/common/unicode_base/lib/ya.make index 2fda0829667..7ca562aef02 100644 --- a/yql/essentials/udfs/common/unicode_base/lib/ya.make +++ b/yql/essentials/udfs/common/unicode_base/lib/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( unicode_base_udf.cpp ) diff --git a/yql/essentials/udfs/common/unicode_base/ya.make b/yql/essentials/udfs/common/unicode_base/ya.make index 4ec872e2495..0540c0593b5 100644 --- a/yql/essentials/udfs/common/unicode_base/ya.make +++ b/yql/essentials/udfs/common/unicode_base/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(unicode_udf) 37 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( unicode_base.cpp diff --git a/yql/essentials/udfs/common/url_base/lib/url_base_udf.cpp b/yql/essentials/udfs/common/url_base/lib/url_base_udf.cpp index 50a3ee8d1f1..778a3088b7f 100644 --- a/yql/essentials/udfs/common/url_base/lib/url_base_udf.cpp +++ b/yql/essentials/udfs/common/url_base/lib/url_base_udf.cpp @@ -1 +1 @@ -#include "url_base_udf.h"
\ No newline at end of file +#include "url_base_udf.h" diff --git a/yql/essentials/udfs/common/url_base/lib/url_base_udf.h b/yql/essentials/udfs/common/url_base/lib/url_base_udf.h index 04ad1b4e469..676d7a802d1 100644 --- a/yql/essentials/udfs/common/url_base/lib/url_base_udf.h +++ b/yql/essentials/udfs/common/url_base/lib/url_base_udf.h @@ -26,28 +26,27 @@ inline bool PrepareUrl(const std::string_view& keyStr, TUri& parser) { return parser.ParseAbs(keyStr, parseFlags) == TUri::ParsedOK; } -#define ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(udfName, functionName) \ - BEGIN_SIMPLE_ARROW_UDF(udfName, TOptional<char*>(TOptional<char*>)) { \ - EMPTY_RESULT_ON_EMPTY_ARG(0); \ - const std::string_view url(args[0].AsStringRef()); \ - const std::string_view res(functionName(url)); \ - return res.empty() ? TUnboxedValue() : \ - valueBuilder->SubString(args[0], std::distance(url.begin(), res.begin()), res.size()); \ - } \ - struct udfName##KernelExec : public TUnaryKernelExec<udfName##KernelExec> { \ - template <typename TSink> \ - static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { \ - if (!arg) { \ - return sink(TBlockItem()); \ - } \ - const std::string_view url(arg.AsStringRef()); \ - const std::string_view res(functionName(url)); \ - if (res.empty()) { \ - return sink(TBlockItem()); \ - } \ - sink(TBlockItem(TStringRef(res))); \ - } \ - }; \ +#define ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(udfName, functionName) \ + BEGIN_SIMPLE_ARROW_UDF(udfName, TOptional<char*>(TOptional<char*>)) { \ + EMPTY_RESULT_ON_EMPTY_ARG(0); \ + const std::string_view url(args[0].AsStringRef()); \ + const std::string_view res(functionName(url)); \ + return res.empty() ? TUnboxedValue() : valueBuilder->SubString(args[0], std::distance(url.begin(), res.begin()), res.size()); \ + } \ + struct udfName##KernelExec: public TUnaryKernelExec<udfName##KernelExec> { \ + template <typename TSink> \ + static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { \ + if (!arg) { \ + return sink(TBlockItem()); \ + } \ + const std::string_view url(arg.AsStringRef()); \ + const std::string_view res(functionName(url)); \ + if (res.empty()) { \ + return sink(TBlockItem()); \ + } \ + sink(TBlockItem(TStringRef(res))); \ + } \ + }; \ END_SIMPLE_ARROW_UDF(udfName, udfName##KernelExec::Do); BEGIN_SIMPLE_ARROW_UDF(TNormalize, TOptional<char*>(TOptional<char*>)) { @@ -55,10 +54,10 @@ BEGIN_SIMPLE_ARROW_UDF(TNormalize, TOptional<char*>(TOptional<char*>)) { TUri url; const bool success = PrepareUrl(args[0].AsStringRef(), url); return success - ? valueBuilder->NewString(url.PrintS(TUri::FlagNoFrag)) - : TUnboxedValue(); + ? valueBuilder->NewString(url.PrintS(TUri::FlagNoFrag)) + : TUnboxedValue(); } -struct TNormalizeKernelExec : public TUnaryKernelExec<TNormalizeKernelExec> { +struct TNormalizeKernelExec: public TUnaryKernelExec<TNormalizeKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { @@ -79,7 +78,7 @@ BEGIN_SIMPLE_STRICT_ARROW_UDF(TGetScheme, char*(TAutoMap<char*>)) { const std::string_view prefix(GetSchemePrefix(url)); return valueBuilder->SubString(args[0], std::distance(url.begin(), prefix.begin()), prefix.size()); } -struct TGetSchemeKernelExec : public TUnaryKernelExec<TGetSchemeKernelExec> { +struct TGetSchemeKernelExec: public TUnaryKernelExec<TGetSchemeKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const std::string_view url(arg.AsStringRef()); @@ -117,12 +116,12 @@ BEGIN_SIMPLE_ARROW_UDF(TGetPort, TOptional<ui64>(TOptional<char*>)) { TStringBuf scheme, host; TString lowerUri(args[0].AsStringRef()); std::transform(lowerUri.cbegin(), lowerUri.cbegin() + GetSchemePrefixSize(lowerUri), - lowerUri.begin(), [](unsigned char c){ return std::tolower(c); }); + lowerUri.begin(), [](unsigned char c) { return std::tolower(c); }); return TryGetSchemeHostAndPort(lowerUri, scheme, host, port) && port - ? TUnboxedValuePod(port) - : TUnboxedValuePod(); + ? TUnboxedValuePod(port) + : TUnboxedValuePod(); } -struct TGetPortKernelExec : public TUnaryKernelExec<TGetPortKernelExec> { +struct TGetPortKernelExec: public TUnaryKernelExec<TGetPortKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { @@ -132,7 +131,7 @@ struct TGetPortKernelExec : public TUnaryKernelExec<TGetPortKernelExec> { TStringBuf scheme, host; TString lowerUri(arg.AsStringRef()); std::transform(lowerUri.cbegin(), lowerUri.cbegin() + GetSchemePrefixSize(lowerUri), - lowerUri.begin(), [](unsigned char c){ return std::tolower(c); }); + lowerUri.begin(), [](unsigned char c) { return std::tolower(c); }); if (TryGetSchemeHostAndPort(lowerUri, scheme, host, port) && port) { return sink(TBlockItem(port)); } @@ -147,10 +146,10 @@ BEGIN_SIMPLE_ARROW_UDF(TGetTail, TOptional<char*>(TOptional<char*>)) { TStringBuf host, tail; SplitUrlToHostAndPath(url, host, tail); return tail.StartsWith('/') - ? valueBuilder->NewString(tail) - : valueBuilder->NewString(TString('/').append(tail)); + ? valueBuilder->NewString(tail) + : valueBuilder->NewString(TString('/').append(tail)); } -struct TGetTailKernelExec : public TUnaryKernelExec<TGetTailKernelExec> { +struct TGetTailKernelExec: public TUnaryKernelExec<TGetTailKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { @@ -184,7 +183,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetPath, TOptional<char*>(TOptional<char*>)) { return valueBuilder->SubString(args[0], std::distance(url.begin(), cut.begin()), cut.length()); } -struct TGetPathKernelExec : public TUnaryKernelExec<TGetPathKernelExec> { +struct TGetPathKernelExec: public TUnaryKernelExec<TGetPathKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { @@ -211,10 +210,9 @@ BEGIN_SIMPLE_ARROW_UDF(TGetFragment, TOptional<char*>(TOptional<char*>)) { EMPTY_RESULT_ON_EMPTY_ARG(0); const std::string_view url(args[0].AsStringRef()); const auto pos = url.find('#'); - return pos == std::string_view::npos ? TUnboxedValue() : - valueBuilder->SubString(args[0], pos + 1U, url.length() - pos - 1U); + return pos == std::string_view::npos ? TUnboxedValue() : valueBuilder->SubString(args[0], pos + 1U, url.length() - pos - 1U); } -struct TGetFragmentKernelExec : public TUnaryKernelExec<TGetFragmentKernelExec> { +struct TGetFragmentKernelExec: public TUnaryKernelExec<TGetFragmentKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { @@ -254,7 +252,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetDomain, TOptional<char*>(TOptional<char*>, ui8)) { const std::pair<ui32, ui32> result = *resultOpt; return valueBuilder->SubString(args[0], result.first, result.second); } -struct TGetDomainKernelExec : public TBinaryKernelExec<TGetDomainKernelExec> { +struct TGetDomainKernelExec: public TBinaryKernelExec<TGetDomainKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { if (!arg1) { @@ -274,7 +272,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetTLD, char*(TAutoMap<char*>)) { const TStringBuf url(args[0].AsStringRef()); return valueBuilder->NewString(GetZone(GetOnlyHost(url))); } -struct TGetTLDKernelExec : public TUnaryKernelExec<TGetTLDKernelExec> { +struct TGetTLDKernelExec: public TUnaryKernelExec<TGetTLDKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const TStringBuf url(arg.AsStringRef()); @@ -289,7 +287,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetDomainLevel, ui64(TAutoMap<char*>)) { StringSplitter(GetOnlyHost(args[0].AsStringRef())).Split('.').AddTo(&parts); return TUnboxedValuePod(ui64(parts.size())); } -struct TGetDomainLevelKernelExec : public TUnaryKernelExec<TGetDomainLevelKernelExec> { +struct TGetDomainLevelKernelExec: public TUnaryKernelExec<TGetDomainLevelKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { std::vector<std::string_view> parts; @@ -358,7 +356,7 @@ BEGIN_SIMPLE_ARROW_UDF(TGetCGIParam, TOptional<char*>(TOptional<char*>, char*)) const std::pair<ui32, ui32> result = *resultOpt; return valueBuilder->SubString(args[0], result.first, result.second); } -struct TGetCGIParamKernelExec : public TBinaryKernelExec<TGetCGIParamKernelExec> { +struct TGetCGIParamKernelExec: public TBinaryKernelExec<TGetCGIParamKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg1, TBlockItem arg2, const TSink& sink) { if (!arg1) { @@ -385,7 +383,7 @@ BEGIN_SIMPLE_ARROW_UDF(TCutQueryStringAndFragment, char*(TAutoMap<char*>)) { const auto cut = input.find_first_of("?#"); return std::string_view::npos == cut ? NUdf::TUnboxedValue(args[0]) : valueBuilder->SubString(args[0], 0U, cut); } -struct TCutQueryStringAndFragmentKernelExec : public TUnaryKernelExec<TCutQueryStringAndFragmentKernelExec> { +struct TCutQueryStringAndFragmentKernelExec: public TUnaryKernelExec<TCutQueryStringAndFragmentKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const std::string_view input(arg.AsStringRef()); @@ -405,7 +403,7 @@ BEGIN_SIMPLE_ARROW_UDF(TEncode, TOptional<char*>(TOptional<char*>)) { UrlEscape(url); return input == url ? NUdf::TUnboxedValue(args[0]) : valueBuilder->NewString(url); } -struct TEncodeKernelExec : public TUnaryKernelExec<TEncodeKernelExec> { +struct TEncodeKernelExec: public TUnaryKernelExec<TEncodeKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { @@ -433,7 +431,7 @@ BEGIN_SIMPLE_ARROW_UDF(TDecode, TOptional<char*>(TOptional<char*>)) { UrlUnescape(url); return input == url ? NUdf::TUnboxedValue(args[0]) : valueBuilder->NewString(url); } -struct TDecodeKernelExec : public TUnaryKernelExec<TDecodeKernelExec> { +struct TDecodeKernelExec: public TUnaryKernelExec<TDecodeKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { if (!arg) { @@ -455,7 +453,7 @@ BEGIN_SIMPLE_ARROW_UDF(TIsKnownTLD, bool(TAutoMap<char*>)) { Y_UNUSED(valueBuilder); return TUnboxedValuePod(IsTld(args[0].AsStringRef())); } -struct TIsKnownTLDKernelExec : public TUnaryKernelExec<TIsKnownTLDKernelExec> { +struct TIsKnownTLDKernelExec: public TUnaryKernelExec<TIsKnownTLDKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { sink(TBlockItem(static_cast<ui8>(IsTld(arg.AsStringRef())))); @@ -467,7 +465,7 @@ BEGIN_SIMPLE_ARROW_UDF(TIsWellKnownTLD, bool(TAutoMap<char*>)) { Y_UNUSED(valueBuilder); return TUnboxedValuePod(IsVeryGoodTld(args[0].AsStringRef())); } -struct TIsWellKnownTLDKernelExec : public TUnaryKernelExec<TIsWellKnownTLDKernelExec> { +struct TIsWellKnownTLDKernelExec: public TUnaryKernelExec<TIsWellKnownTLDKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { sink(TBlockItem(static_cast<ui8>(IsVeryGoodTld(arg.AsStringRef())))); @@ -475,13 +473,14 @@ struct TIsWellKnownTLDKernelExec : public TUnaryKernelExec<TIsWellKnownTLDKernel }; END_SIMPLE_ARROW_UDF(TIsWellKnownTLD, TIsWellKnownTLDKernelExec::Do); -BEGIN_SIMPLE_ARROW_UDF(THostNameToPunycode, TOptional<char*>(TAutoMap<char*>)) try { +BEGIN_SIMPLE_ARROW_UDF(THostNameToPunycode, TOptional<char*>(TAutoMap<char*>)) +try { const TUtf16String& input = UTF8ToWide(args[0].AsStringRef()); return valueBuilder->NewString(HostNameToPunycode(input)); } catch (TPunycodeError&) { return TUnboxedValue(); } -struct THostNameToPunycodeKernelExec : public TUnaryKernelExec<THostNameToPunycodeKernelExec> { +struct THostNameToPunycodeKernelExec: public TUnaryKernelExec<THostNameToPunycodeKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) try { const TUtf16String& input = UTF8ToWide(arg.AsStringRef()); @@ -496,7 +495,7 @@ BEGIN_SIMPLE_ARROW_UDF(TForceHostNameToPunycode, char*(TAutoMap<char*>)) { const TUtf16String& input = UTF8ToWide(args[0].AsStringRef()); return valueBuilder->NewString(ForceHostNameToPunycode(input)); } -struct TForceHostNameToPunycodeKernelExec : public TUnaryKernelExec<TForceHostNameToPunycodeKernelExec> { +struct TForceHostNameToPunycodeKernelExec: public TUnaryKernelExec<TForceHostNameToPunycodeKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const TUtf16String& input = UTF8ToWide(arg.AsStringRef()); @@ -505,14 +504,15 @@ struct TForceHostNameToPunycodeKernelExec : public TUnaryKernelExec<TForceHostNa }; END_SIMPLE_ARROW_UDF(TForceHostNameToPunycode, TForceHostNameToPunycodeKernelExec::Do); -BEGIN_SIMPLE_ARROW_UDF(TPunycodeToHostName, TOptional<char*>(TAutoMap<char*>)) try { +BEGIN_SIMPLE_ARROW_UDF(TPunycodeToHostName, TOptional<char*>(TAutoMap<char*>)) +try { const TStringRef& input = args[0].AsStringRef(); const auto& result = WideToUTF8(PunycodeToHostName(input)); return valueBuilder->NewString(result); } catch (TPunycodeError&) { return TUnboxedValue(); } -struct TPunycodeToHostNameKernelExec : public TUnaryKernelExec<TPunycodeToHostNameKernelExec> { +struct TPunycodeToHostNameKernelExec: public TUnaryKernelExec<TPunycodeToHostNameKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) try { const TStringRef& input = arg.AsStringRef(); @@ -529,7 +529,7 @@ BEGIN_SIMPLE_ARROW_UDF(TForcePunycodeToHostName, char*(TAutoMap<char*>)) { const auto& result = WideToUTF8(ForcePunycodeToHostName(input)); return valueBuilder->NewString(result); } -struct TForcePunycodeToHostNameKernelExec : public TUnaryKernelExec<TForcePunycodeToHostNameKernelExec> { +struct TForcePunycodeToHostNameKernelExec: public TUnaryKernelExec<TForcePunycodeToHostNameKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { const TStringRef& input = arg.AsStringRef(); @@ -543,7 +543,7 @@ BEGIN_SIMPLE_ARROW_UDF(TCanBePunycodeHostName, bool(TAutoMap<char*>)) { Y_UNUSED(valueBuilder); return TUnboxedValuePod(CanBePunycodeHostName(args[0].AsStringRef())); } -struct TCanBePunycodeHostNameKernelExec : public TUnaryKernelExec<TCanBePunycodeHostNameKernelExec> { +struct TCanBePunycodeHostNameKernelExec: public TUnaryKernelExec<TCanBePunycodeHostNameKernelExec> { template <typename TSink> static void Process(const IValueBuilder*, TBlockItem arg, const TSink& sink) { sink(TBlockItem(static_cast<ui8>(CanBePunycodeHostName(arg.AsStringRef())))); @@ -551,36 +551,36 @@ struct TCanBePunycodeHostNameKernelExec : public TUnaryKernelExec<TCanBePunycode }; END_SIMPLE_ARROW_UDF(TCanBePunycodeHostName, TCanBePunycodeHostNameKernelExec::Do); -#define EXPORTED_URL_BASE_UDF \ - TNormalize, \ - TParse, \ - TGetScheme, \ - TGetHost, \ - TGetHostPort, \ - TGetSchemeHost, \ - TGetSchemeHostPort, \ - TGetPort, \ - TGetTail, \ - TGetPath, \ - TGetFragment, \ - TGetDomain, \ - TGetTLD, \ - TGetDomainLevel, \ - TGetSignificantDomain, \ - TGetCGIParam, \ - TCutScheme, \ - TCutWWW, \ - TCutWWW2, \ - TCutQueryStringAndFragment, \ - TEncode, \ - TDecode, \ - TIsKnownTLD, \ - TIsWellKnownTLD, \ - THostNameToPunycode, \ - TForceHostNameToPunycode, \ - TPunycodeToHostName, \ - TForcePunycodeToHostName, \ - TCanBePunycodeHostName, \ - TQueryStringToList, \ - TQueryStringToDict, \ - TBuildQueryString +#define EXPORTED_URL_BASE_UDF \ + TNormalize, \ + TParse, \ + TGetScheme, \ + TGetHost, \ + TGetHostPort, \ + TGetSchemeHost, \ + TGetSchemeHostPort, \ + TGetPort, \ + TGetTail, \ + TGetPath, \ + TGetFragment, \ + TGetDomain, \ + TGetTLD, \ + TGetDomainLevel, \ + TGetSignificantDomain, \ + TGetCGIParam, \ + TCutScheme, \ + TCutWWW, \ + TCutWWW2, \ + TCutQueryStringAndFragment, \ + TEncode, \ + TDecode, \ + TIsKnownTLD, \ + TIsWellKnownTLD, \ + THostNameToPunycode, \ + TForceHostNameToPunycode, \ + TPunycodeToHostName, \ + TForcePunycodeToHostName, \ + TCanBePunycodeHostName, \ + TQueryStringToList, \ + TQueryStringToDict, \ + TBuildQueryString diff --git a/yql/essentials/udfs/common/url_base/lib/url_parse.cpp b/yql/essentials/udfs/common/url_base/lib/url_parse.cpp index 34485970ee0..c892bf25f6f 100644 --- a/yql/essentials/udfs/common/url_base/lib/url_parse.cpp +++ b/yql/essentials/udfs/common/url_base/lib/url_parse.cpp @@ -1,53 +1,53 @@ #include "url_parse.h" #define FIELD_ADD(name) structBuilder->AddField(#name, optionalStringType, &urlParseIndexes.name); -#define FIELD_FILL(name) \ - if (value.FldIsSet(TUri::Field##name)) { \ +#define FIELD_FILL(name) \ + if (value.FldIsSet(TUri::Field##name)) { \ fields[UrlParseIndexes_.name] = valueBuilder->NewString(value.GetField(TUri::Field##name)); \ } namespace NUrlUdf { - using namespace NUri; - using namespace NKikimr; - using namespace NUdf; +using namespace NUri; +using namespace NKikimr; +using namespace NUdf; - TUnboxedValue TParse::Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - TUri value; - const auto ParseError = value.ParseAbs(args[0].AsStringRef(), ParseFlags_); - TUnboxedValue* fields = nullptr; - const auto result = valueBuilder->NewArray(FieldsCount, fields); - if (ParseError == TUri::ParsedOK) { - FIELD_MAP(FIELD_FILL) - } else { - fields[UrlParseIndexes_.ParseError] = valueBuilder->NewString(TStringBuilder() << ParseError); - } - return result; +TUnboxedValue TParse::Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + TUri value; + const auto ParseError = value.ParseAbs(args[0].AsStringRef(), ParseFlags_); + TUnboxedValue* fields = nullptr; + const auto result = valueBuilder->NewArray(FieldsCount, fields); + if (ParseError == TUri::ParsedOK) { + FIELD_MAP(FIELD_FILL) + } else { + fields[UrlParseIndexes_.ParseError] = valueBuilder->NewString(TStringBuilder() << ParseError); } + return result; +} - bool TParse::DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - Y_UNUSED(userType); - if (Name() == name) { - TUrlParseIndexes urlParseIndexes; +bool TParse::DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + Y_UNUSED(userType); + if (Name() == name) { + TUrlParseIndexes urlParseIndexes; - builder.Args(1)->Add<TAutoMap<char*>>(); - const auto optionalStringType = builder.Optional()->Item<char*>().Build(); - const auto structBuilder = builder.Struct(FieldsCount); - structBuilder->AddField("ParseError", optionalStringType, &urlParseIndexes.ParseError); - FIELD_MAP(FIELD_ADD) - builder.Returns(structBuilder->Build()); + builder.Args(1)->Add<TAutoMap<char*>>(); + const auto optionalStringType = builder.Optional()->Item<char*>().Build(); + const auto structBuilder = builder.Struct(FieldsCount); + structBuilder->AddField("ParseError", optionalStringType, &urlParseIndexes.ParseError); + FIELD_MAP(FIELD_ADD) + builder.Returns(structBuilder->Build()); - if (!typesOnly) { - builder.Implementation(new TParse(urlParseIndexes)); - } - return true; - } else { - return false; + if (!typesOnly) { + builder.Implementation(new TParse(urlParseIndexes)); } + return true; + } else { + return false; } } +} // namespace NUrlUdf diff --git a/yql/essentials/udfs/common/url_base/lib/url_parse.h b/yql/essentials/udfs/common/url_base/lib/url_parse.h index e25e79c5196..8dab6ac7f47 100644 --- a/yql/essentials/udfs/common/url_base/lib/url_parse.h +++ b/yql/essentials/udfs/common/url_base/lib/url_parse.h @@ -16,44 +16,44 @@ #define FIELD_INDEXES(name) ui32 name; namespace NUrlUdf { - using namespace NUri; - using namespace NKikimr; - using namespace NUdf; - - struct TUrlParseIndexes { - ui32 ParseError; - FIELD_MAP(FIELD_INDEXES) - }; - - class TParse: public TBoxedValue { - public: - TParse(const TUrlParseIndexes& UrlParseIndexes) - : UrlParseIndexes_(UrlParseIndexes) - , ParseFlags_(TUri::FeaturesRecommended) - { - } - - static const TStringRef& Name() { - static auto nameRef = TStringRef("Parse"); - return nameRef; - } - - private: - TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override; - - public: - static bool DeclareSignature( - const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly); - - private: - const TUrlParseIndexes UrlParseIndexes_; - const NUri::TParseFlags ParseFlags_; - - static constexpr ui32 FieldsCount = sizeof(TUrlParseIndexes) / sizeof(ui32); - }; -} +using namespace NUri; +using namespace NKikimr; +using namespace NUdf; + +struct TUrlParseIndexes { + ui32 ParseError; + FIELD_MAP(FIELD_INDEXES) +}; + +class TParse: public TBoxedValue { +public: + TParse(const TUrlParseIndexes& UrlParseIndexes) + : UrlParseIndexes_(UrlParseIndexes) + , ParseFlags_(TUri::FeaturesRecommended) + { + } + + static const TStringRef& Name() { + static auto nameRef = TStringRef("Parse"); + return nameRef; + } + +private: + TUnboxedValue Run( + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override; + +public: + static bool DeclareSignature( + const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly); + +private: + const TUrlParseIndexes UrlParseIndexes_; + const NUri::TParseFlags ParseFlags_; + + static constexpr ui32 FieldsCount = sizeof(TUrlParseIndexes) / sizeof(ui32); +}; +} // namespace NUrlUdf diff --git a/yql/essentials/udfs/common/url_base/lib/url_query.cpp b/yql/essentials/udfs/common/url_base/lib/url_query.cpp index 885dc3b16e9..32dc2e34806 100644 --- a/yql/essentials/udfs/common/url_base/lib/url_query.cpp +++ b/yql/essentials/udfs/common/url_base/lib/url_query.cpp @@ -7,122 +7,125 @@ #include <library/cpp/string_utils/quote/quote.h> namespace NUrlUdf { - void TQueryStringParse::MakeSignature(IFunctionTypeInfoBuilder& builder, - const TType* retType) - { - builder.Returns(retType).OptionalArgs(4); - auto args = builder.Args(); - args->Add<TAutoMap<TQueryStr>>(); - args->Add<TKeepBlankValuesNArg>(); - args->Add<TStrictNArg>(); - args->Add<TMaxFieldsNArg>(); - args->Add<TSeparatorNArg>().Done(); +void TQueryStringParse::MakeSignature(IFunctionTypeInfoBuilder& builder, + const TType* retType) +{ + builder.Returns(retType).OptionalArgs(4); + auto args = builder.Args(); + args->Add<TAutoMap<TQueryStr>>(); + args->Add<TKeepBlankValuesNArg>(); + args->Add<TStrictNArg>(); + args->Add<TMaxFieldsNArg>(); + args->Add<TSeparatorNArg>().Done(); +} + +std::vector<std::pair<TString, TString>> +TQueryStringParse::RunImpl(const TUnboxedValuePod* args) const { + const std::string_view query(args[0].AsStringRef()); + if (query.empty()) { + return {}; } + const bool keepBlankValues = args[1].GetOrDefault(false); + const bool strict = args[2].GetOrDefault(true); + const ui32 maxFieldCnt = args[3].GetOrDefault(Max<ui32>()); + const std::string_view sep(args[4] ? args[4].AsStringRef() : "&"); - std::vector<std::pair<TString, TString>> - TQueryStringParse::RunImpl(const TUnboxedValuePod* args) const { - const std::string_view query(args[0].AsStringRef()); - if (query.empty()) - return {}; - const bool keepBlankValues = args[1].GetOrDefault(false); - const bool strict = args[2].GetOrDefault(true); - const ui32 maxFieldCnt = args[3].GetOrDefault(Max<ui32>()); - const std::string_view sep(args[4] ? args[4].AsStringRef() : "&"); + std::vector<TStringBuf> parts; + StringSplitter(query).SplitByString(sep).Collect(&parts); + if (parts.size() > maxFieldCnt) { + UdfTerminate((TStringBuilder() << Pos_ << "Max number of fields (" << maxFieldCnt + << ") exceeded: got " << parts.size()) + .c_str()); + } - std::vector<TStringBuf> parts; - StringSplitter(query).SplitByString(sep).Collect(&parts); - if (parts.size() > maxFieldCnt) { - UdfTerminate((TStringBuilder() << Pos_ << "Max number of fields (" << maxFieldCnt - << ") exceeded: got " << parts.size()).c_str()); + std::vector<std::pair<TString, TString>> pairs; + for (const TStringBuf& part : parts) { + if (part.empty() && !strict) { + continue; } - - std::vector<std::pair<TString, TString>> pairs; - for (const TStringBuf& part: parts) { - if (part.empty() && !strict) { - continue; - } - TVector<TString> nvPair = StringSplitter(part).Split('=').Limit(2); - if (nvPair.size() != 2) { - if (strict) { - UdfTerminate((TStringBuilder() << Pos_ << "Bad query field: \"" - << nvPair[0] << "\"").c_str()); - } - if (keepBlankValues) { - nvPair.emplace_back(""); - } else { - continue; - } + TVector<TString> nvPair = StringSplitter(part).Split('=').Limit(2); + if (nvPair.size() != 2) { + if (strict) { + UdfTerminate((TStringBuilder() << Pos_ << "Bad query field: \"" + << nvPair[0] << "\"") + .c_str()); } - if (!nvPair[1].empty() || keepBlankValues) { - CGIUnescape(nvPair[0]); - CGIUnescape(nvPair[1]); - pairs.emplace_back(nvPair[0], nvPair[1]); + if (keepBlankValues) { + nvPair.emplace_back(""); + } else { + continue; } } - return pairs; + if (!nvPair[1].empty() || keepBlankValues) { + CGIUnescape(nvPair[0]); + CGIUnescape(nvPair[1]); + pairs.emplace_back(nvPair[0], nvPair[1]); + } } + return pairs; +} - bool TQueryStringToList::DeclareSignature(const TStringRef& name, - TType*, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - if (Name() == name) { - MakeSignature(builder, GetListType(builder)); - if (!typesOnly) { - builder.Implementation(new TQueryStringToList(builder.GetSourcePosition())); - } - return true; +bool TQueryStringToList::DeclareSignature(const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + if (Name() == name) { + MakeSignature(builder, GetListType(builder)); + if (!typesOnly) { + builder.Implementation(new TQueryStringToList(builder.GetSourcePosition())); } - return false; + return true; } + return false; +} - TUnboxedValue TQueryStringToList::Run(const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - const auto pairs = RunImpl(args); - std::vector<TUnboxedValue> ret; - for (const auto& nvPair : pairs) { - TUnboxedValue* pair = nullptr; - auto item = valueBuilder->NewArray(2U, pair); - pair[0] = valueBuilder->NewString(nvPair.first); - pair[1] = valueBuilder->NewString(nvPair.second); - ret.push_back(item); - } - return valueBuilder->NewList(ret.data(), ret.size()); +TUnboxedValue TQueryStringToList::Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + const auto pairs = RunImpl(args); + std::vector<TUnboxedValue> ret; + for (const auto& nvPair : pairs) { + TUnboxedValue* pair = nullptr; + auto item = valueBuilder->NewArray(2U, pair); + pair[0] = valueBuilder->NewString(nvPair.first); + pair[1] = valueBuilder->NewString(nvPair.second); + ret.push_back(item); } + return valueBuilder->NewList(ret.data(), ret.size()); +} - bool TQueryStringToDict::DeclareSignature(const TStringRef& name, - TType*, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - if (Name() == name) { - auto dictType = GetDictType(builder); - MakeSignature(builder, dictType); - if (!typesOnly) { - builder.Implementation(new TQueryStringToDict(dictType, - builder.GetSourcePosition())); - } - return true; +bool TQueryStringToDict::DeclareSignature(const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + if (Name() == name) { + auto dictType = GetDictType(builder); + MakeSignature(builder, dictType); + if (!typesOnly) { + builder.Implementation(new TQueryStringToDict(dictType, + builder.GetSourcePosition())); } - return false; + return true; } + return false; +} - TUnboxedValue TQueryStringToDict::Run(const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - const auto pairs = RunImpl(args); - auto ret = valueBuilder->NewDict(DictType_, TDictFlags::Hashed | TDictFlags::Multi); - for (const auto& nvPair : pairs) { - ret->Add(valueBuilder->NewString(nvPair.first), - valueBuilder->NewString(nvPair.second)); - } - return ret->Build(); +TUnboxedValue TQueryStringToDict::Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + const auto pairs = RunImpl(args); + auto ret = valueBuilder->NewDict(DictType_, TDictFlags::Hashed | TDictFlags::Multi); + for (const auto& nvPair : pairs) { + ret->Add(valueBuilder->NewString(nvPair.first), + valueBuilder->NewString(nvPair.second)); } + return ret->Build(); +} - TUnboxedValue TBuildQueryString::Run(const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const { - const std::string_view sep(args[1] ? args[1].AsStringRef() : "&"); - TStringBuilder ret; +TUnboxedValue TBuildQueryString::Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const { + const std::string_view sep(args[1] ? args[1].AsStringRef() : "&"); + TStringBuilder ret; - switch(FirstArgTypeId_) { + switch (FirstArgTypeId_) { case EFirstArgTypeId::Dict: { TUnboxedValue key, value; const auto dictIt = args[0].GetDictIterator(); @@ -132,8 +135,9 @@ namespace NUrlUdf { const auto listIt = value.GetListIterator(); TUnboxedValue item; while (listIt.Next(item)) { - if (wasItem++) + if (wasItem++) { ret << sep; + } if (item) { ret << keyEscaped << '=' << CGIEscapeRet(item.AsStringRef()); } else { @@ -148,8 +152,9 @@ namespace NUrlUdf { const auto dictIt = args[0].GetDictIterator(); ui64 wasKey = 0; while (dictIt.NextPair(key, value)) { - if (wasKey++) + if (wasKey++) { ret << sep; + } if (value) { ret << CGIEscapeRet(key.AsStringRef()) << '=' << CGIEscapeRet(value.AsStringRef()); @@ -164,8 +169,9 @@ namespace NUrlUdf { TUnboxedValue item; const auto listIt = args[0].GetListIterator(); while (listIt.Next(item)) { - if (wasItem++) + if (wasItem++) { ret << sep; + } TUnboxedValue key = item.GetElement(0), val = item.GetElement(1); if (val) { ret << CGIEscapeRet(key.AsStringRef()) << '=' @@ -178,66 +184,66 @@ namespace NUrlUdf { } default: Y_ABORT("Current first parameter type is not yet implemented"); - } - return valueBuilder->NewString(ret); } + return valueBuilder->NewString(ret); +} - bool TBuildQueryString::DeclareSignature(const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly) { - if (Name() == name) { - if (!userType) { - builder.SetError("Missing user type"); - return true; - } - builder.UserType(userType); - const auto typeHelper = builder.TypeInfoHelper(); - const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); - if (!userTypeInspector || !userTypeInspector.GetElementsCount()) { - builder.SetError("User type is not tuple"); - return true; - } - const auto argsTypeInspector = TTupleTypeInspector(*typeHelper, - userTypeInspector.GetElementType(0)); - if (!argsTypeInspector || !argsTypeInspector.GetElementsCount()) { - builder.SetError("Please provide at least one argument"); - return true; - } - const auto firstArgType = argsTypeInspector.GetElementType(0); - EFirstArgTypeId firstArgTypeId = EFirstArgTypeId::None; +bool TBuildQueryString::DeclareSignature(const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly) { + if (Name() == name) { + if (!userType) { + builder.SetError("Missing user type"); + return true; + } + builder.UserType(userType); + const auto typeHelper = builder.TypeInfoHelper(); + const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); + if (!userTypeInspector || !userTypeInspector.GetElementsCount()) { + builder.SetError("User type is not tuple"); + return true; + } + const auto argsTypeInspector = TTupleTypeInspector(*typeHelper, + userTypeInspector.GetElementType(0)); + if (!argsTypeInspector || !argsTypeInspector.GetElementsCount()) { + builder.SetError("Please provide at least one argument"); + return true; + } + const auto firstArgType = argsTypeInspector.GetElementType(0); + EFirstArgTypeId firstArgTypeId = EFirstArgTypeId::None; - if (typeHelper->IsSameType(GetDictType(builder), firstArgType) || - typeHelper->IsSameType(GetDictType(builder, true), firstArgType)) { - firstArgTypeId = EFirstArgTypeId::Dict; - } else if (typeHelper->IsSameType(GetListType(builder), firstArgType) || - typeHelper->IsSameType(GetListType(builder, true), firstArgType) || - typeHelper->GetTypeKind(firstArgType) == ETypeKind::EmptyList) - { - firstArgTypeId = EFirstArgTypeId::List; - } else if (typeHelper->IsSameType(GetFlattenDictType(builder), firstArgType) || - typeHelper->IsSameType(GetFlattenDictType(builder, true), firstArgType) || - typeHelper->GetTypeKind(firstArgType) == ETypeKind::EmptyDict) - { - firstArgTypeId = EFirstArgTypeId::FlattenDict; - } - if (firstArgTypeId != EFirstArgTypeId::None) { - builder.Returns<TQueryStr>().OptionalArgs(1); - auto args = builder.Args(); - args->Add(firstArgType).Flags(ICallablePayload::TArgumentFlags::AutoMap); - args->Add<TSeparatorNArg>().Done(); - if (!typesOnly) { - builder.Implementation(new TBuildQueryString(builder.GetSourcePosition(), - firstArgTypeId)); - } - } else { - TStringBuilder sb; - sb << "Unsupported first argument type: "; - TTypePrinter(*typeHelper, firstArgType).Out(sb.Out); - builder.SetError(sb); + if (typeHelper->IsSameType(GetDictType(builder), firstArgType) || + typeHelper->IsSameType(GetDictType(builder, true), firstArgType)) { + firstArgTypeId = EFirstArgTypeId::Dict; + } else if (typeHelper->IsSameType(GetListType(builder), firstArgType) || + typeHelper->IsSameType(GetListType(builder, true), firstArgType) || + typeHelper->GetTypeKind(firstArgType) == ETypeKind::EmptyList) + { + firstArgTypeId = EFirstArgTypeId::List; + } else if (typeHelper->IsSameType(GetFlattenDictType(builder), firstArgType) || + typeHelper->IsSameType(GetFlattenDictType(builder, true), firstArgType) || + typeHelper->GetTypeKind(firstArgType) == ETypeKind::EmptyDict) + { + firstArgTypeId = EFirstArgTypeId::FlattenDict; + } + if (firstArgTypeId != EFirstArgTypeId::None) { + builder.Returns<TQueryStr>().OptionalArgs(1); + auto args = builder.Args(); + args->Add(firstArgType).Flags(ICallablePayload::TArgumentFlags::AutoMap); + args->Add<TSeparatorNArg>().Done(); + if (!typesOnly) { + builder.Implementation(new TBuildQueryString(builder.GetSourcePosition(), + firstArgTypeId)); } - return true; + } else { + TStringBuilder sb; + sb << "Unsupported first argument type: "; + TTypePrinter(*typeHelper, firstArgType).Out(sb.Out); + builder.SetError(sb); } - return false; + return true; } + return false; } +} // namespace NUrlUdf diff --git a/yql/essentials/udfs/common/url_base/lib/url_query.h b/yql/essentials/udfs/common/url_base/lib/url_query.h index 552b8527823..7ba82241896 100644 --- a/yql/essentials/udfs/common/url_base/lib/url_query.h +++ b/yql/essentials/udfs/common/url_base/lib/url_query.h @@ -3,132 +3,136 @@ #include <yql/essentials/public/udf/udf_helpers.h> namespace NUrlUdf { - using namespace NYql::NUdf; - - struct TQueryStringConv : public TBoxedValue { - protected: - static constexpr char Separator[] = "Separator"; - - using TQueryStr = char*; - using TSeparatorNArg = TNamedArg<TQueryStr, Separator>; - - static inline TType* GetListType(const IFunctionTypeInfoBuilder& builder, - bool optional = false) - { - auto tupleType = optional ? - builder.Tuple()->Add<TQueryStr>().Add(builder.Optional()->Item<TQueryStr>().Build()).Build() - : builder.Tuple()->Add<TQueryStr>().Add<TQueryStr>().Build(); - return builder.List()->Item(tupleType).Build(); - } - - static inline TType* GetDictType(const IFunctionTypeInfoBuilder& builder, - bool optional = false) - { - auto listType = optional ? - builder.List()->Item(builder.Optional()->Item<TQueryStr>().Build()).Build() - : builder.List()->Item<TQueryStr>().Build(); - return builder.Dict()->Key<TQueryStr>().Value(listType).Build(); - } - - static inline TType* GetFlattenDictType(const IFunctionTypeInfoBuilder& builder, - bool optional = false) - { - return optional ? - builder.Dict()->Key<TQueryStr>().Value(builder.Optional()->Item<TQueryStr>().Build()).Build() - : builder.Dict()->Key<TQueryStr>().Value<TQueryStr>().Build(); - } - }; - - struct TQueryStringParse: public TQueryStringConv { - explicit TQueryStringParse(TSourcePosition&& pos) : Pos_(std::move(pos)) {} - - protected: - static constexpr char KeepBlankValues[] = "KeepBlankValues"; - static constexpr char Strict[] = "Strict"; - static constexpr char MaxFields[] = "MaxFields"; - - using TKeepBlankValuesNArg = TNamedArg<bool, KeepBlankValues>; - using TStrictNArg = TNamedArg<bool, Strict>; - using TMaxFieldsNArg = TNamedArg<ui32, MaxFields>; - - static void MakeSignature(IFunctionTypeInfoBuilder& builder, const TType* retType); - - std::vector<std::pair<TString, TString>> - RunImpl(const TUnboxedValuePod* args) const; - - private: - TSourcePosition Pos_; - }; - - struct TQueryStringToList : public TQueryStringParse { - explicit TQueryStringToList(TSourcePosition&& pos) - : TQueryStringParse(std::forward<TSourcePosition>(pos)) {} - - static const TStringRef& Name() { - static const auto name = TStringRef::Of("QueryStringToList"); - return name; - } - - static bool DeclareSignature(const TStringRef& name, - TType*, - IFunctionTypeInfoBuilder& builder, - bool typesOnly); - - TUnboxedValue Run(const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override; - }; - - struct TQueryStringToDict : public TQueryStringParse { - explicit TQueryStringToDict(TType* dictType, TSourcePosition&& pos) - : TQueryStringParse(std::move(pos)) - , DictType_(dictType) - {} - - static const TStringRef& Name() { - static const auto name = TStringRef::Of("QueryStringToDict"); - return name; - } - - static bool DeclareSignature(const TStringRef& name, - TType*, - IFunctionTypeInfoBuilder& builder, - bool typesOnly); - - TUnboxedValue Run(const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override; - - private: - TType* DictType_; - }; - - class TBuildQueryString : public TQueryStringConv { - TSourcePosition Pos_; - enum class EFirstArgTypeId { - None, - Dict, - FlattenDict, - List, - } FirstArgTypeId_; - - public: - typedef bool TTypeAwareMarker; - - explicit TBuildQueryString(TSourcePosition&& pos, EFirstArgTypeId firstArgTypeId) - : Pos_(std::move(pos)) - , FirstArgTypeId_(firstArgTypeId) - {} - - static const TStringRef& Name() { - static const auto name = TStringRef::Of("BuildQueryString"); - return name; - } - - TUnboxedValue Run(const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override; - - static bool DeclareSignature(const TStringRef& name, - TType* userType, - IFunctionTypeInfoBuilder& builder, - bool typesOnly); - }; -} +using namespace NYql::NUdf; + +struct TQueryStringConv: public TBoxedValue { +protected: + static constexpr char Separator[] = "Separator"; + + using TQueryStr = char*; + using TSeparatorNArg = TNamedArg<TQueryStr, Separator>; + + static inline TType* GetListType(const IFunctionTypeInfoBuilder& builder, + bool optional = false) + { + auto tupleType = optional ? builder.Tuple()->Add<TQueryStr>().Add(builder.Optional()->Item<TQueryStr>().Build()).Build() + : builder.Tuple()->Add<TQueryStr>().Add<TQueryStr>().Build(); + return builder.List()->Item(tupleType).Build(); + } + + static inline TType* GetDictType(const IFunctionTypeInfoBuilder& builder, + bool optional = false) + { + auto listType = optional ? builder.List()->Item(builder.Optional()->Item<TQueryStr>().Build()).Build() + : builder.List()->Item<TQueryStr>().Build(); + return builder.Dict()->Key<TQueryStr>().Value(listType).Build(); + } + + static inline TType* GetFlattenDictType(const IFunctionTypeInfoBuilder& builder, + bool optional = false) + { + return optional ? builder.Dict()->Key<TQueryStr>().Value(builder.Optional()->Item<TQueryStr>().Build()).Build() + : builder.Dict()->Key<TQueryStr>().Value<TQueryStr>().Build(); + } +}; + +struct TQueryStringParse: public TQueryStringConv { + explicit TQueryStringParse(TSourcePosition&& pos) + : Pos_(std::move(pos)) + { + } + +protected: + static constexpr char KeepBlankValues[] = "KeepBlankValues"; + static constexpr char Strict[] = "Strict"; + static constexpr char MaxFields[] = "MaxFields"; + + using TKeepBlankValuesNArg = TNamedArg<bool, KeepBlankValues>; + using TStrictNArg = TNamedArg<bool, Strict>; + using TMaxFieldsNArg = TNamedArg<ui32, MaxFields>; + + static void MakeSignature(IFunctionTypeInfoBuilder& builder, const TType* retType); + + std::vector<std::pair<TString, TString>> + RunImpl(const TUnboxedValuePod* args) const; + +private: + TSourcePosition Pos_; +}; + +struct TQueryStringToList: public TQueryStringParse { + explicit TQueryStringToList(TSourcePosition&& pos) + : TQueryStringParse(std::forward<TSourcePosition>(pos)) + { + } + + static const TStringRef& Name() { + static const auto name = TStringRef::Of("QueryStringToList"); + return name; + } + + static bool DeclareSignature(const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly); + + TUnboxedValue Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override; +}; + +struct TQueryStringToDict: public TQueryStringParse { + explicit TQueryStringToDict(TType* dictType, TSourcePosition&& pos) + : TQueryStringParse(std::move(pos)) + , DictType_(dictType) + { + } + + static const TStringRef& Name() { + static const auto name = TStringRef::Of("QueryStringToDict"); + return name; + } + + static bool DeclareSignature(const TStringRef& name, + TType*, + IFunctionTypeInfoBuilder& builder, + bool typesOnly); + + TUnboxedValue Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override; + +private: + TType* DictType_; +}; + +class TBuildQueryString: public TQueryStringConv { + TSourcePosition Pos_; + enum class EFirstArgTypeId { + None, + Dict, + FlattenDict, + List, + } FirstArgTypeId_; + +public: + typedef bool TTypeAwareMarker; + + explicit TBuildQueryString(TSourcePosition&& pos, EFirstArgTypeId firstArgTypeId) + : Pos_(std::move(pos)) + , FirstArgTypeId_(firstArgTypeId) + { + } + + static const TStringRef& Name() { + static const auto name = TStringRef::Of("BuildQueryString"); + return name; + } + + TUnboxedValue Run(const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override; + + static bool DeclareSignature(const TStringRef& name, + TType* userType, + IFunctionTypeInfoBuilder& builder, + bool typesOnly); +}; +} // namespace NUrlUdf diff --git a/yql/essentials/udfs/common/url_base/lib/ya.make b/yql/essentials/udfs/common/url_base/lib/ya.make index 9887842303b..51a55865bd4 100644 --- a/yql/essentials/udfs/common/url_base/lib/ya.make +++ b/yql/essentials/udfs/common/url_base/lib/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( url_base_udf.cpp url_parse.cpp diff --git a/yql/essentials/udfs/common/url_base/url_base.cpp b/yql/essentials/udfs/common/url_base/url_base.cpp index 628abe7a301..ae5516741be 100644 --- a/yql/essentials/udfs/common/url_base/url_base.cpp +++ b/yql/essentials/udfs/common/url_base/url_base.cpp @@ -4,4 +4,3 @@ SIMPLE_MODULE(TUrlModule, EXPORTED_URL_BASE_UDF) REGISTER_MODULES(TUrlModule) - diff --git a/yql/essentials/udfs/common/url_base/ya.make b/yql/essentials/udfs/common/url_base/ya.make index a251b5cf0d2..79c7c9bead6 100644 --- a/yql/essentials/udfs/common/url_base/ya.make +++ b/yql/essentials/udfs/common/url_base/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(url_udf) 37 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( url_base.cpp diff --git a/yql/essentials/udfs/common/vector/vector_udf.cpp b/yql/essentials/udfs/common/vector/vector_udf.cpp index f1afafa7217..dbaf1d05e31 100644 --- a/yql/essentials/udfs/common/vector/vector_udf.cpp +++ b/yql/essentials/udfs/common/vector/vector_udf.cpp @@ -15,7 +15,8 @@ private: public: TVector() : Vector_() - {} + { + } TUnboxedValue GetResult(const IValueBuilder* builder) { TUnboxedValue* values = nullptr; @@ -45,14 +46,13 @@ public: }; extern const char VectorResourceName[] = "Vector.VectorResource"; -class TVectorResource: - public TBoxedResource<TVector, VectorResourceName> -{ +class TVectorResource: public TBoxedResource<TVector, VectorResourceName> { public: template <typename... Args> inline TVectorResource(Args&&... args) : TBoxedResource(std::forward<Args>(args)...) - {} + { + } }; TVectorResource* GetVectorResource(const TUnboxedValuePod& arg) { @@ -120,8 +120,7 @@ public: TType* userType, const TStringRef& typeConfig, ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + IFunctionTypeInfoBuilder& builder) const final { Y_UNUSED(typeConfig); try { diff --git a/yql/essentials/udfs/common/vector/ya.make b/yql/essentials/udfs/common/vector/ya.make index a1403f62a61..342065cbaff 100644 --- a/yql/essentials/udfs/common/vector/ya.make +++ b/yql/essentials/udfs/common/vector/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SRCS( vector_udf.cpp ) diff --git a/yql/essentials/udfs/common/yson2/ya.make b/yql/essentials/udfs/common/yson2/ya.make index 36867223141..cfcb7334c29 100644 --- a/yql/essentials/udfs/common/yson2/ya.make +++ b/yql/essentials/udfs/common/yson2/ya.make @@ -5,6 +5,8 @@ YQL_UDF_CONTRIB(yson2_udf) 28 0 ) + + ENABLE(YQL_STYLE_CPP) SRCS( yson2_udf.cpp diff --git a/yql/essentials/udfs/common/yson2/yson2_udf.cpp b/yql/essentials/udfs/common/yson2/yson2_udf.cpp index 721280dd5c5..a6ed9d13002 100644 --- a/yql/essentials/udfs/common/yson2/yson2_udf.cpp +++ b/yql/essentials/udfs/common/yson2/yson2_udf.cpp @@ -30,7 +30,7 @@ using TBoolDictType = TDict<char*, bool>; using TDoubleDictType = TDict<char*, double>; using TStringDictType = TDict<char*, char*>; -enum class EOptions : ui8 { +enum class EOptions: ui8 { Strict = 1, AutoConvert = 2 }; @@ -38,8 +38,8 @@ enum class EOptions : ui8 { union TOpts { ui8 Raw = 0; struct { - bool Strict: 1; - bool AutoConvert: 1; + bool Strict : 1; + bool AutoConvert : 1; }; }; @@ -52,7 +52,7 @@ TOpts ParseOptions(TUnboxedValuePod x) { return {}; } -class TOptions : public TBoxedValue { +class TOptions: public TBoxedValue { TUnboxedValue Run(const IValueBuilder*, const TUnboxedValuePod* args) const override { ui8 options = 0; @@ -66,6 +66,7 @@ class TOptions : public TBoxedValue { return TUnboxedValuePod(options); } + public: static const TStringRef& Name() { static auto name = TStringRef::Of("Options"); @@ -99,18 +100,25 @@ public: using TConverterPtr = TUnboxedValuePod (*)(TUnboxedValuePod, const IValueBuilder*, const TSourcePosition& pos); template <TConverterPtr Converter> -class TLazyConveterT : public TManagedBoxedValue { +class TLazyConveterT: public TManagedBoxedValue { public: TLazyConveterT(TUnboxedValue&& original, const IValueBuilder* valueBuilder, const TSourcePosition& pos) - : Original_(std::move(original)), ValueBuilder_(valueBuilder), Pos_(pos) - {} + : Original_(std::move(original)) + , ValueBuilder_(valueBuilder) + , Pos_(pos) + { + } + private: template <bool NoSwap> class TIterator: public TManagedBoxedValue { public: TIterator(TUnboxedValue&& original, const IValueBuilder* valueBuilder, const TSourcePosition& pos) - : Original_(std::move(original)), ValueBuilder_(valueBuilder), Pos_(pos) - {} + : Original_(std::move(original)) + , ValueBuilder_(valueBuilder) + , Pos_(pos) + { + } private: bool Skip() final { @@ -140,7 +148,7 @@ private: } const TUnboxedValue Original_; - const IValueBuilder *const ValueBuilder_; + const IValueBuilder* const ValueBuilder_; const TSourcePosition Pos_; }; @@ -196,11 +204,11 @@ private: } const TUnboxedValue Original_; - const IValueBuilder *const ValueBuilder_; + const IValueBuilder* const ValueBuilder_; const TSourcePosition Pos_; }; -template<bool Strict, bool AutoConvert, TConverterPtr Converter = nullptr> +template <bool Strict, bool AutoConvert, TConverterPtr Converter = nullptr> TUnboxedValuePod ConvertToListImpl(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { if (!x) { return valueBuilder->NewEmptyList().Release(); @@ -208,8 +216,9 @@ TUnboxedValuePod ConvertToListImpl(TUnboxedValuePod x, const IValueBuilder* valu switch (GetNodeType(x)) { case ENodeType::List: - if (!x.IsBoxed()) + if (!x.IsBoxed()) { break; + } if constexpr (Converter != nullptr) { if constexpr (Strict || AutoConvert) { return TUnboxedValuePod(new TLazyConveterT<Converter>(x, valueBuilder, pos)); @@ -251,7 +260,7 @@ TUnboxedValuePod ConvertToListImpl(TUnboxedValuePod x, const IValueBuilder* valu return valueBuilder->NewEmptyList().Release(); } -template<bool Strict, bool AutoConvert, TConverterPtr Converter = nullptr> +template <bool Strict, bool AutoConvert, TConverterPtr Converter = nullptr> TUnboxedValuePod ConvertToDictImpl(TUnboxedValuePod x, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { if (!x) { return valueBuilder->NewEmptyList().Release(); @@ -259,8 +268,9 @@ TUnboxedValuePod ConvertToDictImpl(TUnboxedValuePod x, const IValueBuilder* valu switch (GetNodeType(x)) { case ENodeType::Dict: - if (!x.IsBoxed()) + if (!x.IsBoxed()) { break; + } if constexpr (Converter != nullptr) { if constexpr (Strict || AutoConvert) { return TUnboxedValuePod(new TLazyConveterT<Converter>(x, valueBuilder, pos)); @@ -310,8 +320,9 @@ TUnboxedValuePod LookupImpl(TUnboxedValuePod dict, const TUnboxedValuePod key, c if (dict.IsBoxed()) { if (const i32 size = dict.GetListLength()) { if (i32 index; TryFromString(key.AsStringRef(), index) && index < size && index >= -size) { - if (index < 0) + if (index < 0) { index += size; + } if constexpr (Converter != nullptr) { return Converter(dict.Lookup(TUnboxedValuePod(index)).Release(), valueBuilder, pos); } @@ -337,10 +348,11 @@ TUnboxedValuePod YPathImpl(TUnboxedValuePod dict, const TUnboxedValuePod key, co for (const auto s : StringSplitter(path.substr(path[1U] == '/' ? 2U : 1U)).Split('/')) { const bool attr = IsNodeType<ENodeType::Attr>(dict); if (const std::string_view subpath = s.Token(); subpath == "@") { - if (attr) + if (attr) { dict = SetNodeType<ENodeType::Dict>(dict); - else + } else { return {}; + } } else { if (attr) { dict = dict.GetVariantItem().Release(); @@ -362,16 +374,17 @@ TUnboxedValuePod YPathImpl(TUnboxedValuePod dict, const TUnboxedValuePod key, co return dict; } -template<bool Strict, bool AutoConvert> +template <bool Strict, bool AutoConvert> TUnboxedValuePod ContainsImpl(TUnboxedValuePod dict, TUnboxedValuePod key, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { switch (GetNodeType(dict)) { case ENodeType::Attr: return ContainsImpl<Strict, AutoConvert>(dict.GetVariantItem().Release(), key, valueBuilder, pos); case ENodeType::Dict: - if (dict.IsBoxed()) + if (dict.IsBoxed()) { return TUnboxedValuePod(dict.Contains(key)); - else + } else { return TUnboxedValuePod(false); + } case ENodeType::List: if (dict.IsBoxed()) { if (const i32 size = dict.GetListLength()) { @@ -382,14 +395,15 @@ TUnboxedValuePod ContainsImpl(TUnboxedValuePod dict, TUnboxedValuePod key, const } return TUnboxedValuePod(false); default: - if constexpr (Strict && !AutoConvert) + if constexpr (Strict && !AutoConvert) { UdfTerminate((TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Can't check contains on scalar " << TDebugPrinter(dict)).c_str()); - else + } else { return {}; + } } } -template<bool Strict, bool AutoConvert> +template <bool Strict, bool AutoConvert> TUnboxedValuePod GetLengthImpl(TUnboxedValuePod dict, const IValueBuilder* valueBuilder, const TSourcePosition& pos) { switch (GetNodeType(dict)) { case ENodeType::Attr: @@ -399,132 +413,150 @@ TUnboxedValuePod GetLengthImpl(TUnboxedValuePod dict, const IValueBuilder* value case ENodeType::List: return TUnboxedValuePod(dict.IsBoxed() ? dict.GetListLength() : ui64(0)); default: - if constexpr (Strict && !AutoConvert) + if constexpr (Strict && !AutoConvert) { UdfTerminate((TStringBuilder() << valueBuilder->WithCalleePosition(pos) << " Can't get container length from scalar " << TDebugPrinter(dict)).c_str()); - else + } else { return {}; + } } } -} +} // namespace SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToBool, TOptional<bool>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToBool<true, true> : &ConvertToBool<true, false>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToBool<false, true> : &ConvertToBool<false, false>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToInt64, TOptional<i64>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToIntegral<true, true, i64> : &ConvertToIntegral<true, false, i64>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToIntegral<false, true, i64> : &ConvertToIntegral<false, false, i64>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToUint64, TOptional<ui64>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToIntegral<true, true, ui64> : &ConvertToIntegral<true, false, ui64>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToIntegral<false, true, ui64> : &ConvertToIntegral<false, false, ui64>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDouble, TOptional<double>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToFloat<true, true, double> : &ConvertToFloat<true, false, double>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToFloat<false, true, double> : &ConvertToFloat<false, false, double>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToString, TOptional<char*>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToString<true, true, false> : &ConvertToString<true, false, false>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToString<false, true, false> : &ConvertToString<false, false, false>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToList, TListType<TNodeResource>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToListImpl<true, true> : &ConvertToListImpl<true, false>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToListImpl<false, true> : &ConvertToListImpl<false, false>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToInt64List, TListType<i64>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToIntegral<true, true, i64>> : &ConvertToListImpl<true, false, &ConvertToIntegral<true, false, i64>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToIntegral<false, true, i64>> : &ConvertToListImpl<false, false, &ConvertToIntegral<false, false, i64>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToUint64List, TListType<ui64>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToIntegral<true, true, ui64>> : &ConvertToListImpl<true, false, &ConvertToIntegral<true, false, ui64>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToIntegral<false, true, ui64>> : &ConvertToListImpl<false, false, &ConvertToIntegral<false, false, ui64>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToBoolList, TListType<bool>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToBool<true, true>> : &ConvertToListImpl<true, false, &ConvertToBool<true, false>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToBool<false, true>> : &ConvertToListImpl<false, false, &ConvertToBool<false, false>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDoubleList, TListType<double>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToFloat<true, true, double>> : &ConvertToListImpl<true, false, &ConvertToFloat<true, false, double>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToFloat<false, true, double>> : &ConvertToListImpl<false, false, &ConvertToFloat<false, false, double>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToStringList, TListType<char*>(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToListImpl<true, true, &ConvertToString<true, true, false>> : &ConvertToListImpl<true, false, &ConvertToString<true, false, false>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToListImpl<false, true, &ConvertToString<false, true, false>> : &ConvertToListImpl<false, false, &ConvertToString<false, false, false>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDict, TDictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToDictImpl<true, true> : &ConvertToDictImpl<true, false>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToDictImpl<false, true> : &ConvertToDictImpl<false, false>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToInt64Dict, TInt64DictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToIntegral<true, true, i64>> : &ConvertToDictImpl<true, false, &ConvertToIntegral<true, false, i64>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToIntegral<false, true, i64>> : &ConvertToDictImpl<false, false, &ConvertToIntegral<false, false, i64>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToUint64Dict, TUint64DictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToIntegral<true, true, ui64>> : &ConvertToDictImpl<true, false, &ConvertToIntegral<true, false, ui64>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToIntegral<false, true, ui64>> : &ConvertToDictImpl<false, false, &ConvertToIntegral<false, false, ui64>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToBoolDict, TBoolDictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToBool<true, true>> : &ConvertToDictImpl<true, false, &ConvertToBool<true, false>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToBool<false, true>> : &ConvertToDictImpl<false, false, &ConvertToBool<false, false>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToDoubleDict, TDoubleDictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToFloat<true, true, double>> : &ConvertToDictImpl<true, false, &ConvertToFloat<true, false, double>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToFloat<false, true, double>> : &ConvertToDictImpl<false, false, &ConvertToFloat<false, false, double>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TConvertToStringDict, TStringDictType(TOptional<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &ConvertToDictImpl<true, true, &ConvertToString<true, true, false>> : &ConvertToDictImpl<true, false, &ConvertToString<true, false, false>>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ConvertToDictImpl<false, true, &ConvertToString<false, true, false>> : &ConvertToDictImpl<false, false, &ConvertToString<false, false, false>>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_STRICT_UDF(TAttributes, TDictType(TAutoMap<TNodeResource>)) { @@ -537,17 +569,19 @@ SIMPLE_STRICT_UDF(TAttributes, TDictType(TAutoMap<TNodeResource>)) { } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TContains, TOptional<bool>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &ContainsImpl<true, true> : &ContainsImpl<true, false>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &ContainsImpl<false, true> : &ContainsImpl<false, false>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TGetLength, TOptional<ui64>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &GetLengthImpl<true, true> : &GetLengthImpl<true, false>)(args[0], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &GetLengthImpl<false, true> : &GetLengthImpl<false, false>)(args[0], valueBuilder, GetPos()); + } } SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TLookup, TOptional<TNodeResource>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { @@ -555,52 +589,59 @@ SIMPLE_STRICT_UDF_WITH_OPTIONAL_ARGS(TLookup, TOptional<TNodeResource>(TAutoMap< } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupBool, TOptional<bool>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &LookupImpl<&ConvertToBool<true, true>> : &LookupImpl<&ConvertToBool<true, false>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &LookupImpl<&ConvertToBool<false, true>> : &LookupImpl<&ConvertToBool<false, false>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupInt64, TOptional<i64>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral<true, true, i64>> : &LookupImpl<&ConvertToIntegral<true, false, i64>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral<false, true, i64>> : &LookupImpl<&ConvertToIntegral<false, false, i64>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupUint64, TOptional<ui64>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral<true, true, ui64>> : &LookupImpl<&ConvertToIntegral<true, false, ui64>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &LookupImpl<&ConvertToIntegral<false, true, ui64>> : &LookupImpl<&ConvertToIntegral<false, false, ui64>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupDouble, TOptional<double>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &LookupImpl<&ConvertToFloat<true, true, double>> : &LookupImpl<&ConvertToFloat<true, false, double>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &LookupImpl<&ConvertToFloat<false, true, double>> : &LookupImpl<&ConvertToFloat<false, false, double>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupString, TOptional<char*>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &LookupImpl<&ConvertToString<true, true, false>> : &LookupImpl<&ConvertToString<true, false, false>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &LookupImpl<&ConvertToString<false, true, false>> : &LookupImpl<&ConvertToString<false, false, false>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupList, TOptional<TListType<TNodeResource>>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &LookupImpl<&ConvertToListImpl<true, true>> : &LookupImpl<&ConvertToListImpl<true, false>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &LookupImpl<&ConvertToListImpl<false, true>> : &LookupImpl<&ConvertToListImpl<false, false>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TLookupDict, TOptional<TDictType>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &LookupImpl<&ConvertToDictImpl<true, true>> : &LookupImpl<&ConvertToDictImpl<true, false>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &LookupImpl<&ConvertToDictImpl<false, true>> : &LookupImpl<&ConvertToDictImpl<false, false>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPath, TOptional<TNodeResource>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { @@ -608,52 +649,59 @@ SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPath, TOptional<TNodeResource>(TAutoMap<TNodeRes } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathBool, TOptional<bool>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &YPathImpl<&ConvertToBool<true, true>> : &YPathImpl<&ConvertToBool<true, false>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &YPathImpl<&ConvertToBool<false, true>> : &YPathImpl<&ConvertToBool<false, false>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathInt64, TOptional<i64>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral<true, true, i64>> : &YPathImpl<&ConvertToIntegral<true, false, i64>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral<false, true, i64>> : &YPathImpl<&ConvertToIntegral<false, false, i64>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathUint64, TOptional<ui64>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral<true, true, ui64>> : &YPathImpl<&ConvertToIntegral<true, false, ui64>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &YPathImpl<&ConvertToIntegral<false, true, ui64>> : &YPathImpl<&ConvertToIntegral<false, false, ui64>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathDouble, TOptional<double>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &YPathImpl<&ConvertToFloat<true, true, double>> : &YPathImpl<&ConvertToFloat<true, false, double>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &YPathImpl<&ConvertToFloat<false, true, double>> : &YPathImpl<&ConvertToFloat<false, false, double>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathString, TOptional<char*>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &YPathImpl<&ConvertToString<true, true, false>> : &YPathImpl<&ConvertToString<true, false, false>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &YPathImpl<&ConvertToString<false, true, false>> : &YPathImpl<&ConvertToString<false, false, false>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathList, TOptional<TListType<TNodeResource>>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &YPathImpl<&ConvertToListImpl<true, true>> : &YPathImpl<&ConvertToListImpl<true, false>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &YPathImpl<&ConvertToListImpl<false, true>> : &YPathImpl<&ConvertToListImpl<false, false>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_UDF_WITH_OPTIONAL_ARGS(TYPathDict, TOptional<TDictType>(TAutoMap<TNodeResource>, char*, TOptional<TOptionsResource>), 1) { - if (const auto options = ParseOptions(args[2]); options.Strict) + if (const auto options = ParseOptions(args[2]); options.Strict) { return (options.AutoConvert ? &YPathImpl<&ConvertToDictImpl<true, true>> : &YPathImpl<&ConvertToDictImpl<true, false>>)(args[0], args[1], valueBuilder, GetPos()); - else + } else { return (options.AutoConvert ? &YPathImpl<&ConvertToDictImpl<false, true>> : &YPathImpl<&ConvertToDictImpl<false, false>>)(args[0], args[1], valueBuilder, GetPos()); + } } SIMPLE_STRICT_UDF(TSerialize, TYson(TAutoMap<TNodeResource>)) { @@ -672,7 +720,8 @@ constexpr char SkipMapEntity[] = "SkipMapEntity"; constexpr char EncodeUtf8[] = "EncodeUtf8"; constexpr char WriteNanAsString[] = "WriteNanAsString"; -SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSerializeJson, TOptional<TJson>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>, TNamedArg<bool, SkipMapEntity>, TNamedArg<bool, EncodeUtf8>, TNamedArg<bool, WriteNanAsString>), 4) try { +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TSerializeJson, TOptional<TJson>(TAutoMap<TNodeResource>, TOptional<TOptionsResource>, TNamedArg<bool, SkipMapEntity>, TNamedArg<bool, EncodeUtf8>, TNamedArg<bool, WriteNanAsString>), 4) +try { return valueBuilder->NewString(SerializeJsonDom(args[0], args[2].GetOrDefault(false), args[3].GetOrDefault(false), args[4].GetOrDefault(false))); } catch (const std::exception& e) { if (ParseOptions(args[1]).Strict) { @@ -721,7 +770,7 @@ SIMPLE_STRICT_UDF(TWithAttributes, TOptional<TNodeResource>(TAutoMap<TNodeResour } } -template<ENodeType Type> +template <ENodeType Type> TUnboxedValuePod IsTypeImpl(TUnboxedValuePod y) { if (IsNodeType<ENodeType::Attr>(y)) { y = y.GetVariantItem().Release(); @@ -787,11 +836,14 @@ public: typedef bool TTypeAwareMarker; TBase(TSourcePosition pos, const ITypeInfoHelper::TPtr typeHelper, const TType* shape) - : Pos_(pos), TypeHelper_(typeHelper), Shape_(shape) - {} + : Pos_(pos) + , TypeHelper_(typeHelper) + , Shape_(shape) + { + } protected: - template<bool MoreTypesAllowed> + template <bool MoreTypesAllowed> static const TType* CheckType(const ITypeInfoHelper::TPtr typeHelper, const TType* shape) { switch (/* const auto kind = */ typeHelper->GetTypeKind(shape)) { case ETypeKind::Null: @@ -826,31 +878,41 @@ protected: return CheckType<MoreTypesAllowed>(typeHelper, TListTypeInspector(*typeHelper, shape).GetItemType()); case ETypeKind::Dict: { const auto dictTypeInspector = TDictTypeInspector(*typeHelper, shape); - if (const auto keyType = dictTypeInspector.GetKeyType(); ETypeKind::Data == typeHelper->GetTypeKind(keyType)) - if (const auto keyId = TDataTypeInspector(*typeHelper, keyType).GetTypeId(); keyId == TDataType<char*>::Id || keyId == TDataType<TUtf8>::Id) + if (const auto keyType = dictTypeInspector.GetKeyType(); ETypeKind::Data == typeHelper->GetTypeKind(keyType)) { + if (const auto keyId = TDataTypeInspector(*typeHelper, keyType).GetTypeId(); keyId == TDataType<char*>::Id || keyId == TDataType<TUtf8>::Id) { return CheckType<MoreTypesAllowed>(typeHelper, dictTypeInspector.GetValueType()); + } + } return shape; } case ETypeKind::Tuple: - if (const auto tupleTypeInspector = TTupleTypeInspector(*typeHelper, shape); auto count = tupleTypeInspector.GetElementsCount()) do - if (const auto bad = CheckType<MoreTypesAllowed>(typeHelper, tupleTypeInspector.GetElementType(--count))) - return bad; - while (count); + if (const auto tupleTypeInspector = TTupleTypeInspector(*typeHelper, shape); auto count = tupleTypeInspector.GetElementsCount()) { + do { + if (const auto bad = CheckType<MoreTypesAllowed>(typeHelper, tupleTypeInspector.GetElementType(--count))) { + return bad; + } + } while (count); + } return nullptr; case ETypeKind::Struct: - if (const auto structTypeInspector = TStructTypeInspector(*typeHelper, shape); auto count = structTypeInspector.GetMembersCount()) do - if (const auto bad = CheckType<MoreTypesAllowed>(typeHelper, structTypeInspector.GetMemberType(--count))) - return bad; - while (count); + if (const auto structTypeInspector = TStructTypeInspector(*typeHelper, shape); auto count = structTypeInspector.GetMembersCount()) { + do { + if (const auto bad = CheckType<MoreTypesAllowed>(typeHelper, structTypeInspector.GetMemberType(--count))) { + return bad; + } + } while (count); + } return nullptr; case ETypeKind::Variant: - if constexpr (MoreTypesAllowed) + if constexpr (MoreTypesAllowed) { return CheckType<MoreTypesAllowed>(typeHelper, TVariantTypeInspector(*typeHelper, shape).GetUnderlyingType()); - else + } else { return shape; + } case ETypeKind::Resource: - if (const auto inspector = TResourceTypeInspector(*typeHelper, shape); TStringBuf(inspector.GetTag()) == NodeResourceName) + if (const auto inspector = TResourceTypeInspector(*typeHelper, shape); TStringBuf(inspector.GetTag()) == NodeResourceName) { return nullptr; + } [[fallthrough]]; // AUTOGENERATED_FALLTHROUGH_FIXME default: return shape; @@ -859,13 +921,14 @@ protected: const TSourcePosition Pos_; const ITypeInfoHelper::TPtr TypeHelper_; - const TType *const Shape_; + const TType* const Shape_; }; class TFrom: public TBase { TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { return MakeDom(TypeHelper_.Get(), Shape_, *args, valueBuilder); } + public: static const TStringRef& Name() { static auto name = TStringRef::Of("From"); @@ -874,7 +937,8 @@ public: TFrom(TSourcePosition pos, const ITypeInfoHelper::TPtr typeHelper, const TType* shape) : TBase(pos, typeHelper, shape) - {} + { + } static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { if (Name() == name) { @@ -931,23 +995,24 @@ public: class TConvert: public TBase { TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { - if (const auto options = ParseOptions(args[1]); options.Strict) + if (const auto options = ParseOptions(args[1]); options.Strict) { return (options.AutoConvert ? &PeelDom<true, true> : &PeelDom<true, false>)(TypeHelper_.Get(), Shape_, args[0], valueBuilder, Pos_); - else + } else { return (options.AutoConvert ? &PeelDom<false, true> : &PeelDom<false, false>)(TypeHelper_.Get(), Shape_, args[0], valueBuilder, Pos_); + } } public: TConvert(TSourcePosition pos, const ITypeInfoHelper::TPtr typeHelper, const TType* shape) : TBase(pos, typeHelper, shape) - {} + { + } static const TStringRef& Name() { static auto name = TStringRef::Of("ConvertTo"); return name; } - static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { if (Name() == name) { const auto optionsType = builder.Optional()->Item(builder.Resource(OptionsResourceName)).Build(); @@ -1006,19 +1071,23 @@ public: } }; -template<typename TYJson, bool DecodeUtf8 = false> +template <typename TYJson, bool DecodeUtf8 = false> class TParse: public TBoxedValue { public: typedef bool TTypeAwareMarker; + private: const TSourcePosition Pos_; const bool StrictType_; TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final; + public: TParse(TSourcePosition pos, bool strictType) - : Pos_(pos), StrictType_(strictType) - {} + : Pos_(pos) + , StrictType_(strictType) + { + } static const TStringRef& Name(); @@ -1090,7 +1159,7 @@ public: } }; -template<> +template <> TUnboxedValue TParse<TYson, false>::Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const try { return TryParseYsonDom(args[0].AsStringRef(), valueBuilder); } catch (const std::exception& e) { @@ -1100,7 +1169,7 @@ TUnboxedValue TParse<TYson, false>::Run(const IValueBuilder* valueBuilder, const return TUnboxedValuePod(); } -template<> +template <> TUnboxedValue TParse<TJson, false>::Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const try { return TryParseJsonDom(args[0].AsStringRef(), valueBuilder); } catch (const std::exception& e) { @@ -1110,7 +1179,7 @@ TUnboxedValue TParse<TJson, false>::Run(const IValueBuilder* valueBuilder, const return TUnboxedValuePod(); } -template<> +template <> TUnboxedValue TParse<TJson, true>::Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const try { return TryParseJsonDom(args[0].AsStringRef(), valueBuilder, true); } catch (const std::exception& e) { @@ -1120,85 +1189,84 @@ TUnboxedValue TParse<TJson, true>::Run(const IValueBuilder* valueBuilder, const return TUnboxedValuePod(); } -template<> +template <> const TStringRef& TParse<TYson, false>::Name() { static auto yson = TStringRef::Of("Parse"); return yson; } -template<> +template <> const TStringRef& TParse<TJson, false>::Name() { static auto yson = TStringRef::Of("ParseJson"); return yson; } -template<> +template <> const TStringRef& TParse<TJson, true>::Name() { static auto yson = TStringRef::Of("ParseJsonDecodeUtf8"); return yson; } -} +} // namespace // TODO: optimizer that marks UDFs as strict if Yson::Options(false as Strict) is given SIMPLE_MODULE(TYson2Module, - TOptions, - TParse<TYson>, - TParse<TJson>, - TParse<TJson, true>, - TConvert, - TConvertToBool, - TConvertToInt64, - TConvertToUint64, - TConvertToDouble, - TConvertToString, - TConvertToList, - TConvertToBoolList, - TConvertToInt64List, - TConvertToUint64List, - TConvertToDoubleList, - TConvertToStringList, - TConvertToDict, - TConvertToBoolDict, - TConvertToInt64Dict, - TConvertToUint64Dict, - TConvertToDoubleDict, - TConvertToStringDict, - TAttributes, - TContains, - TLookup, - TLookupBool, - TLookupInt64, - TLookupUint64, - TLookupDouble, - TLookupString, - TLookupList, - TLookupDict, - TYPath, - TYPathBool, - TYPathInt64, - TYPathUint64, - TYPathDouble, - TYPathString, - TYPathList, - TYPathDict, - TSerialize, - TSerializeText, - TSerializePretty, - TSerializeJson, - TWithAttributes, - TIsString, - TIsInt64, - TIsUint64, - TIsBool, - TIsDouble, - TIsList, - TIsDict, - TIsEntity, - TFrom, - TGetLength, - TEquals, - TGetHash -); + TOptions, + TParse<TYson>, + TParse<TJson>, + TParse<TJson, true>, + TConvert, + TConvertToBool, + TConvertToInt64, + TConvertToUint64, + TConvertToDouble, + TConvertToString, + TConvertToList, + TConvertToBoolList, + TConvertToInt64List, + TConvertToUint64List, + TConvertToDoubleList, + TConvertToStringList, + TConvertToDict, + TConvertToBoolDict, + TConvertToInt64Dict, + TConvertToUint64Dict, + TConvertToDoubleDict, + TConvertToStringDict, + TAttributes, + TContains, + TLookup, + TLookupBool, + TLookupInt64, + TLookupUint64, + TLookupDouble, + TLookupString, + TLookupList, + TLookupDict, + TYPath, + TYPathBool, + TYPathInt64, + TYPathUint64, + TYPathDouble, + TYPathString, + TYPathList, + TYPathDict, + TSerialize, + TSerializeText, + TSerializePretty, + TSerializeJson, + TWithAttributes, + TIsString, + TIsInt64, + TIsUint64, + TIsBool, + TIsDouble, + TIsList, + TIsDict, + TIsEntity, + TFrom, + TGetLength, + TEquals, + TGetHash); REGISTER_MODULES(TYson2Module); diff --git a/yql/essentials/udfs/examples/callables/callables_udf.cpp b/yql/essentials/udfs/examples/callables/callables_udf.cpp index 6d8f1c27d9a..e367c3b990c 100644 --- a/yql/essentials/udfs/examples/callables/callables_udf.cpp +++ b/yql/essentials/udfs/examples/callables/callables_udf.cpp @@ -5,7 +5,6 @@ #include <util/generic/yexception.h> #include <util/string/cast.h> - using namespace NKikimr; using namespace NUdf; @@ -14,8 +13,7 @@ namespace { ////////////////////////////////////////////////////////////////////////////// // TFromString ////////////////////////////////////////////////////////////////////////////// -class TFromString: public TBoxedValue -{ +class TFromString: public TBoxedValue { public: static TStringRef Name() { static auto name = TStringRef::Of("FromString"); @@ -24,9 +22,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { Y_UNUSED(valueBuilder); auto str = args[0].AsStringRef(); int val = FromString<int>(str); @@ -37,8 +34,7 @@ private: ////////////////////////////////////////////////////////////////////////////// // TSum ////////////////////////////////////////////////////////////////////////////// -class TSum: public TBoxedValue -{ +class TSum: public TBoxedValue { public: static TStringRef Name() { static auto name = TStringRef::Of("Sum"); @@ -47,9 +43,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { int sum = 0; auto it = args[0].GetListIterator(); @@ -65,8 +60,7 @@ private: ////////////////////////////////////////////////////////////////////////////// // TMul ////////////////////////////////////////////////////////////////////////////// -class TMul: public TBoxedValue -{ +class TMul: public TBoxedValue { public: static TStringRef Name() { static auto name = TStringRef::Of("Mul"); @@ -75,9 +69,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { int mul = 1; const auto it = args[0].GetListIterator(); @@ -106,9 +99,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { Y_UNUSED(valueBuilder); auto res = args[0] ? args[0].Get<i32>() : 123; return TUnboxedValuePod(res + 1); @@ -127,9 +119,8 @@ public: } TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { Y_UNUSED(valueBuilder); Y_UNUSED(args); return TUnboxedValuePod(new TNamedArgUdf()); @@ -139,14 +130,14 @@ public: ////////////////////////////////////////////////////////////////////////////// // TCallablesModule ////////////////////////////////////////////////////////////////////////////// -class TCallablesModule: public IUdfModule -{ +class TCallablesModule: public IUdfModule { public: TStringRef Name() const { return TStringRef::Of("Callables"); } - void CleanupOnTerminate() const final {} + void CleanupOnTerminate() const final { + } void GetAllFunctions(IFunctionsSink& sink) const final { sink.Add(TFromString::Name()); @@ -155,12 +146,11 @@ public: } void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { try { Y_UNUSED(userType); Y_UNUSED(typeConfig); @@ -176,25 +166,20 @@ public: if (!typesOnly) { builder.Implementation(new TFromString); } - } - else if (TSum::Name() == name) { + } else if (TSum::Name() == name) { // function signature: // int (ListOf(String), int(*)(String)) // run config: void - builder.Returns<int>().Args()-> - Add(builder.List()->Item<char*>()) - .Add(builder.Callable()->Returns<int>().Arg<char*>()) - .Done(); + builder.Returns<int>().Args()->Add(builder.List()->Item<char*>()).Add(builder.Callable()->Returns<int>().Arg<char*>()).Done(); if (!typesOnly) { builder.Implementation(new TSum); } - } - else if (TMul::Name() == name) { + } else if (TMul::Name() == name) { // function signature: // int (ListOf(String), int(*)(String)) // run config: void - using TFuncType = int(*)(char*); + using TFuncType = int (*)(char*); builder.SimpleSignature<int(TListType<char*>, TFuncType)>(); if (!typesOnly) { diff --git a/yql/essentials/udfs/examples/callables/ya.make b/yql/essentials/udfs/examples/callables/ya.make index 63e19657996..67b6cecbb68 100644 --- a/yql/essentials/udfs/examples/callables/ya.make +++ b/yql/essentials/udfs/examples/callables/ya.make @@ -1,6 +1,8 @@ YQL_UDF_CONTRIB(callables_udf) YQL_ABI_VERSION(2 38 0) +ENABLE(YQL_STYLE_CPP) + SRCS( callables_udf.cpp ) diff --git a/yql/essentials/udfs/examples/dicts/dicts_udf.cpp b/yql/essentials/udfs/examples/dicts/dicts_udf.cpp index 69231f01b13..2ce8c509a6e 100644 --- a/yql/essentials/udfs/examples/dicts/dicts_udf.cpp +++ b/yql/essentials/udfs/examples/dicts/dicts_udf.cpp @@ -13,8 +13,7 @@ namespace { ////////////////////////////////////////////////////////////////////////////// // TStrToInt ////////////////////////////////////////////////////////////////////////////// -class TStrToInt: public TBoxedValue -{ +class TStrToInt: public TBoxedValue { public: explicit TStrToInt(TType* dictType) : DictType_(dictType) @@ -28,9 +27,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { auto kind = args[0].AsStringRef(); ui32 flags = 0; @@ -40,18 +38,7 @@ private: flags |= TDictFlags::Sorted; } - return valueBuilder->NewDict(DictType_, flags)-> - Add(valueBuilder->NewString("zero"), TUnboxedValuePod((ui32) 0)) - .Add(valueBuilder->NewString("one"), TUnboxedValuePod((ui32) 1)) - .Add(valueBuilder->NewString("two"), TUnboxedValuePod((ui32) 2)) - .Add(valueBuilder->NewString("three"), TUnboxedValuePod((ui32) 3)) - .Add(valueBuilder->NewString("four"), TUnboxedValuePod((ui32) 4)) - .Add(valueBuilder->NewString("five"), TUnboxedValuePod((ui32) 5)) - .Add(valueBuilder->NewString("six"), TUnboxedValuePod((ui32) 6)) - .Add(valueBuilder->NewString("seven"), TUnboxedValuePod((ui32) 7)) - .Add(valueBuilder->NewString("eight"), TUnboxedValuePod((ui32) 8)) - .Add(valueBuilder->NewString("nine"), TUnboxedValuePod((ui32) 9)) - .Build(); + return valueBuilder->NewDict(DictType_, flags)->Add(valueBuilder->NewString("zero"), TUnboxedValuePod((ui32)0)).Add(valueBuilder->NewString("one"), TUnboxedValuePod((ui32)1)).Add(valueBuilder->NewString("two"), TUnboxedValuePod((ui32)2)).Add(valueBuilder->NewString("three"), TUnboxedValuePod((ui32)3)).Add(valueBuilder->NewString("four"), TUnboxedValuePod((ui32)4)).Add(valueBuilder->NewString("five"), TUnboxedValuePod((ui32)5)).Add(valueBuilder->NewString("six"), TUnboxedValuePod((ui32)6)).Add(valueBuilder->NewString("seven"), TUnboxedValuePod((ui32)7)).Add(valueBuilder->NewString("eight"), TUnboxedValuePod((ui32)8)).Add(valueBuilder->NewString("nine"), TUnboxedValuePod((ui32)9)).Build(); } TType* DictType_; @@ -60,26 +47,25 @@ private: ////////////////////////////////////////////////////////////////////////////// // TDictsModule ////////////////////////////////////////////////////////////////////////////// -class TDictsModule: public IUdfModule -{ +class TDictsModule: public IUdfModule { public: TStringRef Name() const { return TStringRef::Of("Dicts"); } - void CleanupOnTerminate() const final {} + void CleanupOnTerminate() const final { + } void GetAllFunctions(IFunctionsSink& sink) const final { sink.Add(TStrToInt::Name()); } void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { try { Y_UNUSED(userType); Y_UNUSED(typeConfig); diff --git a/yql/essentials/udfs/examples/dicts/ya.make b/yql/essentials/udfs/examples/dicts/ya.make index 4dd62e36e9d..f7a7ff40c85 100644 --- a/yql/essentials/udfs/examples/dicts/ya.make +++ b/yql/essentials/udfs/examples/dicts/ya.make @@ -1,6 +1,8 @@ YQL_UDF_CONTRIB(dicts_udf) YQL_ABI_VERSION(2 9 0) +ENABLE(YQL_STYLE_CPP) + SRCS( dicts_udf.cpp ) diff --git a/yql/essentials/udfs/examples/dummylog/dummylog_udf.cpp b/yql/essentials/udfs/examples/dummylog/dummylog_udf.cpp index e3e227f1303..fd1dcb5970b 100644 --- a/yql/essentials/udfs/examples/dummylog/dummylog_udf.cpp +++ b/yql/essentials/udfs/examples/dummylog/dummylog_udf.cpp @@ -10,32 +10,28 @@ using namespace NUdf; namespace { -struct TRecordInfo -{ +struct TRecordInfo { ui32 Key; ui32 Subkey; ui32 Value; static constexpr ui32 FieldsCount = 3U; }; - ////////////////////////////////////////////////////////////////////////////// // TDummyLog ////////////////////////////////////////////////////////////////////////////// -class TDummyLog: public TBoxedValue -{ +class TDummyLog: public TBoxedValue { public: explicit TDummyLog( - const TRecordInfo& fieldIndexes) + const TRecordInfo& fieldIndexes) : RecordInfo_(fieldIndexes) { } private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { auto keyData = args[0].GetElement(RecordInfo_.Key); auto subkeyData = args[0].GetElement(RecordInfo_.Subkey); auto valueData = args[0].GetElement(RecordInfo_.Value); @@ -55,21 +51,20 @@ private: const TRecordInfo RecordInfo_; }; -class TDummyLog2 : public TBoxedValue -{ +class TDummyLog2: public TBoxedValue { public: - class TFactory : public TBoxedValue { + class TFactory: public TBoxedValue { public: TFactory(const TRecordInfo& inputInfo, const TRecordInfo& outputInfo) : InputInfo_(inputInfo) , OutputInfo_(outputInfo) - {} + { + } -private: + private: TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const TUnboxedValuePod* args) const override { Y_UNUSED(valueBuilder); return TUnboxedValuePod(new TDummyLog2(args[0], InputInfo_, OutputInfo_)); } @@ -81,8 +76,7 @@ private: explicit TDummyLog2( const TUnboxedValuePod& runConfig, const TRecordInfo& inputInfo, - const TRecordInfo& outputInfo - ) + const TRecordInfo& outputInfo) : Prefix_(runConfig.AsStringRef()) , InputInfo_(inputInfo) , OutputInfo_(outputInfo) @@ -92,8 +86,7 @@ private: private: TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const TUnboxedValuePod* args) const override { auto keyData = args[0].GetElement(InputInfo_.Key); auto valueData = args[0].GetElement(InputInfo_.Value); @@ -115,14 +108,14 @@ private: ////////////////////////////////////////////////////////////////////////////// // TDummyLogModule ////////////////////////////////////////////////////////////////////////////// -class TDummyLogModule: public IUdfModule -{ +class TDummyLogModule: public IUdfModule { public: TStringRef Name() const { return TStringRef::Of("DummyLog"); } - void CleanupOnTerminate() const final {} + void CleanupOnTerminate() const final { + } void GetAllFunctions(IFunctionsSink& sink) const final { sink.Add(TStringRef::Of("ReadRecord")); @@ -130,12 +123,11 @@ public: } void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { try { Y_UNUSED(userType); Y_UNUSED(typeConfig); @@ -144,11 +136,7 @@ public: if (TStringRef::Of("ReadRecord") == name) { TRecordInfo recordInfo; - auto recordType = builder.Struct(recordInfo.FieldsCount)-> - AddField<char*>("key", &recordInfo.Key) - .AddField<char*>("subkey", &recordInfo.Subkey) - .AddField<char*>("value", &recordInfo.Value) - .Build(); + auto recordType = builder.Struct(recordInfo.FieldsCount)->AddField<char*>("key", &recordInfo.Key).AddField<char*>("subkey", &recordInfo.Subkey).AddField<char*>("value", &recordInfo.Value).Build(); builder.Returns(recordType).Args()->Add(recordType).Done(); @@ -162,18 +150,10 @@ public: builder.SetError(TStringRef::Of("Only AAA is valid type config")); } TRecordInfo inputInfo; - auto inputType = builder.Struct(inputInfo.FieldsCount)-> - AddField<char*>("key", &inputInfo.Key) - .AddField<char*>("subkey", &inputInfo.Subkey) - .AddField<char*>("value", &inputInfo.Value) - .Build(); + auto inputType = builder.Struct(inputInfo.FieldsCount)->AddField<char*>("key", &inputInfo.Key).AddField<char*>("subkey", &inputInfo.Subkey).AddField<char*>("value", &inputInfo.Value).Build(); TRecordInfo outputInfo; - auto outputType = builder.Struct(2U)-> - AddField<char*>("key", &outputInfo.Key) - .AddField<char*>("value", &outputInfo.Value) - .Build(); - + auto outputType = builder.Struct(2U)->AddField<char*>("key", &outputInfo.Key).AddField<char*>("value", &outputInfo.Value).Build(); builder.Returns(outputType).Args()->Add(inputType).Done(); builder.RunConfig<char*>(); diff --git a/yql/essentials/udfs/examples/dummylog/ya.make b/yql/essentials/udfs/examples/dummylog/ya.make index d1acd15945b..2a74a5767e9 100644 --- a/yql/essentials/udfs/examples/dummylog/ya.make +++ b/yql/essentials/udfs/examples/dummylog/ya.make @@ -1,6 +1,8 @@ YQL_UDF_CONTRIB(dummylog) YQL_ABI_VERSION(2 9 0) +ENABLE(YQL_STYLE_CPP) + SRCS( dummylog_udf.cpp ) diff --git a/yql/essentials/udfs/examples/linear/linear_udf.cpp b/yql/essentials/udfs/examples/linear/linear_udf.cpp index 006ee817446..8696799a865 100644 --- a/yql/essentials/udfs/examples/linear/linear_udf.cpp +++ b/yql/essentials/udfs/examples/linear/linear_udf.cpp @@ -10,7 +10,7 @@ SIMPLE_UDF(TProducer, TLinear<i32>(i32)) { return TUnboxedValuePod(args[0].Get<i32>()); } -using TExchangeRet = TTuple<TLinear<i32>,i32>; +using TExchangeRet = TTuple<TLinear<i32>, i32>; SIMPLE_UDF(TExchange, TExchangeRet(TLinear<i32>, i32)) { TUnboxedValue* items; TUnboxedValue ret = valueBuilder->NewArray(2, items); @@ -19,7 +19,7 @@ SIMPLE_UDF(TExchange, TExchangeRet(TLinear<i32>, i32)) { return ret; } -class TUnsafeConsumer : public TBoxedValue { +class TUnsafeConsumer: public TBoxedValue { public: typedef bool TTypeAwareMarker; @@ -78,8 +78,7 @@ public: } return true; - } - else { + } else { return false; } } @@ -87,6 +86,6 @@ public: SIMPLE_MODULE(TLinearModule, TProducer, TUnsafeConsumer, TExchange) -} +} // namespace REGISTER_MODULES(TLinearModule) diff --git a/yql/essentials/udfs/examples/linear/ya.make b/yql/essentials/udfs/examples/linear/ya.make index 961bc48b82e..fb38acaf8f6 100644 --- a/yql/essentials/udfs/examples/linear/ya.make +++ b/yql/essentials/udfs/examples/linear/ya.make @@ -1,6 +1,8 @@ YQL_UDF(linear_udf) YQL_ABI_VERSION(2 44 0) +ENABLE(YQL_STYLE_CPP) + SRCS( linear_udf.cpp ) diff --git a/yql/essentials/udfs/examples/lists/lists_udf.cpp b/yql/essentials/udfs/examples/lists/lists_udf.cpp index 35dbb57dfa3..e310cde3e3b 100644 --- a/yql/essentials/udfs/examples/lists/lists_udf.cpp +++ b/yql/essentials/udfs/examples/lists/lists_udf.cpp @@ -15,8 +15,7 @@ namespace { ////////////////////////////////////////////////////////////////////////////// // TNumbersList ////////////////////////////////////////////////////////////////////////////// -class TNumbers: public TBoxedValue -{ +class TNumbers: public TBoxedValue { public: static TStringRef Name() { static auto name = TStringRef::Of("Numbers"); @@ -25,9 +24,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { const auto appendPrepend = args[0].AsStringRef(); const auto count = args[1].Get<ui32>(); std::vector<TUnboxedValue> list(count); @@ -36,8 +34,7 @@ private: for (auto it = list.begin(); list.end() != it; ++it) { *it = TUnboxedValuePod(i++); } - } - else if (TStringRef::Of("Prepend") == appendPrepend) { + } else if (TStringRef::Of("Prepend") == appendPrepend) { for (auto it = list.rbegin(); list.rend() != it; ++it) { *it = TUnboxedValuePod(i++); } @@ -50,8 +47,7 @@ private: ////////////////////////////////////////////////////////////////////////////// // TExtend ////////////////////////////////////////////////////////////////////////////// -class TExtend: public TBoxedValue -{ +class TExtend: public TBoxedValue { public: static TStringRef Name() { static auto name = TStringRef::Of("Extend"); @@ -60,9 +56,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { std::array<TUnboxedValue, 2U> list = {{TUnboxedValuePod(args[0]), TUnboxedValuePod(args[1])}}; return valueBuilder->NewList(list.data(), list.size()); } @@ -71,14 +66,14 @@ private: ////////////////////////////////////////////////////////////////////////////// // TListsModule ////////////////////////////////////////////////////////////////////////////// -class TListsModule: public IUdfModule -{ +class TListsModule: public IUdfModule { public: TStringRef Name() const { return TStringRef::Of("Lists"); } - void CleanupOnTerminate() const final {} + void CleanupOnTerminate() const final { + } void GetAllFunctions(IFunctionsSink& sink) const final { sink.Add(TNumbers::Name()); @@ -86,12 +81,11 @@ public: } void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { try { Y_UNUSED(userType); Y_UNUSED(typeConfig); @@ -107,14 +101,16 @@ public: if (!typesOnly) { builder.Implementation(new TNumbers); } - } - else if (TExtend::Name() == name) { + } else if (TExtend::Name() == name) { // function signature: // List<ui32> Numbers(List<ui32>, List<ui32>) // runConfig: void auto listType = builder.List()->Item<ui32>().Build(); builder.Returns(listType) - .Args()->Add(listType).Add(listType).Done(); + .Args() + ->Add(listType) + .Add(listType) + .Done(); if (!typesOnly) { builder.Implementation(new TExtend); diff --git a/yql/essentials/udfs/examples/lists/ya.make b/yql/essentials/udfs/examples/lists/ya.make index 3fa002c7800..dac2ee92dd9 100644 --- a/yql/essentials/udfs/examples/lists/ya.make +++ b/yql/essentials/udfs/examples/lists/ya.make @@ -1,6 +1,8 @@ YQL_UDF_CONTRIB(lists_udf) YQL_ABI_VERSION(2 9 0) +ENABLE(YQL_STYLE_CPP) + SRCS( lists_udf.cpp ) diff --git a/yql/essentials/udfs/examples/structs/structs_udf.cpp b/yql/essentials/udfs/examples/structs/structs_udf.cpp index 38ea115eaf6..3d8e89c4ff6 100644 --- a/yql/essentials/udfs/examples/structs/structs_udf.cpp +++ b/yql/essentials/udfs/examples/structs/structs_udf.cpp @@ -11,8 +11,7 @@ using namespace NUdf; namespace { -struct TPersonInfo -{ +struct TPersonInfo { ui32 FirstName = 0; ui32 LastName = 0; ui32 Age = 0; @@ -26,8 +25,7 @@ struct TPersonInfo ////////////////////////////////////////////////////////////////////////////// // TPersonMember ////////////////////////////////////////////////////////////////////////////// -class TPersonMember: public TBoxedValue -{ +class TPersonMember: public TBoxedValue { public: explicit TPersonMember(ui32 memberIndex) : MemberIndex_(memberIndex) @@ -36,9 +34,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { Y_UNUSED(valueBuilder); return args[0].GetElement(MemberIndex_); } @@ -49,8 +46,7 @@ private: ////////////////////////////////////////////////////////////////////////////// // TNewPerson ////////////////////////////////////////////////////////////////////////////// -class TNewPerson: public TBoxedValue -{ +class TNewPerson: public TBoxedValue { public: explicit TNewPerson(const TPersonInfo& personIndexes) : Info_(personIndexes) @@ -59,9 +55,8 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { TUnboxedValue name, surname, age; if (Info_.RemapKSV) { name = args->GetElement(Info_.Key); @@ -88,14 +83,14 @@ private: ////////////////////////////////////////////////////////////////////////////// // TPersonModule ////////////////////////////////////////////////////////////////////////////// -class TPersonModule: public IUdfModule -{ +class TPersonModule: public IUdfModule { public: TStringRef Name() const { return TStringRef::Of("Person"); } - void CleanupOnTerminate() const final {} + void CleanupOnTerminate() const final { + } void GetAllFunctions(IFunctionsSink& sink) const final { sink.Add(TStringRef::Of("FirstName")); @@ -105,22 +100,17 @@ public: } void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { Y_UNUSED(userType); try { bool typesOnly = (flags & TFlags::TypesOnly); TPersonInfo personInfo; - auto personType = builder.Struct(personInfo.FieldsCount)-> - AddField<char*>("FirstName", &personInfo.FirstName) - .AddField<char*>("LastName", &personInfo.LastName) - .AddField<ui32>("Age", &personInfo.Age) - .Build(); + auto personType = builder.Struct(personInfo.FieldsCount)->AddField<char*>("FirstName", &personInfo.FirstName).AddField<char*>("LastName", &personInfo.LastName).AddField<ui32>("Age", &personInfo.Age).Build(); if (TStringRef::Of("FirstName") == name) { // function signature: String FirstName(PersonStruct p) @@ -130,8 +120,7 @@ public: if (!typesOnly) { builder.Implementation(new TPersonMember(personInfo.FirstName)); } - } - else if (TStringRef::Of("LastName") == name) { + } else if (TStringRef::Of("LastName") == name) { // function signature: String LastName(PersonStruct p) // runConfig: void builder.Returns<char*>().Args()->Add(personType).Done(); @@ -139,8 +128,7 @@ public: if (!typesOnly) { builder.Implementation(new TPersonMember(personInfo.LastName)); } - } - else if (TStringRef::Of("Age") == name) { + } else if (TStringRef::Of("Age") == name) { // function signature: ui32 Age(PersonStruct p) // runConfig: void builder.Returns<ui32>().Args()->Add(personType).Done(); @@ -148,19 +136,14 @@ public: if (!typesOnly) { builder.Implementation(new TPersonMember(personInfo.Age)); } - } - else if (TStringRef::Of("New") == name) { + } else if (TStringRef::Of("New") == name) { // function signature: // PersonStruct New(String firstName, String lastName, ui32 age) // runConfig: void builder.Returns(personType); if (TStringRef::Of("RemapKSV") == typeConfig) { personInfo.RemapKSV = true; - auto inputType = builder.Struct(personInfo.FieldsCount)-> - AddField<char*>("key", &personInfo.Key) - .AddField<char*>("subkey", &personInfo.Subkey) - .AddField<char*>("value", &personInfo.Value) - .Build(); + auto inputType = builder.Struct(personInfo.FieldsCount)->AddField<char*>("key", &personInfo.Key).AddField<char*>("subkey", &personInfo.Subkey).AddField<char*>("value", &personInfo.Value).Build(); builder.Args()->Add(inputType); } else { builder.Args()->Add<char*>().Add<char*>().Add<ui32>(); diff --git a/yql/essentials/udfs/examples/structs/ya.make b/yql/essentials/udfs/examples/structs/ya.make index 2339cf0b272..00dacb1545a 100644 --- a/yql/essentials/udfs/examples/structs/ya.make +++ b/yql/essentials/udfs/examples/structs/ya.make @@ -1,6 +1,8 @@ YQL_UDF_CONTRIB(structs_udf) YQL_ABI_VERSION(2 9 0) +ENABLE(YQL_STYLE_CPP) + SRCS( structs_udf.cpp ) diff --git a/yql/essentials/udfs/examples/tagged/tagged_udf.cpp b/yql/essentials/udfs/examples/tagged/tagged_udf.cpp index 0f85c3e968e..29e2c51540d 100644 --- a/yql/essentials/udfs/examples/tagged/tagged_udf.cpp +++ b/yql/essentials/udfs/examples/tagged/tagged_udf.cpp @@ -4,100 +4,100 @@ using namespace NKikimr; using namespace NUdf; namespace { - extern const char TagFoo[] = "foo"; - extern const char TagBar[] = "bar"; - extern const char TagBaz[] = "baz"; - using TTaggedFoo = TTagged<i32, TagFoo>; - using TTaggedBar = TTagged<i32, TagBar>; - using TTaggedBaz = TTagged<i32, TagBaz>; - - SIMPLE_UDF(TExample, TTaggedBaz(TTaggedFoo, TTaggedBar)) { - Y_UNUSED(valueBuilder); - const auto input1 = args[0].Get<i32>(); - const auto input2 = args[1].Get<i32>(); - return TUnboxedValuePod(input1 + input2); +extern const char TagFoo[] = "foo"; +extern const char TagBar[] = "bar"; +extern const char TagBaz[] = "baz"; +using TTaggedFoo = TTagged<i32, TagFoo>; +using TTaggedBar = TTagged<i32, TagBar>; +using TTaggedBaz = TTagged<i32, TagBaz>; + +SIMPLE_UDF(TExample, TTaggedBaz(TTaggedFoo, TTaggedBar)) { + Y_UNUSED(valueBuilder); + const auto input1 = args[0].Get<i32>(); + const auto input2 = args[1].Get<i32>(); + return TUnboxedValuePod(input1 + input2); +} + +class TGenericTag: public TBoxedValue { +public: + typedef bool TTypeAwareMarker; + + TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { + auto tagStr = valueBuilder->NewString(Tag_); + return valueBuilder->ConcatStrings(args[0], static_cast<const TUnboxedValuePod&>(tagStr)); } - class TGenericTag : public TBoxedValue { - public: - typedef bool TTypeAwareMarker; + static const TStringRef& Name() { + static auto name = TStringRef::Of("GenericTag"); + return name; + } - TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { - auto tagStr = valueBuilder->NewString(Tag_); - return valueBuilder->ConcatStrings(args[0], static_cast<const TUnboxedValuePod&>(tagStr)); - } + TGenericTag(TStringRef tag) + : Tag_(tag) + { + } - static const TStringRef& Name() { - static auto name = TStringRef::Of("GenericTag"); - return name; - } + static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { + if (Name() == name) { + if (!userType) { + builder.SetError("Missing user type."); + return true; + } + + builder.UserType(userType); + const auto typeHelper = builder.TypeInfoHelper(); + const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); + if (!userTypeInspector || userTypeInspector.GetElementsCount() < 1) { + builder.SetError("Invalid user type."); + return true; + } - TGenericTag(TStringRef tag) - : Tag_(tag) - {} - - static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { - if (Name() == name) { - if (!userType) { - builder.SetError("Missing user type."); - return true; - } - - builder.UserType(userType); - const auto typeHelper = builder.TypeInfoHelper(); - const auto userTypeInspector = TTupleTypeInspector(*typeHelper, userType); - if (!userTypeInspector || userTypeInspector.GetElementsCount() < 1) { - builder.SetError("Invalid user type."); - return true; - } - - const auto argsTypeTuple = userTypeInspector.GetElementType(0); - const auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple); - if (!argsTypeInspector) { - builder.SetError("Invalid user type - expected tuple."); - return true; - } - - if (const auto argsCount = argsTypeInspector.GetElementsCount(); argsCount != 1) { - ::TStringBuilder sb; - sb << "Invalid user type - expected one argument, got: " << argsCount; - builder.SetError(sb); - return true; - } - - const auto inputType = argsTypeInspector.GetElementType(0); - const auto tagged = TTaggedTypeInspector(*typeHelper, inputType); - if (!tagged) { - ::TStringBuilder sb; - sb << "Expected tagged string"; - builder.SetError(sb); - return true; - } - - const auto data = TDataTypeInspector(*typeHelper, tagged.GetBaseType()); - if (!data || data.GetTypeId() != TDataType<const char*>::Id) { - ::TStringBuilder sb; - sb << "Expected tagged string"; - builder.SetError(sb); - return true; - } - - builder.Args()->Add(inputType).Done().Returns(inputType); - if (!typesOnly) { - builder.Implementation(new TGenericTag(tagged.GetTag())); - } + const auto argsTypeTuple = userTypeInspector.GetElementType(0); + const auto argsTypeInspector = TTupleTypeInspector(*typeHelper, argsTypeTuple); + if (!argsTypeInspector) { + builder.SetError("Invalid user type - expected tuple."); return true; } - else { - return false; + + if (const auto argsCount = argsTypeInspector.GetElementsCount(); argsCount != 1) { + ::TStringBuilder sb; + sb << "Invalid user type - expected one argument, got: " << argsCount; + builder.SetError(sb); + return true; + } + + const auto inputType = argsTypeInspector.GetElementType(0); + const auto tagged = TTaggedTypeInspector(*typeHelper, inputType); + if (!tagged) { + ::TStringBuilder sb; + sb << "Expected tagged string"; + builder.SetError(sb); + return true; + } + + const auto data = TDataTypeInspector(*typeHelper, tagged.GetBaseType()); + if (!data || data.GetTypeId() != TDataType<const char*>::Id) { + ::TStringBuilder sb; + sb << "Expected tagged string"; + builder.SetError(sb); + return true; + } + + builder.Args()->Add(inputType).Done().Returns(inputType); + if (!typesOnly) { + builder.Implementation(new TGenericTag(tagged.GetTag())); } + return true; + } else { + return false; } - private: - TStringRef Tag_; - }; + } +private: + TStringRef Tag_; +}; - SIMPLE_MODULE(TTaggedModule, TExample, TGenericTag) -} +SIMPLE_MODULE(TTaggedModule, TExample, TGenericTag) +} // namespace REGISTER_MODULES(TTaggedModule) diff --git a/yql/essentials/udfs/examples/tagged/ya.make b/yql/essentials/udfs/examples/tagged/ya.make index 2afb4f4d42e..7209cbfbe56 100644 --- a/yql/essentials/udfs/examples/tagged/ya.make +++ b/yql/essentials/udfs/examples/tagged/ya.make @@ -1,6 +1,8 @@ YQL_UDF_CONTRIB(tagged_udf) YQL_ABI_VERSION(2 21 0) +ENABLE(YQL_STYLE_CPP) + SRCS( tagged_udf.cpp ) diff --git a/yql/essentials/udfs/examples/type_inspection/type_inspection_udf.cpp b/yql/essentials/udfs/examples/type_inspection/type_inspection_udf.cpp index b4b3e4709e1..07ad4f724fd 100644 --- a/yql/essentials/udfs/examples/type_inspection/type_inspection_udf.cpp +++ b/yql/essentials/udfs/examples/type_inspection/type_inspection_udf.cpp @@ -6,7 +6,6 @@ #include <util/generic/yexception.h> - using namespace NKikimr; using namespace NUdf; @@ -15,8 +14,7 @@ namespace { ////////////////////////////////////////////////////////////////////////////// // TZip ////////////////////////////////////////////////////////////////////////////// -class TZip: public TBoxedValue -{ +class TZip: public TBoxedValue { public: static TStringRef Name() { static auto name = TStringRef::Of("Zip"); @@ -25,15 +23,15 @@ public: private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const override { const auto it1 = args[0].GetListIterator(); const auto it2 = args[1].GetListIterator(); std::vector<TUnboxedValue> list; - if (args[0].HasFastListLength() && args[1].HasFastListLength()) + if (args[0].HasFastListLength() && args[1].HasFastListLength()) { list.reserve(std::min(args[0].GetListLength(), args[1].GetListLength())); + } for (TUnboxedValue one, two, *items = nullptr; it1.Next(one) && it2.Next(two);) { auto tuple = valueBuilder->NewArray(2U, items); items[0] = std::move(one); @@ -48,8 +46,7 @@ private: ////////////////////////////////////////////////////////////////////////////// // TFold ////////////////////////////////////////////////////////////////////////////// -class TFold : public TBoxedValue -{ +class TFold: public TBoxedValue { public: static TStringRef Name() { static auto name = TStringRef::Of("Fold"); @@ -59,8 +56,7 @@ public: private: TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const TUnboxedValuePod* args) const override { const auto it = args[0].GetListIterator(); TUnboxedValue state = TUnboxedValuePod(args[1]); auto func = args[2]; @@ -76,10 +72,9 @@ private: ////////////////////////////////////////////////////////////////////////////// // TInterleave ////////////////////////////////////////////////////////////////////////////// -class TInterleave : public TBoxedValue -{ +class TInterleave: public TBoxedValue { public: - class TValue : public TBoxedValue { + class TValue: public TBoxedValue { public: TValue(const IValueBuilder* valueBuilder, const TUnboxedValuePod& left, const TUnboxedValuePod& right) : ValueBuilder_(valueBuilder) @@ -121,8 +116,7 @@ public: private: TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const override - { + const TUnboxedValuePod* args) const override { return TUnboxedValuePod(new TValue(valueBuilder, args[0], args[1])); } }; @@ -130,14 +124,14 @@ private: ////////////////////////////////////////////////////////////////////////////// // TTypeInspectionModule ////////////////////////////////////////////////////////////////////////////// -class TTypeInspectionModule: public IUdfModule -{ +class TTypeInspectionModule: public IUdfModule { public: TStringRef Name() const { return TStringRef::Of("TypeInspection"); } - void CleanupOnTerminate() const final {} + void CleanupOnTerminate() const final { + } void GetAllFunctions(IFunctionsSink& sink) const final { sink.Add(TZip::Name())->SetTypeAwareness(); @@ -146,12 +140,11 @@ public: } void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final - { + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final { try { Y_UNUSED(typeConfig); diff --git a/yql/essentials/udfs/examples/type_inspection/ya.make b/yql/essentials/udfs/examples/type_inspection/ya.make index 7ce6c1b26dc..bbe8e26e0ae 100644 --- a/yql/essentials/udfs/examples/type_inspection/ya.make +++ b/yql/essentials/udfs/examples/type_inspection/ya.make @@ -1,6 +1,8 @@ YQL_UDF_CONTRIB(type_inspection_udf) YQL_ABI_VERSION(2 9 0) +ENABLE(YQL_STYLE_CPP) + SRCS( type_inspection_udf.cpp ) diff --git a/yql/essentials/udfs/language/yql/ya.make b/yql/essentials/udfs/language/yql/ya.make index e0c16d6f5da..b9e673a7538 100644 --- a/yql/essentials/udfs/language/yql/ya.make +++ b/yql/essentials/udfs/language/yql/ya.make @@ -8,6 +8,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + SUBSCRIBER(g:yql) SRCS( diff --git a/yql/essentials/udfs/language/yql/yql_language_udf.cpp b/yql/essentials/udfs/language/yql/yql_language_udf.cpp index 8749f044053..d410538a481 100644 --- a/yql/essentials/udfs/language/yql/yql_language_udf.cpp +++ b/yql/essentials/udfs/language/yql/yql_language_udf.cpp @@ -19,12 +19,12 @@ using namespace NSQLTranslation; using namespace NSQLTranslationV1; using namespace NSQLv1Generated; -class TRuleFreqTranslation : public TSqlTranslation -{ +class TRuleFreqTranslation: public TSqlTranslation { public: TRuleFreqTranslation(TContext& ctx) : TSqlTranslation(ctx, ctx.Settings.Mode) - {} + { + } }; class TRuleFreqVisitor { @@ -75,7 +75,6 @@ public: continue; } - Freqs_[std::make_pair(fullName, fieldFullName)] += 1; } @@ -106,49 +105,49 @@ private: void VisitHint(const TRule_table_hint& msg, const TString& parent) { switch (msg.Alt_case()) { - case TRule_table_hint::kAltTableHint1: { - const auto& alt = msg.GetAlt_table_hint1(); - const TString id = Id(alt.GetRule_an_id_hint1(), Translation_); - Freqs_[std::make_pair(parent, id)] += 1; - break; - } - case TRule_table_hint::kAltTableHint2: { - const auto& alt = msg.GetAlt_table_hint2(); - Freqs_[std::make_pair(parent, alt.GetToken1().GetValue())] += 1; - break; - } - case TRule_table_hint::kAltTableHint3: { - const auto& alt = msg.GetAlt_table_hint3(); - Freqs_[std::make_pair(parent, alt.GetToken1().GetValue())] += 1; - break; - } - case TRule_table_hint::kAltTableHint4: { - const auto& alt = msg.GetAlt_table_hint4(); - Freqs_[std::make_pair(parent, alt.GetToken1().GetValue())] += 1; - break; - } - case TRule_table_hint::ALT_NOT_SET: - return; + case TRule_table_hint::kAltTableHint1: { + const auto& alt = msg.GetAlt_table_hint1(); + const TString id = Id(alt.GetRule_an_id_hint1(), Translation_); + Freqs_[std::make_pair(parent, id)] += 1; + break; + } + case TRule_table_hint::kAltTableHint2: { + const auto& alt = msg.GetAlt_table_hint2(); + Freqs_[std::make_pair(parent, alt.GetToken1().GetValue())] += 1; + break; + } + case TRule_table_hint::kAltTableHint3: { + const auto& alt = msg.GetAlt_table_hint3(); + Freqs_[std::make_pair(parent, alt.GetToken1().GetValue())] += 1; + break; + } + case TRule_table_hint::kAltTableHint4: { + const auto& alt = msg.GetAlt_table_hint4(); + Freqs_[std::make_pair(parent, alt.GetToken1().GetValue())] += 1; + break; + } + case TRule_table_hint::ALT_NOT_SET: + return; } } void VisitHints(const TRule_table_hints& msg, const TString& parent) { auto& block = msg.GetBlock2(); switch (block.Alt_case()) { - case TRule_table_hints::TBlock2::kAlt1: { - VisitHint(block.GetAlt1().GetRule_table_hint1(), parent); - break; - } - case TRule_table_hints::TBlock2::kAlt2: { - VisitHint(block.GetAlt2().GetRule_table_hint2(), parent); - for (const auto& x : block.GetAlt2().GetBlock3()) { - VisitHint(x.GetRule_table_hint2(), parent); + case TRule_table_hints::TBlock2::kAlt1: { + VisitHint(block.GetAlt1().GetRule_table_hint1(), parent); + break; } + case TRule_table_hints::TBlock2::kAlt2: { + VisitHint(block.GetAlt2().GetRule_table_hint2(), parent); + for (const auto& x : block.GetAlt2().GetBlock3()) { + VisitHint(x.GetRule_table_hint2(), parent); + } - break; - } - case TRule_table_hints::TBlock2::ALT_NOT_SET: - return; + break; + } + case TRule_table_hints::TBlock2::ALT_NOT_SET: + return; } } @@ -167,7 +166,7 @@ private: } } - template<typename TUnaryCasualExprRule> + template <typename TUnaryCasualExprRule> void VisitUnaryCasualSubexpr(const TUnaryCasualExprRule& msg) { const auto& block = msg.GetBlock1(); TString func; @@ -249,14 +248,14 @@ private: const auto& alt = msg.GetAlt_atom_expr7(); module = Id(alt.GetRule_an_id_or_type1(), Translation_); switch (alt.GetBlock3().Alt_case()) { - case TRule_atom_expr::TAlt7::TBlock3::kAlt1: - func = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), Translation_); - break; - case TRule_atom_expr::TAlt7::TBlock3::kAlt2: { - return false; - } - case TRule_atom_expr::TAlt7::TBlock3::ALT_NOT_SET: - Y_ABORT("Unsigned number: you should change implementation according to grammar changes"); + case TRule_atom_expr::TAlt7::TBlock3::kAlt1: + func = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), Translation_); + break; + case TRule_atom_expr::TAlt7::TBlock3::kAlt2: { + return false; + } + case TRule_atom_expr::TAlt7::TBlock3::ALT_NOT_SET: + Y_ABORT("Unsigned number: you should change implementation according to grammar changes"); } return true; @@ -270,14 +269,14 @@ private: const auto& alt = msg.GetAlt_in_atom_expr6(); module = Id(alt.GetRule_an_id_or_type1(), Translation_); switch (alt.GetBlock3().Alt_case()) { - case TRule_in_atom_expr::TAlt6::TBlock3::kAlt1: - func = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), Translation_); - break; - case TRule_in_atom_expr::TAlt6::TBlock3::kAlt2: { - return false; - } - case TRule_in_atom_expr::TAlt6::TBlock3::ALT_NOT_SET: - Y_ABORT("You should change implementation according to grammar changes"); + case TRule_in_atom_expr::TAlt6::TBlock3::kAlt1: + func = Id(alt.GetBlock3().GetAlt1().GetRule_id_or_type1(), Translation_); + break; + case TRule_in_atom_expr::TAlt6::TBlock3::kAlt2: { + return false; + } + case TRule_in_atom_expr::TAlt6::TBlock3::ALT_NOT_SET: + Y_ABORT("You should change implementation according to grammar changes"); } return true; @@ -403,7 +402,8 @@ SIMPLE_UDF(TRuleFreq, TOptional<TRuleFreqResult>(TAutoMap<char*>)) { } } -SIMPLE_UDF(TTestSyntax, TOptional<char*>(TAutoMap<char*>)) try { +SIMPLE_UDF(TTestSyntax, TOptional<char*>(TAutoMap<char*>)) +try { const TString query(args[0].AsStringRef()); google::protobuf::Arena arena; @@ -425,10 +425,8 @@ SIMPLE_UDF(TTestSyntax, TOptional<char*>(TAutoMap<char*>)) try { } SIMPLE_MODULE(TYqlLangModule, - TObfuscate, - TRuleFreq, - TTestSyntax -); + TObfuscate, + TRuleFreq, + TTestSyntax); REGISTER_MODULES(TYqlLangModule); - diff --git a/yql/essentials/udfs/logs/dsv/dsv_udf.cpp b/yql/essentials/udfs/logs/dsv/dsv_udf.cpp index 421e36a0100..8d66253a32b 100644 --- a/yql/essentials/udfs/logs/dsv/dsv_udf.cpp +++ b/yql/essentials/udfs/logs/dsv/dsv_udf.cpp @@ -11,15 +11,13 @@ using namespace NUdf; namespace { -struct TKsvIndexes -{ +struct TKsvIndexes { ui32 Key; ui32 Subkey; ui32 Value; }; -struct TResultIndexes -{ +struct TResultIndexes { TType* DictType; ui32 Key; @@ -40,28 +38,26 @@ void ParseDsv(const TUnboxedValuePod& value, const auto from = std::distance(input.begin(), part.begin()); builder->Add( valueBuilder->SubString(value, from, pos), - valueBuilder->SubString(value, from + pos + 1U, part.length() - pos - 1U) - ); + valueBuilder->SubString(value, from + pos + 1U, part.length() - pos - 1U)); } } } -class TDsvReadRecord: public TBoxedValue -{ +class TDsvReadRecord: public TBoxedValue { public: - class TFactory : public TBoxedValue { + class TFactory: public TBoxedValue { public: TFactory(const TResultIndexes& fieldIndexes, const TKsvIndexes& ksvIndexes) - : ResultIndexes_(fieldIndexes) - , KsvIndexes_(ksvIndexes) + : ResultIndexes_(fieldIndexes) + , KsvIndexes_(ksvIndexes) { } + private: TUnboxedValue Run( const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try - { + const TUnboxedValuePod* args) const final try { const auto optRunConfig = args[0]; TUnboxedValue separator; if (optRunConfig && !optRunConfig.AsStringRef().Empty()) { @@ -71,8 +67,7 @@ public: } return TUnboxedValuePod(new TDsvReadRecord(separator, ResultIndexes_, KsvIndexes_)); - } - catch (const std::exception& e) { + } catch (const std::exception& e) { UdfTerminate(e.what()); } @@ -88,16 +83,15 @@ public: , KsvIndexes_(ksvIndexes) { } + private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { auto keyData = args[0].GetElement(KsvIndexes_.Key); auto subkeyData = args[0].GetElement(KsvIndexes_.Subkey); auto valueData = args[0].GetElement(KsvIndexes_.Value); - auto dict = valueBuilder->NewDict(ResultIndexes_.DictType, 0); ParseDsv(valueData, Separator_.AsStringRef(), valueBuilder, dict.Get()); @@ -108,8 +102,7 @@ private: items[ResultIndexes_.Subkey] = subkeyData; items[ResultIndexes_.Dict] = dict->Build(); return result; - } - catch (const std::exception& e) { + } catch (const std::exception& e) { UdfTerminate(e.what()); } @@ -118,48 +111,46 @@ private: const TKsvIndexes KsvIndexes_; }; -class TDsvParse: public TBoxedValue -{ +class TDsvParse: public TBoxedValue { public: explicit TDsvParse(TType* dictType) : DictType_(dictType) - {} + { + } + private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try - { - const std::string_view separator = args[1] ? - std::string_view(args[1].AsStringRef()): - std::string_view("\t"); + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { + const std::string_view separator = args[1] ? std::string_view(args[1].AsStringRef()) : std::string_view("\t"); auto dict = valueBuilder->NewDict(DictType_, 0); ParseDsv(args[0], separator, valueBuilder, dict.Get()); return dict->Build(); - } - catch (const std::exception& e) { + } catch (const std::exception& e) { UdfTerminate(e.what()); } const TType* DictType_; }; -#define TYPE_TO_STRING(type) \ -case TDataType<type>::Id: part += ToString(member.Get<type>()); break; +#define TYPE_TO_STRING(type) \ + case TDataType<type>::Id: \ + part += ToString(member.Get<type>()); \ + break; -class TDsvSerialize: public TBoxedValue -{ +class TDsvSerialize: public TBoxedValue { public: explicit TDsvSerialize(const TVector<TDataTypeId>& typeIds, TStructTypeInspector* structInspector) : TypeIds_(typeIds) , StructInspector_(structInspector) - {} + { + } private: TUnboxedValue Run( - const IValueBuilder* valueBuilder, - const TUnboxedValuePod* args) const final try - { + const IValueBuilder* valueBuilder, + const TUnboxedValuePod* args) const final try { TVector<TString> result; if (const ui32 structSize = StructInspector_->GetMembersCount()) { result.reserve(structSize); @@ -179,14 +170,12 @@ private: default: part += member.AsStringRef(); break; - } result.emplace_back(std::move(part)); } } return valueBuilder->NewString(JoinStrings(result, "\t")); - } - catch (const std::exception& e) { + } catch (const std::exception& e) { UdfTerminate(e.what()); } @@ -194,14 +183,14 @@ private: THolder<TStructTypeInspector> StructInspector_; }; -class TDsvModule: public IUdfModule -{ +class TDsvModule: public IUdfModule { public: TStringRef Name() const { return TStringRef::Of("Dsv"); } - void CleanupOnTerminate() const final {} + void CleanupOnTerminate() const final { + } void GetAllFunctions(IFunctionsSink& sink) const final { sink.Add(TStringRef::Of("ReadRecord")); @@ -210,39 +199,36 @@ public: } void BuildFunctionTypeInfo( - const TStringRef& name, - TType* userType, - const TStringRef& typeConfig, - ui32 flags, - IFunctionTypeInfoBuilder& builder) const final try - { + const TStringRef& name, + TType* userType, + const TStringRef& typeConfig, + ui32 flags, + IFunctionTypeInfoBuilder& builder) const final try { Y_UNUSED(typeConfig); bool typesOnly = (flags & TFlags::TypesOnly); if (TStringRef::Of("ReadRecord") == name) { TKsvIndexes ksvIndexes; - auto recordType = builder.Struct(3U)-> - AddField<char*>("key", &ksvIndexes.Key) - .AddField<char*>("subkey", &ksvIndexes.Subkey) - .AddField<char*>("value", &ksvIndexes.Value) - .Build(); + auto recordType = builder.Struct(3U)->AddField<char*>("key", &ksvIndexes.Key).AddField<char*>("subkey", &ksvIndexes.Subkey).AddField<char*>("value", &ksvIndexes.Value).Build(); TResultIndexes resultIndexes; resultIndexes.DictType = builder.Dict()->Key<char*>().Value<char*>().Build(); const auto structType = builder.Struct(resultIndexes.FieldsCount) - ->AddField<char*>("key", &resultIndexes.Key) - .AddField<char*>("subkey", &resultIndexes.Subkey) - .AddField("dict", resultIndexes.DictType, &resultIndexes.Dict) - .Build(); + ->AddField<char*>("key", &resultIndexes.Key) + .AddField<char*>("subkey", &resultIndexes.Subkey) + .AddField("dict", resultIndexes.DictType, &resultIndexes.Dict) + .Build(); builder.Returns(structType) - .Args()->Add(recordType).Done() - .RunConfig<TOptional<char*>>(); + .Args() + ->Add(recordType) + .Done() + .RunConfig<TOptional<char*>>(); if (!typesOnly) { builder.Implementation(new TDsvReadRecord::TFactory( - resultIndexes, ksvIndexes)); + resultIndexes, ksvIndexes)); } builder.IsStrict(); } else if (TStringRef::Of("Parse") == name) { @@ -250,8 +236,12 @@ public: auto dictType = builder.Dict()->Key<char*>().Value<char*>().Build(); builder.Returns(dictType) - .Args()->Add<char*>().Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(optionalStringType).Done() - .OptionalArgs(1); + .Args() + ->Add<char*>() + .Flags(ICallablePayload::TArgumentFlags::AutoMap) + .Add(optionalStringType) + .Done() + .OptionalArgs(1); if (!typesOnly) { builder.Implementation(new TDsvParse(dictType)); @@ -304,7 +294,6 @@ public: builder.Implementation(new TDsvSerialize(typeIds, structInspector.Release())); } builder.IsStrict(); - } } catch (const std::exception& e) { builder.SetError(CurrentExceptionMessage()); diff --git a/yql/essentials/udfs/logs/dsv/ya.make b/yql/essentials/udfs/logs/dsv/ya.make index 34e29294233..54b7f5c6d0c 100644 --- a/yql/essentials/udfs/logs/dsv/ya.make +++ b/yql/essentials/udfs/logs/dsv/ya.make @@ -6,6 +6,8 @@ YQL_ABI_VERSION( 0 ) +ENABLE(YQL_STYLE_CPP) + PEERDIR( library/cpp/deprecated/split ) diff --git a/yql/essentials/udfs/test/simple/simple_udf.cpp b/yql/essentials/udfs/test/simple/simple_udf.cpp index 5ae03a7dd66..cca46ab4190 100644 --- a/yql/essentials/udfs/test/simple/simple_udf.cpp +++ b/yql/essentials/udfs/test/simple/simple_udf.cpp @@ -12,7 +12,7 @@ namespace { SIMPLE_UDF(TCrash, ui64(char*)) { Y_UNUSED(valueBuilder); Y_UNUSED(args); - int *ptr = nullptr; + int* ptr = nullptr; *ptr = 1; return TUnboxedValuePod(0); } @@ -59,15 +59,17 @@ SIMPLE_UDF(TEcho, char*(TOptional<char*>)) { } } -SIMPLE_UDF_WITH_OPTIONAL_ARGS(TEchoWithPrefix, char*(char*,TOptional<char*>), 1) { - if (!args[1]) +SIMPLE_UDF_WITH_OPTIONAL_ARGS(TEchoWithPrefix, char*(char*, TOptional<char*>), 1) { + if (!args[1]) { return TUnboxedValuePod(args[0]); + } return valueBuilder->ConcatStrings(args[1], args[0]); } SIMPLE_UDF_RUN(TEchoWithRunPrefix, char*(char*), TOptional<char*>) { - if (!RunConfig) + if (!RunConfig) { return TUnboxedValuePod(args[0]); + } return valueBuilder->PrependString(RunConfig.AsStringRef(), args[0]); } @@ -102,11 +104,11 @@ using TComplexReturnTypeSignature = TDict<char*, ui32>(char*); SIMPLE_UDF(TComplexReturnType, TComplexReturnTypeSignature) { const TStringBuf s = args[0].AsStringRef(); THashMap<TString, ui32> stat; - for(auto c: s) { - ++stat[TString{c}]; + for (auto c : s) { + ++stat[TString{c}]; } auto dictBuilder = valueBuilder->NewDict(ReturnType_, 0); - for(const auto& [k, v]: stat) { + for (const auto& [k, v] : stat) { dictBuilder->Add(valueBuilder->NewString(k), TUnboxedValuePod{v}); } return dictBuilder->Build(); @@ -127,50 +129,40 @@ SIMPLE_UDF_WITH_OPTIONAL_ARGS(TNamedArgs, char*(ui32, TOptional<ui32>, TNamedC, return valueBuilder->NewString(res); } -UDF(TIncrement, builder.Args(2)-> - Add<ui32>().Name("Arg1").Flags(ICallablePayload::TArgumentFlags::AutoMap) - .Add(builder.SimpleType<TOptional<ui32>>()).Name("Arg2") - .Done().Returns<ui32>().OptionalArgs(1);) { +UDF(TIncrement, builder.Args(2)->Add<ui32>().Name("Arg1").Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(builder.SimpleType<TOptional<ui32>>()).Name("Arg2").Done().Returns<ui32>().OptionalArgs(1);) { Y_UNUSED(valueBuilder); return TUnboxedValuePod(args[0].Get<ui32>() + args[1].GetOrDefault<ui32>(1)); } -UDF(TIncrementOpt, builder.Args(2)-> - Add<ui32>().Name("Arg1").Flags(ICallablePayload::TArgumentFlags::AutoMap) - .Add(builder.SimpleType<TOptional<ui32>>()).Name("Arg2") - .Done().Returns(builder.SimpleType<TOptional<ui32>>()).OptionalArgs(1);) { +UDF(TIncrementOpt, builder.Args(2)->Add<ui32>().Name("Arg1").Flags(ICallablePayload::TArgumentFlags::AutoMap).Add(builder.SimpleType<TOptional<ui32>>()).Name("Arg2").Done().Returns(builder.SimpleType<TOptional<ui32>>()).OptionalArgs(1);) { Y_UNUSED(valueBuilder); if (const ui32 by = args[1].GetOrDefault<ui32>(0)) { return TUnboxedValuePod(args[0].Get<ui32>() + by); - } - else { + } else { return TUnboxedValuePod(); } } UDF_IMPL(TIncrementWithCounters, - builder.Args(1)->Add<ui32>().Done().Returns<ui32>(); - , - mutable ::NKikimr::NUdf::TCounter Counter_; - mutable ::NKikimr::NUdf::TScopedProbe Scope_; - , - Counter_ = builder.GetCounter("IncrementWithCounters_Calls", true); - Scope_ = builder.GetScopedProbe("IncrementWithCounters_Time"); - , - "" - , - "" - , - void -) { + builder.Args(1)->Add<ui32>().Done().Returns<ui32>(); + , + mutable ::NKikimr::NUdf::TCounter Counter_; + mutable ::NKikimr::NUdf::TScopedProbe Scope_; + , + Counter_ = builder.GetCounter("IncrementWithCounters_Calls", true); + Scope_ = builder.GetScopedProbe("IncrementWithCounters_Time"); + , + "", + "", + void) { Y_UNUSED(valueBuilder); Counter_.Inc(); - with_lock(Scope_) { + with_lock (Scope_) { return TUnboxedValuePod(args[0].Get<ui32>() + 1); } } -class TGenericAsStruct : public TBoxedValue { +class TGenericAsStruct: public TBoxedValue { public: typedef bool TTypeAwareMarker; @@ -190,7 +182,8 @@ public: TGenericAsStruct(size_t argc) : Argc_(argc) - {} + { + } static bool DeclareSignature(const TStringRef& name, TType* userType, IFunctionTypeInfoBuilder& builder, bool typesOnly) { if (Name() == name) { @@ -230,25 +223,26 @@ public: builder.Implementation(new TGenericAsStruct(argsCount)); } return true; - } - else { + } else { return false; } } + private: const size_t Argc_; }; -class TLogging : public TBoxedValue { +class TLogging: public TBoxedValue { public: TLogging(TLoggerPtr logger, TLogComponentId component) : Logger_(logger) , Component_(component) - {} + { + } TUnboxedValue Run(const IValueBuilder* valueBuilder, const TUnboxedValuePod* args) const final { Y_UNUSED(valueBuilder); - auto level = Min(args[0].Get<ui32>(),static_cast<ui32>(ELogLevel::Trace)); + auto level = Min(args[0].Get<ui32>(), static_cast<ui32>(ELogLevel::Trace)); Logger_->Log(Component_, (ELogLevel)level, args[1].AsStringRef()); return TUnboxedValue::Void(); } @@ -273,8 +267,7 @@ public: builder.Implementation(new TLogging(logger, component)); } return true; - } - else { + } else { return false; } } @@ -285,27 +278,26 @@ private: }; SIMPLE_MODULE(TSimpleUdfModule, - TCrash, - TException, - TReturnNull, - TReturnVoid, - TReturnEmpty, - TReturnBrokenInt, - TEcho, - TEchoWithPrefix, - TEchoWithRunPrefix, - TConst, - TConcat, - TRepeat, - TSleep, - TComplexReturnType, - TNamedArgs, - TIncrement, - TIncrementOpt, - TIncrementWithCounters, - TGenericAsStruct, - TLogging - ) + TCrash, + TException, + TReturnNull, + TReturnVoid, + TReturnEmpty, + TReturnBrokenInt, + TEcho, + TEchoWithPrefix, + TEchoWithRunPrefix, + TConst, + TConcat, + TRepeat, + TSleep, + TComplexReturnType, + TNamedArgs, + TIncrement, + TIncrementOpt, + TIncrementWithCounters, + TGenericAsStruct, + TLogging) } // namespace diff --git a/yql/essentials/udfs/test/simple/ya.make b/yql/essentials/udfs/test/simple/ya.make index 1d045c9ef98..5bef9c94891 100644 --- a/yql/essentials/udfs/test/simple/ya.make +++ b/yql/essentials/udfs/test/simple/ya.make @@ -1,6 +1,8 @@ YQL_UDF_CONTRIB(simple_udf) YQL_ABI_VERSION(2 42 0) +ENABLE(YQL_STYLE_CPP) + SRCS( simple_udf.cpp ) diff --git a/yql/essentials/udfs/test/test_import/test_import_udf.cpp b/yql/essentials/udfs/test/test_import/test_import_udf.cpp index 158194b792e..7cee1a0fa8e 100644 --- a/yql/essentials/udfs/test/test_import/test_import_udf.cpp +++ b/yql/essentials/udfs/test/test_import/test_import_udf.cpp @@ -24,9 +24,8 @@ SIMPLE_UDF(TRepeat, char*(char*, ui64)) { } SIMPLE_MODULE(TTestImportUdfModule, - TConcat, - TRepeat - ) + TConcat, + TRepeat) } // namespace diff --git a/yql/essentials/udfs/test/test_import/ya.make b/yql/essentials/udfs/test/test_import/ya.make index 1adb06149f1..f6ccf6c65f2 100644 --- a/yql/essentials/udfs/test/test_import/ya.make +++ b/yql/essentials/udfs/test/test_import/ya.make @@ -1,6 +1,8 @@ YQL_UDF_CONTRIB(test_import_udf) YQL_ABI_VERSION(2 9 0) +ENABLE(YQL_STYLE_CPP) + SRCS( test_import_udf.cpp ) |