diff options
author | vvvv <vvvv@ydb.tech> | 2023-01-26 11:22:12 +0300 |
---|---|---|
committer | vvvv <vvvv@ydb.tech> | 2023-01-26 11:22:12 +0300 |
commit | 9d77780846b868202ab49891e643fd54b35fac3e (patch) | |
tree | 4e8cfec799475a5de9c6daeb00534cffce770e36 | |
parent | aa2d3064f9f20bb26e88c7bce3845c5e91d735aa (diff) | |
download | ydb-9d77780846b868202ab49891e643fd54b35fac3e.tar.gz |
Url::CutWWW
-rw-r--r-- | ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h | 1 | ||||
-rw-r--r-- | ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h | 42 |
2 files changed, 40 insertions, 3 deletions
diff --git a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h index 5d90cc1380d..27e2840215c 100644 --- a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h +++ b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h @@ -32,7 +32,6 @@ public: , TypeInfoHelper_(typeInfoHelper) { Readers_.resize(ArgTypes_.size()); - Y_UNUSED(OutputType_); } IBlockReader& GetReader(ui32 index) { diff --git a/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h b/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h index b2c74915d97..5093f8c1540 100644 --- a/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h +++ b/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h @@ -232,7 +232,7 @@ SIMPLE_UDF(TCutScheme, TOptional<char*>(TOptional<char*>)) { valueBuilder->SubString(args[0], std::distance(url.begin(), cut.begin()), cut.length()); } -SIMPLE_UDF(TCutWWW, TOptional<char*>(TOptional<char*>)) { +BEGIN_SIMPLE_ARROW_UDF(TCutWWW, TOptional<char*>(TOptional<char*>)) { EMPTY_RESULT_ON_EMPTY_ARG(0); const std::string_view url(args[0].AsStringRef()); const std::string_view cut(CutWWWPrefix(url)); @@ -240,7 +240,26 @@ SIMPLE_UDF(TCutWWW, TOptional<char*>(TOptional<char*>)) { valueBuilder->SubString(args[0], std::distance(url.begin(), cut.begin()), cut.length()); } -SIMPLE_UDF(TCutWWW2, TOptional<char*>(TOptional<char*>)) { +struct TCutWWWKernelExec : public TUnaryKernelExec<TCutWWWKernelExec> { + template <typename TSink> + static void Process(TBlockItem arg, const TSink& sink) { + if (!arg) { + return sink(TBlockItem()); + } + + const std::string_view url(arg.AsStringRef()); + const std::string_view cut(CutWWWPrefix(url)); + if (cut.empty()) { + return sink(TBlockItem()); + } + + sink(TBlockItem(TStringRef(cut))); + } +}; + +END_SIMPLE_ARROW_UDF(TCutWWW, TCutWWWKernelExec::Do); + +BEGIN_SIMPLE_ARROW_UDF(TCutWWW2, TOptional<char*>(TOptional<char*>)) { EMPTY_RESULT_ON_EMPTY_ARG(0); const std::string_view url(args[0].AsStringRef()); const std::string_view cut(CutWWWNumberedPrefix(url)); @@ -248,6 +267,25 @@ SIMPLE_UDF(TCutWWW2, TOptional<char*>(TOptional<char*>)) { valueBuilder->SubString(args[0], std::distance(url.begin(), cut.begin()), cut.length()); } +struct TCutWWW2KernelExec : public TUnaryKernelExec<TCutWWW2KernelExec> { + template <typename TSink> + static void Process(TBlockItem arg, const TSink& sink) { + if (!arg) { + return sink(TBlockItem()); + } + + const std::string_view url(arg.AsStringRef()); + const std::string_view cut(CutWWWNumberedPrefix(url)); + if (cut.empty()) { + return sink(TBlockItem()); + } + + sink(TBlockItem(TStringRef(cut))); + } +}; + +END_SIMPLE_ARROW_UDF(TCutWWW2, TCutWWW2KernelExec::Do); + SIMPLE_UDF(TCutQueryStringAndFragment, char*(TAutoMap<char*>)) { const std::string_view input(args[0].AsStringRef()); const auto cut = input.find_first_of("?#"); |