aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvvvv <vvvv@ydb.tech>2023-01-26 11:22:12 +0300
committervvvv <vvvv@ydb.tech>2023-01-26 11:22:12 +0300
commit9d77780846b868202ab49891e643fd54b35fac3e (patch)
tree4e8cfec799475a5de9c6daeb00534cffce770e36
parentaa2d3064f9f20bb26e88c7bce3845c5e91d735aa (diff)
downloadydb-9d77780846b868202ab49891e643fd54b35fac3e.tar.gz
Url::CutWWW
-rw-r--r--ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h1
-rw-r--r--ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h42
2 files changed, 40 insertions, 3 deletions
diff --git a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h
index 5d90cc1380d..27e2840215c 100644
--- a/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h
+++ b/ydb/library/yql/public/udf/arrow/udf_arrow_helpers.h
@@ -32,7 +32,6 @@ public:
, TypeInfoHelper_(typeInfoHelper)
{
Readers_.resize(ArgTypes_.size());
- Y_UNUSED(OutputType_);
}
IBlockReader& GetReader(ui32 index) {
diff --git a/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h b/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h
index b2c74915d97..5093f8c1540 100644
--- a/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h
+++ b/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h
@@ -232,7 +232,7 @@ SIMPLE_UDF(TCutScheme, TOptional<char*>(TOptional<char*>)) {
valueBuilder->SubString(args[0], std::distance(url.begin(), cut.begin()), cut.length());
}
-SIMPLE_UDF(TCutWWW, TOptional<char*>(TOptional<char*>)) {
+BEGIN_SIMPLE_ARROW_UDF(TCutWWW, TOptional<char*>(TOptional<char*>)) {
EMPTY_RESULT_ON_EMPTY_ARG(0);
const std::string_view url(args[0].AsStringRef());
const std::string_view cut(CutWWWPrefix(url));
@@ -240,7 +240,26 @@ SIMPLE_UDF(TCutWWW, TOptional<char*>(TOptional<char*>)) {
valueBuilder->SubString(args[0], std::distance(url.begin(), cut.begin()), cut.length());
}
-SIMPLE_UDF(TCutWWW2, TOptional<char*>(TOptional<char*>)) {
+struct TCutWWWKernelExec : public TUnaryKernelExec<TCutWWWKernelExec> {
+ template <typename TSink>
+ static void Process(TBlockItem arg, const TSink& sink) {
+ if (!arg) {
+ return sink(TBlockItem());
+ }
+
+ const std::string_view url(arg.AsStringRef());
+ const std::string_view cut(CutWWWPrefix(url));
+ if (cut.empty()) {
+ return sink(TBlockItem());
+ }
+
+ sink(TBlockItem(TStringRef(cut)));
+ }
+};
+
+END_SIMPLE_ARROW_UDF(TCutWWW, TCutWWWKernelExec::Do);
+
+BEGIN_SIMPLE_ARROW_UDF(TCutWWW2, TOptional<char*>(TOptional<char*>)) {
EMPTY_RESULT_ON_EMPTY_ARG(0);
const std::string_view url(args[0].AsStringRef());
const std::string_view cut(CutWWWNumberedPrefix(url));
@@ -248,6 +267,25 @@ SIMPLE_UDF(TCutWWW2, TOptional<char*>(TOptional<char*>)) {
valueBuilder->SubString(args[0], std::distance(url.begin(), cut.begin()), cut.length());
}
+struct TCutWWW2KernelExec : public TUnaryKernelExec<TCutWWW2KernelExec> {
+ template <typename TSink>
+ static void Process(TBlockItem arg, const TSink& sink) {
+ if (!arg) {
+ return sink(TBlockItem());
+ }
+
+ const std::string_view url(arg.AsStringRef());
+ const std::string_view cut(CutWWWNumberedPrefix(url));
+ if (cut.empty()) {
+ return sink(TBlockItem());
+ }
+
+ sink(TBlockItem(TStringRef(cut)));
+ }
+};
+
+END_SIMPLE_ARROW_UDF(TCutWWW2, TCutWWW2KernelExec::Do);
+
SIMPLE_UDF(TCutQueryStringAndFragment, char*(TAutoMap<char*>)) {
const std::string_view input(args[0].AsStringRef());
const auto cut = input.find_first_of("?#");