aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorepbugaev <epbugaev@yandex-team.com>2023-03-02 16:48:33 +0300
committerepbugaev <epbugaev@yandex-team.com>2023-03-02 16:48:33 +0300
commit46106987bf9ad142884f9f94b1326ded3e96f96c (patch)
treef2a662b7708d269f0475ac72fe8cea37643638d3
parent753d7fc890774ea3fcaec6eaf6d448ac2910e363 (diff)
downloadydb-46106987bf9ad142884f9f94b1326ded3e96f96c.tar.gz
New block base_url udfs
Add 4 new block base_url udfs: Encode Decode GetTail GetPath
-rw-r--r--ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h79
1 files changed, 75 insertions, 4 deletions
diff --git a/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h b/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h
index 8e3de1aca1..8479fcbcbc 100644
--- a/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h
+++ b/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h
@@ -98,7 +98,7 @@ SIMPLE_UDF(TGetPort, TOptional<ui64>(TOptional<char*>)) {
: TUnboxedValuePod();
}
-SIMPLE_UDF(TGetTail, TOptional<char*>(TOptional<char*>)) {
+BEGIN_SIMPLE_ARROW_UDF(TGetTail, TOptional<char*>(TOptional<char*>)) {
EMPTY_RESULT_ON_EMPTY_ARG(0);
const TStringBuf url(args[0].AsStringRef());
TStringBuf host, tail;
@@ -107,8 +107,24 @@ SIMPLE_UDF(TGetTail, TOptional<char*>(TOptional<char*>)) {
? valueBuilder->NewString(tail)
: valueBuilder->NewString(TString('/').append(tail));
}
+struct TGetTailKernelExec : public TUnaryKernelExec<TGetTailKernelExec> {
+ template <typename TSink>
+ static void Process(TBlockItem arg, const TSink& sink) {
+ if (!arg) {
+ return sink(TBlockItem());
+ }
+ const TStringBuf url(arg.AsStringRef());
+ TStringBuf host, tail;
+ SplitUrlToHostAndPath(url, host, tail);
+ if (tail.StartsWith('/')) {
+ return sink(TBlockItem(TStringRef(tail)));
+ }
+ sink(TBlockItem(TStringRef(TString('/').append(tail))));
+ }
+};
+END_SIMPLE_ARROW_UDF(TGetTail, TGetTailKernelExec::Do);
-SIMPLE_UDF(TGetPath, TOptional<char*>(TOptional<char*>)) {
+BEGIN_SIMPLE_ARROW_UDF(TGetPath, TOptional<char*>(TOptional<char*>)) {
EMPTY_RESULT_ON_EMPTY_ARG(0);
const std::string_view url(args[0].AsStringRef());
std::string_view cut(CutSchemePrefix(url));
@@ -125,6 +141,28 @@ SIMPLE_UDF(TGetPath, TOptional<char*>(TOptional<char*>)) {
return valueBuilder->SubString(args[0], std::distance(url.begin(), cut.begin()), cut.length());
}
+struct TGetPathKernelExec : public TUnaryKernelExec<TGetPathKernelExec> {
+ template <typename TSink>
+ static void Process(TBlockItem arg, const TSink& sink) {
+ if (!arg) {
+ return sink(TBlockItem());
+ }
+ const std::string_view url(arg.AsStringRef());
+ std::string_view cut(CutSchemePrefix(url));
+ const auto s = cut.find('/');
+ if (s == std::string_view::npos) {
+ return sink(TBlockItem(TStringRef("/")));
+ }
+
+ cut.remove_prefix(s);
+ const auto end = cut.find_first_of("?#");
+ if (std::string_view::npos != end) {
+ cut.remove_suffix(cut.size() - end);
+ }
+ sink(TBlockItem(TStringRef(cut)));
+ }
+};
+END_SIMPLE_ARROW_UDF(TGetPath, TGetPathKernelExec::Do);
SIMPLE_UDF(TGetFragment, TOptional<char*>(TOptional<char*>)) {
EMPTY_RESULT_ON_EMPTY_ARG(0);
@@ -229,7 +267,7 @@ SIMPLE_UDF(TCutQueryStringAndFragment, char*(TAutoMap<char*>)) {
return std::string_view::npos == cut ? NUdf::TUnboxedValue(args[0]) : valueBuilder->SubString(args[0], 0U, cut);
}
-SIMPLE_UDF(TEncode, TOptional<char*>(TOptional<char*>)) {
+BEGIN_SIMPLE_ARROW_UDF(TEncode, TOptional<char*>(TOptional<char*>)) {
EMPTY_RESULT_ON_EMPTY_ARG(0);
const std::string_view input(args[0].AsStringRef());
if (input.empty()) {
@@ -239,8 +277,24 @@ SIMPLE_UDF(TEncode, TOptional<char*>(TOptional<char*>)) {
UrlEscape(url);
return input == url ? NUdf::TUnboxedValue(args[0]) : valueBuilder->NewString(url);
}
+struct TEncodeKernelExec : public TUnaryKernelExec<TEncodeKernelExec> {
+ template <typename TSink>
+ static void Process(TBlockItem arg, const TSink& sink) {
+ if (!arg) {
+ return sink(TBlockItem());
+ }
+ const std::string_view input(arg.AsStringRef());
+ if (input.empty()) {
+ return sink(TBlockItem());
+ }
+ TString url(input);
+ UrlEscape(url);
+ sink(TBlockItem(TStringRef(url)));
+ }
+};
+END_SIMPLE_ARROW_UDF(TEncode, TEncodeKernelExec::Do);
-SIMPLE_UDF(TDecode, TOptional<char*>(TOptional<char*>)) {
+BEGIN_SIMPLE_ARROW_UDF(TDecode, TOptional<char*>(TOptional<char*>)) {
EMPTY_RESULT_ON_EMPTY_ARG(0);
const std::string_view input(args[0].AsStringRef());
if (input.empty()) {
@@ -251,6 +305,23 @@ SIMPLE_UDF(TDecode, TOptional<char*>(TOptional<char*>)) {
UrlUnescape(url);
return input == url ? NUdf::TUnboxedValue(args[0]) : valueBuilder->NewString(url);
}
+struct TDecodeKernelExec : public TUnaryKernelExec<TDecodeKernelExec> {
+ template <typename TSink>
+ static void Process(TBlockItem arg, const TSink& sink) {
+ if (!arg) {
+ return sink(TBlockItem());
+ }
+ const std::string_view input(arg.AsStringRef());
+ if (input.empty()) {
+ return sink(TBlockItem());
+ }
+ TString url(input);
+ SubstGlobal(url, '+', ' ');
+ UrlUnescape(url);
+ sink(TBlockItem(TStringRef(url)));
+ }
+};
+END_SIMPLE_ARROW_UDF(TDecode, TDecodeKernelExec::Do);
SIMPLE_UDF(TIsKnownTLD, bool(TAutoMap<char*>)) {
Y_UNUSED(valueBuilder);