aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorepbugaev <epbugaev@yandex-team.com>2023-02-22 18:04:53 +0300
committerepbugaev <epbugaev@yandex-team.com>2023-02-22 18:04:53 +0300
commitcb72ea5e70afde62df86253b424ae15fd5200c21 (patch)
tree7d66960bd26b61eee18eb65f3e9e5e4053c48c8a
parent65963701e9f4d71fb82dd81e0987021612296adb (diff)
downloadydb-cb72ea5e70afde62df86253b424ae15fd5200c21.tar.gz
YQL block cut scheme udf
Refactor url block functions and add block CutScheme
-rw-r--r--ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h114
1 files changed, 28 insertions, 86 deletions
diff --git a/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h b/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h
index 5093f8c154..ddb38c38ce 100644
--- a/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h
+++ b/ydb/library/yql/udfs/common/url_base/lib/url_base_udf.h
@@ -26,6 +26,30 @@ inline bool PrepareUrl(const std::string_view& keyStr, TUri& parser) {
return parser.ParseAbs(keyStr, parseFlags) == TUri::ParsedOK;
}
+#define ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(udfName, functionName) \
+ BEGIN_SIMPLE_ARROW_UDF(udfName, TOptional<char*>(TOptional<char*>)) { \
+ EMPTY_RESULT_ON_EMPTY_ARG(0); \
+ const std::string_view url(args[0].AsStringRef()); \
+ const std::string_view res(functionName(url)); \
+ return res.empty() ? TUnboxedValue() : \
+ valueBuilder->SubString(args[0], std::distance(url.begin(), res.begin()), res.size()); \
+ } \
+ struct udfName##KernelExec : public TUnaryKernelExec<udfName##KernelExec> { \
+ template <typename TSink> \
+ static void Process(TBlockItem arg, const TSink& sink) { \
+ if (!arg) { \
+ return sink(TBlockItem()); \
+ } \
+ const std::string_view url(arg.AsStringRef()); \
+ const std::string_view res(functionName(url)); \
+ if (res.empty()) { \
+ return sink(TBlockItem()); \
+ } \
+ sink(TBlockItem(TStringRef(res))); \
+ } \
+ }; \
+ END_SIMPLE_ARROW_UDF(udfName, udfName##KernelExec::Do);
+
SIMPLE_UDF(TNormalize, TOptional<char*>(TOptional<char*>)) {
EMPTY_RESULT_ON_EMPTY_ARG(0);
TUri url;
@@ -41,32 +65,7 @@ SIMPLE_UDF(TGetScheme, char*(TAutoMap<char*>)) {
return valueBuilder->SubString(args[0], std::distance(url.begin(), prefix.begin()), prefix.size());
}
-BEGIN_SIMPLE_ARROW_UDF(TGetHost, TOptional<char*>(TOptional<char*>)) {
- EMPTY_RESULT_ON_EMPTY_ARG(0);
- const std::string_view url(args[0].AsStringRef());
- const std::string_view host(GetOnlyHost(url));
- return host.empty() ? TUnboxedValue() :
- valueBuilder->SubString(args[0], std::distance(url.begin(), host.begin()), host.size());
-}
-
-struct TGetHostKernelExec : public TUnaryKernelExec<TGetHostKernelExec> {
- template <typename TSink>
- static void Process(TBlockItem arg, const TSink& sink) {
- if (!arg) {
- return sink(TBlockItem());
- }
-
- const std::string_view url(arg.AsStringRef());
- const std::string_view host(GetOnlyHost(url));
- if (host.empty()) {
- return sink(TBlockItem());
- }
-
- sink(TBlockItem(TStringRef(host)));
- }
-};
-
-END_SIMPLE_ARROW_UDF(TGetHost, TGetHostKernelExec::Do);
+ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(TGetHost, GetOnlyHost)
SIMPLE_UDF(TGetHostPort, TOptional<char*>(TOptional<char*>)) {
EMPTY_RESULT_ON_EMPTY_ARG(0);
@@ -224,67 +223,11 @@ SIMPLE_UDF(TGetCGIParam, TOptional<char*>(TOptional<char*>, char*)) {
return TUnboxedValue();
}
-SIMPLE_UDF(TCutScheme, TOptional<char*>(TOptional<char*>)) {
- EMPTY_RESULT_ON_EMPTY_ARG(0);
- const std::string_view url(args[0].AsStringRef());
- const std::string_view cut(CutSchemePrefix(url));
- return cut.empty() ? TUnboxedValue() :
- valueBuilder->SubString(args[0], std::distance(url.begin(), cut.begin()), cut.length());
-}
-
-BEGIN_SIMPLE_ARROW_UDF(TCutWWW, TOptional<char*>(TOptional<char*>)) {
- EMPTY_RESULT_ON_EMPTY_ARG(0);
- const std::string_view url(args[0].AsStringRef());
- const std::string_view cut(CutWWWPrefix(url));
- return cut.empty() ? TUnboxedValue() :
- valueBuilder->SubString(args[0], std::distance(url.begin(), cut.begin()), cut.length());
-}
-
-struct TCutWWWKernelExec : public TUnaryKernelExec<TCutWWWKernelExec> {
- template <typename TSink>
- static void Process(TBlockItem arg, const TSink& sink) {
- if (!arg) {
- return sink(TBlockItem());
- }
-
- const std::string_view url(arg.AsStringRef());
- const std::string_view cut(CutWWWPrefix(url));
- if (cut.empty()) {
- return sink(TBlockItem());
- }
+ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(TCutScheme, CutSchemePrefix)
- sink(TBlockItem(TStringRef(cut)));
- }
-};
+ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(TCutWWW, CutWWWPrefix)
-END_SIMPLE_ARROW_UDF(TCutWWW, TCutWWWKernelExec::Do);
-
-BEGIN_SIMPLE_ARROW_UDF(TCutWWW2, TOptional<char*>(TOptional<char*>)) {
- EMPTY_RESULT_ON_EMPTY_ARG(0);
- const std::string_view url(args[0].AsStringRef());
- const std::string_view cut(CutWWWNumberedPrefix(url));
- return cut.empty() ? TUnboxedValue() :
- valueBuilder->SubString(args[0], std::distance(url.begin(), cut.begin()), cut.length());
-}
-
-struct TCutWWW2KernelExec : public TUnaryKernelExec<TCutWWW2KernelExec> {
- template <typename TSink>
- static void Process(TBlockItem arg, const TSink& sink) {
- if (!arg) {
- return sink(TBlockItem());
- }
-
- const std::string_view url(arg.AsStringRef());
- const std::string_view cut(CutWWWNumberedPrefix(url));
- if (cut.empty()) {
- return sink(TBlockItem());
- }
-
- sink(TBlockItem(TStringRef(cut)));
- }
-};
-
-END_SIMPLE_ARROW_UDF(TCutWWW2, TCutWWW2KernelExec::Do);
+ARROW_UDF_SINGLE_STRING_FUNCTION_FOR_URL(TCutWWW2, CutWWWNumberedPrefix)
SIMPLE_UDF(TCutQueryStringAndFragment, char*(TAutoMap<char*>)) {
const std::string_view input(args[0].AsStringRef());
@@ -389,4 +332,3 @@ SIMPLE_UDF(TCanBePunycodeHostName, bool(TAutoMap<char*>)) {
TQueryStringToList, \
TQueryStringToDict, \
TBuildQueryString
-