aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoramnosov <amnosov@yandex-team.com>2022-10-21 13:42:26 +0300
committeramnosov <amnosov@yandex-team.com>2022-10-21 13:42:26 +0300
commit3f1c6722c965cb3b5406ab0fded6f99a42384206 (patch)
tree4a614495a751851b05119f2395415d00c72bfe9d
parent2dd428087d1848b5c885915e96d0a9a3191371e6 (diff)
downloadydb-3f1c6722c965cb3b5406ab0fded6f99a42384206.tar.gz
Unicode::Strip
Unicode::Strip udf and tests for this udf added
-rw-r--r--ydb/library/yql/udfs/common/unicode_base/lib/unicode_base_udf.h24
1 files changed, 23 insertions, 1 deletions
diff --git a/ydb/library/yql/udfs/common/unicode_base/lib/unicode_base_udf.h b/ydb/library/yql/udfs/common/unicode_base/lib/unicode_base_udf.h
index 1506456f500..947748dbf58 100644
--- a/ydb/library/yql/udfs/common/unicode_base/lib/unicode_base_udf.h
+++ b/ydb/library/yql/udfs/common/unicode_base/lib/unicode_base_udf.h
@@ -13,12 +13,27 @@
#include <util/string/split.h>
#include <util/string/subst.h>
#include <util/charset/wide.h>
+#include <util/string/strip.h>
+#include <util/charset/unidata.h>
using namespace NYql;
using namespace NUdf;
using namespace NUnicode;
namespace {
+
+ template <class It>
+ struct TIsUnicodeSpaceAdapter {
+ bool operator()(const It& it) const noexcept {
+ return IsSpace(*it);
+ }
+ };
+
+ template <class It>
+ TIsUnicodeSpaceAdapter<It> IsUnicodeSpaceAdapter(It) {
+ return {};
+ }
+
#define NORMALIZE_UDF_MAP(XX) \
XX(Normalize, NFC) \
XX(NormalizeNFD, NFD) \
@@ -428,6 +443,12 @@ namespace {
return *args;
}
+ SIMPLE_UDF(TStrip, TUtf8(TAutoMap<TUtf8>)) {
+ const TUtf32String input = UTF8ToUTF32<true>(args[0].AsStringRef());
+ const auto& result = StripString(input, IsUnicodeSpaceAdapter(input.begin()));
+ return valueBuilder->NewString(WideToUTF8(result));
+ }
+
#define REGISTER_NORMALIZE_UDF(name, mode) T##name,
#define EXPORTED_UNICODE_BASE_UDF \
NORMALIZE_UDF_MAP(REGISTER_NORMALIZE_UDF) \
@@ -452,5 +473,6 @@ namespace {
TToUpper, \
TToTitle, \
TToUint64, \
- TTryToUint64
+ TTryToUint64, \
+ TStrip
}