diff options
author | amnosov <amnosov@yandex-team.com> | 2022-10-21 13:42:26 +0300 |
---|---|---|
committer | amnosov <amnosov@yandex-team.com> | 2022-10-21 13:42:26 +0300 |
commit | 3f1c6722c965cb3b5406ab0fded6f99a42384206 (patch) | |
tree | 4a614495a751851b05119f2395415d00c72bfe9d | |
parent | 2dd428087d1848b5c885915e96d0a9a3191371e6 (diff) | |
download | ydb-3f1c6722c965cb3b5406ab0fded6f99a42384206.tar.gz |
Unicode::Strip
Unicode::Strip udf and tests for this udf added
-rw-r--r-- | ydb/library/yql/udfs/common/unicode_base/lib/unicode_base_udf.h | 24 |
1 files changed, 23 insertions, 1 deletions
diff --git a/ydb/library/yql/udfs/common/unicode_base/lib/unicode_base_udf.h b/ydb/library/yql/udfs/common/unicode_base/lib/unicode_base_udf.h index 1506456f500..947748dbf58 100644 --- a/ydb/library/yql/udfs/common/unicode_base/lib/unicode_base_udf.h +++ b/ydb/library/yql/udfs/common/unicode_base/lib/unicode_base_udf.h @@ -13,12 +13,27 @@ #include <util/string/split.h> #include <util/string/subst.h> #include <util/charset/wide.h> +#include <util/string/strip.h> +#include <util/charset/unidata.h> using namespace NYql; using namespace NUdf; using namespace NUnicode; namespace { + + template <class It> + struct TIsUnicodeSpaceAdapter { + bool operator()(const It& it) const noexcept { + return IsSpace(*it); + } + }; + + template <class It> + TIsUnicodeSpaceAdapter<It> IsUnicodeSpaceAdapter(It) { + return {}; + } + #define NORMALIZE_UDF_MAP(XX) \ XX(Normalize, NFC) \ XX(NormalizeNFD, NFD) \ @@ -428,6 +443,12 @@ namespace { return *args; } + SIMPLE_UDF(TStrip, TUtf8(TAutoMap<TUtf8>)) { + const TUtf32String input = UTF8ToUTF32<true>(args[0].AsStringRef()); + const auto& result = StripString(input, IsUnicodeSpaceAdapter(input.begin())); + return valueBuilder->NewString(WideToUTF8(result)); + } + #define REGISTER_NORMALIZE_UDF(name, mode) T##name, #define EXPORTED_UNICODE_BASE_UDF \ NORMALIZE_UDF_MAP(REGISTER_NORMALIZE_UDF) \ @@ -452,5 +473,6 @@ namespace { TToUpper, \ TToTitle, \ TToUint64, \ - TTryToUint64 + TTryToUint64, \ + TStrip } |