aboutsummaryrefslogtreecommitdiffstats
path: root/util
diff options
context:
space:
mode:
authorOleg Sidorkin <osidorkin@gmail.com>2022-04-16 13:43:06 +0300
committerOleg Sidorkin <osidorkin@gmail.com>2022-04-16 13:43:06 +0300
commitc1e51b5d64d252145765d1529a71271ee4d33812 (patch)
treeaee1d5a24b2035a857fdf47ac4788b2278a088f5 /util
parent09be10fcbbf04de7c78405d5bf7c18117a1b339f (diff)
downloadydb-c1e51b5d64d252145765d1529a71271ee4d33812.tar.gz
Add Collapse variant for utf-32 strings
ref:5897581c4d2be8d8775525221d2d910c04d788b8
Diffstat (limited to 'util')
-rw-r--r--util/string/strip.cpp4
-rw-r--r--util/string/strip.h16
-rw-r--r--util/string/strip_ut.cpp17
3 files changed, 32 insertions, 5 deletions
diff --git a/util/string/strip.cpp b/util/string/strip.cpp
index c921571cf0..1cf5e33096 100644
--- a/util/string/strip.cpp
+++ b/util/string/strip.cpp
@@ -3,10 +3,6 @@
#include <util/string/reverse.h>
-bool Collapse(const TString& from, TString& to, size_t maxLen) {
- return CollapseImpl<TString, bool (*)(unsigned char)>(from, to, maxLen, IsAsciiSpace);
-}
-
void CollapseText(const TString& from, TString& to, size_t maxLen) {
Collapse(from, to, maxLen);
StripInPlace(to);
diff --git a/util/string/strip.h b/util/string/strip.h
index d5ef6da96d..a65d3fe069 100644
--- a/util/string/strip.h
+++ b/util/string/strip.h
@@ -230,13 +230,27 @@ bool CollapseImpl(const TStringType& from, TStringType& to, size_t maxLen, const
return false;
}
-bool Collapse(const TString& from, TString& to, size_t maxLen = 0);
+template <class TStringType, class TWhitespaceFunc>
+std::enable_if_t<std::is_invocable_v<TWhitespaceFunc, typename TStringType::value_type>, bool> Collapse(
+ const TStringType& from, TStringType& to, TWhitespaceFunc isWhitespace, size_t maxLen = 0)
+{
+ return CollapseImpl(from, to, maxLen, isWhitespace);
+}
+
+inline bool Collapse(const TString& from, TString& to, size_t maxLen = 0) {
+ return Collapse(from, to, IsAsciiSpace<typename TString::value_type>, maxLen);
+}
/// Replaces several consequtive space symbols with one (processing is limited to maxLen bytes)
inline TString& CollapseInPlace(TString& s, size_t maxLen = 0) {
Collapse(s, s, maxLen);
return s;
}
+template <class TStringType, class TWhitespaceFunc>
+inline TStringType& CollapseInPlace(TStringType& s, TWhitespaceFunc isWhitespace, size_t maxLen = 0) {
+ Collapse(s, s, isWhitespace, maxLen);
+ return s;
+}
/// Replaces several consequtive space symbols with one (processing is limited to maxLen bytes)
inline TString Collapse(const TString& s, size_t maxLen = 0) Y_WARN_UNUSED_RESULT;
diff --git a/util/string/strip_ut.cpp b/util/string/strip_ut.cpp
index d1029d1498..df4f9bc57d 100644
--- a/util/string/strip_ut.cpp
+++ b/util/string/strip_ut.cpp
@@ -107,6 +107,23 @@ Y_UNIT_TEST_SUITE(TStripStringTest) {
u"abc");
}
+ Y_UNIT_TEST(TestCollapseUtf32) {
+ TUtf32String s;
+ Collapse(UTF8ToUTF32<true>(" 123 456 "), s, IsWhitespace);
+ UNIT_ASSERT(s == UTF8ToUTF32<true>(" 123 456 "));
+ Collapse(UTF8ToUTF32<true>(" 123 456 "), s, IsWhitespace, 10);
+ UNIT_ASSERT(s == UTF8ToUTF32<true>(" 123 456 "));
+
+ s = UTF8ToUTF32<true>(" a b c ");
+ TUtf32String s2 = s;
+ CollapseInPlace(s2, IsWhitespace);
+
+ UNIT_ASSERT(s == s2);
+#ifndef TSTRING_IS_STD_STRING
+ UNIT_ASSERT(s.c_str() == s2.c_str()); // Collapse() does not change the string at all
+#endif
+ }
+
Y_UNIT_TEST(TestCollapse) {
TString s;
Collapse(TString(" 123 456 "), s);