diff options
author | arcadia-devtools <arcadia-devtools@yandex-team.ru> | 2022-02-17 12:04:09 +0300 |
---|---|---|
committer | arcadia-devtools <arcadia-devtools@yandex-team.ru> | 2022-02-17 12:04:09 +0300 |
commit | 2c8e314f8fff8633fe2cf026badfbf6180845ae0 (patch) | |
tree | c3b650d13934ec1315e3660d60fd2275f09b03a7 /library/cpp/case_insensitive_string | |
parent | a49ae9d891c35087b242c854f69880fd9fecbddd (diff) | |
download | ydb-2c8e314f8fff8633fe2cf026badfbf6180845ae0.tar.gz |
intermediate changes
ref:d5f945ecdc1f5af1ad57e12787c6b8ed1a9f0f12
Diffstat (limited to 'library/cpp/case_insensitive_string')
7 files changed, 214 insertions, 0 deletions
diff --git a/library/cpp/case_insensitive_string/case_insensitive_char_traits.cpp b/library/cpp/case_insensitive_string/case_insensitive_char_traits.cpp new file mode 100644 index 0000000000..14e6d1d51f --- /dev/null +++ b/library/cpp/case_insensitive_string/case_insensitive_char_traits.cpp @@ -0,0 +1,34 @@ +#include "case_insensitive_char_traits.h" +#include "case_insensitive_string.h" + +#include <util/string/escape.h> + +int TCaseInsensitiveCharTraits::compare(const char* s1, const char* s2, std::size_t n) { + while (n-- != 0) { + if (to_upper(*s1) < to_upper(*s2)) { + return -1; + } + if (to_upper(*s1) > to_upper(*s2)) { + return 1; + } + ++s1; + ++s2; + } + return 0; +} + +const char* TCaseInsensitiveCharTraits::find(const char* s, std::size_t n, char a) { + auto const ua(to_upper(a)); + while (n-- != 0) { + if (to_upper(*s) == ua) + return s; + s++; + } + return nullptr; +} + +TCaseInsensitiveString EscapeC(const TCaseInsensitiveString& str) { + const auto result = EscapeC(str.data(), str.size()); + return {result.data(), result.size()}; +} + diff --git a/library/cpp/case_insensitive_string/case_insensitive_char_traits.h b/library/cpp/case_insensitive_string/case_insensitive_char_traits.h new file mode 100644 index 0000000000..2717893c10 --- /dev/null +++ b/library/cpp/case_insensitive_string/case_insensitive_char_traits.h @@ -0,0 +1,30 @@ +#pragma once + +#include <contrib/libs/libc_compat/string.h> + +#include <string> + +struct TCaseInsensitiveCharTraits : private std::char_traits<char> { + static bool eq(char c1, char c2) { + return to_upper(c1) == to_upper(c2); + } + + static bool lt(char c1, char c2) { + return to_upper(c1) < to_upper(c2); + } + + static int compare(const char* s1, const char* s2, std::size_t n); + + static const char* find(const char* s, std::size_t n, char a); + + using std::char_traits<char>::assign; + using std::char_traits<char>::char_type; + using std::char_traits<char>::copy; + using std::char_traits<char>::length; + using std::char_traits<char>::move; + +private: + static char to_upper(char ch) { + return std::toupper((unsigned char)ch); + } +}; diff --git a/library/cpp/case_insensitive_string/case_insensitive_string.cpp b/library/cpp/case_insensitive_string/case_insensitive_string.cpp new file mode 100644 index 0000000000..16c0f5ff7a --- /dev/null +++ b/library/cpp/case_insensitive_string/case_insensitive_string.cpp @@ -0,0 +1,22 @@ +#include "case_insensitive_string.h" + +#include <library/cpp/digest/murmur/murmur.h> + +size_t THash<TCaseInsensitiveStringBuf>::operator()(TCaseInsensitiveStringBuf str) const noexcept { + TMurmurHash2A<size_t> hash; + for (size_t i = 0; i < str.size(); ++i) { + char lower = std::tolower(str[i]); + hash.Update(&lower, 1); + } + return hash.Value(); +} + +template <> +void Out<TCaseInsensitiveString>(IOutputStream& o, const TCaseInsensitiveString& p) { + o.Write(p.data(), p.size()); +} + +template <> +void Out<TCaseInsensitiveStringBuf>(IOutputStream& o, const TCaseInsensitiveStringBuf& p) { + o.Write(p.data(), p.size()); +} diff --git a/library/cpp/case_insensitive_string/case_insensitive_string.h b/library/cpp/case_insensitive_string/case_insensitive_string.h new file mode 100644 index 0000000000..443de3e5f9 --- /dev/null +++ b/library/cpp/case_insensitive_string/case_insensitive_string.h @@ -0,0 +1,37 @@ +#pragma once + +#include "case_insensitive_char_traits.h" + +#include <util/generic/strbuf.h> +#include <util/generic/string.h> +#include <util/generic/hash.h> +#include <util/string/split.h> + +using TCaseInsensitiveString = TBasicString<char, TCaseInsensitiveCharTraits>; +using TCaseInsensitiveStringBuf = TBasicStringBuf<char, TCaseInsensitiveCharTraits>; + +template <> +struct THash<TCaseInsensitiveStringBuf> { + size_t operator()(TCaseInsensitiveStringBuf str) const noexcept; +}; + +template <> +struct THash<TCaseInsensitiveString> : THash<TCaseInsensitiveStringBuf> {}; + +namespace NStringSplitPrivate { + + template<> + struct TStringBufOfImpl<TCaseInsensitiveStringBuf> { + /* + * WARN: + * StringSplitter does not use TCharTraits properly. + * Splitting such strings is explicitly disabled. + */ + // using type = TCaseInsensitiveStringBuf; + }; + + template<> + struct TStringBufOfImpl<TCaseInsensitiveString> : TStringBufOfImpl<TCaseInsensitiveStringBuf> { + }; + +} // namespace NStringSplitPrivate diff --git a/library/cpp/case_insensitive_string/case_insensitive_string_ut.cpp b/library/cpp/case_insensitive_string/case_insensitive_string_ut.cpp new file mode 100644 index 0000000000..49f9c59c95 --- /dev/null +++ b/library/cpp/case_insensitive_string/case_insensitive_string_ut.cpp @@ -0,0 +1,65 @@ +#include "case_insensitive_string.h" + +#include <util/generic/string_ut.h> + +class TCaseInsensitiveStringTest : public TTestBase, private TStringTestImpl<TCaseInsensitiveString, TTestData<char>> { +public: + UNIT_TEST_SUITE(TCaseInsensitiveStringTest); + UNIT_TEST(TestOperators); + UNIT_TEST(TestOperatorsCI); + + UNIT_TEST_SUITE_END(); +}; + +UNIT_TEST_SUITE_REGISTRATION(TCaseInsensitiveStringTest); + +Y_UNIT_TEST_SUITE(TCaseInsensitiveStringTestEx) { + Y_UNIT_TEST(BasicTString) { + TCaseInsensitiveString foo("foo"); + TCaseInsensitiveString FOO("FOO"); + TCaseInsensitiveString Bar("Bar"); + TCaseInsensitiveString bAR("bAR"); + + UNIT_ASSERT_EQUAL(foo, FOO); + UNIT_ASSERT_EQUAL(Bar, bAR); + + constexpr TCaseInsensitiveStringBuf foobar("foobar"); + UNIT_ASSERT(foobar.StartsWith(foo)); + UNIT_ASSERT(foobar.StartsWith(FOO)); + UNIT_ASSERT(foobar.EndsWith(Bar)); + UNIT_ASSERT(foobar.EndsWith(bAR)); + UNIT_ASSERT(foobar.Contains(FOO)); + UNIT_ASSERT(foobar.Contains(Bar)); + } + + Y_UNIT_TEST(BasicStdString) { + using TCaseInsensitiveStdString = std::basic_string<char, TCaseInsensitiveCharTraits>; + using TCaseInsensitiveStringView = std::basic_string_view<char, TCaseInsensitiveCharTraits>; + + TCaseInsensitiveStdString foo("foo"); + TCaseInsensitiveStdString FOO("FOO"); + TCaseInsensitiveStdString Bar("Bar"); + TCaseInsensitiveStdString bAR("bAR"); + + UNIT_ASSERT_EQUAL(foo, FOO); + UNIT_ASSERT_EQUAL(Bar, bAR); + + constexpr TCaseInsensitiveStringView foobar("foobar"); + UNIT_ASSERT(foobar.starts_with(foo)); + UNIT_ASSERT(foobar.starts_with(FOO)); + UNIT_ASSERT(foobar.ends_with(Bar)); + UNIT_ASSERT(foobar.ends_with(bAR)); + //TODO: test contains after C++23 + } + +/* + Y_UNIT_TEST(TestSplit) { + TCaseInsensitiveStringBuf input("splitAmeAbro"); + TVector<TCaseInsensitiveStringBuf> expected{"split", "me", "bro"}; + + TVector<TCaseInsensitiveStringBuf> split = StringSplitter(input).Split('a'); + + UNIT_ASSERT_VALUES_EQUAL(split, expected); + } +*/ +} diff --git a/library/cpp/case_insensitive_string/ut/ya.make b/library/cpp/case_insensitive_string/ut/ya.make new file mode 100644 index 0000000000..b209d4571e --- /dev/null +++ b/library/cpp/case_insensitive_string/ut/ya.make @@ -0,0 +1,9 @@ +OWNER(eeight) + +UNITTEST_FOR(library/cpp/case_insensitive_string) + +SRCS( + case_insensitive_string_ut.cpp +) + +END() diff --git a/library/cpp/case_insensitive_string/ya.make b/library/cpp/case_insensitive_string/ya.make new file mode 100644 index 0000000000..dfca415e6b --- /dev/null +++ b/library/cpp/case_insensitive_string/ya.make @@ -0,0 +1,17 @@ +OWNER(eeight) + +LIBRARY() + +SRCS( + case_insensitive_char_traits.cpp + case_insensitive_string.cpp +) + +PEERDIR( + contrib/libs/libc_compat + library/cpp/digest/murmur +) + +END() + +RECURSE_FOR_TESTS(ut) |