aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/case_insensitive_string
diff options
context:
space:
mode:
authorarcadia-devtools <arcadia-devtools@yandex-team.ru>2022-02-17 12:04:09 +0300
committerarcadia-devtools <arcadia-devtools@yandex-team.ru>2022-02-17 12:04:09 +0300
commit2c8e314f8fff8633fe2cf026badfbf6180845ae0 (patch)
treec3b650d13934ec1315e3660d60fd2275f09b03a7 /library/cpp/case_insensitive_string
parenta49ae9d891c35087b242c854f69880fd9fecbddd (diff)
downloadydb-2c8e314f8fff8633fe2cf026badfbf6180845ae0.tar.gz
intermediate changes
ref:d5f945ecdc1f5af1ad57e12787c6b8ed1a9f0f12
Diffstat (limited to 'library/cpp/case_insensitive_string')
-rw-r--r--library/cpp/case_insensitive_string/case_insensitive_char_traits.cpp34
-rw-r--r--library/cpp/case_insensitive_string/case_insensitive_char_traits.h30
-rw-r--r--library/cpp/case_insensitive_string/case_insensitive_string.cpp22
-rw-r--r--library/cpp/case_insensitive_string/case_insensitive_string.h37
-rw-r--r--library/cpp/case_insensitive_string/case_insensitive_string_ut.cpp65
-rw-r--r--library/cpp/case_insensitive_string/ut/ya.make9
-rw-r--r--library/cpp/case_insensitive_string/ya.make17
7 files changed, 214 insertions, 0 deletions
diff --git a/library/cpp/case_insensitive_string/case_insensitive_char_traits.cpp b/library/cpp/case_insensitive_string/case_insensitive_char_traits.cpp
new file mode 100644
index 0000000000..14e6d1d51f
--- /dev/null
+++ b/library/cpp/case_insensitive_string/case_insensitive_char_traits.cpp
@@ -0,0 +1,34 @@
+#include "case_insensitive_char_traits.h"
+#include "case_insensitive_string.h"
+
+#include <util/string/escape.h>
+
+int TCaseInsensitiveCharTraits::compare(const char* s1, const char* s2, std::size_t n) {
+ while (n-- != 0) {
+ if (to_upper(*s1) < to_upper(*s2)) {
+ return -1;
+ }
+ if (to_upper(*s1) > to_upper(*s2)) {
+ return 1;
+ }
+ ++s1;
+ ++s2;
+ }
+ return 0;
+}
+
+const char* TCaseInsensitiveCharTraits::find(const char* s, std::size_t n, char a) {
+ auto const ua(to_upper(a));
+ while (n-- != 0) {
+ if (to_upper(*s) == ua)
+ return s;
+ s++;
+ }
+ return nullptr;
+}
+
+TCaseInsensitiveString EscapeC(const TCaseInsensitiveString& str) {
+ const auto result = EscapeC(str.data(), str.size());
+ return {result.data(), result.size()};
+}
+
diff --git a/library/cpp/case_insensitive_string/case_insensitive_char_traits.h b/library/cpp/case_insensitive_string/case_insensitive_char_traits.h
new file mode 100644
index 0000000000..2717893c10
--- /dev/null
+++ b/library/cpp/case_insensitive_string/case_insensitive_char_traits.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <contrib/libs/libc_compat/string.h>
+
+#include <string>
+
+struct TCaseInsensitiveCharTraits : private std::char_traits<char> {
+ static bool eq(char c1, char c2) {
+ return to_upper(c1) == to_upper(c2);
+ }
+
+ static bool lt(char c1, char c2) {
+ return to_upper(c1) < to_upper(c2);
+ }
+
+ static int compare(const char* s1, const char* s2, std::size_t n);
+
+ static const char* find(const char* s, std::size_t n, char a);
+
+ using std::char_traits<char>::assign;
+ using std::char_traits<char>::char_type;
+ using std::char_traits<char>::copy;
+ using std::char_traits<char>::length;
+ using std::char_traits<char>::move;
+
+private:
+ static char to_upper(char ch) {
+ return std::toupper((unsigned char)ch);
+ }
+};
diff --git a/library/cpp/case_insensitive_string/case_insensitive_string.cpp b/library/cpp/case_insensitive_string/case_insensitive_string.cpp
new file mode 100644
index 0000000000..16c0f5ff7a
--- /dev/null
+++ b/library/cpp/case_insensitive_string/case_insensitive_string.cpp
@@ -0,0 +1,22 @@
+#include "case_insensitive_string.h"
+
+#include <library/cpp/digest/murmur/murmur.h>
+
+size_t THash<TCaseInsensitiveStringBuf>::operator()(TCaseInsensitiveStringBuf str) const noexcept {
+ TMurmurHash2A<size_t> hash;
+ for (size_t i = 0; i < str.size(); ++i) {
+ char lower = std::tolower(str[i]);
+ hash.Update(&lower, 1);
+ }
+ return hash.Value();
+}
+
+template <>
+void Out<TCaseInsensitiveString>(IOutputStream& o, const TCaseInsensitiveString& p) {
+ o.Write(p.data(), p.size());
+}
+
+template <>
+void Out<TCaseInsensitiveStringBuf>(IOutputStream& o, const TCaseInsensitiveStringBuf& p) {
+ o.Write(p.data(), p.size());
+}
diff --git a/library/cpp/case_insensitive_string/case_insensitive_string.h b/library/cpp/case_insensitive_string/case_insensitive_string.h
new file mode 100644
index 0000000000..443de3e5f9
--- /dev/null
+++ b/library/cpp/case_insensitive_string/case_insensitive_string.h
@@ -0,0 +1,37 @@
+#pragma once
+
+#include "case_insensitive_char_traits.h"
+
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+#include <util/generic/hash.h>
+#include <util/string/split.h>
+
+using TCaseInsensitiveString = TBasicString<char, TCaseInsensitiveCharTraits>;
+using TCaseInsensitiveStringBuf = TBasicStringBuf<char, TCaseInsensitiveCharTraits>;
+
+template <>
+struct THash<TCaseInsensitiveStringBuf> {
+ size_t operator()(TCaseInsensitiveStringBuf str) const noexcept;
+};
+
+template <>
+struct THash<TCaseInsensitiveString> : THash<TCaseInsensitiveStringBuf> {};
+
+namespace NStringSplitPrivate {
+
+ template<>
+ struct TStringBufOfImpl<TCaseInsensitiveStringBuf> {
+ /*
+ * WARN:
+ * StringSplitter does not use TCharTraits properly.
+ * Splitting such strings is explicitly disabled.
+ */
+ // using type = TCaseInsensitiveStringBuf;
+ };
+
+ template<>
+ struct TStringBufOfImpl<TCaseInsensitiveString> : TStringBufOfImpl<TCaseInsensitiveStringBuf> {
+ };
+
+} // namespace NStringSplitPrivate
diff --git a/library/cpp/case_insensitive_string/case_insensitive_string_ut.cpp b/library/cpp/case_insensitive_string/case_insensitive_string_ut.cpp
new file mode 100644
index 0000000000..49f9c59c95
--- /dev/null
+++ b/library/cpp/case_insensitive_string/case_insensitive_string_ut.cpp
@@ -0,0 +1,65 @@
+#include "case_insensitive_string.h"
+
+#include <util/generic/string_ut.h>
+
+class TCaseInsensitiveStringTest : public TTestBase, private TStringTestImpl<TCaseInsensitiveString, TTestData<char>> {
+public:
+ UNIT_TEST_SUITE(TCaseInsensitiveStringTest);
+ UNIT_TEST(TestOperators);
+ UNIT_TEST(TestOperatorsCI);
+
+ UNIT_TEST_SUITE_END();
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TCaseInsensitiveStringTest);
+
+Y_UNIT_TEST_SUITE(TCaseInsensitiveStringTestEx) {
+ Y_UNIT_TEST(BasicTString) {
+ TCaseInsensitiveString foo("foo");
+ TCaseInsensitiveString FOO("FOO");
+ TCaseInsensitiveString Bar("Bar");
+ TCaseInsensitiveString bAR("bAR");
+
+ UNIT_ASSERT_EQUAL(foo, FOO);
+ UNIT_ASSERT_EQUAL(Bar, bAR);
+
+ constexpr TCaseInsensitiveStringBuf foobar("foobar");
+ UNIT_ASSERT(foobar.StartsWith(foo));
+ UNIT_ASSERT(foobar.StartsWith(FOO));
+ UNIT_ASSERT(foobar.EndsWith(Bar));
+ UNIT_ASSERT(foobar.EndsWith(bAR));
+ UNIT_ASSERT(foobar.Contains(FOO));
+ UNIT_ASSERT(foobar.Contains(Bar));
+ }
+
+ Y_UNIT_TEST(BasicStdString) {
+ using TCaseInsensitiveStdString = std::basic_string<char, TCaseInsensitiveCharTraits>;
+ using TCaseInsensitiveStringView = std::basic_string_view<char, TCaseInsensitiveCharTraits>;
+
+ TCaseInsensitiveStdString foo("foo");
+ TCaseInsensitiveStdString FOO("FOO");
+ TCaseInsensitiveStdString Bar("Bar");
+ TCaseInsensitiveStdString bAR("bAR");
+
+ UNIT_ASSERT_EQUAL(foo, FOO);
+ UNIT_ASSERT_EQUAL(Bar, bAR);
+
+ constexpr TCaseInsensitiveStringView foobar("foobar");
+ UNIT_ASSERT(foobar.starts_with(foo));
+ UNIT_ASSERT(foobar.starts_with(FOO));
+ UNIT_ASSERT(foobar.ends_with(Bar));
+ UNIT_ASSERT(foobar.ends_with(bAR));
+ //TODO: test contains after C++23
+ }
+
+/*
+ Y_UNIT_TEST(TestSplit) {
+ TCaseInsensitiveStringBuf input("splitAmeAbro");
+ TVector<TCaseInsensitiveStringBuf> expected{"split", "me", "bro"};
+
+ TVector<TCaseInsensitiveStringBuf> split = StringSplitter(input).Split('a');
+
+ UNIT_ASSERT_VALUES_EQUAL(split, expected);
+ }
+*/
+}
diff --git a/library/cpp/case_insensitive_string/ut/ya.make b/library/cpp/case_insensitive_string/ut/ya.make
new file mode 100644
index 0000000000..b209d4571e
--- /dev/null
+++ b/library/cpp/case_insensitive_string/ut/ya.make
@@ -0,0 +1,9 @@
+OWNER(eeight)
+
+UNITTEST_FOR(library/cpp/case_insensitive_string)
+
+SRCS(
+ case_insensitive_string_ut.cpp
+)
+
+END()
diff --git a/library/cpp/case_insensitive_string/ya.make b/library/cpp/case_insensitive_string/ya.make
new file mode 100644
index 0000000000..dfca415e6b
--- /dev/null
+++ b/library/cpp/case_insensitive_string/ya.make
@@ -0,0 +1,17 @@
+OWNER(eeight)
+
+LIBRARY()
+
+SRCS(
+ case_insensitive_char_traits.cpp
+ case_insensitive_string.cpp
+)
+
+PEERDIR(
+ contrib/libs/libc_compat
+ library/cpp/digest/murmur
+)
+
+END()
+
+RECURSE_FOR_TESTS(ut)