aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/case_insensitive_string/case_insensitive_string.cpp
diff options
context:
space:
mode:
authorvadim-xd <vadim-xd@yandex-team.com>2024-06-09 14:29:51 +0300
committervadim-xd <vadim-xd@yandex-team.com>2024-06-09 14:38:13 +0300
commit22d59c45d8f17195622bd9e5bfa9259c50b1a732 (patch)
tree508002f84f703be6d6f92443827d1a4e255d457f /library/cpp/case_insensitive_string/case_insensitive_string.cpp
parentafd4899380eea1c70e2a68714b5da1c9919ccdbd (diff)
downloadydb-22d59c45d8f17195622bd9e5bfa9259c50b1a732.tar.gz
Add TCaseInsensitiveAsciiString
Followup for rXXXXXX - further optimize ascii-only case insensitive strings 1fca7889a074a191eadce12247bdd6dd18b75ab2
Diffstat (limited to 'library/cpp/case_insensitive_string/case_insensitive_string.cpp')
-rw-r--r--library/cpp/case_insensitive_string/case_insensitive_string.cpp75
1 files changed, 52 insertions, 23 deletions
diff --git a/library/cpp/case_insensitive_string/case_insensitive_string.cpp b/library/cpp/case_insensitive_string/case_insensitive_string.cpp
index dce0ff4af8..25fedd36eb 100644
--- a/library/cpp/case_insensitive_string/case_insensitive_string.cpp
+++ b/library/cpp/case_insensitive_string/case_insensitive_string.cpp
@@ -2,35 +2,64 @@
#include <library/cpp/digest/murmur/murmur.h>
+#include <util/string/escape.h>
+
#include <array>
-static size_t HashTail(TMurmurHash2A<size_t>& hash, const char* data, size_t size) {
- for (size_t i = 0; i < size; ++i) {
- char lower = std::tolower(data[i]);
- hash.Update(&lower, 1);
- }
- return hash.Value();
-}
+namespace {
+ template <auto ToLower>
+ struct TCaseInsensitiveHash {
+ static size_t HashTail(TMurmurHash2A<size_t>& hash, const char* data, size_t size) noexcept {
+ for (size_t i = 0; i < size; ++i) {
+ char lower = ToLower(data[i]);
+ hash.Update(&lower, 1);
+ }
+ return hash.Value();
+ }
-size_t THash<TCaseInsensitiveStringBuf>::operator()(TCaseInsensitiveStringBuf str) const noexcept {
- TMurmurHash2A<size_t> hash;
- std::array<char, sizeof(size_t)> buf;
- size_t headSize = str.size() - str.size() % buf.size();
- for (size_t i = 0; i < headSize; i += buf.size()) {
- for (size_t j = 0; j < buf.size(); ++j) {
- buf[j] = std::tolower(str[i + j]);
+ static size_t ComputeHash(const char* s, size_t n) noexcept {
+ TMurmurHash2A<size_t> hash;
+ std::array<char, sizeof(size_t)> buf;
+ size_t headSize = n - n % buf.size();
+ for (size_t i = 0; i < headSize; i += buf.size()) {
+ for (size_t j = 0; j < buf.size(); ++j) {
+ buf[j] = ToLower(s[i + j]);
+ }
+ hash.Update(buf.data(), buf.size());
+ }
+ return HashTail(hash, s + headSize, n - headSize);
}
- hash.Update(buf.data(), buf.size());
- }
- return HashTail(hash, str.data() + headSize, str.size() - headSize);
+ };
}
-template <>
-void Out<TCaseInsensitiveString>(IOutputStream& o, const TCaseInsensitiveString& p) {
- o.Write(p.data(), p.size());
+size_t CaseInsensitiveStringHash(const char* s, size_t n) noexcept {
+ return TCaseInsensitiveHash<static_cast<int(*)(int)>(std::tolower)>::ComputeHash(s, n);
}
-template <>
-void Out<TCaseInsensitiveStringBuf>(IOutputStream& o, const TCaseInsensitiveStringBuf& p) {
- o.Write(p.data(), p.size());
+size_t CaseInsensitiveAsciiStringHash(const char* s, size_t n) noexcept {
+ return TCaseInsensitiveHash<static_cast<char(*)(char)>(AsciiToLower)>::ComputeHash(s, n);
}
+
+#define Y_DEFINE_STRING_OUT(type) \
+ template <> \
+ void Out<type>(IOutputStream& o, const type& p) { \
+ o.Write(p.data(), p.size()); \
+ }
+
+Y_DEFINE_STRING_OUT(TCaseInsensitiveString);
+Y_DEFINE_STRING_OUT(TCaseInsensitiveStringBuf);
+Y_DEFINE_STRING_OUT(TCaseInsensitiveAsciiString);
+Y_DEFINE_STRING_OUT(TCaseInsensitiveAsciiStringBuf);
+
+#undef Y_DEFINE_STRING_OUT
+
+#define Y_DEFINE_STRING_ESCAPE(type) \
+ type EscapeC(const type& str) { \
+ const auto result = EscapeC(str.data(), str.size()); \
+ return {result.data(), result.size()}; \
+ }
+
+Y_DEFINE_STRING_ESCAPE(TCaseInsensitiveString);
+Y_DEFINE_STRING_ESCAPE(TCaseInsensitiveAsciiString);
+
+#undef Y_DEFINE_STRING_ESCAPE