diff options
author | vadim-xd <vadim-xd@yandex-team.com> | 2024-06-09 14:29:51 +0300 |
---|---|---|
committer | vadim-xd <vadim-xd@yandex-team.com> | 2024-06-09 14:38:13 +0300 |
commit | 22d59c45d8f17195622bd9e5bfa9259c50b1a732 (patch) | |
tree | 508002f84f703be6d6f92443827d1a4e255d457f /library/cpp/case_insensitive_string/case_insensitive_string.cpp | |
parent | afd4899380eea1c70e2a68714b5da1c9919ccdbd (diff) | |
download | ydb-22d59c45d8f17195622bd9e5bfa9259c50b1a732.tar.gz |
Add TCaseInsensitiveAsciiString
Followup for rXXXXXX - further optimize ascii-only case insensitive strings
1fca7889a074a191eadce12247bdd6dd18b75ab2
Diffstat (limited to 'library/cpp/case_insensitive_string/case_insensitive_string.cpp')
-rw-r--r-- | library/cpp/case_insensitive_string/case_insensitive_string.cpp | 75 |
1 files changed, 52 insertions, 23 deletions
diff --git a/library/cpp/case_insensitive_string/case_insensitive_string.cpp b/library/cpp/case_insensitive_string/case_insensitive_string.cpp index dce0ff4af8..25fedd36eb 100644 --- a/library/cpp/case_insensitive_string/case_insensitive_string.cpp +++ b/library/cpp/case_insensitive_string/case_insensitive_string.cpp @@ -2,35 +2,64 @@ #include <library/cpp/digest/murmur/murmur.h> +#include <util/string/escape.h> + #include <array> -static size_t HashTail(TMurmurHash2A<size_t>& hash, const char* data, size_t size) { - for (size_t i = 0; i < size; ++i) { - char lower = std::tolower(data[i]); - hash.Update(&lower, 1); - } - return hash.Value(); -} +namespace { + template <auto ToLower> + struct TCaseInsensitiveHash { + static size_t HashTail(TMurmurHash2A<size_t>& hash, const char* data, size_t size) noexcept { + for (size_t i = 0; i < size; ++i) { + char lower = ToLower(data[i]); + hash.Update(&lower, 1); + } + return hash.Value(); + } -size_t THash<TCaseInsensitiveStringBuf>::operator()(TCaseInsensitiveStringBuf str) const noexcept { - TMurmurHash2A<size_t> hash; - std::array<char, sizeof(size_t)> buf; - size_t headSize = str.size() - str.size() % buf.size(); - for (size_t i = 0; i < headSize; i += buf.size()) { - for (size_t j = 0; j < buf.size(); ++j) { - buf[j] = std::tolower(str[i + j]); + static size_t ComputeHash(const char* s, size_t n) noexcept { + TMurmurHash2A<size_t> hash; + std::array<char, sizeof(size_t)> buf; + size_t headSize = n - n % buf.size(); + for (size_t i = 0; i < headSize; i += buf.size()) { + for (size_t j = 0; j < buf.size(); ++j) { + buf[j] = ToLower(s[i + j]); + } + hash.Update(buf.data(), buf.size()); + } + return HashTail(hash, s + headSize, n - headSize); } - hash.Update(buf.data(), buf.size()); - } - return HashTail(hash, str.data() + headSize, str.size() - headSize); + }; } -template <> -void Out<TCaseInsensitiveString>(IOutputStream& o, const TCaseInsensitiveString& p) { - o.Write(p.data(), p.size()); +size_t CaseInsensitiveStringHash(const char* s, size_t n) noexcept { + return TCaseInsensitiveHash<static_cast<int(*)(int)>(std::tolower)>::ComputeHash(s, n); } -template <> -void Out<TCaseInsensitiveStringBuf>(IOutputStream& o, const TCaseInsensitiveStringBuf& p) { - o.Write(p.data(), p.size()); +size_t CaseInsensitiveAsciiStringHash(const char* s, size_t n) noexcept { + return TCaseInsensitiveHash<static_cast<char(*)(char)>(AsciiToLower)>::ComputeHash(s, n); } + +#define Y_DEFINE_STRING_OUT(type) \ + template <> \ + void Out<type>(IOutputStream& o, const type& p) { \ + o.Write(p.data(), p.size()); \ + } + +Y_DEFINE_STRING_OUT(TCaseInsensitiveString); +Y_DEFINE_STRING_OUT(TCaseInsensitiveStringBuf); +Y_DEFINE_STRING_OUT(TCaseInsensitiveAsciiString); +Y_DEFINE_STRING_OUT(TCaseInsensitiveAsciiStringBuf); + +#undef Y_DEFINE_STRING_OUT + +#define Y_DEFINE_STRING_ESCAPE(type) \ + type EscapeC(const type& str) { \ + const auto result = EscapeC(str.data(), str.size()); \ + return {result.data(), result.size()}; \ + } + +Y_DEFINE_STRING_ESCAPE(TCaseInsensitiveString); +Y_DEFINE_STRING_ESCAPE(TCaseInsensitiveAsciiString); + +#undef Y_DEFINE_STRING_ESCAPE |