diff options
author | vadim-xd <vadim-xd@yandex-team.com> | 2024-06-03 23:26:15 +0300 |
---|---|---|
committer | vadim-xd <vadim-xd@yandex-team.com> | 2024-06-03 23:35:19 +0300 |
commit | ca99fc2f20163d67dbab313a7fdf6589d83ba220 (patch) | |
tree | 4fcb2ee7466822ee8650cfaf684ab44c3c8dd5e8 /library/cpp/case_insensitive_string/case_insensitive_string.cpp | |
parent | 7d8657f7553a8c96975b883afc7f2b42bc558616 (diff) | |
download | ydb-ca99fc2f20163d67dbab313a7fdf6589d83ba220.tar.gz |
Optimize hashing for case-insensitive strings
6e07ea929418b1fae4257a2af37aa0ed5799f22a
Diffstat (limited to 'library/cpp/case_insensitive_string/case_insensitive_string.cpp')
-rw-r--r-- | library/cpp/case_insensitive_string/case_insensitive_string.cpp | 22 |
1 files changed, 18 insertions, 4 deletions
diff --git a/library/cpp/case_insensitive_string/case_insensitive_string.cpp b/library/cpp/case_insensitive_string/case_insensitive_string.cpp index 16c0f5ff7a..dce0ff4af8 100644 --- a/library/cpp/case_insensitive_string/case_insensitive_string.cpp +++ b/library/cpp/case_insensitive_string/case_insensitive_string.cpp @@ -2,15 +2,29 @@ #include <library/cpp/digest/murmur/murmur.h> -size_t THash<TCaseInsensitiveStringBuf>::operator()(TCaseInsensitiveStringBuf str) const noexcept { - TMurmurHash2A<size_t> hash; - for (size_t i = 0; i < str.size(); ++i) { - char lower = std::tolower(str[i]); +#include <array> + +static size_t HashTail(TMurmurHash2A<size_t>& hash, const char* data, size_t size) { + for (size_t i = 0; i < size; ++i) { + char lower = std::tolower(data[i]); hash.Update(&lower, 1); } return hash.Value(); } +size_t THash<TCaseInsensitiveStringBuf>::operator()(TCaseInsensitiveStringBuf str) const noexcept { + TMurmurHash2A<size_t> hash; + std::array<char, sizeof(size_t)> buf; + size_t headSize = str.size() - str.size() % buf.size(); + for (size_t i = 0; i < headSize; i += buf.size()) { + for (size_t j = 0; j < buf.size(); ++j) { + buf[j] = std::tolower(str[i + j]); + } + hash.Update(buf.data(), buf.size()); + } + return HashTail(hash, str.data() + headSize, str.size() - headSize); +} + template <> void Out<TCaseInsensitiveString>(IOutputStream& o, const TCaseInsensitiveString& p) { o.Write(p.data(), p.size()); |