diff options
author | vadim-xd <vadim-xd@yandex-team.com> | 2024-06-03 23:26:15 +0300 |
---|---|---|
committer | vadim-xd <vadim-xd@yandex-team.com> | 2024-06-03 23:35:19 +0300 |
commit | ca99fc2f20163d67dbab313a7fdf6589d83ba220 (patch) | |
tree | 4fcb2ee7466822ee8650cfaf684ab44c3c8dd5e8 /library/cpp/case_insensitive_string/ut_gtest | |
parent | 7d8657f7553a8c96975b883afc7f2b42bc558616 (diff) | |
download | ydb-ca99fc2f20163d67dbab313a7fdf6589d83ba220.tar.gz |
Optimize hashing for case-insensitive strings
6e07ea929418b1fae4257a2af37aa0ed5799f22a
Diffstat (limited to 'library/cpp/case_insensitive_string/ut_gtest')
-rw-r--r-- | library/cpp/case_insensitive_string/ut_gtest/case_insensitive_string_hash.cpp | 36 | ||||
-rw-r--r-- | library/cpp/case_insensitive_string/ut_gtest/ya.make | 12 |
2 files changed, 48 insertions, 0 deletions
diff --git a/library/cpp/case_insensitive_string/ut_gtest/case_insensitive_string_hash.cpp b/library/cpp/case_insensitive_string/ut_gtest/case_insensitive_string_hash.cpp new file mode 100644 index 0000000000..893494c96d --- /dev/null +++ b/library/cpp/case_insensitive_string/ut_gtest/case_insensitive_string_hash.cpp @@ -0,0 +1,36 @@ +#include <library/cpp/case_insensitive_string/case_insensitive_string.h> +#include <library/cpp/digest/murmur/murmur.h> +#include <library/cpp/testing/gtest/gtest.h> + +#include <util/generic/hash_table.h> +#include <util/random/random.h> + +TEST(CaseInsensitiveString, Hash) { + size_t h1 = ComputeHash(TCaseInsensitiveString("some long string...")); + size_t h2 = ComputeHash(TCaseInsensitiveString("Some Long String...")); + EXPECT_EQ(h1, h2); + size_t otherHash = ComputeHash(TCaseInsensitiveString("other long string...")); + EXPECT_NE(h1, otherHash); +} + +namespace { + size_t NaiveCaseInsensitiveHash(TCaseInsensitiveStringBuf str) noexcept { + TMurmurHash2A<size_t> hash; + for (size_t i = 0; i < str.size(); ++i) { + char lower = std::tolower(str[i]); + hash.Update(&lower, 1); + } + return hash.Value(); + } +} + +TEST(CaseInsensitiveString, HashValues) { + SetRandomSeed(123); + for (size_t n = 0; n < 64; ++n) { + TCaseInsensitiveString s; + for (size_t i = 0; i < n; ++i) { + s.push_back(RandomNumber<unsigned char>()); + } + EXPECT_EQ(ComputeHash(s), NaiveCaseInsensitiveHash(s)) << "Hashes for \"" << s << "\" differ"; + } +} diff --git a/library/cpp/case_insensitive_string/ut_gtest/ya.make b/library/cpp/case_insensitive_string/ut_gtest/ya.make new file mode 100644 index 0000000000..2dbb44d87b --- /dev/null +++ b/library/cpp/case_insensitive_string/ut_gtest/ya.make @@ -0,0 +1,12 @@ +GTEST() + +SRCS( + case_insensitive_string_hash.cpp +) + +PEERDIR( + library/cpp/case_insensitive_string + library/cpp/digest/murmur +) + +END() |