aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/case_insensitive_string/ut_gtest
diff options
context:
space:
mode:
authorvadim-xd <vadim-xd@yandex-team.com>2024-06-03 23:26:15 +0300
committervadim-xd <vadim-xd@yandex-team.com>2024-06-03 23:35:19 +0300
commitca99fc2f20163d67dbab313a7fdf6589d83ba220 (patch)
tree4fcb2ee7466822ee8650cfaf684ab44c3c8dd5e8 /library/cpp/case_insensitive_string/ut_gtest
parent7d8657f7553a8c96975b883afc7f2b42bc558616 (diff)
downloadydb-ca99fc2f20163d67dbab313a7fdf6589d83ba220.tar.gz
Optimize hashing for case-insensitive strings
6e07ea929418b1fae4257a2af37aa0ed5799f22a
Diffstat (limited to 'library/cpp/case_insensitive_string/ut_gtest')
-rw-r--r--library/cpp/case_insensitive_string/ut_gtest/case_insensitive_string_hash.cpp36
-rw-r--r--library/cpp/case_insensitive_string/ut_gtest/ya.make12
2 files changed, 48 insertions, 0 deletions
diff --git a/library/cpp/case_insensitive_string/ut_gtest/case_insensitive_string_hash.cpp b/library/cpp/case_insensitive_string/ut_gtest/case_insensitive_string_hash.cpp
new file mode 100644
index 0000000000..893494c96d
--- /dev/null
+++ b/library/cpp/case_insensitive_string/ut_gtest/case_insensitive_string_hash.cpp
@@ -0,0 +1,36 @@
+#include <library/cpp/case_insensitive_string/case_insensitive_string.h>
+#include <library/cpp/digest/murmur/murmur.h>
+#include <library/cpp/testing/gtest/gtest.h>
+
+#include <util/generic/hash_table.h>
+#include <util/random/random.h>
+
+TEST(CaseInsensitiveString, Hash) {
+ size_t h1 = ComputeHash(TCaseInsensitiveString("some long string..."));
+ size_t h2 = ComputeHash(TCaseInsensitiveString("Some Long String..."));
+ EXPECT_EQ(h1, h2);
+ size_t otherHash = ComputeHash(TCaseInsensitiveString("other long string..."));
+ EXPECT_NE(h1, otherHash);
+}
+
+namespace {
+ size_t NaiveCaseInsensitiveHash(TCaseInsensitiveStringBuf str) noexcept {
+ TMurmurHash2A<size_t> hash;
+ for (size_t i = 0; i < str.size(); ++i) {
+ char lower = std::tolower(str[i]);
+ hash.Update(&lower, 1);
+ }
+ return hash.Value();
+ }
+}
+
+TEST(CaseInsensitiveString, HashValues) {
+ SetRandomSeed(123);
+ for (size_t n = 0; n < 64; ++n) {
+ TCaseInsensitiveString s;
+ for (size_t i = 0; i < n; ++i) {
+ s.push_back(RandomNumber<unsigned char>());
+ }
+ EXPECT_EQ(ComputeHash(s), NaiveCaseInsensitiveHash(s)) << "Hashes for \"" << s << "\" differ";
+ }
+}
diff --git a/library/cpp/case_insensitive_string/ut_gtest/ya.make b/library/cpp/case_insensitive_string/ut_gtest/ya.make
new file mode 100644
index 0000000000..2dbb44d87b
--- /dev/null
+++ b/library/cpp/case_insensitive_string/ut_gtest/ya.make
@@ -0,0 +1,12 @@
+GTEST()
+
+SRCS(
+ case_insensitive_string_hash.cpp
+)
+
+PEERDIR(
+ library/cpp/case_insensitive_string
+ library/cpp/digest/murmur
+)
+
+END()