diff options
author | thegeorg <thegeorg@yandex-team.com> | 2022-08-22 11:44:57 +0300 |
---|---|---|
committer | thegeorg <thegeorg@yandex-team.com> | 2022-08-22 11:44:57 +0300 |
commit | 34a4f487c8a1ffe58fc9cf16cc577e24852b5955 (patch) | |
tree | 5cb336f7069d20ac36e97a8e2d27b31c6d855dc4 /contrib/restricted/abseil-cpp/absl/container | |
parent | 8ec5fa9d0aadfa2720716e2675016036b745e016 (diff) | |
download | ydb-34a4f487c8a1ffe58fc9cf16cc577e24852b5955.tar.gz |
Update contrib/restricted/abseil-cpp to 20220623.0
Diffstat (limited to 'contrib/restricted/abseil-cpp/absl/container')
12 files changed, 739 insertions, 358 deletions
diff --git a/contrib/restricted/abseil-cpp/absl/container/fixed_array.h b/contrib/restricted/abseil-cpp/absl/container/fixed_array.h index 839ba0bc16..2aefae3bde 100644 --- a/contrib/restricted/abseil-cpp/absl/container/fixed_array.h +++ b/contrib/restricted/abseil-cpp/absl/container/fixed_array.h @@ -489,12 +489,14 @@ class FixedArray { Storage storage_; }; +#ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL template <typename T, size_t N, typename A> constexpr size_t FixedArray<T, N, A>::kInlineBytesDefault; template <typename T, size_t N, typename A> constexpr typename FixedArray<T, N, A>::size_type FixedArray<T, N, A>::inline_elements; +#endif template <typename T, size_t N, typename A> void FixedArray<T, N, A>::NonEmptyInlinedStorage::AnnotateConstruct( diff --git a/contrib/restricted/abseil-cpp/absl/container/flat_hash_map.h b/contrib/restricted/abseil-cpp/absl/container/flat_hash_map.h index 74def0df0e..e6bdbd9e4f 100644 --- a/contrib/restricted/abseil-cpp/absl/container/flat_hash_map.h +++ b/contrib/restricted/abseil-cpp/absl/container/flat_hash_map.h @@ -36,6 +36,7 @@ #include <utility> #include "absl/algorithm/container.h" +#include "absl/base/macros.h" #include "absl/container/internal/container_memory.h" #include "absl/container/internal/hash_function_defaults.h" // IWYU pragma: export #include "absl/container/internal/raw_hash_map.h" // IWYU pragma: export @@ -75,6 +76,10 @@ struct FlatHashMapPolicy; // absl/hash/hash.h for information on extending Abseil hashing to user-defined // types. // +// Using `absl::flat_hash_map` at interface boundaries in dynamically loaded +// libraries (e.g. .dll, .so) is unsupported due to way `absl::Hash` values may +// be randomized across dynamically loaded libraries. +// // NOTE: A `flat_hash_map` stores its value types directly inside its // implementation array to avoid memory indirection. Because a `flat_hash_map` // is designed to move data when rehashed, map values will not retain pointer @@ -356,8 +361,8 @@ class flat_hash_map : public absl::container_internal::raw_hash_map< // `flat_hash_map`. // // iterator try_emplace(const_iterator hint, - // const init_type& k, Args&&... args): - // iterator try_emplace(const_iterator hint, init_type&& k, Args&&... args): + // const key_type& k, Args&&... args): + // iterator try_emplace(const_iterator hint, key_type&& k, Args&&... args): // // Inserts (via copy or move) the element of the specified key into the // `flat_hash_map` using the position of `hint` as a non-binding suggestion @@ -541,10 +546,12 @@ class flat_hash_map : public absl::container_internal::raw_hash_map< // erase_if(flat_hash_map<>, Pred) // // Erases all elements that satisfy the predicate `pred` from the container `c`. +// Returns the number of erased elements. template <typename K, typename V, typename H, typename E, typename A, typename Predicate> -void erase_if(flat_hash_map<K, V, H, E, A>& c, Predicate pred) { - container_internal::EraseIf(pred, &c); +typename flat_hash_map<K, V, H, E, A>::size_type erase_if( + flat_hash_map<K, V, H, E, A>& c, Predicate pred) { + return container_internal::EraseIf(pred, &c); } namespace container_internal { diff --git a/contrib/restricted/abseil-cpp/absl/container/flat_hash_set.h b/contrib/restricted/abseil-cpp/absl/container/flat_hash_set.h index 6b89da6571..4938c703b7 100644 --- a/contrib/restricted/abseil-cpp/absl/container/flat_hash_set.h +++ b/contrib/restricted/abseil-cpp/absl/container/flat_hash_set.h @@ -67,11 +67,15 @@ struct FlatHashSetPolicy; // // By default, `flat_hash_set` uses the `absl::Hash` hashing framework. All // fundamental and Abseil types that support the `absl::Hash` framework have a -// compatible equality operator for comparing insertions into `flat_hash_map`. +// compatible equality operator for comparing insertions into `flat_hash_set`. // If your type is not yet supported by the `absl::Hash` framework, see // absl/hash/hash.h for information on extending Abseil hashing to user-defined // types. // +// Using `absl::flat_hash_set` at interface boundaries in dynamically loaded +// libraries (e.g. .dll, .so) is unsupported due to way `absl::Hash` values may +// be randomized across dynamically loaded libraries. +// // NOTE: A `flat_hash_set` stores its keys directly inside its implementation // array to avoid memory indirection. Because a `flat_hash_set` is designed to // move data when rehashed, set keys will not retain pointer stability. If you @@ -106,7 +110,7 @@ class flat_hash_set public: // Constructors and Assignment Operators // - // A flat_hash_set supports the same overload set as `std::unordered_map` + // A flat_hash_set supports the same overload set as `std::unordered_set` // for construction and assignment: // // * Default constructor @@ -173,7 +177,7 @@ class flat_hash_set // available within the `flat_hash_set`. // // NOTE: this member function is particular to `absl::flat_hash_set` and is - // not provided in the `std::unordered_map` API. + // not provided in the `std::unordered_set` API. using Base::capacity; // flat_hash_set::empty() @@ -332,7 +336,7 @@ class flat_hash_set // flat_hash_set::swap(flat_hash_set& other) // // Exchanges the contents of this `flat_hash_set` with those of the `other` - // flat hash map, avoiding invocation of any move, copy, or swap operations on + // flat hash set, avoiding invocation of any move, copy, or swap operations on // individual elements. // // All iterators and references on the `flat_hash_set` remain valid, excepting @@ -340,7 +344,7 @@ class flat_hash_set // // `swap()` requires that the flat hash set's hashing and key equivalence // functions be Swappable, and are exchaged using unqualified calls to - // non-member `swap()`. If the map's allocator has + // non-member `swap()`. If the set's allocator has // `std::allocator_traits<allocator_type>::propagate_on_container_swap::value` // set to `true`, the allocators are also exchanged using an unqualified call // to non-member `swap()`; otherwise, the allocators are not swapped. @@ -395,14 +399,14 @@ class flat_hash_set // flat_hash_set::bucket_count() // // Returns the number of "buckets" within the `flat_hash_set`. Note that - // because a flat hash map contains all elements within its internal storage, + // because a flat hash set contains all elements within its internal storage, // this value simply equals the current capacity of the `flat_hash_set`. using Base::bucket_count; // flat_hash_set::load_factor() // // Returns the current load factor of the `flat_hash_set` (the average number - // of slots occupied with a value within the hash map). + // of slots occupied with a value within the hash set). using Base::load_factor; // flat_hash_set::max_load_factor() @@ -443,9 +447,11 @@ class flat_hash_set // erase_if(flat_hash_set<>, Pred) // // Erases all elements that satisfy the predicate `pred` from the container `c`. +// Returns the number of erased elements. template <typename T, typename H, typename E, typename A, typename Predicate> -void erase_if(flat_hash_set<T, H, E, A>& c, Predicate pred) { - container_internal::EraseIf(pred, &c); +typename flat_hash_set<T, H, E, A>::size_type erase_if( + flat_hash_set<T, H, E, A>& c, Predicate pred) { + return container_internal::EraseIf(pred, &c); } namespace container_internal { diff --git a/contrib/restricted/abseil-cpp/absl/container/inlined_vector.h b/contrib/restricted/abseil-cpp/absl/container/inlined_vector.h index df9e09917d..bc1c4a776c 100644 --- a/contrib/restricted/abseil-cpp/absl/container/inlined_vector.h +++ b/contrib/restricted/abseil-cpp/absl/container/inlined_vector.h @@ -36,7 +36,6 @@ #define ABSL_CONTAINER_INLINED_VECTOR_H_ #include <algorithm> -#include <cassert> #include <cstddef> #include <cstdlib> #include <cstring> @@ -152,7 +151,7 @@ class InlinedVector { const allocator_type& allocator = allocator_type()) : storage_(allocator) { storage_.Initialize(IteratorValueAdapter<A, ForwardIterator>(first), - std::distance(first, last)); + static_cast<size_t>(std::distance(first, last))); } // Creates an inlined vector with elements constructed from the provided input @@ -233,8 +232,8 @@ class InlinedVector { // specified allocator is also `noexcept`. InlinedVector( InlinedVector&& other, - const allocator_type& allocator) - noexcept(absl::allocator_is_nothrow<allocator_type>::value) + const allocator_type& + allocator) noexcept(absl::allocator_is_nothrow<allocator_type>::value) : storage_(allocator) { if (IsMemcpyOk<A>::value) { storage_.MemcpyFrom(other.storage_); @@ -486,8 +485,8 @@ class InlinedVector { InlinedVector& operator=(InlinedVector&& other) { if (ABSL_PREDICT_TRUE(this != std::addressof(other))) { if (IsMemcpyOk<A>::value || other.storage_.GetIsAllocated()) { - inlined_vector_internal::DestroyElements<A>(storage_.GetAllocator(), - data(), size()); + inlined_vector_internal::DestroyAdapter<A>::DestroyElements( + storage_.GetAllocator(), data(), size()); storage_.DeallocateIfAllocated(); storage_.MemcpyFrom(other.storage_); @@ -523,7 +522,7 @@ class InlinedVector { EnableIfAtLeastForwardIterator<ForwardIterator> = 0> void assign(ForwardIterator first, ForwardIterator last) { storage_.Assign(IteratorValueAdapter<A, ForwardIterator>(first), - std::distance(first, last)); + static_cast<size_t>(std::distance(first, last))); } // Overload of `InlinedVector::assign(...)` to replace the contents of the @@ -586,8 +585,20 @@ class InlinedVector { if (ABSL_PREDICT_TRUE(n != 0)) { value_type dealias = v; + // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=102329#c2 + // It appears that GCC thinks that since `pos` is a const pointer and may + // point to uninitialized memory at this point, a warning should be + // issued. But `pos` is actually only used to compute an array index to + // write to. +#if !defined(__clang__) && defined(__GNUC__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" +#endif return storage_.Insert(pos, CopyValueAdapter<A>(std::addressof(dealias)), n); +#if !defined(__clang__) && defined(__GNUC__) +#pragma GCC diagnostic pop +#endif } else { return const_cast<iterator>(pos); } @@ -721,8 +732,8 @@ class InlinedVector { // Destroys all elements in the inlined vector, setting the size to `0` and // deallocating any held memory. void clear() noexcept { - inlined_vector_internal::DestroyElements<A>(storage_.GetAllocator(), data(), - size()); + inlined_vector_internal::DestroyAdapter<A>::DestroyElements( + storage_.GetAllocator(), data(), size()); storage_.DeallocateIfAllocated(); storage_.SetInlinedSize(0); diff --git a/contrib/restricted/abseil-cpp/absl/container/internal/common.h b/contrib/restricted/abseil-cpp/absl/container/internal/common.h index 030e9d4ab0..416d9aa32e 100644 --- a/contrib/restricted/abseil-cpp/absl/container/internal/common.h +++ b/contrib/restricted/abseil-cpp/absl/container/internal/common.h @@ -84,10 +84,11 @@ class node_handle_base { PolicyTraits::transfer(alloc(), slot(), s); } - struct move_tag_t {}; - node_handle_base(move_tag_t, const allocator_type& a, slot_type* s) + struct construct_tag_t {}; + template <typename... Args> + node_handle_base(construct_tag_t, const allocator_type& a, Args&&... args) : alloc_(a) { - PolicyTraits::construct(alloc(), slot(), s); + PolicyTraits::construct(alloc(), slot(), std::forward<Args>(args)...); } void destroy() { @@ -186,8 +187,8 @@ struct CommonAccess { } template <typename T, typename... Args> - static T Move(Args&&... args) { - return T(typename T::move_tag_t{}, std::forward<Args>(args)...); + static T Construct(Args&&... args) { + return T(typename T::construct_tag_t{}, std::forward<Args>(args)...); } }; diff --git a/contrib/restricted/abseil-cpp/absl/container/internal/container_memory.h b/contrib/restricted/abseil-cpp/absl/container/internal/container_memory.h index e67529ecb6..00e9f6d703 100644 --- a/contrib/restricted/abseil-cpp/absl/container/internal/container_memory.h +++ b/contrib/restricted/abseil-cpp/absl/container/internal/container_memory.h @@ -174,7 +174,7 @@ decltype(std::declval<F>()(std::declval<T>())) WithConstructed( // // 2. auto a = PairArgs(args...); // std::pair<F, S> p(std::piecewise_construct, -// std::move(p.first), std::move(p.second)); +// std::move(a.first), std::move(a.second)); inline std::pair<std::tuple<>, std::tuple<>> PairArgs() { return {}; } template <class F, class S> std::pair<std::tuple<F&&>, std::tuple<S&&>> PairArgs(F&& f, S&& s) { @@ -402,6 +402,15 @@ struct map_slot_policy { } } + // Construct this slot by copying from another slot. + template <class Allocator> + static void construct(Allocator* alloc, slot_type* slot, + const slot_type* other) { + emplace(slot); + absl::allocator_traits<Allocator>::construct(*alloc, &slot->value, + other->value); + } + template <class Allocator> static void destroy(Allocator* alloc, slot_type* slot) { if (kMutableKeys::value) { @@ -424,33 +433,6 @@ struct map_slot_policy { } destroy(alloc, old_slot); } - - template <class Allocator> - static void swap(Allocator* alloc, slot_type* a, slot_type* b) { - if (kMutableKeys::value) { - using std::swap; - swap(a->mutable_value, b->mutable_value); - } else { - value_type tmp = std::move(a->value); - absl::allocator_traits<Allocator>::destroy(*alloc, &a->value); - absl::allocator_traits<Allocator>::construct(*alloc, &a->value, - std::move(b->value)); - absl::allocator_traits<Allocator>::destroy(*alloc, &b->value); - absl::allocator_traits<Allocator>::construct(*alloc, &b->value, - std::move(tmp)); - } - } - - template <class Allocator> - static void move(Allocator* alloc, slot_type* src, slot_type* dest) { - if (kMutableKeys::value) { - dest->mutable_value = std::move(src->mutable_value); - } else { - absl::allocator_traits<Allocator>::destroy(*alloc, &dest->value); - absl::allocator_traits<Allocator>::construct(*alloc, &dest->value, - std::move(src->value)); - } - } }; } // namespace container_internal diff --git a/contrib/restricted/abseil-cpp/absl/container/internal/hashtablez_sampler.cc b/contrib/restricted/abseil-cpp/absl/container/internal/hashtablez_sampler.cc index 40cce0479e..efc1be5866 100644 --- a/contrib/restricted/abseil-cpp/absl/container/internal/hashtablez_sampler.cc +++ b/contrib/restricted/abseil-cpp/absl/container/internal/hashtablez_sampler.cc @@ -21,33 +21,43 @@ #include <limits> #include "absl/base/attributes.h" -#include "absl/container/internal/have_sse.h" +#include "absl/base/config.h" #include "absl/debugging/stacktrace.h" #include "absl/memory/memory.h" #include "absl/profiling/internal/exponential_biased.h" #include "absl/profiling/internal/sample_recorder.h" #include "absl/synchronization/mutex.h" +#include "absl/utility/utility.h" namespace absl { ABSL_NAMESPACE_BEGIN namespace container_internal { + +#ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL constexpr int HashtablezInfo::kMaxStackDepth; +#endif namespace { ABSL_CONST_INIT std::atomic<bool> g_hashtablez_enabled{ false }; ABSL_CONST_INIT std::atomic<int32_t> g_hashtablez_sample_parameter{1 << 10}; +std::atomic<HashtablezConfigListener> g_hashtablez_config_listener{nullptr}; #if defined(ABSL_INTERNAL_HASHTABLEZ_SAMPLE) ABSL_PER_THREAD_TLS_KEYWORD absl::profiling_internal::ExponentialBiased g_exponential_biased_generator; #endif +void TriggerHashtablezConfigListener() { + auto* listener = g_hashtablez_config_listener.load(std::memory_order_acquire); + if (listener != nullptr) listener(); +} + } // namespace #if defined(ABSL_INTERNAL_HASHTABLEZ_SAMPLE) -ABSL_PER_THREAD_TLS_KEYWORD int64_t global_next_sample = 0; +ABSL_PER_THREAD_TLS_KEYWORD SamplingState global_next_sample = {0, 0}; #endif // defined(ABSL_INTERNAL_HASHTABLEZ_SAMPLE) HashtablezSampler& GlobalHashtablezSampler() { @@ -55,13 +65,11 @@ HashtablezSampler& GlobalHashtablezSampler() { return *sampler; } -// TODO(bradleybear): The comments at this constructors declaration say that the -// fields are not initialized, but this definition does initialize the fields. -// Something needs to be cleaned up. -HashtablezInfo::HashtablezInfo() { PrepareForSampling(); } +HashtablezInfo::HashtablezInfo() = default; HashtablezInfo::~HashtablezInfo() = default; -void HashtablezInfo::PrepareForSampling() { +void HashtablezInfo::PrepareForSampling(int64_t stride, + size_t inline_element_size_value) { capacity.store(0, std::memory_order_relaxed); size.store(0, std::memory_order_relaxed); num_erases.store(0, std::memory_order_relaxed); @@ -74,11 +82,13 @@ void HashtablezInfo::PrepareForSampling() { max_reserve.store(0, std::memory_order_relaxed); create_time = absl::Now(); + weight = stride; // The inliner makes hardcoded skip_count difficult (especially when combined // with LTO). We use the ability to exclude stacks by regex when encoding // instead. depth = absl::GetStackTrace(stack, HashtablezInfo::kMaxStackDepth, /* skip_count= */ 0); + inline_element_size = inline_element_size_value; } static bool ShouldForceSampling() { @@ -101,23 +111,32 @@ static bool ShouldForceSampling() { return state == kForce; } -HashtablezInfo* SampleSlow(int64_t* next_sample, size_t inline_element_size) { +HashtablezInfo* SampleSlow(SamplingState& next_sample, + size_t inline_element_size) { if (ABSL_PREDICT_FALSE(ShouldForceSampling())) { - *next_sample = 1; - HashtablezInfo* result = GlobalHashtablezSampler().Register(); - result->inline_element_size = inline_element_size; + next_sample.next_sample = 1; + const int64_t old_stride = exchange(next_sample.sample_stride, 1); + HashtablezInfo* result = + GlobalHashtablezSampler().Register(old_stride, inline_element_size); return result; } #if !defined(ABSL_INTERNAL_HASHTABLEZ_SAMPLE) - *next_sample = std::numeric_limits<int64_t>::max(); + next_sample = { + std::numeric_limits<int64_t>::max(), + std::numeric_limits<int64_t>::max(), + }; return nullptr; #else - bool first = *next_sample < 0; - *next_sample = g_exponential_biased_generator.GetStride( + bool first = next_sample.next_sample < 0; + + const int64_t next_stride = g_exponential_biased_generator.GetStride( g_hashtablez_sample_parameter.load(std::memory_order_relaxed)); + + next_sample.next_sample = next_stride; + const int64_t old_stride = exchange(next_sample.sample_stride, next_stride); // Small values of interval are equivalent to just sampling next time. - ABSL_ASSERT(*next_sample >= 1); + ABSL_ASSERT(next_stride >= 1); // g_hashtablez_enabled can be dynamically flipped, we need to set a threshold // low enough that we will start sampling in a reasonable time, so we just use @@ -127,13 +146,11 @@ HashtablezInfo* SampleSlow(int64_t* next_sample, size_t inline_element_size) { // We will only be negative on our first count, so we should just retry in // that case. if (first) { - if (ABSL_PREDICT_TRUE(--*next_sample > 0)) return nullptr; + if (ABSL_PREDICT_TRUE(--next_sample.next_sample > 0)) return nullptr; return SampleSlow(next_sample, inline_element_size); } - HashtablezInfo* result = GlobalHashtablezSampler().Register(); - result->inline_element_size = inline_element_size; - return result; + return GlobalHashtablezSampler().Register(old_stride, inline_element_size); #endif } @@ -146,7 +163,7 @@ void RecordInsertSlow(HashtablezInfo* info, size_t hash, // SwissTables probe in groups of 16, so scale this to count items probes and // not offset from desired. size_t probe_length = distance_from_desired; -#if ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSE2 +#ifdef ABSL_INTERNAL_HAVE_SSE2 probe_length /= 16; #else probe_length /= 8; @@ -163,11 +180,33 @@ void RecordInsertSlow(HashtablezInfo* info, size_t hash, info->size.fetch_add(1, std::memory_order_relaxed); } +void SetHashtablezConfigListener(HashtablezConfigListener l) { + g_hashtablez_config_listener.store(l, std::memory_order_release); +} + +bool IsHashtablezEnabled() { + return g_hashtablez_enabled.load(std::memory_order_acquire); +} + void SetHashtablezEnabled(bool enabled) { + SetHashtablezEnabledInternal(enabled); + TriggerHashtablezConfigListener(); +} + +void SetHashtablezEnabledInternal(bool enabled) { g_hashtablez_enabled.store(enabled, std::memory_order_release); } +int32_t GetHashtablezSampleParameter() { + return g_hashtablez_sample_parameter.load(std::memory_order_acquire); +} + void SetHashtablezSampleParameter(int32_t rate) { + SetHashtablezSampleParameterInternal(rate); + TriggerHashtablezConfigListener(); +} + +void SetHashtablezSampleParameterInternal(int32_t rate) { if (rate > 0) { g_hashtablez_sample_parameter.store(rate, std::memory_order_release); } else { @@ -176,7 +215,16 @@ void SetHashtablezSampleParameter(int32_t rate) { } } +int32_t GetHashtablezMaxSamples() { + return GlobalHashtablezSampler().GetMaxSamples(); +} + void SetHashtablezMaxSamples(int32_t max) { + SetHashtablezMaxSamplesInternal(max); + TriggerHashtablezConfigListener(); +} + +void SetHashtablezMaxSamplesInternal(int32_t max) { if (max > 0) { GlobalHashtablezSampler().SetMaxSamples(max); } else { diff --git a/contrib/restricted/abseil-cpp/absl/container/internal/hashtablez_sampler.h b/contrib/restricted/abseil-cpp/absl/container/internal/hashtablez_sampler.h index 91fcdb34a3..d4016d8a9f 100644 --- a/contrib/restricted/abseil-cpp/absl/container/internal/hashtablez_sampler.h +++ b/contrib/restricted/abseil-cpp/absl/container/internal/hashtablez_sampler.h @@ -44,9 +44,9 @@ #include <memory> #include <vector> +#include "absl/base/config.h" #include "absl/base/internal/per_thread_tls.h" #include "absl/base/optimization.h" -#include "absl/container/internal/have_sse.h" #include "absl/profiling/internal/sample_recorder.h" #include "absl/synchronization/mutex.h" #include "absl/utility/utility.h" @@ -67,7 +67,8 @@ struct HashtablezInfo : public profiling_internal::Sample<HashtablezInfo> { // Puts the object into a clean state, fills in the logically `const` members, // blocking for any readers that are currently sampling the object. - void PrepareForSampling() ABSL_EXCLUSIVE_LOCKS_REQUIRED(init_mu); + void PrepareForSampling(int64_t stride, size_t inline_element_size_value) + ABSL_EXCLUSIVE_LOCKS_REQUIRED(init_mu); // These fields are mutated by the various Record* APIs and need to be // thread-safe. @@ -84,18 +85,18 @@ struct HashtablezInfo : public profiling_internal::Sample<HashtablezInfo> { // All of the fields below are set by `PrepareForSampling`, they must not be // mutated in `Record*` functions. They are logically `const` in that sense. - // These are guarded by init_mu, but that is not externalized to clients, who - // can only read them during `HashtablezSampler::Iterate` which will hold the - // lock. + // These are guarded by init_mu, but that is not externalized to clients, + // which can read them only during `SampleRecorder::Iterate` which will hold + // the lock. static constexpr int kMaxStackDepth = 64; absl::Time create_time; int32_t depth; void* stack[kMaxStackDepth]; - size_t inline_element_size; + size_t inline_element_size; // How big is the slot? }; inline void RecordRehashSlow(HashtablezInfo* info, size_t total_probe_length) { -#if ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSE2 +#ifdef ABSL_INTERNAL_HAVE_SSE2 total_probe_length /= 16; #else total_probe_length /= 8; @@ -144,7 +145,15 @@ inline void RecordEraseSlow(HashtablezInfo* info) { std::memory_order_relaxed); } -HashtablezInfo* SampleSlow(int64_t* next_sample, size_t inline_element_size); +struct SamplingState { + int64_t next_sample; + // When we make a sampling decision, we record that distance so we can weight + // each sample. + int64_t sample_stride; +}; + +HashtablezInfo* SampleSlow(SamplingState& next_sample, + size_t inline_element_size); void UnsampleSlow(HashtablezInfo* info); #if defined(ABSL_INTERNAL_HASHTABLEZ_SAMPLE) @@ -234,7 +243,7 @@ class HashtablezInfoHandle { #endif // defined(ABSL_INTERNAL_HASHTABLEZ_SAMPLE) #if defined(ABSL_INTERNAL_HASHTABLEZ_SAMPLE) -extern ABSL_PER_THREAD_TLS_KEYWORD int64_t global_next_sample; +extern ABSL_PER_THREAD_TLS_KEYWORD SamplingState global_next_sample; #endif // defined(ABSL_INTERNAL_HASHTABLEZ_SAMPLE) // Returns an RAII sampling handle that manages registration and unregistation @@ -242,11 +251,11 @@ extern ABSL_PER_THREAD_TLS_KEYWORD int64_t global_next_sample; inline HashtablezInfoHandle Sample( size_t inline_element_size ABSL_ATTRIBUTE_UNUSED) { #if defined(ABSL_INTERNAL_HASHTABLEZ_SAMPLE) - if (ABSL_PREDICT_TRUE(--global_next_sample > 0)) { + if (ABSL_PREDICT_TRUE(--global_next_sample.next_sample > 0)) { return HashtablezInfoHandle(nullptr); } return HashtablezInfoHandle( - SampleSlow(&global_next_sample, inline_element_size)); + SampleSlow(global_next_sample, inline_element_size)); #else return HashtablezInfoHandle(nullptr); #endif // !ABSL_PER_THREAD_TLS @@ -258,14 +267,23 @@ using HashtablezSampler = // Returns a global Sampler. HashtablezSampler& GlobalHashtablezSampler(); +using HashtablezConfigListener = void (*)(); +void SetHashtablezConfigListener(HashtablezConfigListener l); + // Enables or disables sampling for Swiss tables. +bool IsHashtablezEnabled(); void SetHashtablezEnabled(bool enabled); +void SetHashtablezEnabledInternal(bool enabled); // Sets the rate at which Swiss tables will be sampled. +int32_t GetHashtablezSampleParameter(); void SetHashtablezSampleParameter(int32_t rate); +void SetHashtablezSampleParameterInternal(int32_t rate); // Sets a soft max for the number of samples that will be kept. +int32_t GetHashtablezMaxSamples(); void SetHashtablezMaxSamples(int32_t max); +void SetHashtablezMaxSamplesInternal(int32_t max); // Configuration override. // This allows process-wide sampling without depending on order of diff --git a/contrib/restricted/abseil-cpp/absl/container/internal/have_sse.h b/contrib/restricted/abseil-cpp/absl/container/internal/have_sse.h deleted file mode 100644 index e75e1a16d3..0000000000 --- a/contrib/restricted/abseil-cpp/absl/container/internal/have_sse.h +++ /dev/null @@ -1,50 +0,0 @@ -// Copyright 2018 The Abseil Authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// Shared config probing for SSE instructions used in Swiss tables. -#ifndef ABSL_CONTAINER_INTERNAL_HAVE_SSE_H_ -#define ABSL_CONTAINER_INTERNAL_HAVE_SSE_H_ - -#ifndef ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSE2 -#if defined(__SSE2__) || \ - (defined(_MSC_VER) && \ - (defined(_M_X64) || (defined(_M_IX86) && _M_IX86_FP >= 2))) -#define ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSE2 1 -#else -#define ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSE2 0 -#endif -#endif - -#ifndef ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSSE3 -#ifdef __SSSE3__ -#define ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSSE3 1 -#else -#define ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSSE3 0 -#endif -#endif - -#if ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSSE3 && \ - !ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSE2 -#error "Bad configuration!" -#endif - -#if ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSE2 -#include <emmintrin.h> -#endif - -#if ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSSE3 -#include <tmmintrin.h> -#endif - -#endif // ABSL_CONTAINER_INTERNAL_HAVE_SSE_H_ diff --git a/contrib/restricted/abseil-cpp/absl/container/internal/inlined_vector.h b/contrib/restricted/abseil-cpp/absl/container/internal/inlined_vector.h index 1d7d6cda72..54c92a0106 100644 --- a/contrib/restricted/abseil-cpp/absl/container/internal/inlined_vector.h +++ b/contrib/restricted/abseil-cpp/absl/container/internal/inlined_vector.h @@ -40,7 +40,6 @@ namespace inlined_vector_internal { #if !defined(__clang__) && defined(__GNUC__) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Warray-bounds" -#pragma GCC diagnostic ignored "-Wmaybe-uninitialized" #endif template <typename A> @@ -94,16 +93,30 @@ struct TypeIdentity { template <typename T> using NoTypeDeduction = typename TypeIdentity<T>::type; +template <typename A, bool IsTriviallyDestructible = + absl::is_trivially_destructible<ValueType<A>>::value> +struct DestroyAdapter; + template <typename A> -void DestroyElements(NoTypeDeduction<A>& allocator, Pointer<A> destroy_first, - SizeType<A> destroy_size) { - if (destroy_first != nullptr) { +struct DestroyAdapter<A, /* IsTriviallyDestructible */ false> { + static void DestroyElements(A& allocator, Pointer<A> destroy_first, + SizeType<A> destroy_size) { for (SizeType<A> i = destroy_size; i != 0;) { --i; AllocatorTraits<A>::destroy(allocator, destroy_first + i); } } -} +}; + +template <typename A> +struct DestroyAdapter<A, /* IsTriviallyDestructible */ true> { + static void DestroyElements(A& allocator, Pointer<A> destroy_first, + SizeType<A> destroy_size) { + static_cast<void>(allocator); + static_cast<void>(destroy_first); + static_cast<void>(destroy_size); + } +}; template <typename A> struct Allocation { @@ -133,7 +146,7 @@ void ConstructElements(NoTypeDeduction<A>& allocator, for (SizeType<A> i = 0; i < construct_size; ++i) { ABSL_INTERNAL_TRY { values.ConstructNext(allocator, construct_first + i); } ABSL_INTERNAL_CATCH_ANY { - DestroyElements<A>(allocator, construct_first, i); + DestroyAdapter<A>::DestroyElements(allocator, construct_first, i); ABSL_INTERNAL_RETHROW; } } @@ -253,7 +266,7 @@ class ConstructionTransaction { ~ConstructionTransaction() { if (DidConstruct()) { - DestroyElements<A>(GetAllocator(), GetData(), GetSize()); + DestroyAdapter<A>::DestroyElements(GetAllocator(), GetData(), GetSize()); } } @@ -297,10 +310,10 @@ class Storage { // Storage Constructors and Destructor // --------------------------------------------------------------------------- - Storage() : metadata_(A(), /* size and is_allocated */ 0) {} + Storage() : metadata_(A(), /* size and is_allocated */ 0u) {} explicit Storage(const A& allocator) - : metadata_(allocator, /* size and is_allocated */ 0) {} + : metadata_(allocator, /* size and is_allocated */ 0u) {} ~Storage() { if (GetSizeAndIsAllocated() == 0) { @@ -416,7 +429,7 @@ class Storage { } void SubtractSize(SizeType<A> count) { - assert(count <= GetSize()); + ABSL_HARDENING_ASSERT(count <= GetSize()); GetSizeAndIsAllocated() -= count << static_cast<SizeType<A>>(1); } @@ -427,7 +440,8 @@ class Storage { } void MemcpyFrom(const Storage& other_storage) { - assert(IsMemcpyOk<A>::value || other_storage.GetIsAllocated()); + ABSL_HARDENING_ASSERT(IsMemcpyOk<A>::value || + other_storage.GetIsAllocated()); GetSizeAndIsAllocated() = other_storage.GetSizeAndIsAllocated(); data_ = other_storage.data_; @@ -469,14 +483,14 @@ class Storage { template <typename T, size_t N, typename A> void Storage<T, N, A>::DestroyContents() { Pointer<A> data = GetIsAllocated() ? GetAllocatedData() : GetInlinedData(); - DestroyElements<A>(GetAllocator(), data, GetSize()); + DestroyAdapter<A>::DestroyElements(GetAllocator(), data, GetSize()); DeallocateIfAllocated(); } template <typename T, size_t N, typename A> void Storage<T, N, A>::InitFrom(const Storage& other) { const SizeType<A> n = other.GetSize(); - assert(n > 0); // Empty sources handled handled in caller. + ABSL_HARDENING_ASSERT(n > 0); // Empty sources handled handled in caller. ConstPointer<A> src; Pointer<A> dst; if (!other.GetIsAllocated()) { @@ -508,8 +522,8 @@ template <typename ValueAdapter> auto Storage<T, N, A>::Initialize(ValueAdapter values, SizeType<A> new_size) -> void { // Only callable from constructors! - assert(!GetIsAllocated()); - assert(GetSize() == 0); + ABSL_HARDENING_ASSERT(!GetIsAllocated()); + ABSL_HARDENING_ASSERT(GetSize() == 0); Pointer<A> construct_data; if (new_size > GetInlinedCapacity()) { @@ -566,7 +580,8 @@ auto Storage<T, N, A>::Assign(ValueAdapter values, SizeType<A> new_size) ConstructElements<A>(GetAllocator(), construct_loop.data(), values, construct_loop.size()); - DestroyElements<A>(GetAllocator(), destroy_loop.data(), destroy_loop.size()); + DestroyAdapter<A>::DestroyElements(GetAllocator(), destroy_loop.data(), + destroy_loop.size()); if (allocation_tx.DidAllocate()) { DeallocateIfAllocated(); @@ -587,7 +602,7 @@ auto Storage<T, N, A>::Resize(ValueAdapter values, SizeType<A> new_size) A& alloc = GetAllocator(); if (new_size <= size) { // Destroy extra old elements. - DestroyElements<A>(alloc, base + new_size, size - new_size); + DestroyAdapter<A>::DestroyElements(alloc, base + new_size, size - new_size); } else if (new_size <= storage_view.capacity) { // Construct new elements in place. ConstructElements<A>(alloc, base + size, values, new_size - size); @@ -595,7 +610,7 @@ auto Storage<T, N, A>::Resize(ValueAdapter values, SizeType<A> new_size) // Steps: // a. Allocate new backing store. // b. Construct new elements in new backing store. - // c. Move existing elements from old backing store to now. + // c. Move existing elements from old backing store to new backing store. // d. Destroy all elements in old backing store. // Use transactional wrappers for the first two steps so we can roll // back if necessary due to exceptions. @@ -611,7 +626,7 @@ auto Storage<T, N, A>::Resize(ValueAdapter values, SizeType<A> new_size) (MoveIterator<A>(base))); ConstructElements<A>(alloc, new_data, move_values, size); - DestroyElements<A>(alloc, base, size); + DestroyAdapter<A>::DestroyElements(alloc, base, size); std::move(construction_tx).Commit(); DeallocateIfAllocated(); SetAllocation(std::move(allocation_tx).Release()); @@ -650,7 +665,8 @@ auto Storage<T, N, A>::Insert(ConstIterator<A> pos, ValueAdapter values, ConstructElements<A>(GetAllocator(), new_data + insert_end_index, move_values, storage_view.size - insert_index); - DestroyElements<A>(GetAllocator(), storage_view.data, storage_view.size); + DestroyAdapter<A>::DestroyElements(GetAllocator(), storage_view.data, + storage_view.size); std::move(construction_tx).Commit(); std::move(move_construction_tx).Commit(); @@ -753,7 +769,8 @@ auto Storage<T, N, A>::EmplaceBackSlow(Args&&... args) -> Reference<A> { ABSL_INTERNAL_RETHROW; } // Destroy elements in old backing store. - DestroyElements<A>(GetAllocator(), storage_view.data, storage_view.size); + DestroyAdapter<A>::DestroyElements(GetAllocator(), storage_view.data, + storage_view.size); DeallocateIfAllocated(); SetAllocation(std::move(allocation_tx).Release()); @@ -778,9 +795,9 @@ auto Storage<T, N, A>::Erase(ConstIterator<A> from, ConstIterator<A> to) AssignElements<A>(storage_view.data + erase_index, move_values, storage_view.size - erase_end_index); - DestroyElements<A>(GetAllocator(), - storage_view.data + (storage_view.size - erase_size), - erase_size); + DestroyAdapter<A>::DestroyElements( + GetAllocator(), storage_view.data + (storage_view.size - erase_size), + erase_size); SubtractSize(erase_size); return Iterator<A>(storage_view.data + erase_index); @@ -804,7 +821,8 @@ auto Storage<T, N, A>::Reserve(SizeType<A> requested_capacity) -> void { ConstructElements<A>(GetAllocator(), new_data, move_values, storage_view.size); - DestroyElements<A>(GetAllocator(), storage_view.data, storage_view.size); + DestroyAdapter<A>::DestroyElements(GetAllocator(), storage_view.data, + storage_view.size); DeallocateIfAllocated(); SetAllocation(std::move(allocation_tx).Release()); @@ -814,7 +832,7 @@ auto Storage<T, N, A>::Reserve(SizeType<A> requested_capacity) -> void { template <typename T, size_t N, typename A> auto Storage<T, N, A>::ShrinkToFit() -> void { // May only be called on allocated instances! - assert(GetIsAllocated()); + ABSL_HARDENING_ASSERT(GetIsAllocated()); StorageView<A> storage_view{GetAllocatedData(), GetSize(), GetAllocatedCapacity()}; @@ -847,7 +865,8 @@ auto Storage<T, N, A>::ShrinkToFit() -> void { ABSL_INTERNAL_RETHROW; } - DestroyElements<A>(GetAllocator(), storage_view.data, storage_view.size); + DestroyAdapter<A>::DestroyElements(GetAllocator(), storage_view.data, + storage_view.size); MallocAdapter<A>::Deallocate(GetAllocator(), storage_view.data, storage_view.capacity); @@ -862,7 +881,7 @@ auto Storage<T, N, A>::ShrinkToFit() -> void { template <typename T, size_t N, typename A> auto Storage<T, N, A>::Swap(Storage* other_storage_ptr) -> void { using std::swap; - assert(this != other_storage_ptr); + ABSL_HARDENING_ASSERT(this != other_storage_ptr); if (GetIsAllocated() && other_storage_ptr->GetIsAllocated()) { swap(data_.allocated, other_storage_ptr->data_.allocated); @@ -883,9 +902,10 @@ auto Storage<T, N, A>::Swap(Storage* other_storage_ptr) -> void { move_values, large_ptr->GetSize() - small_ptr->GetSize()); - DestroyElements<A>(large_ptr->GetAllocator(), - large_ptr->GetInlinedData() + small_ptr->GetSize(), - large_ptr->GetSize() - small_ptr->GetSize()); + DestroyAdapter<A>::DestroyElements( + large_ptr->GetAllocator(), + large_ptr->GetInlinedData() + small_ptr->GetSize(), + large_ptr->GetSize() - small_ptr->GetSize()); } else { Storage* allocated_ptr = this; Storage* inlined_ptr = other_storage_ptr; @@ -904,23 +924,24 @@ auto Storage<T, N, A>::Swap(Storage* other_storage_ptr) -> void { inlined_ptr->GetSize()); } ABSL_INTERNAL_CATCH_ANY { - allocated_ptr->SetAllocation( - {allocated_storage_view.data, allocated_storage_view.capacity}); + allocated_ptr->SetAllocation(Allocation<A>{ + allocated_storage_view.data, allocated_storage_view.capacity}); ABSL_INTERNAL_RETHROW; } - DestroyElements<A>(inlined_ptr->GetAllocator(), - inlined_ptr->GetInlinedData(), inlined_ptr->GetSize()); + DestroyAdapter<A>::DestroyElements(inlined_ptr->GetAllocator(), + inlined_ptr->GetInlinedData(), + inlined_ptr->GetSize()); - inlined_ptr->SetAllocation( - {allocated_storage_view.data, allocated_storage_view.capacity}); + inlined_ptr->SetAllocation(Allocation<A>{allocated_storage_view.data, + allocated_storage_view.capacity}); } swap(GetSizeAndIsAllocated(), other_storage_ptr->GetSizeAndIsAllocated()); swap(GetAllocator(), other_storage_ptr->GetAllocator()); } -// End ignore "array-bounds" and "maybe-uninitialized" +// End ignore "array-bounds" #if !defined(__clang__) && defined(__GNUC__) #pragma GCC diagnostic pop #endif diff --git a/contrib/restricted/abseil-cpp/absl/container/internal/raw_hash_set.cc b/contrib/restricted/abseil-cpp/absl/container/internal/raw_hash_set.cc index 687bcb8a4d..c63a2e02d1 100644 --- a/contrib/restricted/abseil-cpp/absl/container/internal/raw_hash_set.cc +++ b/contrib/restricted/abseil-cpp/absl/container/internal/raw_hash_set.cc @@ -23,13 +23,17 @@ namespace absl { ABSL_NAMESPACE_BEGIN namespace container_internal { +// A single block of empty control bytes for tables without any slots allocated. +// This enables removing a branch in the hot path of find(). alignas(16) ABSL_CONST_INIT ABSL_DLL const ctrl_t kEmptyGroup[16] = { ctrl_t::kSentinel, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty, ctrl_t::kEmpty}; +#ifdef ABSL_INTERNAL_NEED_REDUNDANT_CONSTEXPR_DECL constexpr size_t Group::kWidth; +#endif // Returns "random" seed. inline size_t RandomSeed() { diff --git a/contrib/restricted/abseil-cpp/absl/container/internal/raw_hash_set.h b/contrib/restricted/abseil-cpp/absl/container/internal/raw_hash_set.h index 12682b3532..ea912f8305 100644 --- a/contrib/restricted/abseil-cpp/absl/container/internal/raw_hash_set.h +++ b/contrib/restricted/abseil-cpp/absl/container/internal/raw_hash_set.h @@ -53,51 +53,121 @@ // // IMPLEMENTATION DETAILS // -// The table stores elements inline in a slot array. In addition to the slot -// array the table maintains some control state per slot. The extra state is one -// byte per slot and stores empty or deleted marks, or alternatively 7 bits from -// the hash of an occupied slot. The table is split into logical groups of -// slots, like so: +// # Table Layout +// +// A raw_hash_set's backing array consists of control bytes followed by slots +// that may or may not contain objects. +// +// The layout of the backing array, for `capacity` slots, is thus, as a +// pseudo-struct: +// +// struct BackingArray { +// // Control bytes for the "real" slots. +// ctrl_t ctrl[capacity]; +// // Always `ctrl_t::kSentinel`. This is used by iterators to find when to +// // stop and serves no other purpose. +// ctrl_t sentinel; +// // A copy of the first `kWidth - 1` elements of `ctrl`. This is used so +// // that if a probe sequence picks a value near the end of `ctrl`, +// // `Group` will have valid control bytes to look at. +// ctrl_t clones[kWidth - 1]; +// // The actual slot data. +// slot_type slots[capacity]; +// }; +// +// The length of this array is computed by `AllocSize()` below. +// +// Control bytes (`ctrl_t`) are bytes (collected into groups of a +// platform-specific size) that define the state of the corresponding slot in +// the slot array. Group manipulation is tightly optimized to be as efficient +// as possible: SSE and friends on x86, clever bit operations on other arches. // // Group 1 Group 2 Group 3 // +---------------+---------------+---------------+ // | | | | | | | | | | | | | | | | | | | | | | | | | // +---------------+---------------+---------------+ // -// On lookup the hash is split into two parts: -// - H2: 7 bits (those stored in the control bytes) -// - H1: the rest of the bits -// The groups are probed using H1. For each group the slots are matched to H2 in -// parallel. Because H2 is 7 bits (128 states) and the number of slots per group -// is low (8 or 16) in almost all cases a match in H2 is also a lookup hit. +// Each control byte is either a special value for empty slots, deleted slots +// (sometimes called *tombstones*), and a special end-of-table marker used by +// iterators, or, if occupied, seven bits (H2) from the hash of the value in the +// corresponding slot. +// +// Storing control bytes in a separate array also has beneficial cache effects, +// since more logical slots will fit into a cache line. +// +// # Hashing +// +// We compute two separate hashes, `H1` and `H2`, from the hash of an object. +// `H1(hash(x))` is an index into `slots`, and essentially the starting point +// for the probe sequence. `H2(hash(x))` is a 7-bit value used to filter out +// objects that cannot possibly be the one we are looking for. +// +// # Table operations. // -// On insert, once the right group is found (as in lookup), its slots are -// filled in order. +// The key operations are `insert`, `find`, and `erase`. // -// On erase a slot is cleared. In case the group did not have any empty slots -// before the erase, the erased slot is marked as deleted. +// Since `insert` and `erase` are implemented in terms of `find`, we describe +// `find` first. To `find` a value `x`, we compute `hash(x)`. From +// `H1(hash(x))` and the capacity, we construct a `probe_seq` that visits every +// group of slots in some interesting order. // -// Groups without empty slots (but maybe with deleted slots) extend the probe -// sequence. The probing algorithm is quadratic. Given N the number of groups, -// the probing function for the i'th probe is: +// We now walk through these indices. At each index, we select the entire group +// starting with that index and extract potential candidates: occupied slots +// with a control byte equal to `H2(hash(x))`. If we find an empty slot in the +// group, we stop and return an error. Each candidate slot `y` is compared with +// `x`; if `x == y`, we are done and return `&y`; otherwise we contine to the +// next probe index. Tombstones effectively behave like full slots that never +// match the value we're looking for. // -// P(0) = H1 % N +// The `H2` bits ensure when we compare a slot to an object with `==`, we are +// likely to have actually found the object. That is, the chance is low that +// `==` is called and returns `false`. Thus, when we search for an object, we +// are unlikely to call `==` many times. This likelyhood can be analyzed as +// follows (assuming that H2 is a random enough hash function). // -// P(i) = (P(i - 1) + i) % N +// Let's assume that there are `k` "wrong" objects that must be examined in a +// probe sequence. For example, when doing a `find` on an object that is in the +// table, `k` is the number of objects between the start of the probe sequence +// and the final found object (not including the final found object). The +// expected number of objects with an H2 match is then `k/128`. Measurements +// and analysis indicate that even at high load factors, `k` is less than 32, +// meaning that the number of "false positive" comparisons we must perform is +// less than 1/8 per `find`. + +// `insert` is implemented in terms of `unchecked_insert`, which inserts a +// value presumed to not be in the table (violating this requirement will cause +// the table to behave erratically). Given `x` and its hash `hash(x)`, to insert +// it, we construct a `probe_seq` once again, and use it to find the first +// group with an unoccupied (empty *or* deleted) slot. We place `x` into the +// first such slot in the group and mark it as full with `x`'s H2. // -// This probing function guarantees that after N probes, all the groups of the -// table will be probed exactly once. +// To `insert`, we compose `unchecked_insert` with `find`. We compute `h(x)` and +// perform a `find` to see if it's already present; if it is, we're done. If +// it's not, we may decide the table is getting overcrowded (i.e. the load +// factor is greater than 7/8 for big tables; `is_small()` tables use a max load +// factor of 1); in this case, we allocate a bigger array, `unchecked_insert` +// each element of the table into the new array (we know that no insertion here +// will insert an already-present value), and discard the old backing array. At +// this point, we may `unchecked_insert` the value `x`. // -// The control state and slot array are stored contiguously in a shared heap -// allocation. The layout of this allocation is: `capacity()` control bytes, -// one sentinel control byte, `Group::kWidth - 1` cloned control bytes, -// <possible padding>, `capacity()` slots. The sentinel control byte is used in -// iteration so we know when we reach the end of the table. The cloned control -// bytes at the end of the table are cloned from the beginning of the table so -// groups that begin near the end of the table can see a full group. In cases in -// which there are more than `capacity()` cloned control bytes, the extra bytes -// are `kEmpty`, and these ensure that we always see at least one empty slot and -// can stop an unsuccessful search. +// Below, `unchecked_insert` is partly implemented by `prepare_insert`, which +// presents a viable, initialized slot pointee to the caller. +// +// `erase` is implemented in terms of `erase_at`, which takes an index to a +// slot. Given an offset, we simply create a tombstone and destroy its contents. +// If we can prove that the slot would not appear in a probe sequence, we can +// make the slot as empty, instead. We can prove this by observing that if a +// group has any empty slots, it has never been full (assuming we never create +// an empty slot in a group with no empties, which this heuristic guarantees we +// never do) and find would stop at this group anyways (since it does not probe +// beyond groups with empties). +// +// `erase` is `erase_at` composed with `find`: if we +// have a value `x`, we can perform a `find`, and then `erase_at` the resulting +// slot. +// +// To iterate, we simply traverse the array, skipping empty and deleted slots +// and stopping when we hit a `kSentinel`. #ifndef ABSL_CONTAINER_INTERNAL_RAW_HASH_SET_H_ #define ABSL_CONTAINER_INTERNAL_RAW_HASH_SET_H_ @@ -113,7 +183,9 @@ #include <type_traits> #include <utility> +#include "absl/base/config.h" #include "absl/base/internal/endian.h" +#include "absl/base/internal/prefetch.h" #include "absl/base/optimization.h" #include "absl/base/port.h" #include "absl/container/internal/common.h" @@ -122,12 +194,27 @@ #include "absl/container/internal/hash_policy_traits.h" #include "absl/container/internal/hashtable_debug_hooks.h" #include "absl/container/internal/hashtablez_sampler.h" -#include "absl/container/internal/have_sse.h" #include "absl/memory/memory.h" #include "absl/meta/type_traits.h" #include "absl/numeric/bits.h" #include "absl/utility/utility.h" +#ifdef ABSL_INTERNAL_HAVE_SSE2 +#include <emmintrin.h> +#endif + +#ifdef ABSL_INTERNAL_HAVE_SSSE3 +#include <tmmintrin.h> +#endif + +#ifdef _MSC_VER +#include <intrin.h> +#endif + +#ifdef ABSL_INTERNAL_HAVE_ARM_NEON +#include <arm_neon.h> +#endif + namespace absl { ABSL_NAMESPACE_BEGIN namespace container_internal { @@ -142,14 +229,40 @@ template <typename AllocType> void SwapAlloc(AllocType& /*lhs*/, AllocType& /*rhs*/, std::false_type /* propagate_on_container_swap */) {} +// The state for a probe sequence. +// +// Currently, the sequence is a triangular progression of the form +// +// p(i) := Width * (i^2 + i)/2 + hash (mod mask + 1) +// +// The use of `Width` ensures that each probe step does not overlap groups; +// the sequence effectively outputs the addresses of *groups* (although not +// necessarily aligned to any boundary). The `Group` machinery allows us +// to check an entire group with minimal branching. +// +// Wrapping around at `mask + 1` is important, but not for the obvious reason. +// As described above, the first few entries of the control byte array +// are mirrored at the end of the array, which `Group` will find and use +// for selecting candidates. However, when those candidates' slots are +// actually inspected, there are no corresponding slots for the cloned bytes, +// so we need to make sure we've treated those offsets as "wrapping around". +// +// It turns out that this probe sequence visits every group exactly once if the +// number of groups is a power of two, since (i^2+i)/2 is a bijection in +// Z/(2^m). See https://en.wikipedia.org/wiki/Quadratic_probing template <size_t Width> class probe_seq { public: + // Creates a new probe sequence using `hash` as the initial value of the + // sequence and `mask` (usually the capacity of the table) as the mask to + // apply to each value in the progression. probe_seq(size_t hash, size_t mask) { assert(((mask + 1) & mask) == 0 && "not a mask"); mask_ = mask; offset_ = hash & mask_; } + + // The offset within the table, i.e., the value `p(i)` above. size_t offset() const { return offset_; } size_t offset(size_t i) const { return (offset_ + i) & mask_; } @@ -158,7 +271,7 @@ class probe_seq { offset_ += index_; offset_ &= mask_; } - // 0-based probe index. The i-th probe in the probe sequence. + // 0-based probe index, a multiple of `Width`. size_t index() const { return index_; } private: @@ -182,9 +295,9 @@ struct IsDecomposable : std::false_type {}; template <class Policy, class Hash, class Eq, class... Ts> struct IsDecomposable< - absl::void_t<decltype( - Policy::apply(RequireUsableKey<typename Policy::key_type, Hash, Eq>(), - std::declval<Ts>()...))>, + absl::void_t<decltype(Policy::apply( + RequireUsableKey<typename Policy::key_type, Hash, Eq>(), + std::declval<Ts>()...))>, Policy, Hash, Eq, Ts...> : std::true_type {}; // TODO(alkis): Switch to std::is_nothrow_swappable when gcc/clang supports it. @@ -200,57 +313,84 @@ constexpr bool IsNoThrowSwappable(std::false_type /* is_swappable */) { template <typename T> uint32_t TrailingZeros(T x) { - ABSL_INTERNAL_ASSUME(x != 0); - return countr_zero(x); + ABSL_ASSUME(x != 0); + return static_cast<uint32_t>(countr_zero(x)); } -// An abstraction over a bitmask. It provides an easy way to iterate through the -// indexes of the set bits of a bitmask. When Shift=0 (platforms with SSE), -// this is a true bitmask. On non-SSE, platforms the arithematic used to -// emulate the SSE behavior works in bytes (Shift=3) and leaves each bytes as -// either 0x00 or 0x80. +// An abstract bitmask, such as that emitted by a SIMD instruction. // -// For example: -// for (int i : BitMask<uint32_t, 16>(0x5)) -> yields 0, 2 -// for (int i : BitMask<uint64_t, 8, 3>(0x0000000080800000)) -> yields 2, 3 +// Specifically, this type implements a simple bitset whose representation is +// controlled by `SignificantBits` and `Shift`. `SignificantBits` is the number +// of abstract bits in the bitset, while `Shift` is the log-base-two of the +// width of an abstract bit in the representation. +// This mask provides operations for any number of real bits set in an abstract +// bit. To add iteration on top of that, implementation must guarantee no more +// than one real bit is set in an abstract bit. template <class T, int SignificantBits, int Shift = 0> -class BitMask { - static_assert(std::is_unsigned<T>::value, ""); - static_assert(Shift == 0 || Shift == 3, ""); - +class NonIterableBitMask { public: - // These are useful for unit tests (gunit). - using value_type = int; - using iterator = BitMask; - using const_iterator = BitMask; + explicit NonIterableBitMask(T mask) : mask_(mask) {} - explicit BitMask(T mask) : mask_(mask) {} - BitMask& operator++() { - mask_ &= (mask_ - 1); - return *this; - } - explicit operator bool() const { return mask_ != 0; } - int operator*() const { return LowestBitSet(); } + explicit operator bool() const { return this->mask_ != 0; } + + // Returns the index of the lowest *abstract* bit set in `self`. uint32_t LowestBitSet() const { return container_internal::TrailingZeros(mask_) >> Shift; } + + // Returns the index of the highest *abstract* bit set in `self`. uint32_t HighestBitSet() const { return static_cast<uint32_t>((bit_width(mask_) - 1) >> Shift); } - BitMask begin() const { return *this; } - BitMask end() const { return BitMask(0); } - + // Return the number of trailing zero *abstract* bits. uint32_t TrailingZeros() const { return container_internal::TrailingZeros(mask_) >> Shift; } + // Return the number of leading zero *abstract* bits. uint32_t LeadingZeros() const { constexpr int total_significant_bits = SignificantBits << Shift; constexpr int extra_bits = sizeof(T) * 8 - total_significant_bits; - return countl_zero(mask_ << extra_bits) >> Shift; + return static_cast<uint32_t>(countl_zero(mask_ << extra_bits)) >> Shift; } + T mask_; +}; + +// Mask that can be iterable +// +// For example, when `SignificantBits` is 16 and `Shift` is zero, this is just +// an ordinary 16-bit bitset occupying the low 16 bits of `mask`. When +// `SignificantBits` is 8 and `Shift` is 3, abstract bits are represented as +// the bytes `0x00` and `0x80`, and it occupies all 64 bits of the bitmask. +// +// For example: +// for (int i : BitMask<uint32_t, 16>(0b101)) -> yields 0, 2 +// for (int i : BitMask<uint64_t, 8, 3>(0x0000000080800000)) -> yields 2, 3 +template <class T, int SignificantBits, int Shift = 0> +class BitMask : public NonIterableBitMask<T, SignificantBits, Shift> { + using Base = NonIterableBitMask<T, SignificantBits, Shift>; + static_assert(std::is_unsigned<T>::value, ""); + static_assert(Shift == 0 || Shift == 3, ""); + + public: + explicit BitMask(T mask) : Base(mask) {} + // BitMask is an iterator over the indices of its abstract bits. + using value_type = int; + using iterator = BitMask; + using const_iterator = BitMask; + + BitMask& operator++() { + this->mask_ &= (this->mask_ - 1); + return *this; + } + + uint32_t operator*() const { return Base::LowestBitSet(); } + + BitMask begin() const { return *this; } + BitMask end() const { return BitMask(0); } + private: friend bool operator==(const BitMask& a, const BitMask& b) { return a.mask_ == b.mask_; @@ -258,15 +398,27 @@ class BitMask { friend bool operator!=(const BitMask& a, const BitMask& b) { return a.mask_ != b.mask_; } - - T mask_; }; using h2_t = uint8_t; // The values here are selected for maximum performance. See the static asserts -// below for details. We use an enum class so that when strict aliasing is -// enabled, the compiler knows ctrl_t doesn't alias other types. +// below for details. + +// A `ctrl_t` is a single control byte, which can have one of four +// states: empty, deleted, full (which has an associated seven-bit h2_t value) +// and the sentinel. They have the following bit patterns: +// +// empty: 1 0 0 0 0 0 0 0 +// deleted: 1 1 1 1 1 1 1 0 +// full: 0 h h h h h h h // h represents the hash bits. +// sentinel: 1 1 1 1 1 1 1 1 +// +// These values are specifically tuned for SSE-flavored SIMD. +// The static_asserts below detail the source of these choices. +// +// We use an enum class so that when strict aliasing is enabled, the compiler +// knows ctrl_t doesn't alias other types. enum class ctrl_t : int8_t { kEmpty = -128, // 0b10000000 kDeleted = -2, // 0b11111110 @@ -294,15 +446,17 @@ static_assert( static_cast<int8_t>(ctrl_t::kSentinel) & 0x7F) != 0, "ctrl_t::kEmpty and ctrl_t::kDeleted must share an unset bit that is not " "shared by ctrl_t::kSentinel to make the scalar test for " - "MatchEmptyOrDeleted() efficient"); + "MaskEmptyOrDeleted() efficient"); static_assert(ctrl_t::kDeleted == static_cast<ctrl_t>(-2), "ctrl_t::kDeleted must be -2 to make the implementation of " "ConvertSpecialToEmptyAndFullToDeleted efficient"); -// A single block of empty control bytes for tables without any slots allocated. -// This enables removing a branch in the hot path of find(). ABSL_DLL extern const ctrl_t kEmptyGroup[16]; + +// Returns a pointer to a control byte group that can be used by empty tables. inline ctrl_t* EmptyGroup() { + // Const must be cast away here; no uses of this function will actually write + // to it, because it is only used for empty tables. return const_cast<ctrl_t*>(kEmptyGroup); } @@ -310,28 +464,61 @@ inline ctrl_t* EmptyGroup() { // randomize insertion order within groups. bool ShouldInsertBackwards(size_t hash, const ctrl_t* ctrl); -// Returns a hash seed. +// Returns a per-table, hash salt, which changes on resize. This gets mixed into +// H1 to randomize iteration order per-table. // // The seed consists of the ctrl_ pointer, which adds enough entropy to ensure // non-determinism of iteration order in most cases. -inline size_t HashSeed(const ctrl_t* ctrl) { +inline size_t PerTableSalt(const ctrl_t* ctrl) { // The low bits of the pointer have little or no entropy because of // alignment. We shift the pointer to try to use higher entropy bits. A // good number seems to be 12 bits, because that aligns with page size. return reinterpret_cast<uintptr_t>(ctrl) >> 12; } - +// Extracts the H1 portion of a hash: 57 bits mixed with a per-table salt. inline size_t H1(size_t hash, const ctrl_t* ctrl) { - return (hash >> 7) ^ HashSeed(ctrl); + return (hash >> 7) ^ PerTableSalt(ctrl); } + +// Extracts the H2 portion of a hash: the 7 bits not used for H1. +// +// These are used as an occupied control byte. inline h2_t H2(size_t hash) { return hash & 0x7F; } +// Helpers for checking the state of a control byte. inline bool IsEmpty(ctrl_t c) { return c == ctrl_t::kEmpty; } inline bool IsFull(ctrl_t c) { return c >= static_cast<ctrl_t>(0); } inline bool IsDeleted(ctrl_t c) { return c == ctrl_t::kDeleted; } inline bool IsEmptyOrDeleted(ctrl_t c) { return c < ctrl_t::kSentinel; } -#if ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSE2 +#ifdef ABSL_INTERNAL_HAVE_SSE2 +// Quick reference guide for intrinsics used below: +// +// * __m128i: An XMM (128-bit) word. +// +// * _mm_setzero_si128: Returns a zero vector. +// * _mm_set1_epi8: Returns a vector with the same i8 in each lane. +// +// * _mm_subs_epi8: Saturating-subtracts two i8 vectors. +// * _mm_and_si128: Ands two i128s together. +// * _mm_or_si128: Ors two i128s together. +// * _mm_andnot_si128: And-nots two i128s together. +// +// * _mm_cmpeq_epi8: Component-wise compares two i8 vectors for equality, +// filling each lane with 0x00 or 0xff. +// * _mm_cmpgt_epi8: Same as above, but using > rather than ==. +// +// * _mm_loadu_si128: Performs an unaligned load of an i128. +// * _mm_storeu_si128: Performs an unaligned store of an i128. +// +// * _mm_sign_epi8: Retains, negates, or zeroes each i8 lane of the first +// argument if the corresponding lane of the second +// argument is positive, negative, or zero, respectively. +// * _mm_movemask_epi8: Selects the sign bit out of each i8 lane and produces a +// bitmask consisting of those bits. +// * _mm_shuffle_epi8: Selects i8s from the first argument, using the low +// four bits of each i8 lane in the second argument as +// indices. // https://github.com/abseil/abseil-cpp/issues/209 // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=87853 @@ -360,30 +547,32 @@ struct GroupSse2Impl { BitMask<uint32_t, kWidth> Match(h2_t hash) const { auto match = _mm_set1_epi8(hash); return BitMask<uint32_t, kWidth>( - _mm_movemask_epi8(_mm_cmpeq_epi8(match, ctrl))); + static_cast<uint32_t>(_mm_movemask_epi8(_mm_cmpeq_epi8(match, ctrl)))); } // Returns a bitmask representing the positions of empty slots. - BitMask<uint32_t, kWidth> MatchEmpty() const { -#if ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSSE3 + NonIterableBitMask<uint32_t, kWidth> MaskEmpty() const { +#ifdef ABSL_INTERNAL_HAVE_SSSE3 // This only works because ctrl_t::kEmpty is -128. - return BitMask<uint32_t, kWidth>( - _mm_movemask_epi8(_mm_sign_epi8(ctrl, ctrl))); + return NonIterableBitMask<uint32_t, kWidth>( + static_cast<uint32_t>(_mm_movemask_epi8(_mm_sign_epi8(ctrl, ctrl)))); #else - return Match(static_cast<h2_t>(ctrl_t::kEmpty)); + auto match = _mm_set1_epi8(static_cast<h2_t>(ctrl_t::kEmpty)); + return NonIterableBitMask<uint32_t, kWidth>( + static_cast<uint32_t>(_mm_movemask_epi8(_mm_cmpeq_epi8(match, ctrl)))); #endif } // Returns a bitmask representing the positions of empty or deleted slots. - BitMask<uint32_t, kWidth> MatchEmptyOrDeleted() const { - auto special = _mm_set1_epi8(static_cast<int8_t>(ctrl_t::kSentinel)); - return BitMask<uint32_t, kWidth>( - _mm_movemask_epi8(_mm_cmpgt_epi8_fixed(special, ctrl))); + NonIterableBitMask<uint32_t, kWidth> MaskEmptyOrDeleted() const { + auto special = _mm_set1_epi8(static_cast<uint8_t>(ctrl_t::kSentinel)); + return NonIterableBitMask<uint32_t, kWidth>(static_cast<uint32_t>( + _mm_movemask_epi8(_mm_cmpgt_epi8_fixed(special, ctrl)))); } // Returns the number of trailing empty or deleted elements in the group. uint32_t CountLeadingEmptyOrDeleted() const { - auto special = _mm_set1_epi8(static_cast<int8_t>(ctrl_t::kSentinel)); + auto special = _mm_set1_epi8(static_cast<uint8_t>(ctrl_t::kSentinel)); return TrailingZeros(static_cast<uint32_t>( _mm_movemask_epi8(_mm_cmpgt_epi8_fixed(special, ctrl)) + 1)); } @@ -391,7 +580,7 @@ struct GroupSse2Impl { void ConvertSpecialToEmptyAndFullToDeleted(ctrl_t* dst) const { auto msbs = _mm_set1_epi8(static_cast<char>(-128)); auto x126 = _mm_set1_epi8(126); -#if ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSSE3 +#ifdef ABSL_INTERNAL_HAVE_SSSE3 auto res = _mm_or_si128(_mm_shuffle_epi8(x126, ctrl), msbs); #else auto zero = _mm_setzero_si128(); @@ -405,6 +594,63 @@ struct GroupSse2Impl { }; #endif // ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSE2 +#if defined(ABSL_INTERNAL_HAVE_ARM_NEON) && defined(ABSL_IS_LITTLE_ENDIAN) +struct GroupAArch64Impl { + static constexpr size_t kWidth = 8; + + explicit GroupAArch64Impl(const ctrl_t* pos) { + ctrl = vld1_u8(reinterpret_cast<const uint8_t*>(pos)); + } + + BitMask<uint64_t, kWidth, 3> Match(h2_t hash) const { + uint8x8_t dup = vdup_n_u8(hash); + auto mask = vceq_u8(ctrl, dup); + constexpr uint64_t msbs = 0x8080808080808080ULL; + return BitMask<uint64_t, kWidth, 3>( + vget_lane_u64(vreinterpret_u64_u8(mask), 0) & msbs); + } + + NonIterableBitMask<uint64_t, kWidth, 3> MaskEmpty() const { + uint64_t mask = + vget_lane_u64(vreinterpret_u64_u8( + vceq_s8(vdup_n_s8(static_cast<h2_t>(ctrl_t::kEmpty)), + vreinterpret_s8_u8(ctrl))), + 0); + return NonIterableBitMask<uint64_t, kWidth, 3>(mask); + } + + NonIterableBitMask<uint64_t, kWidth, 3> MaskEmptyOrDeleted() const { + uint64_t mask = + vget_lane_u64(vreinterpret_u64_u8(vcgt_s8( + vdup_n_s8(static_cast<int8_t>(ctrl_t::kSentinel)), + vreinterpret_s8_u8(ctrl))), + 0); + return NonIterableBitMask<uint64_t, kWidth, 3>(mask); + } + + uint32_t CountLeadingEmptyOrDeleted() const { + uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(ctrl), 0); + // ctrl | ~(ctrl >> 7) will have the lowest bit set to zero for kEmpty and + // kDeleted. We lower all other bits and count number of trailing zeros. + // Clang and GCC optimize countr_zero to rbit+clz without any check for 0, + // so we should be fine. + constexpr uint64_t bits = 0x0101010101010101ULL; + return countr_zero((mask | ~(mask >> 7)) & bits) >> 3; + } + + void ConvertSpecialToEmptyAndFullToDeleted(ctrl_t* dst) const { + uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(ctrl), 0); + constexpr uint64_t msbs = 0x8080808080808080ULL; + constexpr uint64_t lsbs = 0x0101010101010101ULL; + auto x = mask & msbs; + auto res = (~x + (x >> 7)) & ~lsbs; + little_endian::Store64(dst, res); + } + + uint8x8_t ctrl; +}; +#endif // ABSL_INTERNAL_HAVE_ARM_NEON && ABSL_IS_LITTLE_ENDIAN + struct GroupPortableImpl { static constexpr size_t kWidth = 8; @@ -431,19 +677,23 @@ struct GroupPortableImpl { return BitMask<uint64_t, kWidth, 3>((x - lsbs) & ~x & msbs); } - BitMask<uint64_t, kWidth, 3> MatchEmpty() const { + NonIterableBitMask<uint64_t, kWidth, 3> MaskEmpty() const { constexpr uint64_t msbs = 0x8080808080808080ULL; - return BitMask<uint64_t, kWidth, 3>((ctrl & (~ctrl << 6)) & msbs); + return NonIterableBitMask<uint64_t, kWidth, 3>((ctrl & (~ctrl << 6)) & + msbs); } - BitMask<uint64_t, kWidth, 3> MatchEmptyOrDeleted() const { + NonIterableBitMask<uint64_t, kWidth, 3> MaskEmptyOrDeleted() const { constexpr uint64_t msbs = 0x8080808080808080ULL; - return BitMask<uint64_t, kWidth, 3>((ctrl & (~ctrl << 7)) & msbs); + return NonIterableBitMask<uint64_t, kWidth, 3>((ctrl & (~ctrl << 7)) & + msbs); } uint32_t CountLeadingEmptyOrDeleted() const { - constexpr uint64_t gaps = 0x00FEFEFEFEFEFEFEULL; - return (TrailingZeros(((~ctrl & (ctrl >> 7)) | gaps) + 1) + 7) >> 3; + // ctrl | ~(ctrl >> 7) will have the lowest bit set to zero for kEmpty and + // kDeleted. We lower all other bits and count number of trailing zeros. + constexpr uint64_t bits = 0x0101010101010101ULL; + return countr_zero((ctrl | ~(ctrl >> 7)) & bits) >> 3; } void ConvertSpecialToEmptyAndFullToDeleted(ctrl_t* dst) const { @@ -457,32 +707,40 @@ struct GroupPortableImpl { uint64_t ctrl; }; -#if ABSL_INTERNAL_RAW_HASH_SET_HAVE_SSE2 +#ifdef ABSL_INTERNAL_HAVE_SSE2 using Group = GroupSse2Impl; +#elif defined(ABSL_INTERNAL_HAVE_ARM_NEON) && defined(ABSL_IS_LITTLE_ENDIAN) +using Group = GroupAArch64Impl; #else using Group = GroupPortableImpl; #endif -// The number of cloned control bytes that we copy from the beginning to the -// end of the control bytes array. +// Returns he number of "cloned control bytes". +// +// This is the number of control bytes that are present both at the beginning +// of the control byte array and at the end, such that we can create a +// `Group::kWidth`-width probe window starting from any control byte. constexpr size_t NumClonedBytes() { return Group::kWidth - 1; } template <class Policy, class Hash, class Eq, class Alloc> class raw_hash_set; +// Returns whether `n` is a valid capacity (i.e., number of slots). +// +// A valid capacity is a non-zero integer `2^m - 1`. inline bool IsValidCapacity(size_t n) { return ((n + 1) & n) == 0 && n > 0; } +// Applies the following mapping to every byte in the control array: +// * kDeleted -> kEmpty +// * kEmpty -> kEmpty +// * _ -> kDeleted // PRECONDITION: // IsValidCapacity(capacity) // ctrl[capacity] == ctrl_t::kSentinel // ctrl[i] != ctrl_t::kSentinel for all i < capacity -// Applies mapping for every byte in ctrl: -// DELETED -> EMPTY -// EMPTY -> EMPTY -// FULL -> DELETED void ConvertDeletedToEmptyAndFullToDeleted(ctrl_t* ctrl, size_t capacity); -// Rounds up the capacity to the next power of 2 minus 1, with a minimum of 1. +// Converts `n` into the next valid capacity, per `IsValidCapacity`. inline size_t NormalizeCapacity(size_t n) { return n ? ~size_t{} >> countl_zero(n) : 1; } @@ -495,8 +753,8 @@ inline size_t NormalizeCapacity(size_t n) { // never need to probe (the whole table fits in one group) so we don't need a // load factor less than 1. -// Given `capacity` of the table, returns the size (i.e. number of full slots) -// at which we should grow the capacity. +// Given `capacity`, applies the load factor; i.e., it returns the maximum +// number of values we should put into the table before a resizing rehash. inline size_t CapacityToGrowth(size_t capacity) { assert(IsValidCapacity(capacity)); // `capacity*7/8` @@ -506,8 +764,12 @@ inline size_t CapacityToGrowth(size_t capacity) { } return capacity - capacity / 8; } -// From desired "growth" to a lowerbound of the necessary capacity. -// Might not be a valid one and requires NormalizeCapacity(). + +// Given `growth`, "unapplies" the load factor to find how large the capacity +// should be to stay within the load factor. +// +// This might not be a valid capacity and `NormalizeCapacity()` should be +// called on this. inline size_t GrowthToLowerboundCapacity(size_t growth) { // `growth*8/7` if (Group::kWidth == 8 && growth == 7) { @@ -533,16 +795,15 @@ size_t SelectBucketCountForIterRange(InputIter first, InputIter last, return 0; } -inline void AssertIsFull(ctrl_t* ctrl) { - ABSL_HARDENING_ASSERT((ctrl != nullptr && IsFull(*ctrl)) && - "Invalid operation on iterator. The element might have " - "been erased, or the table might have rehashed."); -} +#define ABSL_INTERNAL_ASSERT_IS_FULL(ctrl, msg) \ + ABSL_HARDENING_ASSERT((ctrl != nullptr && IsFull(*ctrl)) && msg) inline void AssertIsValid(ctrl_t* ctrl) { - ABSL_HARDENING_ASSERT((ctrl == nullptr || IsFull(*ctrl)) && - "Invalid operation on iterator. The element might have " - "been erased, or the table might have rehashed."); + ABSL_HARDENING_ASSERT( + (ctrl == nullptr || IsFull(*ctrl)) && + "Invalid operation on iterator. The element might have " + "been erased, the table might have rehashed, or this may " + "be an end() iterator."); } struct FindInfo { @@ -550,44 +811,40 @@ struct FindInfo { size_t probe_length; }; -// The representation of the object has two modes: -// - small: For capacities < kWidth-1 -// - large: For the rest. +// Whether a table is "small". A small table fits entirely into a probing +// group, i.e., has a capacity < `Group::kWidth`. // -// Differences: -// - In small mode we are able to use the whole capacity. The extra control -// bytes give us at least one "empty" control byte to stop the iteration. -// This is important to make 1 a valid capacity. +// In small mode we are able to use the whole capacity. The extra control +// bytes give us at least one "empty" control byte to stop the iteration. +// This is important to make 1 a valid capacity. // -// - In small mode only the first `capacity()` control bytes after the -// sentinel are valid. The rest contain dummy ctrl_t::kEmpty values that do not -// represent a real slot. This is important to take into account on -// find_first_non_full(), where we never try ShouldInsertBackwards() for -// small tables. +// In small mode only the first `capacity` control bytes after the sentinel +// are valid. The rest contain dummy ctrl_t::kEmpty values that do not +// represent a real slot. This is important to take into account on +// `find_first_non_full()`, where we never try +// `ShouldInsertBackwards()` for small tables. inline bool is_small(size_t capacity) { return capacity < Group::kWidth - 1; } +// Begins a probing operation on `ctrl`, using `hash`. inline probe_seq<Group::kWidth> probe(const ctrl_t* ctrl, size_t hash, size_t capacity) { return probe_seq<Group::kWidth>(H1(hash, ctrl), capacity); } -// Probes the raw_hash_set with the probe sequence for hash and returns the -// pointer to the first empty or deleted slot. -// NOTE: this function must work with tables having both ctrl_t::kEmpty and -// ctrl_t::kDeleted in one group. Such tables appears during -// drop_deletes_without_resize. +// Probes an array of control bits using a probe sequence derived from `hash`, +// and returns the offset corresponding to the first deleted or empty slot. // -// This function is very useful when insertions happen and: -// - the input is already a set -// - there are enough slots -// - the element with the hash is not in the table +// Behavior when the entire table is full is undefined. +// +// NOTE: this function must work with tables having both empty and deleted +// slots in the same group. Such tables appear during `erase()`. template <typename = void> inline FindInfo find_first_non_full(const ctrl_t* ctrl, size_t hash, size_t capacity) { auto seq = probe(ctrl, hash, capacity); while (true) { Group g{ctrl + seq.offset()}; - auto mask = g.MatchEmptyOrDeleted(); + auto mask = g.MaskEmptyOrDeleted(); if (mask) { #if !defined(NDEBUG) // We want to add entropy even when ASLR is not enabled. @@ -610,7 +867,8 @@ inline FindInfo find_first_non_full(const ctrl_t* ctrl, size_t hash, // corresponding translation unit. extern template FindInfo find_first_non_full(const ctrl_t*, size_t, size_t); -// Reset all ctrl bytes back to ctrl_t::kEmpty, except the sentinel. +// Sets `ctrl` to `{kEmpty, kSentinel, ..., kEmpty}`, marking the entire +// array as marked as empty. inline void ResetCtrl(size_t capacity, ctrl_t* ctrl, const void* slot, size_t slot_size) { std::memset(ctrl, static_cast<int8_t>(ctrl_t::kEmpty), @@ -619,8 +877,10 @@ inline void ResetCtrl(size_t capacity, ctrl_t* ctrl, const void* slot, SanitizerPoisonMemoryRegion(slot, slot_size * capacity); } -// Sets the control byte, and if `i < NumClonedBytes()`, set the cloned byte -// at the end too. +// Sets `ctrl[i]` to `h`. +// +// Unlike setting it directly, this function will perform bounds checks and +// mirror the value to the cloned tail if necessary. inline void SetCtrl(size_t i, ctrl_t h, size_t capacity, ctrl_t* ctrl, const void* slot, size_t slot_size) { assert(i < capacity); @@ -636,25 +896,28 @@ inline void SetCtrl(size_t i, ctrl_t h, size_t capacity, ctrl_t* ctrl, ctrl[((i - NumClonedBytes()) & capacity) + (NumClonedBytes() & capacity)] = h; } +// Overload for setting to an occupied `h2_t` rather than a special `ctrl_t`. inline void SetCtrl(size_t i, h2_t h, size_t capacity, ctrl_t* ctrl, const void* slot, size_t slot_size) { SetCtrl(i, static_cast<ctrl_t>(h), capacity, ctrl, slot, slot_size); } -// The allocated block consists of `capacity + 1 + NumClonedBytes()` control -// bytes followed by `capacity` slots, which must be aligned to `slot_align`. -// SlotOffset returns the offset of the slots into the allocated block. +// Given the capacity of a table, computes the offset (from the start of the +// backing allocation) at which the slots begin. inline size_t SlotOffset(size_t capacity, size_t slot_align) { assert(IsValidCapacity(capacity)); const size_t num_control_bytes = capacity + 1 + NumClonedBytes(); return (num_control_bytes + slot_align - 1) & (~slot_align + 1); } -// Returns the size of the allocated block. See also above comment. +// Given the capacity of a table, computes the total size of the backing +// array. inline size_t AllocSize(size_t capacity, size_t slot_size, size_t slot_align) { return SlotOffset(capacity, slot_align) + capacity * slot_size; } +// A SwissTable. +// // Policy: a policy defines how to perform different operations on // the slots of the hashtable (see hash_policy_traits.h for the full interface // of policy). @@ -769,16 +1032,22 @@ class raw_hash_set { // PRECONDITION: not an end() iterator. reference operator*() const { - AssertIsFull(ctrl_); + ABSL_INTERNAL_ASSERT_IS_FULL(ctrl_, + "operator*() called on invalid iterator."); return PolicyTraits::element(slot_); } // PRECONDITION: not an end() iterator. - pointer operator->() const { return &operator*(); } + pointer operator->() const { + ABSL_INTERNAL_ASSERT_IS_FULL(ctrl_, + "operator-> called on invalid iterator."); + return &operator*(); + } // PRECONDITION: not an end() iterator. iterator& operator++() { - AssertIsFull(ctrl_); + ABSL_INTERNAL_ASSERT_IS_FULL(ctrl_, + "operator++ called on invalid iterator."); ++ctrl_; ++slot_; skip_empty_or_deleted(); @@ -804,9 +1073,13 @@ class raw_hash_set { iterator(ctrl_t* ctrl, slot_type* slot) : ctrl_(ctrl), slot_(slot) { // This assumption helps the compiler know that any non-end iterator is // not equal to any end iterator. - ABSL_INTERNAL_ASSUME(ctrl != nullptr); + ABSL_ASSUME(ctrl != nullptr); } + // Fixes up `ctrl_` to point to a full by advancing it and `slot_` until + // they reach one. + // + // If a sentinel is reached, we null both of them out instead. void skip_empty_or_deleted() { while (IsEmptyOrDeleted(*ctrl_)) { uint32_t shift = Group{ctrl_}.CountLeadingEmptyOrDeleted(); @@ -1103,8 +1376,7 @@ class raw_hash_set { // m.insert(std::make_pair("abc", 42)); // TODO(cheshire): A type alias T2 is introduced as a workaround for the nvcc // bug. - template <class T, RequiresInsertable<T> = 0, - class T2 = T, + template <class T, RequiresInsertable<T> = 0, class T2 = T, typename std::enable_if<IsDecomposable<T2>::value, int>::type = 0, T* = nullptr> std::pair<iterator, bool> insert(T&& value) { @@ -1324,7 +1596,8 @@ class raw_hash_set { // This overload is necessary because otherwise erase<K>(const K&) would be // a better match if non-const iterator is passed as an argument. void erase(iterator it) { - AssertIsFull(it.ctrl_); + ABSL_INTERNAL_ASSERT_IS_FULL(it.ctrl_, + "erase() called on invalid iterator."); PolicyTraits::destroy(&alloc_ref(), it.slot_); erase_meta_only(it); } @@ -1358,7 +1631,8 @@ class raw_hash_set { } node_type extract(const_iterator position) { - AssertIsFull(position.inner_.ctrl_); + ABSL_INTERNAL_ASSERT_IS_FULL(position.inner_.ctrl_, + "extract() called on invalid iterator."); auto node = CommonAccess::Transfer<node_type>(alloc_ref(), position.inner_.slot_); erase_meta_only(position); @@ -1445,12 +1719,13 @@ class raw_hash_set { template <class K = key_type> void prefetch(const key_arg<K>& key) const { (void)key; -#if defined(__GNUC__) + // Avoid probing if we won't be able to prefetch the addresses received. +#ifdef ABSL_INTERNAL_HAVE_PREFETCH prefetch_heap_block(); auto seq = probe(ctrl_, hash_ref()(key), capacity_); - __builtin_prefetch(static_cast<const void*>(ctrl_ + seq.offset())); - __builtin_prefetch(static_cast<const void*>(slots_ + seq.offset())); -#endif // __GNUC__ + base_internal::PrefetchT0(ctrl_ + seq.offset()); + base_internal::PrefetchT0(slots_ + seq.offset()); +#endif // ABSL_INTERNAL_HAVE_PREFETCH } // The API of find() has two extensions. @@ -1465,13 +1740,13 @@ class raw_hash_set { auto seq = probe(ctrl_, hash, capacity_); while (true) { Group g{ctrl_ + seq.offset()}; - for (int i : g.Match(H2(hash))) { + for (uint32_t i : g.Match(H2(hash))) { if (ABSL_PREDICT_TRUE(PolicyTraits::apply( EqualElement<K>{key, eq_ref()}, PolicyTraits::element(slots_ + seq.offset(i))))) return iterator_at(seq.offset(i)); } - if (ABSL_PREDICT_TRUE(g.MatchEmpty())) return end(); + if (ABSL_PREDICT_TRUE(g.MaskEmpty())) return end(); seq.next(); assert(seq.index() <= capacity_ && "full table!"); } @@ -1538,6 +1813,14 @@ class raw_hash_set { return !(a == b); } + template <typename H> + friend typename std::enable_if<H::template is_hashable<value_type>::value, + H>::type + AbslHashValue(H h, const raw_hash_set& s) { + return H::combine(H::combine_unordered(std::move(h), s.begin(), s.end()), + s.size()); + } + friend void swap(raw_hash_set& a, raw_hash_set& b) noexcept(noexcept(a.swap(b))) { a.swap(b); @@ -1603,17 +1886,17 @@ class raw_hash_set { slot_type&& slot; }; - // "erases" the object from the container, except that it doesn't actually - // destroy the object. It only updates all the metadata of the class. - // This can be used in conjunction with Policy::transfer to move the object to - // another place. + // Erases, but does not destroy, the value pointed to by `it`. + // + // This merely updates the pertinent control byte. This can be used in + // conjunction with Policy::transfer to move the object to another place. void erase_meta_only(const_iterator it) { assert(IsFull(*it.inner_.ctrl_) && "erasing a dangling iterator"); --size_; - const size_t index = it.inner_.ctrl_ - ctrl_; + const size_t index = static_cast<size_t>(it.inner_.ctrl_ - ctrl_); const size_t index_before = (index - Group::kWidth) & capacity_; - const auto empty_after = Group(it.inner_.ctrl_).MatchEmpty(); - const auto empty_before = Group(ctrl_ + index_before).MatchEmpty(); + const auto empty_after = Group(it.inner_.ctrl_).MaskEmpty(); + const auto empty_before = Group(ctrl_ + index_before).MaskEmpty(); // We count how many consecutive non empties we have to the right and to the // left of `it`. If the sum is >= kWidth then there is at least one probe @@ -1629,6 +1912,11 @@ class raw_hash_set { infoz().RecordErase(); } + // Allocates a backing array for `self` and initializes its control bytes. + // This reads `capacity_` and updates all other fields based on the result of + // the allocation. + // + // This does not free the currently held array; `capacity_` must be nonzero. void initialize_slots() { assert(capacity_); // Folks with custom allocators often make unwarranted assumptions about the @@ -1657,6 +1945,10 @@ class raw_hash_set { infoz().RecordStorageChanged(size_, capacity_); } + // Destroys all slots in the backing array, frees the backing array, and + // clears all top-level book-keeping data. + // + // This essentially implements `map = raw_hash_set();`. void destroy_slots() { if (!capacity_) return; for (size_t i = 0; i != capacity_; ++i) { @@ -1707,6 +1999,9 @@ class raw_hash_set { infoz().RecordRehash(total_probe_length); } + // Prunes control bytes to remove as many tombstones as possible. + // + // See the comment on `rehash_and_grow_if_necessary()`. void drop_deletes_without_resize() ABSL_ATTRIBUTE_NOINLINE { assert(IsValidCapacity(capacity_)); assert(!is_small(capacity_)); @@ -1773,6 +2068,11 @@ class raw_hash_set { infoz().RecordRehash(total_probe_length); } + // Called whenever the table *might* need to conditionally grow. + // + // This function is an optimization opportunity to perform a rehash even when + // growth is unnecessary, because vacating tombstones is beneficial for + // performance in the long-run. void rehash_and_grow_if_necessary() { if (capacity_ == 0) { resize(1); @@ -1832,12 +2132,12 @@ class raw_hash_set { auto seq = probe(ctrl_, hash, capacity_); while (true) { Group g{ctrl_ + seq.offset()}; - for (int i : g.Match(H2(hash))) { + for (uint32_t i : g.Match(H2(hash))) { if (ABSL_PREDICT_TRUE(PolicyTraits::element(slots_ + seq.offset(i)) == elem)) return true; } - if (ABSL_PREDICT_TRUE(g.MatchEmpty())) return false; + if (ABSL_PREDICT_TRUE(g.MaskEmpty())) return false; seq.next(); assert(seq.index() <= capacity_ && "full table!"); } @@ -1857,6 +2157,9 @@ class raw_hash_set { } protected: + // Attempts to find `key` in the table; if it isn't found, returns a slot that + // the value can be inserted into, with the control byte already set to + // `key`'s H2. template <class K> std::pair<size_t, bool> find_or_prepare_insert(const K& key) { prefetch_heap_block(); @@ -1864,19 +2167,23 @@ class raw_hash_set { auto seq = probe(ctrl_, hash, capacity_); while (true) { Group g{ctrl_ + seq.offset()}; - for (int i : g.Match(H2(hash))) { + for (uint32_t i : g.Match(H2(hash))) { if (ABSL_PREDICT_TRUE(PolicyTraits::apply( EqualElement<K>{key, eq_ref()}, PolicyTraits::element(slots_ + seq.offset(i))))) return {seq.offset(i), false}; } - if (ABSL_PREDICT_TRUE(g.MatchEmpty())) break; + if (ABSL_PREDICT_TRUE(g.MaskEmpty())) break; seq.next(); assert(seq.index() <= capacity_ && "full table!"); } return {prepare_insert(hash), true}; } + // Given the hash of a value not currently in the table, finds the next + // viable slot index to insert it at. + // + // REQUIRES: At least one non-full slot available. size_t prepare_insert(size_t hash) ABSL_ATTRIBUTE_NOINLINE { auto target = find_first_non_full(ctrl_, hash, capacity_); if (ABSL_PREDICT_FALSE(growth_left() == 0 && @@ -1920,15 +2227,23 @@ class raw_hash_set { growth_left() = CapacityToGrowth(capacity()) - size_; } + // The number of slots we can still fill without needing to rehash. + // + // This is stored separately due to tombstones: we do not include tombstones + // in the growth capacity, because we'd like to rehash when the table is + // otherwise filled with tombstones: otherwise, probe sequences might get + // unacceptably long without triggering a rehash. Callers can also force a + // rehash via the standard `rehash(0)`, which will recompute this value as a + // side-effect. + // + // See `CapacityToGrowth()`. size_t& growth_left() { return settings_.template get<0>(); } + // Prefetch the heap-allocated memory region to resolve potential TLB misses. + // This is intended to overlap with execution of calculating the hash for a + // key. void prefetch_heap_block() const { - // Prefetch the heap-allocated memory region to resolve potential TLB - // misses. This is intended to overlap with execution of calculating the - // hash for a key. -#if defined(__GNUC__) - __builtin_prefetch(static_cast<const void*>(ctrl_), 0, 1); -#endif // __GNUC__ + base_internal::PrefetchT2(ctrl_); } HashtablezInfoHandle& infoz() { return settings_.template get<1>(); } @@ -1945,20 +2260,33 @@ class raw_hash_set { // TODO(alkis): Investigate removing some of these fields: // - ctrl/slots can be derived from each other // - size can be moved into the slot array - ctrl_t* ctrl_ = EmptyGroup(); // [(capacity + 1 + NumClonedBytes()) * ctrl_t] - slot_type* slots_ = nullptr; // [capacity * slot_type] - size_t size_ = 0; // number of full slots - size_t capacity_ = 0; // total number of slots + + // The control bytes (and, also, a pointer to the base of the backing array). + // + // This contains `capacity_ + 1 + NumClonedBytes()` entries, even + // when the table is empty (hence EmptyGroup). + ctrl_t* ctrl_ = EmptyGroup(); + // The beginning of the slots, located at `SlotOffset()` bytes after + // `ctrl_`. May be null for empty tables. + slot_type* slots_ = nullptr; + + // The number of filled slots. + size_t size_ = 0; + + // The total number of available slots. + size_t capacity_ = 0; absl::container_internal::CompressedTuple<size_t /* growth_left */, HashtablezInfoHandle, hasher, key_equal, allocator_type> - settings_{0, HashtablezInfoHandle{}, hasher{}, key_equal{}, + settings_{0u, HashtablezInfoHandle{}, hasher{}, key_equal{}, allocator_type{}}; }; // Erases all elements that satisfy the predicate `pred` from the container `c`. template <typename P, typename H, typename E, typename A, typename Predicate> -void EraseIf(Predicate& pred, raw_hash_set<P, H, E, A>* c) { +typename raw_hash_set<P, H, E, A>::size_type EraseIf( + Predicate& pred, raw_hash_set<P, H, E, A>* c) { + const auto initial_size = c->size(); for (auto it = c->begin(), last = c->end(); it != last;) { if (pred(*it)) { c->erase(it++); @@ -1966,6 +2294,7 @@ void EraseIf(Predicate& pred, raw_hash_set<P, H, E, A>* c) { ++it; } } + return initial_size - c->size(); } namespace hashtable_debug_internal { @@ -1981,7 +2310,7 @@ struct HashtableDebugAccess<Set, absl::void_t<typename Set::raw_hash_set>> { auto seq = probe(set.ctrl_, hash, set.capacity_); while (true) { container_internal::Group g{set.ctrl_ + seq.offset()}; - for (int i : g.Match(container_internal::H2(hash))) { + for (uint32_t i : g.Match(container_internal::H2(hash))) { if (Traits::apply( typename Set::template EqualElement<typename Set::key_type>{ key, set.eq_ref()}, @@ -1989,7 +2318,7 @@ struct HashtableDebugAccess<Set, absl::void_t<typename Set::raw_hash_set>> { return num_probes; ++num_probes; } - if (g.MatchEmpty()) return num_probes; + if (g.MaskEmpty()) return num_probes; seq.next(); ++num_probes; } @@ -2031,4 +2360,6 @@ struct HashtableDebugAccess<Set, absl::void_t<typename Set::raw_hash_set>> { ABSL_NAMESPACE_END } // namespace absl +#undef ABSL_INTERNAL_ASSERT_IS_FULL + #endif // ABSL_CONTAINER_INTERNAL_RAW_HASH_SET_H_ |