diff options
author | Devtools Arcadia <[email protected]> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <[email protected]> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /contrib/restricted/aws/aws-c-common/source/hash_table.c |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'contrib/restricted/aws/aws-c-common/source/hash_table.c')
-rw-r--r-- | contrib/restricted/aws/aws-c-common/source/hash_table.c | 1100 |
1 files changed, 1100 insertions, 0 deletions
diff --git a/contrib/restricted/aws/aws-c-common/source/hash_table.c b/contrib/restricted/aws/aws-c-common/source/hash_table.c new file mode 100644 index 00000000000..a8125a2df11 --- /dev/null +++ b/contrib/restricted/aws/aws-c-common/source/hash_table.c @@ -0,0 +1,1100 @@ +/** + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0. + */ + +/* For more information on how the RH hash works and in particular how we do + * deletions, see: + * http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/ + */ + +#include <aws/common/hash_table.h> +#include <aws/common/math.h> +#include <aws/common/private/hash_table_impl.h> +#include <aws/common/string.h> + +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> + +/* Include lookup3.c so we can (potentially) inline it and make use of the mix() + * macro. */ +#include <aws/common/private/lookup3.inl> + +static void s_suppress_unused_lookup3_func_warnings(void) { + /* We avoid making changes to lookup3 if we can avoid it, but since it has functions + * we're not using, reference them somewhere to suppress the unused function warning. + */ + (void)hashword; + (void)hashword2; + (void)hashlittle; + (void)hashbig; +} + +/** + * Calculate the hash for the given key. + * Ensures a reasonable semantics for null keys. + * Ensures that no object ever hashes to 0, which is the sentinal value for an empty hash element. + */ +static uint64_t s_hash_for(struct hash_table_state *state, const void *key) { + AWS_PRECONDITION(hash_table_state_is_valid(state)); + s_suppress_unused_lookup3_func_warnings(); + + if (key == NULL) { + /* The best answer */ + return 42; + } + + uint64_t hash_code = state->hash_fn(key); + if (!hash_code) { + hash_code = 1; + } + AWS_RETURN_WITH_POSTCONDITION(hash_code, hash_code != 0); +} + +/** + * Check equality of two objects, with a reasonable semantics for null. + */ +static bool s_safe_eq_check(aws_hash_callback_eq_fn *equals_fn, const void *a, const void *b) { + /* Short circuit if the pointers are the same */ + if (a == b) { + return true; + } + /* If one but not both are null, the objects are not equal */ + if (a == NULL || b == NULL) { + return false; + } + /* If both are non-null, call the underlying equals fn */ + return equals_fn(a, b); +} + +/** + * Check equality of two hash keys, with a reasonable semantics for null keys. + */ +static bool s_hash_keys_eq(struct hash_table_state *state, const void *a, const void *b) { + AWS_PRECONDITION(hash_table_state_is_valid(state)); + bool rval = s_safe_eq_check(state->equals_fn, a, b); + AWS_RETURN_WITH_POSTCONDITION(rval, hash_table_state_is_valid(state)); +} + +static size_t s_index_for(struct hash_table_state *map, struct hash_table_entry *entry) { + AWS_PRECONDITION(hash_table_state_is_valid(map)); + size_t index = entry - map->slots; + AWS_RETURN_WITH_POSTCONDITION(index, index < map->size && hash_table_state_is_valid(map)); +} + +#if 0 +/* Useful debugging code for anyone working on this in the future */ +static uint64_t s_distance(struct hash_table_state *state, int index) { + return (index - state->slots[index].hash_code) & state->mask; +} + +void hash_dump(struct aws_hash_table *tbl) { + struct hash_table_state *state = tbl->p_impl; + + printf("Dumping hash table contents:\n"); + + for (int i = 0; i < state->size; i++) { + printf("%7d: ", i); + struct hash_table_entry *e = &state->slots[i]; + if (!e->hash_code) { + printf("EMPTY\n"); + } else { + printf("k: %p v: %p hash_code: %lld displacement: %lld\n", + e->element.key, e->element.value, e->hash_code, + (i - e->hash_code) & state->mask); + } + } +} +#endif + +#if 0 +/* Not currently exposed as an API. Should we have something like this? Useful for benchmarks */ +AWS_COMMON_API +void aws_hash_table_print_stats(struct aws_hash_table *table) { + struct hash_table_state *state = table->p_impl; + uint64_t total_disp = 0; + uint64_t max_disp = 0; + + printf("\n=== Hash table statistics ===\n"); + printf("Table size: %zu/%zu (max load %zu, remaining %zu)\n", state->entry_count, state->size, state->max_load, state->max_load - state->entry_count); + printf("Load factor: %02.2lf%% (max %02.2lf%%)\n", + 100.0 * ((double)state->entry_count / (double)state->size), + state->max_load_factor); + + for (size_t i = 0; i < state->size; i++) { + if (state->slots[i].hash_code) { + int displacement = distance(state, i); + total_disp += displacement; + if (displacement > max_disp) { + max_disp = displacement; + } + } + } + + size_t *disp_counts = calloc(sizeof(*disp_counts), max_disp + 1); + + for (size_t i = 0; i < state->size; i++) { + if (state->slots[i].hash_code) { + disp_counts[distance(state, i)]++; + } + } + + uint64_t median = 0; + uint64_t passed = 0; + for (uint64_t i = 0; i <= max_disp && passed < total_disp / 2; i++) { + median = i; + passed += disp_counts[i]; + } + + printf("Displacement statistics: Avg %02.2lf max %llu median %llu\n", (double)total_disp / (double)state->entry_count, max_disp, median); + for (uint64_t i = 0; i <= max_disp; i++) { + printf("Displacement %2lld: %zu entries\n", i, disp_counts[i]); + } + free(disp_counts); + printf("\n"); +} +#endif + +size_t aws_hash_table_get_entry_count(const struct aws_hash_table *map) { + struct hash_table_state *state = map->p_impl; + return state->entry_count; +} + +/* Given a header template, allocates space for a hash table of the appropriate + * size, and copies the state header into this allocated memory, which is + * returned. + */ +static struct hash_table_state *s_alloc_state(const struct hash_table_state *template) { + size_t required_bytes; + if (hash_table_state_required_bytes(template->size, &required_bytes)) { + return NULL; + } + + /* An empty slot has hashcode 0. So this marks all slots as empty */ + struct hash_table_state *state = aws_mem_calloc(template->alloc, 1, required_bytes); + + if (state == NULL) { + return state; + } + + *state = *template; + return state; +} + +/* Computes the correct size and max_load based on a requested size. */ +static int s_update_template_size(struct hash_table_state *template, size_t expected_elements) { + size_t min_size = expected_elements; + + if (min_size < 2) { + min_size = 2; + } + + /* size is always a power of 2 */ + size_t size; + if (aws_round_up_to_power_of_two(min_size, &size)) { + return AWS_OP_ERR; + } + + /* Update the template once we've calculated everything successfully */ + template->size = size; + template->max_load = (size_t)(template->max_load_factor * (double)template->size); + /* Ensure that there is always at least one empty slot in the hash table */ + if (template->max_load >= size) { + template->max_load = size - 1; + } + + /* Since size is a power of 2: (index & (size - 1)) == (index % size) */ + template->mask = size - 1; + + return AWS_OP_SUCCESS; +} + +int aws_hash_table_init( + struct aws_hash_table *map, + struct aws_allocator *alloc, + size_t size, + aws_hash_fn *hash_fn, + aws_hash_callback_eq_fn *equals_fn, + aws_hash_callback_destroy_fn *destroy_key_fn, + aws_hash_callback_destroy_fn *destroy_value_fn) { + AWS_PRECONDITION(map != NULL); + AWS_PRECONDITION(alloc != NULL); + AWS_PRECONDITION(hash_fn != NULL); + AWS_PRECONDITION(equals_fn != NULL); + + struct hash_table_state template; + template.hash_fn = hash_fn; + template.equals_fn = equals_fn; + template.destroy_key_fn = destroy_key_fn; + template.destroy_value_fn = destroy_value_fn; + template.alloc = alloc; + + template.entry_count = 0; + template.max_load_factor = 0.95; /* TODO - make configurable? */ + + if (s_update_template_size(&template, size)) { + return AWS_OP_ERR; + } + map->p_impl = s_alloc_state(&template); + + if (!map->p_impl) { + return AWS_OP_ERR; + } + + AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map)); +} + +void aws_hash_table_clean_up(struct aws_hash_table *map) { + AWS_PRECONDITION(map != NULL); + AWS_PRECONDITION( + map->p_impl == NULL || aws_hash_table_is_valid(map), + "Input aws_hash_table [map] must be valid or hash_table_state pointer [map->p_impl] must be NULL, in case " + "aws_hash_table_clean_up was called twice."); + struct hash_table_state *state = map->p_impl; + + /* Ensure that we're idempotent */ + if (!state) { + return; + } + + aws_hash_table_clear(map); + aws_mem_release(map->p_impl->alloc, map->p_impl); + + map->p_impl = NULL; + AWS_POSTCONDITION(map->p_impl == NULL); +} + +void aws_hash_table_swap(struct aws_hash_table *AWS_RESTRICT a, struct aws_hash_table *AWS_RESTRICT b) { + AWS_PRECONDITION(a != b); + struct aws_hash_table tmp = *a; + *a = *b; + *b = tmp; +} + +void aws_hash_table_move(struct aws_hash_table *AWS_RESTRICT to, struct aws_hash_table *AWS_RESTRICT from) { + AWS_PRECONDITION(to != NULL); + AWS_PRECONDITION(from != NULL); + AWS_PRECONDITION(to != from); + AWS_PRECONDITION(aws_hash_table_is_valid(from)); + + *to = *from; + AWS_ZERO_STRUCT(*from); + AWS_POSTCONDITION(aws_hash_table_is_valid(to)); +} + +/* Tries to find where the requested key is or where it should go if put. + * Returns AWS_ERROR_SUCCESS if the item existed (leaving it in *entry), + * or AWS_ERROR_HASHTBL_ITEM_NOT_FOUND if it did not (putting its destination + * in *entry). Note that this does not take care of displacing whatever was in + * that entry before. + * + * probe_idx is set to the probe index of the entry found. + */ + +static int s_find_entry1( + struct hash_table_state *state, + uint64_t hash_code, + const void *key, + struct hash_table_entry **p_entry, + size_t *p_probe_idx); + +/* Inlined fast path: Check the first slot, only. */ +/* TODO: Force inlining? */ +static int inline s_find_entry( + struct hash_table_state *state, + uint64_t hash_code, + const void *key, + struct hash_table_entry **p_entry, + size_t *p_probe_idx) { + struct hash_table_entry *entry = &state->slots[hash_code & state->mask]; + + if (entry->hash_code == 0) { + if (p_probe_idx) { + *p_probe_idx = 0; + } + *p_entry = entry; + return AWS_ERROR_HASHTBL_ITEM_NOT_FOUND; + } + + if (entry->hash_code == hash_code && s_hash_keys_eq(state, key, entry->element.key)) { + if (p_probe_idx) { + *p_probe_idx = 0; + } + *p_entry = entry; + return AWS_OP_SUCCESS; + } + + return s_find_entry1(state, hash_code, key, p_entry, p_probe_idx); +} + +static int s_find_entry1( + struct hash_table_state *state, + uint64_t hash_code, + const void *key, + struct hash_table_entry **p_entry, + size_t *p_probe_idx) { + size_t probe_idx = 1; + /* If we find a deleted entry, we record that index and return it as our probe index (i.e. we'll keep searching to + * see if it already exists, but if not we'll overwrite the deleted entry). + */ + + int rv; + struct hash_table_entry *entry; + /* This loop is guaranteed to terminate because entry_probe is bounded above by state->mask (i.e. state->size - 1). + * Since probe_idx increments every loop iteration, it will become larger than entry_probe after at most state->size + * transitions and the loop will exit (if it hasn't already) + */ + while (1) { +#ifdef CBMC +# pragma CPROVER check push +# pragma CPROVER check disable "unsigned-overflow" +#endif + uint64_t index = (hash_code + probe_idx) & state->mask; +#ifdef CBMC +# pragma CPROVER check pop +#endif + entry = &state->slots[index]; + if (!entry->hash_code) { + rv = AWS_ERROR_HASHTBL_ITEM_NOT_FOUND; + break; + } + + if (entry->hash_code == hash_code && s_hash_keys_eq(state, key, entry->element.key)) { + rv = AWS_ERROR_SUCCESS; + break; + } + +#ifdef CBMC +# pragma CPROVER check push +# pragma CPROVER check disable "unsigned-overflow" +#endif + uint64_t entry_probe = (index - entry->hash_code) & state->mask; +#ifdef CBMC +# pragma CPROVER check pop +#endif + + if (entry_probe < probe_idx) { + /* We now know that our target entry cannot exist; if it did exist, + * it would be at the current location as it has a higher probe + * length than the entry we are examining and thus would have + * preempted that item + */ + rv = AWS_ERROR_HASHTBL_ITEM_NOT_FOUND; + break; + } + + probe_idx++; + } + + *p_entry = entry; + if (p_probe_idx) { + *p_probe_idx = probe_idx; + } + + return rv; +} + +int aws_hash_table_find(const struct aws_hash_table *map, const void *key, struct aws_hash_element **p_elem) { + AWS_PRECONDITION(aws_hash_table_is_valid(map)); + AWS_PRECONDITION(AWS_OBJECT_PTR_IS_WRITABLE(p_elem), "Input aws_hash_element pointer [p_elem] must be writable."); + + struct hash_table_state *state = map->p_impl; + uint64_t hash_code = s_hash_for(state, key); + struct hash_table_entry *entry; + + int rv = s_find_entry(state, hash_code, key, &entry, NULL); + + if (rv == AWS_ERROR_SUCCESS) { + *p_elem = &entry->element; + } else { + *p_elem = NULL; + } + AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map)); +} + +/** + * Attempts to find a home for the given entry. + * If the entry was empty (i.e. hash-code of 0), then the function does nothing and returns NULL + * Otherwise, it emplaces the item, and returns a pointer to the newly emplaced entry. + * This function is only called after the hash-table has been expanded to fit the new element, + * so it should never fail. + */ +static struct hash_table_entry *s_emplace_item( + struct hash_table_state *state, + struct hash_table_entry entry, + size_t probe_idx) { + AWS_PRECONDITION(hash_table_state_is_valid(state)); + + if (entry.hash_code == 0) { + AWS_RETURN_WITH_POSTCONDITION(NULL, hash_table_state_is_valid(state)); + } + + struct hash_table_entry *rval = NULL; + + /* Since a valid hash_table has at least one empty element, this loop will always terminate in at most linear time + */ + while (entry.hash_code != 0) { +#ifdef CBMC +# pragma CPROVER check push +# pragma CPROVER check disable "unsigned-overflow" +#endif + size_t index = (size_t)(entry.hash_code + probe_idx) & state->mask; +#ifdef CBMC +# pragma CPROVER check pop +#endif + struct hash_table_entry *victim = &state->slots[index]; + +#ifdef CBMC +# pragma CPROVER check push +# pragma CPROVER check disable "unsigned-overflow" +#endif + size_t victim_probe_idx = (size_t)(index - victim->hash_code) & state->mask; +#ifdef CBMC +# pragma CPROVER check pop +#endif + + if (!victim->hash_code || victim_probe_idx < probe_idx) { + /* The first thing we emplace is the entry itself. A pointer to its location becomes the rval */ + if (!rval) { + rval = victim; + } + + struct hash_table_entry tmp = *victim; + *victim = entry; + entry = tmp; + + probe_idx = victim_probe_idx + 1; + } else { + probe_idx++; + } + } + + AWS_RETURN_WITH_POSTCONDITION( + rval, + hash_table_state_is_valid(state) && rval >= &state->slots[0] && rval < &state->slots[state->size], + "Output hash_table_entry pointer [rval] must point in the slots of [state]."); +} + +static int s_expand_table(struct aws_hash_table *map) { + struct hash_table_state *old_state = map->p_impl; + struct hash_table_state template = *old_state; + + size_t new_size; + if (aws_mul_size_checked(template.size, 2, &new_size)) { + return AWS_OP_ERR; + } + + if (s_update_template_size(&template, new_size)) { + return AWS_OP_ERR; + } + + struct hash_table_state *new_state = s_alloc_state(&template); + if (!new_state) { + return AWS_OP_ERR; + } + + for (size_t i = 0; i < old_state->size; i++) { + struct hash_table_entry entry = old_state->slots[i]; + if (entry.hash_code) { + /* We can directly emplace since we know we won't put the same item twice */ + s_emplace_item(new_state, entry, 0); + } + } + + map->p_impl = new_state; + aws_mem_release(new_state->alloc, old_state); + + return AWS_OP_SUCCESS; +} + +int aws_hash_table_create( + struct aws_hash_table *map, + const void *key, + struct aws_hash_element **p_elem, + int *was_created) { + + struct hash_table_state *state = map->p_impl; + uint64_t hash_code = s_hash_for(state, key); + struct hash_table_entry *entry; + size_t probe_idx; + int ignored; + if (!was_created) { + was_created = &ignored; + } + + int rv = s_find_entry(state, hash_code, key, &entry, &probe_idx); + + if (rv == AWS_ERROR_SUCCESS) { + if (p_elem) { + *p_elem = &entry->element; + } + *was_created = 0; + return AWS_OP_SUCCESS; + } + + /* Okay, we need to add an entry. Check the load factor first. */ + size_t incr_entry_count; + if (aws_add_size_checked(state->entry_count, 1, &incr_entry_count)) { + return AWS_OP_ERR; + } + if (incr_entry_count > state->max_load) { + rv = s_expand_table(map); + if (rv != AWS_OP_SUCCESS) { + /* Any error was already raised in expand_table */ + return rv; + } + state = map->p_impl; + /* If we expanded the table, we need to discard the probe index returned from find_entry, + * as it's likely that we can find a more desirable slot. If we don't, then later gets will + * terminate before reaching our probe index. + + * n.b. currently we ignore this probe_idx subsequently, but leaving + this here so we don't + * forget when we optimize later. */ + probe_idx = 0; + } + + state->entry_count++; + struct hash_table_entry new_entry; + new_entry.element.key = key; + new_entry.element.value = NULL; + new_entry.hash_code = hash_code; + + entry = s_emplace_item(state, new_entry, probe_idx); + + if (p_elem) { + *p_elem = &entry->element; + } + + *was_created = 1; + + return AWS_OP_SUCCESS; +} + +AWS_COMMON_API +int aws_hash_table_put(struct aws_hash_table *map, const void *key, void *value, int *was_created) { + struct aws_hash_element *p_elem; + int was_created_fallback; + + if (!was_created) { + was_created = &was_created_fallback; + } + + if (aws_hash_table_create(map, key, &p_elem, was_created)) { + return AWS_OP_ERR; + } + + /* + * aws_hash_table_create might resize the table, which results in map->p_impl changing. + * It is therefore important to wait to read p_impl until after we return. + */ + struct hash_table_state *state = map->p_impl; + + if (!*was_created) { + if (p_elem->key != key && state->destroy_key_fn) { + state->destroy_key_fn((void *)p_elem->key); + } + + if (state->destroy_value_fn) { + state->destroy_value_fn((void *)p_elem->value); + } + } + + p_elem->key = key; + p_elem->value = value; + + return AWS_OP_SUCCESS; +} + +/* Clears an entry. Does _not_ invoke destructor callbacks. + * Returns the last slot touched (note that if we wrap, we'll report an index + * lower than the original entry's index) + */ +static size_t s_remove_entry(struct hash_table_state *state, struct hash_table_entry *entry) { + AWS_PRECONDITION(hash_table_state_is_valid(state)); + AWS_PRECONDITION(state->entry_count > 0); + AWS_PRECONDITION( + entry >= &state->slots[0] && entry < &state->slots[state->size], + "Input hash_table_entry [entry] pointer must point in the available slots."); + state->entry_count--; + + /* Shift subsequent entries back until we find an entry that belongs at its + * current position. This is important to ensure that subsequent searches + * don't terminate at the removed element. + */ + size_t index = s_index_for(state, entry); + /* There is always at least one empty slot in the hash table, so this loop always terminates */ + while (1) { + size_t next_index = (index + 1) & state->mask; + + /* If we hit an empty slot, stop */ + if (!state->slots[next_index].hash_code) { + break; + } + /* If the next slot is at the start of the probe sequence, stop. + * We know that nothing with an earlier home slot is after this; + * otherwise this index-zero entry would have been evicted from its + * home. + */ + if ((state->slots[next_index].hash_code & state->mask) == next_index) { + break; + } + + /* Okay, shift this one back */ + state->slots[index] = state->slots[next_index]; + index = next_index; + } + + /* Clear the entry we shifted out of */ + AWS_ZERO_STRUCT(state->slots[index]); + AWS_RETURN_WITH_POSTCONDITION(index, hash_table_state_is_valid(state) && index <= state->size); +} + +int aws_hash_table_remove( + struct aws_hash_table *map, + const void *key, + struct aws_hash_element *p_value, + int *was_present) { + AWS_PRECONDITION(aws_hash_table_is_valid(map)); + AWS_PRECONDITION( + p_value == NULL || AWS_OBJECT_PTR_IS_WRITABLE(p_value), "Input pointer [p_value] must be NULL or writable."); + AWS_PRECONDITION( + was_present == NULL || AWS_OBJECT_PTR_IS_WRITABLE(was_present), + "Input pointer [was_present] must be NULL or writable."); + + struct hash_table_state *state = map->p_impl; + uint64_t hash_code = s_hash_for(state, key); + struct hash_table_entry *entry; + int ignored; + + if (!was_present) { + was_present = &ignored; + } + + int rv = s_find_entry(state, hash_code, key, &entry, NULL); + + if (rv != AWS_ERROR_SUCCESS) { + *was_present = 0; + AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map)); + } + + *was_present = 1; + + if (p_value) { + *p_value = entry->element; + } else { + if (state->destroy_key_fn) { + state->destroy_key_fn((void *)entry->element.key); + } + if (state->destroy_value_fn) { + state->destroy_value_fn(entry->element.value); + } + } + s_remove_entry(state, entry); + + AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map)); +} + +int aws_hash_table_remove_element(struct aws_hash_table *map, struct aws_hash_element *p_value) { + AWS_PRECONDITION(aws_hash_table_is_valid(map)); + AWS_PRECONDITION(p_value != NULL); + + struct hash_table_state *state = map->p_impl; + struct hash_table_entry *entry = AWS_CONTAINER_OF(p_value, struct hash_table_entry, element); + + s_remove_entry(state, entry); + + AWS_SUCCEED_WITH_POSTCONDITION(aws_hash_table_is_valid(map)); +} + +int aws_hash_table_foreach( + struct aws_hash_table *map, + int (*callback)(void *context, struct aws_hash_element *pElement), + void *context) { + + for (struct aws_hash_iter iter = aws_hash_iter_begin(map); !aws_hash_iter_done(&iter); aws_hash_iter_next(&iter)) { + int rv = callback(context, &iter.element); + + if (rv & AWS_COMMON_HASH_TABLE_ITER_DELETE) { + aws_hash_iter_delete(&iter, false); + } + + if (!(rv & AWS_COMMON_HASH_TABLE_ITER_CONTINUE)) { + break; + } + } + + return AWS_OP_SUCCESS; +} + +bool aws_hash_table_eq( + const struct aws_hash_table *a, + const struct aws_hash_table *b, + aws_hash_callback_eq_fn *value_eq) { + AWS_PRECONDITION(aws_hash_table_is_valid(a)); + AWS_PRECONDITION(aws_hash_table_is_valid(b)); + AWS_PRECONDITION(value_eq != NULL); + + if (aws_hash_table_get_entry_count(a) != aws_hash_table_get_entry_count(b)) { + AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b)); + } + + /* + * Now that we have established that the two tables have the same number of + * entries, we can simply iterate one and compare against the same key in + * the other. + */ + for (size_t i = 0; i < a->p_impl->size; ++i) { + const struct hash_table_entry *const a_entry = &a->p_impl->slots[i]; + if (a_entry->hash_code == 0) { + continue; + } + + struct aws_hash_element *b_element = NULL; + + aws_hash_table_find(b, a_entry->element.key, &b_element); + + if (!b_element) { + /* Key is present in A only */ + AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b)); + } + + if (!s_safe_eq_check(value_eq, a_entry->element.value, b_element->value)) { + AWS_RETURN_WITH_POSTCONDITION(false, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b)); + } + } + AWS_RETURN_WITH_POSTCONDITION(true, aws_hash_table_is_valid(a) && aws_hash_table_is_valid(b)); +} + +/** + * Given an iterator, and a start slot, find the next available filled slot if it exists + * Otherwise, return an iter that will return true for aws_hash_iter_done(). + * Note that aws_hash_iter_is_valid() need not hold on entry to the function, since + * it can be called on a partially constructed iter from aws_hash_iter_begin(). + * + * Note that calling this on an iterator which is "done" is idempotent: it will return another + * iterator which is "done". + */ +static inline void s_get_next_element(struct aws_hash_iter *iter, size_t start_slot) { + AWS_PRECONDITION(iter != NULL); + AWS_PRECONDITION(aws_hash_table_is_valid(iter->map)); + struct hash_table_state *state = iter->map->p_impl; + size_t limit = iter->limit; + + for (size_t i = start_slot; i < limit; i++) { + struct hash_table_entry *entry = &state->slots[i]; + + if (entry->hash_code) { + iter->element = entry->element; + iter->slot = i; + iter->status = AWS_HASH_ITER_STATUS_READY_FOR_USE; + return; + } + } + iter->element.key = NULL; + iter->element.value = NULL; + iter->slot = iter->limit; + iter->status = AWS_HASH_ITER_STATUS_DONE; + AWS_POSTCONDITION(aws_hash_iter_is_valid(iter)); +} + +struct aws_hash_iter aws_hash_iter_begin(const struct aws_hash_table *map) { + AWS_PRECONDITION(aws_hash_table_is_valid(map)); + struct hash_table_state *state = map->p_impl; + struct aws_hash_iter iter; + AWS_ZERO_STRUCT(iter); + iter.map = map; + iter.limit = state->size; + s_get_next_element(&iter, 0); + AWS_RETURN_WITH_POSTCONDITION( + iter, + aws_hash_iter_is_valid(&iter) && + (iter.status == AWS_HASH_ITER_STATUS_DONE || iter.status == AWS_HASH_ITER_STATUS_READY_FOR_USE), + "The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE."); +} + +bool aws_hash_iter_done(const struct aws_hash_iter *iter) { + AWS_PRECONDITION(aws_hash_iter_is_valid(iter)); + AWS_PRECONDITION( + iter->status == AWS_HASH_ITER_STATUS_DONE || iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE, + "Input aws_hash_iter [iter] must either be done, or ready to use."); + /* + * SIZE_MAX is a valid (non-terminal) value for iter->slot in the event that + * we delete slot 0. See comments in aws_hash_iter_delete. + * + * As such we must use == rather than >= here. + */ + bool rval = (iter->slot == iter->limit); + AWS_POSTCONDITION( + iter->status == AWS_HASH_ITER_STATUS_DONE || iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE, + "The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE."); + AWS_POSTCONDITION( + rval == (iter->status == AWS_HASH_ITER_STATUS_DONE), + "Output bool [rval] must be true if and only if the status of [iter] is DONE."); + AWS_POSTCONDITION(aws_hash_iter_is_valid(iter)); + return rval; +} + +void aws_hash_iter_next(struct aws_hash_iter *iter) { + AWS_PRECONDITION(aws_hash_iter_is_valid(iter)); +#ifdef CBMC +# pragma CPROVER check push +# pragma CPROVER check disable "unsigned-overflow" +#endif + s_get_next_element(iter, iter->slot + 1); +#ifdef CBMC +# pragma CPROVER check pop +#endif + AWS_POSTCONDITION( + iter->status == AWS_HASH_ITER_STATUS_DONE || iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE, + "The status of output aws_hash_iter [iter] must either be DONE or READY_FOR_USE."); + AWS_POSTCONDITION(aws_hash_iter_is_valid(iter)); +} + +void aws_hash_iter_delete(struct aws_hash_iter *iter, bool destroy_contents) { + AWS_PRECONDITION( + iter->status == AWS_HASH_ITER_STATUS_READY_FOR_USE, "Input aws_hash_iter [iter] must be ready for use."); + AWS_PRECONDITION(aws_hash_iter_is_valid(iter)); + AWS_PRECONDITION( + iter->map->p_impl->entry_count > 0, + "The hash_table_state pointed by input [iter] must contain at least one entry."); + + struct hash_table_state *state = iter->map->p_impl; + if (destroy_contents) { + if (state->destroy_key_fn) { + state->destroy_key_fn((void *)iter->element.key); + } + if (state->destroy_value_fn) { + state->destroy_value_fn(iter->element.value); + } + } + + size_t last_index = s_remove_entry(state, &state->slots[iter->slot]); + + /* If we shifted elements that are not part of the window we intend to iterate + * over, it means we shifted an element that we already visited into the + * iter->limit - 1 position. To avoid double iteration, we'll now reduce the + * limit to compensate. + * + * Note that last_index cannot equal iter->slot, because slots[iter->slot] + * is empty before we start walking the table. + */ + if (last_index < iter->slot || last_index >= iter->limit) { + iter->limit--; + } + + /* + * After removing this entry, the next entry might be in the same slot, or + * in some later slot, or we might have no further entries. + * + * We also expect that the caller will call aws_hash_iter_done and aws_hash_iter_next + * after this delete call. This gets a bit tricky if we just deleted the value + * in slot 0, and a new value has shifted in. + * + * To deal with this, we'll just step back one slot, and let _next start iteration + * at our current slot. Note that if we just deleted slot 0, this will result in + * underflowing to SIZE_MAX; we have to take care in aws_hash_iter_done to avoid + * treating this as an end-of-iteration condition. + */ +#ifdef CBMC +# pragma CPROVER check push +# pragma CPROVER check disable "unsigned-overflow" +#endif + iter->slot--; +#ifdef CBMC +# pragma CPROVER check pop +#endif + iter->status = AWS_HASH_ITER_STATUS_DELETE_CALLED; + AWS_POSTCONDITION( + iter->status == AWS_HASH_ITER_STATUS_DELETE_CALLED, + "The status of output aws_hash_iter [iter] must be DELETE_CALLED."); + AWS_POSTCONDITION(aws_hash_iter_is_valid(iter)); +} + +void aws_hash_table_clear(struct aws_hash_table *map) { + AWS_PRECONDITION(aws_hash_table_is_valid(map)); + struct hash_table_state *state = map->p_impl; + + /* Check that we have at least one destructor before iterating over the table */ + if (state->destroy_key_fn || state->destroy_value_fn) { + for (size_t i = 0; i < state->size; ++i) { + struct hash_table_entry *entry = &state->slots[i]; + if (!entry->hash_code) { + continue; + } + if (state->destroy_key_fn) { + state->destroy_key_fn((void *)entry->element.key); + } + if (state->destroy_value_fn) { + state->destroy_value_fn(entry->element.value); + } + } + } + /* Since hash code 0 represents an empty slot we can just zero out the + * entire table. */ + memset(state->slots, 0, sizeof(*state->slots) * state->size); + + state->entry_count = 0; + AWS_POSTCONDITION(aws_hash_table_is_valid(map)); +} + +uint64_t aws_hash_c_string(const void *item) { + AWS_PRECONDITION(aws_c_string_is_valid(item)); + const char *str = item; + + /* first digits of pi in hex */ + uint32_t b = 0x3243F6A8, c = 0x885A308D; + hashlittle2(str, strlen(str), &c, &b); + + return ((uint64_t)b << 32) | c; +} + +uint64_t aws_hash_string(const void *item) { + AWS_PRECONDITION(aws_string_is_valid(item)); + const struct aws_string *str = item; + + /* first digits of pi in hex */ + uint32_t b = 0x3243F6A8, c = 0x885A308D; + hashlittle2(aws_string_bytes(str), str->len, &c, &b); + AWS_RETURN_WITH_POSTCONDITION(((uint64_t)b << 32) | c, aws_string_is_valid(str)); +} + +uint64_t aws_hash_byte_cursor_ptr(const void *item) { + AWS_PRECONDITION(aws_byte_cursor_is_valid(item)); + const struct aws_byte_cursor *cur = item; + + /* first digits of pi in hex */ + uint32_t b = 0x3243F6A8, c = 0x885A308D; + hashlittle2(cur->ptr, cur->len, &c, &b); + AWS_RETURN_WITH_POSTCONDITION(((uint64_t)b << 32) | c, aws_byte_cursor_is_valid(cur)); +} + +uint64_t aws_hash_ptr(const void *item) { + /* Since the numeric value of the pointer is considered, not the memory behind it, 0 is an acceptable value */ + /* first digits of e in hex + * 2.b7e 1516 28ae d2a6 */ + uint32_t b = 0x2b7e1516, c = 0x28aed2a6; + + hashlittle2(&item, sizeof(item), &c, &b); + + return ((uint64_t)b << 32) | c; +} + +uint64_t aws_hash_combine(uint64_t item1, uint64_t item2) { + uint32_t b = item2 & 0xFFFFFFFF; /* LSB */ + uint32_t c = item2 >> 32; /* MSB */ + + hashlittle2(&item1, sizeof(item1), &c, &b); + return ((uint64_t)b << 32) | c; +} + +bool aws_hash_callback_c_str_eq(const void *a, const void *b) { + AWS_PRECONDITION(aws_c_string_is_valid(a)); + AWS_PRECONDITION(aws_c_string_is_valid(b)); + bool rval = !strcmp(a, b); + AWS_RETURN_WITH_POSTCONDITION(rval, aws_c_string_is_valid(a) && aws_c_string_is_valid(b)); +} + +bool aws_hash_callback_string_eq(const void *a, const void *b) { + AWS_PRECONDITION(aws_string_is_valid(a)); + AWS_PRECONDITION(aws_string_is_valid(b)); + bool rval = aws_string_eq(a, b); + AWS_RETURN_WITH_POSTCONDITION(rval, aws_c_string_is_valid(a) && aws_c_string_is_valid(b)); +} + +void aws_hash_callback_string_destroy(void *a) { + AWS_PRECONDITION(aws_string_is_valid(a)); + aws_string_destroy(a); +} + +bool aws_ptr_eq(const void *a, const void *b) { + return a == b; +} + +/** + * Best-effort check of hash_table_state data-structure invariants + * Some invariants, such as that the number of entries is actually the + * same as the entry_count field, would require a loop to check + */ +bool aws_hash_table_is_valid(const struct aws_hash_table *map) { + return map && map->p_impl && hash_table_state_is_valid(map->p_impl); +} + +/** + * Best-effort check of hash_table_state data-structure invariants + * Some invariants, such as that the number of entries is actually the + * same as the entry_count field, would require a loop to check + */ +bool hash_table_state_is_valid(const struct hash_table_state *map) { + if (!map) { + return false; + } + bool hash_fn_nonnull = (map->hash_fn != NULL); + bool equals_fn_nonnull = (map->equals_fn != NULL); + /*destroy_key_fn and destroy_value_fn are both allowed to be NULL*/ + bool alloc_nonnull = (map->alloc != NULL); + bool size_at_least_two = (map->size >= 2); + bool size_is_power_of_two = aws_is_power_of_two(map->size); + bool entry_count = (map->entry_count <= map->max_load); + bool max_load = (map->max_load < map->size); + bool mask_is_correct = (map->mask == (map->size - 1)); + bool max_load_factor_bounded = map->max_load_factor == 0.95; //(map->max_load_factor < 1.0); + bool slots_allocated = AWS_MEM_IS_WRITABLE(&map->slots[0], sizeof(map->slots[0]) * map->size); + + return hash_fn_nonnull && equals_fn_nonnull && alloc_nonnull && size_at_least_two && size_is_power_of_two && + entry_count && max_load && mask_is_correct && max_load_factor_bounded && slots_allocated; +} + +/** + * Given a pointer to a hash_iter, checks that it is well-formed, with all data-structure invariants. + */ +bool aws_hash_iter_is_valid(const struct aws_hash_iter *iter) { + if (!iter) { + return false; + } + if (!iter->map) { + return false; + } + if (!aws_hash_table_is_valid(iter->map)) { + return false; + } + if (iter->limit > iter->map->p_impl->size) { + return false; + } + + switch (iter->status) { + case AWS_HASH_ITER_STATUS_DONE: + /* Done iff slot == limit */ + return iter->slot == iter->limit; + case AWS_HASH_ITER_STATUS_DELETE_CALLED: + /* iter->slot can underflow to SIZE_MAX after a delete + * see the comments for aws_hash_iter_delete() */ + return iter->slot <= iter->limit || iter->slot == SIZE_MAX; + case AWS_HASH_ITER_STATUS_READY_FOR_USE: + /* A slot must point to a valid location (i.e. hash_code != 0) */ + return iter->slot < iter->limit && iter->map->p_impl->slots[iter->slot].hash_code != 0; + } + /* Invalid status code */ + return false; +} + +/** + * Determine the total number of bytes needed for a hash-table with + * "size" slots. If the result would overflow a size_t, return + * AWS_OP_ERR; otherwise, return AWS_OP_SUCCESS with the result in + * "required_bytes". + */ +int hash_table_state_required_bytes(size_t size, size_t *required_bytes) { + + size_t elemsize; + if (aws_mul_size_checked(size, sizeof(struct hash_table_entry), &elemsize)) { + return AWS_OP_ERR; + } + + if (aws_add_size_checked(elemsize, sizeof(struct hash_table_state), required_bytes)) { + return AWS_OP_ERR; + } + + return AWS_OP_SUCCESS; +} |