aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/croaring/src/roaring.c
diff options
context:
space:
mode:
authorvitalyisaev <vitalyisaev@ydb.tech>2023-11-14 09:58:56 +0300
committervitalyisaev <vitalyisaev@ydb.tech>2023-11-14 10:20:20 +0300
commitc2b2dfd9827a400a8495e172a56343462e3ceb82 (patch)
treecd4e4f597d01bede4c82dffeb2d780d0a9046bd0 /contrib/libs/croaring/src/roaring.c
parentd4ae8f119e67808cb0cf776ba6e0cf95296f2df7 (diff)
downloadydb-c2b2dfd9827a400a8495e172a56343462e3ceb82.tar.gz
YQ Connector: move tests from yql to ydb (OSS)
Перенос папки с тестами на Коннектор из папки yql в папку ydb (синхронизируется с github).
Diffstat (limited to 'contrib/libs/croaring/src/roaring.c')
-rw-r--r--contrib/libs/croaring/src/roaring.c3544
1 files changed, 3544 insertions, 0 deletions
diff --git a/contrib/libs/croaring/src/roaring.c b/contrib/libs/croaring/src/roaring.c
new file mode 100644
index 0000000000..c863aa5319
--- /dev/null
+++ b/contrib/libs/croaring/src/roaring.c
@@ -0,0 +1,3544 @@
+#include <assert.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <roaring/roaring.h>
+#include <roaring/roaring_array.h>
+
+#include <roaring/containers/containers.h>
+#include <roaring/bitset_util.h>
+#include <roaring/array_util.h>
+
+#ifdef __cplusplus
+using namespace ::roaring::internal;
+
+extern "C" { namespace roaring { namespace api {
+#endif
+
+#define CROARING_SERIALIZATION_ARRAY_UINT32 1
+#define CROARING_SERIALIZATION_CONTAINER 2
+
+extern inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r);
+extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t* r);
+extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t* r, bool cow);
+extern inline roaring_bitmap_t *roaring_bitmap_create(void);
+extern inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min, uint64_t max);
+extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *r, uint64_t min, uint64_t max);
+
+static inline bool is_cow(const roaring_bitmap_t *r) {
+ return r->high_low_container.flags & ROARING_FLAG_COW;
+}
+static inline bool is_frozen(const roaring_bitmap_t *r) {
+ return r->high_low_container.flags & ROARING_FLAG_FROZEN;
+}
+
+// this is like roaring_bitmap_add, but it populates pointer arguments in such a
+// way
+// that we can recover the container touched, which, in turn can be used to
+// accelerate some functions (when you repeatedly need to add to the same
+// container)
+static inline container_t *containerptr_roaring_bitmap_add(
+ roaring_bitmap_t *r, uint32_t val,
+ uint8_t *type, int *index
+){
+ roaring_array_t *ra = &r->high_low_container;
+
+ uint16_t hb = val >> 16;
+ const int i = ra_get_index(ra, hb);
+ if (i >= 0) {
+ ra_unshare_container_at_index(ra, i);
+ container_t *c = ra_get_container_at_index(ra, i, type);
+ uint8_t new_type = *type;
+ container_t *c2 = container_add(c, val & 0xFFFF, *type, &new_type);
+ *index = i;
+ if (c2 != c) {
+ container_free(c, *type);
+ ra_set_container_at_index(ra, i, c2, new_type);
+ *type = new_type;
+ return c2;
+ } else {
+ return c;
+ }
+ } else {
+ array_container_t *new_ac = array_container_create();
+ container_t *c = container_add(new_ac, val & 0xFFFF,
+ ARRAY_CONTAINER_TYPE, type);
+ // we could just assume that it stays an array container
+ ra_insert_new_key_value_at(ra, -i - 1, hb, c, *type);
+ *index = -i - 1;
+ return c;
+ }
+}
+
+roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) {
+ roaring_bitmap_t *ans =
+ (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
+ if (!ans) {
+ return NULL;
+ }
+ bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap);
+ if (!is_ok) {
+ roaring_free(ans);
+ return NULL;
+ }
+ return ans;
+}
+
+bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap) {
+ return ra_init_with_capacity(&r->high_low_container, cap);
+}
+
+static inline void add_bulk_impl(roaring_bitmap_t *r,
+ roaring_bulk_context_t *context,
+ uint32_t val) {
+ uint16_t key = val >> 16;
+ if (context->container == NULL || context->key != key) {
+ uint8_t typecode;
+ int idx;
+ context->container = containerptr_roaring_bitmap_add(
+ r, val, &typecode, &idx);
+ context->typecode = typecode;
+ context->idx = idx;
+ context->key = key;
+ } else {
+ // no need to seek the container, it is at hand
+ // because we already have the container at hand, we can do the
+ // insertion directly, bypassing the roaring_bitmap_add call
+ uint8_t new_typecode;
+ container_t *container2 = container_add(
+ context->container, val & 0xFFFF, context->typecode, &new_typecode);
+ if (container2 != context->container) {
+ // rare instance when we need to change the container type
+ container_free(context->container, context->typecode);
+ ra_set_container_at_index(&r->high_low_container, context->idx,
+ container2, new_typecode);
+ context->typecode = new_typecode;
+ context->container = container2;
+ }
+ }
+}
+
+void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args,
+ const uint32_t *vals) {
+ uint32_t val;
+ const uint32_t *start = vals;
+ const uint32_t *end = vals + n_args;
+ const uint32_t *current_val = start;
+
+ if (n_args == 0) {
+ return;
+ }
+
+ uint8_t typecode;
+ int idx;
+ container_t *container;
+ val = *current_val;
+ container = containerptr_roaring_bitmap_add(r, val, &typecode, &idx);
+ roaring_bulk_context_t context = {container, idx, (uint16_t)(val >> 16), typecode};
+
+ for (; current_val != end; current_val++) {
+ memcpy(&val, current_val, sizeof(val));
+ add_bulk_impl(r, &context, val);
+ }
+}
+
+void roaring_bitmap_add_bulk(roaring_bitmap_t *r,
+ roaring_bulk_context_t *context, uint32_t val) {
+ add_bulk_impl(r, context, val);
+}
+
+bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r,
+ roaring_bulk_context_t *context,
+ uint32_t val)
+{
+ uint16_t key = val >> 16;
+ if (context->container == NULL || context->key != key) {
+ int32_t start_idx = -1;
+ if (context->container != NULL && context->key < key) {
+ start_idx = context->idx;
+ }
+ int idx = ra_advance_until(&r->high_low_container, key, start_idx);
+ if (idx == ra_get_size(&r->high_low_container)) {
+ return false;
+ }
+ uint8_t typecode;
+ context->container = ra_get_container_at_index(&r->high_low_container, idx, &typecode);
+ context->typecode = typecode;
+ context->idx = idx;
+ context->key = ra_get_key_at_index(&r->high_low_container, idx);
+ // ra_advance_until finds the next key >= the target, we found a later container.
+ if (context->key != key) {
+ return false;
+ }
+ }
+ // context is now set up
+ return container_contains(context->container, val & 0xFFFF, context->typecode);
+}
+
+roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) {
+ roaring_bitmap_t *answer = roaring_bitmap_create();
+ roaring_bitmap_add_many(answer, n_args, vals);
+ return answer;
+}
+
+roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) {
+ // todo: could be greatly optimized but we do not expect this call to ever
+ // include long lists
+ roaring_bitmap_t *answer = roaring_bitmap_create();
+ roaring_bulk_context_t context = {0};
+ va_list ap;
+ va_start(ap, n_args);
+ for (size_t i = 0; i < n_args; i++) {
+ uint32_t val = va_arg(ap, uint32_t);
+ roaring_bitmap_add_bulk(answer, &context, val);
+ }
+ va_end(ap);
+ return answer;
+}
+
+static inline uint32_t minimum_uint32(uint32_t a, uint32_t b) {
+ return (a < b) ? a : b;
+}
+
+static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) {
+ return (a < b) ? a : b;
+}
+
+roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
+ uint32_t step) {
+ if(max >= UINT64_C(0x100000000)) {
+ max = UINT64_C(0x100000000);
+ }
+ if (step == 0) return NULL;
+ if (max <= min) return NULL;
+ roaring_bitmap_t *answer = roaring_bitmap_create();
+ if (step >= (1 << 16)) {
+ for (uint32_t value = (uint32_t)min; value < max; value += step) {
+ roaring_bitmap_add(answer, value);
+ }
+ return answer;
+ }
+ uint64_t min_tmp = min;
+ do {
+ uint32_t key = (uint32_t)min_tmp >> 16;
+ uint32_t container_min = min_tmp & 0xFFFF;
+ uint32_t container_max = (uint32_t)minimum_uint64(max - (key << 16), 1 << 16);
+ uint8_t type;
+ container_t *container = container_from_range(&type, container_min,
+ container_max, (uint16_t)step);
+ ra_append(&answer->high_low_container, key, container, type);
+ uint32_t gap = container_max - container_min + step - 1;
+ min_tmp += gap - (gap % step);
+ } while (min_tmp < max);
+ // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step
+ return answer;
+}
+
+void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max) {
+ if (min > max) {
+ return;
+ }
+
+ roaring_array_t *ra = &r->high_low_container;
+
+ uint32_t min_key = min >> 16;
+ uint32_t max_key = max >> 16;
+
+ int32_t num_required_containers = max_key - min_key + 1;
+ int32_t suffix_length = count_greater(ra->keys, ra->size, max_key);
+ int32_t prefix_length = count_less(ra->keys, ra->size - suffix_length,
+ min_key);
+ int32_t common_length = ra->size - prefix_length - suffix_length;
+
+ if (num_required_containers > common_length) {
+ ra_shift_tail(ra, suffix_length,
+ num_required_containers - common_length);
+ }
+
+ int32_t src = prefix_length + common_length - 1;
+ int32_t dst = ra->size - suffix_length - 1;
+ for (uint32_t key = max_key; key != min_key-1; key--) { // beware of min_key==0
+ uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0;
+ uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff;
+ container_t* new_container;
+ uint8_t new_type;
+
+ if (src >= 0 && ra->keys[src] == key) {
+ ra_unshare_container_at_index(ra, src);
+ new_container = container_add_range(ra->containers[src],
+ ra->typecodes[src],
+ container_min, container_max,
+ &new_type);
+ if (new_container != ra->containers[src]) {
+ container_free(ra->containers[src],
+ ra->typecodes[src]);
+ }
+ src--;
+ } else {
+ new_container = container_from_range(&new_type, container_min,
+ container_max+1, 1);
+ }
+ ra_replace_key_and_container_at_index(ra, dst, key, new_container,
+ new_type);
+ dst--;
+ }
+}
+
+void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min, uint32_t max) {
+ if (min > max) {
+ return;
+ }
+
+ roaring_array_t *ra = &r->high_low_container;
+
+ uint32_t min_key = min >> 16;
+ uint32_t max_key = max >> 16;
+
+ int32_t src = count_less(ra->keys, ra->size, min_key);
+ int32_t dst = src;
+ while (src < ra->size && ra->keys[src] <= max_key) {
+ uint32_t container_min = (min_key == ra->keys[src]) ? (min & 0xffff) : 0;
+ uint32_t container_max = (max_key == ra->keys[src]) ? (max & 0xffff) : 0xffff;
+ ra_unshare_container_at_index(ra, src);
+ container_t *new_container;
+ uint8_t new_type;
+ new_container = container_remove_range(ra->containers[src],
+ ra->typecodes[src],
+ container_min, container_max,
+ &new_type);
+ if (new_container != ra->containers[src]) {
+ container_free(ra->containers[src],
+ ra->typecodes[src]);
+ }
+ if (new_container) {
+ ra_replace_key_and_container_at_index(ra, dst, ra->keys[src],
+ new_container, new_type);
+ dst++;
+ }
+ src++;
+ }
+ if (src > dst) {
+ ra_shift_tail(ra, ra->size - src, dst - src);
+ }
+}
+
+void roaring_bitmap_printf(const roaring_bitmap_t *r) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ printf("{");
+ for (int i = 0; i < ra->size; ++i) {
+ container_printf_as_uint32_array(ra->containers[i], ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16);
+
+ if (i + 1 < ra->size) {
+ printf(",");
+ }
+ }
+ printf("}");
+}
+
+void roaring_bitmap_printf_describe(const roaring_bitmap_t *r) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ printf("{");
+ for (int i = 0; i < ra->size; ++i) {
+ printf("%d: %s (%d)", ra->keys[i],
+ get_full_container_name(ra->containers[i], ra->typecodes[i]),
+ container_get_cardinality(ra->containers[i], ra->typecodes[i]));
+ if (ra->typecodes[i] == SHARED_CONTAINER_TYPE) {
+ printf("(shared count = %" PRIu32 " )",
+ croaring_refcount_get(
+ &(CAST_shared(ra->containers[i])->counter)));
+ }
+
+ if (i + 1 < ra->size) {
+ printf(", ");
+ }
+ }
+ printf("}");
+}
+
+typedef struct min_max_sum_s {
+ uint32_t min;
+ uint32_t max;
+ uint64_t sum;
+} min_max_sum_t;
+
+static bool min_max_sum_fnc(uint32_t value, void *param) {
+ min_max_sum_t *mms = (min_max_sum_t *)param;
+ if (value > mms->max) mms->max = value;
+ if (value < mms->min) mms->min = value;
+ mms->sum += value;
+ return true; // we always process all data points
+}
+
+/**
+* (For advanced users.)
+* Collect statistics about the bitmap
+*/
+void roaring_bitmap_statistics(const roaring_bitmap_t *r,
+ roaring_statistics_t *stat) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ memset(stat, 0, sizeof(*stat));
+ stat->n_containers = ra->size;
+ stat->cardinality = roaring_bitmap_get_cardinality(r);
+ min_max_sum_t mms;
+ mms.min = UINT32_C(0xFFFFFFFF);
+ mms.max = UINT32_C(0);
+ mms.sum = 0;
+ roaring_iterate(r, &min_max_sum_fnc, &mms);
+ stat->min_value = mms.min;
+ stat->max_value = mms.max;
+ stat->sum_value = mms.sum;
+
+ for (int i = 0; i < ra->size; ++i) {
+ uint8_t truetype =
+ get_container_type(ra->containers[i], ra->typecodes[i]);
+ uint32_t card =
+ container_get_cardinality(ra->containers[i], ra->typecodes[i]);
+ uint32_t sbytes =
+ container_size_in_bytes(ra->containers[i], ra->typecodes[i]);
+ switch (truetype) {
+ case BITSET_CONTAINER_TYPE:
+ stat->n_bitset_containers++;
+ stat->n_values_bitset_containers += card;
+ stat->n_bytes_bitset_containers += sbytes;
+ break;
+ case ARRAY_CONTAINER_TYPE:
+ stat->n_array_containers++;
+ stat->n_values_array_containers += card;
+ stat->n_bytes_array_containers += sbytes;
+ break;
+ case RUN_CONTAINER_TYPE:
+ stat->n_run_containers++;
+ stat->n_values_run_containers += card;
+ stat->n_bytes_run_containers += sbytes;
+ break;
+ default:
+ assert(false);
+ roaring_unreachable;
+ }
+ }
+}
+
+/*
+ * Checks that:
+ * - Array containers are sorted and contain no duplicates
+ * - Range containers are sorted and contain no overlapping ranges
+ * - Roaring containers are sorted by key and there are no duplicate keys
+ * - The correct container type is use for each container (e.g. bitmaps aren't used for small containers)
+ */
+bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r, const char **reason) {
+ const char *reason_local;
+ if (reason == NULL) {
+ // Always allow assigning through *reason
+ reason = &reason_local;
+ }
+ *reason = NULL;
+ const roaring_array_t *ra = &r->high_low_container;
+ if (ra->size < 0) {
+ *reason = "negative size";
+ return false;
+ }
+ if (ra->allocation_size < 0) {
+ *reason = "negative allocation size";
+ return false;
+ }
+ if (ra->size > ra->allocation_size) {
+ *reason = "more containers than allocated space";
+ return false;
+ }
+ if (ra->flags & ~(ROARING_FLAG_COW | ROARING_FLAG_FROZEN)) {
+ *reason = "invalid flags";
+ return false;
+ }
+ if (ra->size == 0) {
+ return true;
+ }
+
+ if (ra->keys == NULL) {
+ *reason = "keys is NULL";
+ return false;
+ }
+ if (ra->typecodes == NULL) {
+ *reason = "typecodes is NULL";
+ return false;
+ }
+ if (ra->containers == NULL) {
+ *reason = "containers is NULL";
+ return false;
+ }
+
+ uint32_t prev_key = ra->keys[0];
+ for (int32_t i = 1; i < ra->size; ++i) {
+ if (ra->keys[i] <= prev_key) {
+ *reason = "keys not strictly increasing";
+ return false;
+ }
+ prev_key = ra->keys[i];
+ }
+
+ for (int32_t i = 0; i < ra->size; ++i) {
+ if (!container_internal_validate(ra->containers[i], ra->typecodes[i], reason)) {
+ // reason should already be set
+ if (*reason == NULL) {
+ *reason = "container failed to validate but no reason given";
+ }
+ return false;
+ }
+ }
+
+ return true;
+}
+
+roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) {
+ roaring_bitmap_t *ans =
+ (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
+ if (!ans) {
+ return NULL;
+ }
+ if (!ra_init_with_capacity( // allocation of list of containers can fail
+ &ans->high_low_container, r->high_low_container.size)
+ ){
+ roaring_free(ans);
+ return NULL;
+ }
+ if (!ra_overwrite( // memory allocation of individual containers may fail
+ &r->high_low_container, &ans->high_low_container, is_cow(r))
+ ){
+ roaring_bitmap_free(ans); // overwrite should leave in freeable state
+ return NULL;
+ }
+ roaring_bitmap_set_copy_on_write(ans, is_cow(r));
+ return ans;
+}
+
+bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
+ const roaring_bitmap_t *src) {
+ roaring_bitmap_set_copy_on_write(dest, is_cow(src));
+ return ra_overwrite(&src->high_low_container, &dest->high_low_container,
+ is_cow(src));
+}
+
+void roaring_bitmap_free(const roaring_bitmap_t *r) {
+ if(r == NULL) { return; }
+ if (!is_frozen(r)) {
+ ra_clear((roaring_array_t*)&r->high_low_container);
+ }
+ roaring_free((roaring_bitmap_t*)r);
+}
+
+void roaring_bitmap_clear(roaring_bitmap_t *r) {
+ ra_reset(&r->high_low_container);
+}
+
+void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) {
+ roaring_array_t *ra = &r->high_low_container;
+
+ const uint16_t hb = val >> 16;
+ const int i = ra_get_index(ra, hb);
+ uint8_t typecode;
+ if (i >= 0) {
+ ra_unshare_container_at_index(ra, i);
+ container_t *container =
+ ra_get_container_at_index(ra, i, &typecode);
+ uint8_t newtypecode = typecode;
+ container_t *container2 =
+ container_add(container, val & 0xFFFF, typecode, &newtypecode);
+ if (container2 != container) {
+ container_free(container, typecode);
+ ra_set_container_at_index(&r->high_low_container, i, container2,
+ newtypecode);
+ }
+ } else {
+ array_container_t *newac = array_container_create();
+ container_t *container = container_add(newac, val & 0xFFFF,
+ ARRAY_CONTAINER_TYPE, &typecode);
+ // we could just assume that it stays an array container
+ ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
+ container, typecode);
+ }
+}
+
+bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) {
+ const uint16_t hb = val >> 16;
+ const int i = ra_get_index(&r->high_low_container, hb);
+ uint8_t typecode;
+ bool result = false;
+ if (i >= 0) {
+ ra_unshare_container_at_index(&r->high_low_container, i);
+ container_t *container =
+ ra_get_container_at_index(&r->high_low_container, i, &typecode);
+
+ const int oldCardinality =
+ container_get_cardinality(container, typecode);
+
+ uint8_t newtypecode = typecode;
+ container_t *container2 =
+ container_add(container, val & 0xFFFF, typecode, &newtypecode);
+ if (container2 != container) {
+ container_free(container, typecode);
+ ra_set_container_at_index(&r->high_low_container, i, container2,
+ newtypecode);
+ result = true;
+ } else {
+ const int newCardinality =
+ container_get_cardinality(container, newtypecode);
+
+ result = oldCardinality != newCardinality;
+ }
+ } else {
+ array_container_t *newac = array_container_create();
+ container_t *container = container_add(newac, val & 0xFFFF,
+ ARRAY_CONTAINER_TYPE, &typecode);
+ // we could just assume that it stays an array container
+ ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
+ container, typecode);
+ result = true;
+ }
+
+ return result;
+}
+
+void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) {
+ const uint16_t hb = val >> 16;
+ const int i = ra_get_index(&r->high_low_container, hb);
+ uint8_t typecode;
+ if (i >= 0) {
+ ra_unshare_container_at_index(&r->high_low_container, i);
+ container_t *container =
+ ra_get_container_at_index(&r->high_low_container, i, &typecode);
+ uint8_t newtypecode = typecode;
+ container_t *container2 =
+ container_remove(container, val & 0xFFFF, typecode, &newtypecode);
+ if (container2 != container) {
+ container_free(container, typecode);
+ ra_set_container_at_index(&r->high_low_container, i, container2,
+ newtypecode);
+ }
+ if (container_get_cardinality(container2, newtypecode) != 0) {
+ ra_set_container_at_index(&r->high_low_container, i, container2,
+ newtypecode);
+ } else {
+ ra_remove_at_index_and_free(&r->high_low_container, i);
+ }
+ }
+}
+
+bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) {
+ const uint16_t hb = val >> 16;
+ const int i = ra_get_index(&r->high_low_container, hb);
+ uint8_t typecode;
+ bool result = false;
+ if (i >= 0) {
+ ra_unshare_container_at_index(&r->high_low_container, i);
+ container_t *container =
+ ra_get_container_at_index(&r->high_low_container, i, &typecode);
+
+ const int oldCardinality =
+ container_get_cardinality(container, typecode);
+
+ uint8_t newtypecode = typecode;
+ container_t *container2 =
+ container_remove(container, val & 0xFFFF, typecode, &newtypecode);
+ if (container2 != container) {
+ container_free(container, typecode);
+ ra_set_container_at_index(&r->high_low_container, i, container2,
+ newtypecode);
+ }
+
+ const int newCardinality =
+ container_get_cardinality(container2, newtypecode);
+
+ if (newCardinality != 0) {
+ ra_set_container_at_index(&r->high_low_container, i, container2,
+ newtypecode);
+ } else {
+ ra_remove_at_index_and_free(&r->high_low_container, i);
+ }
+
+ result = oldCardinality != newCardinality;
+ }
+ return result;
+}
+
+void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args,
+ const uint32_t *vals) {
+ if (n_args == 0 || r->high_low_container.size == 0) {
+ return;
+ }
+ int32_t pos = -1; // position of the container used in the previous iteration
+ for (size_t i = 0; i < n_args; i++) {
+ uint16_t key = (uint16_t)(vals[i] >> 16);
+ if (pos < 0 || key != r->high_low_container.keys[pos]) {
+ pos = ra_get_index(&r->high_low_container, key);
+ }
+ if (pos >= 0) {
+ uint8_t new_typecode;
+ container_t *new_container;
+ new_container = container_remove(r->high_low_container.containers[pos],
+ vals[i] & 0xffff,
+ r->high_low_container.typecodes[pos],
+ &new_typecode);
+ if (new_container != r->high_low_container.containers[pos]) {
+ container_free(r->high_low_container.containers[pos],
+ r->high_low_container.typecodes[pos]);
+ ra_replace_key_and_container_at_index(&r->high_low_container,
+ pos, key, new_container,
+ new_typecode);
+ }
+ if (!container_nonzero_cardinality(new_container, new_typecode)) {
+ container_free(new_container, new_typecode);
+ ra_remove_at_index(&r->high_low_container, pos);
+ pos = -1;
+ }
+ }
+ }
+}
+
+// there should be some SIMD optimizations possible here
+roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ uint32_t neededcap = length1 > length2 ? length2 : length1;
+ roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap);
+ roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
+
+ int pos1 = 0, pos2 = 0;
+
+ while (pos1 < length1 && pos2 < length2) {
+ const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ if (s1 == s2) {
+ uint8_t type1, type2;
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c = container_and(c1, type1, c2, type2, &result_type);
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ } else {
+ container_free(c, result_type); // otherwise: memory leak!
+ }
+ ++pos1;
+ ++pos2;
+ } else if (s1 < s2) { // s1 < s2
+ pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
+ } else { // s1 > s2
+ pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
+ }
+ }
+ return answer;
+}
+
+/**
+ * Compute the union of 'number' bitmaps.
+ */
+roaring_bitmap_t *roaring_bitmap_or_many(size_t number,
+ const roaring_bitmap_t **x) {
+ if (number == 0) {
+ return roaring_bitmap_create();
+ }
+ if (number == 1) {
+ return roaring_bitmap_copy(x[0]);
+ }
+ roaring_bitmap_t *answer =
+ roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION);
+ for (size_t i = 2; i < number; i++) {
+ roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION);
+ }
+ roaring_bitmap_repair_after_lazy(answer);
+ return answer;
+}
+
+/**
+ * Compute the xor of 'number' bitmaps.
+ */
+roaring_bitmap_t *roaring_bitmap_xor_many(size_t number,
+ const roaring_bitmap_t **x) {
+ if (number == 0) {
+ return roaring_bitmap_create();
+ }
+ if (number == 1) {
+ return roaring_bitmap_copy(x[0]);
+ }
+ roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]);
+ for (size_t i = 2; i < number; i++) {
+ roaring_bitmap_lazy_xor_inplace(answer, x[i]);
+ }
+ roaring_bitmap_repair_after_lazy(answer);
+ return answer;
+}
+
+// inplace and (modifies its first argument).
+void roaring_bitmap_and_inplace(roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ if (x1 == x2) return;
+ int pos1 = 0, pos2 = 0, intersection_size = 0;
+ const int length1 = ra_get_size(&x1->high_low_container);
+ const int length2 = ra_get_size(&x2->high_low_container);
+
+ // any skipped-over or newly emptied containers in x1
+ // have to be freed.
+ while (pos1 < length1 && pos2 < length2) {
+ const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ if (s1 == s2) {
+ uint8_t type1, type2, result_type;
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+
+ // We do the computation "in place" only when c1 is not a shared container.
+ // Rationale: using a shared container safely with in place computation would
+ // require making a copy and then doing the computation in place which is likely
+ // less efficient than avoiding in place entirely and always generating a new
+ // container.
+ container_t *c =
+ (type1 == SHARED_CONTAINER_TYPE)
+ ? container_and(c1, type1, c2, type2, &result_type)
+ : container_iand(c1, type1, c2, type2, &result_type);
+
+ if (c != c1) { // in this instance a new container was created, and
+ // we need to free the old one
+ container_free(c1, type1);
+ }
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_replace_key_and_container_at_index(&x1->high_low_container,
+ intersection_size, s1, c,
+ result_type);
+ intersection_size++;
+ } else {
+ container_free(c, result_type);
+ }
+ ++pos1;
+ ++pos2;
+ } else if (s1 < s2) {
+ pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1);
+ } else { // s1 > s2
+ pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
+ }
+ }
+
+ // if we ended early because x2 ran out, then all remaining in x1 should be
+ // freed
+ while (pos1 < length1) {
+ container_free(x1->high_low_container.containers[pos1],
+ x1->high_low_container.typecodes[pos1]);
+ ++pos1;
+ }
+
+ // all containers after this have either been copied or freed
+ ra_downsize(&x1->high_low_container, intersection_size);
+}
+
+roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ if (0 == length1) {
+ return roaring_bitmap_copy(x2);
+ }
+ if (0 == length2) {
+ return roaring_bitmap_copy(x1);
+ }
+ roaring_bitmap_t *answer =
+ roaring_bitmap_create_with_capacity(length1 + length2);
+ roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c = container_or(c1, type1, c2, type2, &result_type);
+
+ // since we assume that the initial containers are non-empty, the
+ // result here
+ // can only be non-empty
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ // c1 = container_clone(c1, type1);
+ c1 = get_copy_of_container(c1, &type1, is_cow(x1));
+ if (is_cow(x1)) {
+ ra_set_container_at_index(&x1->high_low_container, pos1, c1,
+ type1);
+ }
+ ra_append(&answer->high_low_container, s1, c1, type1);
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ // c2 = container_clone(c2, type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+ ra_append(&answer->high_low_container, s2, c2, type2);
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x2->high_low_container, pos2, length2,
+ is_cow(x2));
+ } else if (pos2 == length2) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, length1,
+ is_cow(x1));
+ }
+ return answer;
+}
+
+// inplace or (modifies its first argument).
+void roaring_bitmap_or_inplace(roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ int length1 = x1->high_low_container.size;
+ const int length2 = x2->high_low_container.size;
+
+ if (0 == length2) return;
+
+ if (0 == length1) {
+ roaring_bitmap_overwrite(x1, x2);
+ return;
+ }
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ if (!container_is_full(c1, type1)) {
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c =
+ (type1 == SHARED_CONTAINER_TYPE)
+ ? container_or(c1, type1, c2, type2, &result_type)
+ : container_ior(c1, type1, c2, type2, &result_type);
+
+ if (c != c1) { // in this instance a new container was created,
+ // and we need to free the old one
+ container_free(c1, type1);
+ }
+ ra_set_container_at_index(&x1->high_low_container, pos1, c,
+ result_type);
+ }
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
+ pos2, &type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+
+ // container_t *c2_clone = container_clone(c2, type2);
+ ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
+ type2);
+ pos1++;
+ length1++;
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
+ pos2, length2, is_cow(x2));
+ }
+}
+
+roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ if (0 == length1) {
+ return roaring_bitmap_copy(x2);
+ }
+ if (0 == length2) {
+ return roaring_bitmap_copy(x1);
+ }
+ roaring_bitmap_t *answer =
+ roaring_bitmap_create_with_capacity(length1 + length2);
+ roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c = container_xor(c1, type1, c2, type2, &result_type);
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ } else {
+ container_free(c, result_type);
+ }
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ c1 = get_copy_of_container(c1, &type1, is_cow(x1));
+ if (is_cow(x1)) {
+ ra_set_container_at_index(&x1->high_low_container, pos1, c1,
+ type1);
+ }
+ ra_append(&answer->high_low_container, s1, c1, type1);
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+ ra_append(&answer->high_low_container, s2, c2, type2);
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x2->high_low_container, pos2, length2,
+ is_cow(x2));
+ } else if (pos2 == length2) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, length1,
+ is_cow(x1));
+ }
+ return answer;
+}
+
+// inplace xor (modifies its first argument).
+
+void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ assert(x1 != x2);
+ uint8_t result_type = 0;
+ int length1 = x1->high_low_container.size;
+ const int length2 = x2->high_low_container.size;
+
+ if (0 == length2) return;
+
+ if (0 == length1) {
+ roaring_bitmap_overwrite(x1, x2);
+ return;
+ }
+
+ // XOR can have new containers inserted from x2, but can also
+ // lose containers when x1 and x2 are nonempty and identical.
+
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+
+ // We do the computation "in place" only when c1 is not a shared container.
+ // Rationale: using a shared container safely with in place computation would
+ // require making a copy and then doing the computation in place which is likely
+ // less efficient than avoiding in place entirely and always generating a new
+ // container.
+
+ container_t *c;
+ if (type1 == SHARED_CONTAINER_TYPE) {
+ c = container_xor(c1, type1, c2, type2, &result_type);
+ shared_container_free(CAST_shared(c1)); // so release
+ }
+ else {
+ c = container_ixor(c1, type1, c2, type2, &result_type);
+ }
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_set_container_at_index(&x1->high_low_container, pos1, c,
+ result_type);
+ ++pos1;
+ } else {
+ container_free(c, result_type);
+ ra_remove_at_index(&x1->high_low_container, pos1);
+ --length1;
+ }
+
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+
+ ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
+ type2);
+ pos1++;
+ length1++;
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
+ pos2, length2, is_cow(x2));
+ }
+}
+
+roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ if (0 == length1) {
+ roaring_bitmap_t *empty_bitmap = roaring_bitmap_create();
+ roaring_bitmap_set_copy_on_write(empty_bitmap, is_cow(x1) || is_cow(x2));
+ return empty_bitmap;
+ }
+ if (0 == length2) {
+ return roaring_bitmap_copy(x1);
+ }
+ roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1);
+ roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
+
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = 0;
+ uint16_t s2 = 0;
+ while (true) {
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c = container_andnot(c1, type1, c2, type2,
+ &result_type);
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ } else {
+ container_free(c, result_type);
+ }
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ } else if (s1 < s2) { // s1 < s2
+ const int next_pos1 =
+ ra_advance_until(&x1->high_low_container, s2, pos1);
+ ra_append_copy_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, next_pos1,
+ is_cow(x1));
+ // TODO : perhaps some of the copy_on_write should be based on
+ // answer rather than x1 (more stringent?). Many similar cases
+ pos1 = next_pos1;
+ if (pos1 == length1) break;
+ } else { // s1 > s2
+ pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
+ if (pos2 == length2) break;
+ }
+ }
+ if (pos2 == length2) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, length1,
+ is_cow(x1));
+ }
+ return answer;
+}
+
+// inplace andnot (modifies its first argument).
+
+void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ assert(x1 != x2);
+
+ uint8_t result_type = 0;
+ int length1 = x1->high_low_container.size;
+ const int length2 = x2->high_low_container.size;
+ int intersection_size = 0;
+
+ if (0 == length2) return;
+
+ if (0 == length1) {
+ roaring_bitmap_clear(x1);
+ return;
+ }
+
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+
+ // We do the computation "in place" only when c1 is not a shared container.
+ // Rationale: using a shared container safely with in place computation would
+ // require making a copy and then doing the computation in place which is likely
+ // less efficient than avoiding in place entirely and always generating a new
+ // container.
+
+ container_t *c;
+ if (type1 == SHARED_CONTAINER_TYPE) {
+ c = container_andnot(c1, type1, c2, type2, &result_type);
+ shared_container_free(CAST_shared(c1)); // release
+ }
+ else {
+ c = container_iandnot(c1, type1, c2, type2, &result_type);
+ }
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_replace_key_and_container_at_index(&x1->high_low_container,
+ intersection_size++, s1,
+ c, result_type);
+ } else {
+ container_free(c, result_type);
+ }
+
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ if (pos1 != intersection_size) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+
+ ra_replace_key_and_container_at_index(&x1->high_low_container,
+ intersection_size, s1, c1,
+ type1);
+ }
+ intersection_size++;
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+
+ if (pos1 < length1) {
+ // all containers between intersection_size and
+ // pos1 are junk. However, they have either been moved
+ // (thus still referenced) or involved in an iandnot
+ // that will clean up all containers that could not be reused.
+ // Thus we should not free the junk containers between
+ // intersection_size and pos1.
+ if (pos1 > intersection_size) {
+ // left slide of remaining items
+ ra_copy_range(&x1->high_low_container, pos1, length1,
+ intersection_size);
+ }
+ // else current placement is fine
+ intersection_size += (length1 - pos1);
+ }
+ ra_downsize(&x1->high_low_container, intersection_size);
+}
+
+uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ uint64_t card = 0;
+ for (int i = 0; i < ra->size; ++i)
+ card += container_get_cardinality(ra->containers[i], ra->typecodes[i]);
+ return card;
+}
+
+uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r,
+ uint64_t range_start,
+ uint64_t range_end) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ if (range_end > UINT32_MAX) {
+ range_end = UINT32_MAX + UINT64_C(1);
+ }
+ if (range_start >= range_end) {
+ return 0;
+ }
+ range_end--; // make range_end inclusive
+ // now we have: 0 <= range_start <= range_end <= UINT32_MAX
+
+ uint16_t minhb = range_start >> 16;
+ uint16_t maxhb = range_end >> 16;
+
+ uint64_t card = 0;
+
+ int i = ra_get_index(ra, minhb);
+ if (i >= 0) {
+ if (minhb == maxhb) {
+ card += container_rank(ra->containers[i], ra->typecodes[i],
+ range_end & 0xffff);
+ } else {
+ card += container_get_cardinality(ra->containers[i],
+ ra->typecodes[i]);
+ }
+ if ((range_start & 0xffff) != 0) {
+ card -= container_rank(ra->containers[i], ra->typecodes[i],
+ (range_start & 0xffff) - 1);
+ }
+ i++;
+ } else {
+ i = -i - 1;
+ }
+
+ for (; i < ra->size; i++) {
+ uint16_t key = ra->keys[i];
+ if (key < maxhb) {
+ card += container_get_cardinality(ra->containers[i],
+ ra->typecodes[i]);
+ } else if (key == maxhb) {
+ card += container_rank(ra->containers[i], ra->typecodes[i],
+ range_end & 0xffff);
+ break;
+ } else {
+ break;
+ }
+ }
+
+ return card;
+}
+
+
+bool roaring_bitmap_is_empty(const roaring_bitmap_t *r) {
+ return r->high_low_container.size == 0;
+}
+
+void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans) {
+ ra_to_uint32_array(&r->high_low_container, ans);
+}
+
+bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r,
+ size_t offset, size_t limit,
+ uint32_t *ans) {
+ return ra_range_uint32_array(&r->high_low_container, offset, limit, ans);
+}
+
+/** convert array and bitmap containers to run containers when it is more
+ * efficient;
+ * also convert from run containers when more space efficient. Returns
+ * true if the result has at least one run container.
+*/
+bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) {
+ bool answer = false;
+ for (int i = 0; i < r->high_low_container.size; i++) {
+ uint8_t type_original, type_after;
+ ra_unshare_container_at_index(
+ &r->high_low_container, i); // TODO: this introduces extra cloning!
+ container_t *c = ra_get_container_at_index(&r->high_low_container, i,
+ &type_original);
+ container_t *c1 = convert_run_optimize(c, type_original, &type_after);
+ if (type_after == RUN_CONTAINER_TYPE) {
+ answer = true;
+ }
+ ra_set_container_at_index(&r->high_low_container, i, c1, type_after);
+ }
+ return answer;
+}
+
+size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) {
+ size_t answer = 0;
+ for (int i = 0; i < r->high_low_container.size; i++) {
+ uint8_t type_original;
+ container_t *c = ra_get_container_at_index(&r->high_low_container, i,
+ &type_original);
+ answer += container_shrink_to_fit(c, type_original);
+ }
+ answer += ra_shrink_to_fit(&r->high_low_container);
+ return answer;
+}
+
+/**
+ * Remove run-length encoding even when it is more space efficient
+ * return whether a change was applied
+ */
+bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) {
+ bool answer = false;
+ for (int i = 0; i < r->high_low_container.size; i++) {
+ uint8_t type_original, type_after;
+ container_t *c = ra_get_container_at_index(&r->high_low_container, i,
+ &type_original);
+ if (get_container_type(c, type_original) == RUN_CONTAINER_TYPE) {
+ answer = true;
+ if (type_original == SHARED_CONTAINER_TYPE) {
+ run_container_t *truec = CAST_run(CAST_shared(c)->container);
+ int32_t card = run_container_cardinality(truec);
+ container_t *c1 = convert_to_bitset_or_array_container(
+ truec, card, &type_after);
+ shared_container_free(CAST_shared(c)); // frees run as needed
+ ra_set_container_at_index(&r->high_low_container, i, c1,
+ type_after);
+
+ } else {
+ int32_t card = run_container_cardinality(CAST_run(c));
+ container_t *c1 = convert_to_bitset_or_array_container(
+ CAST_run(c), card, &type_after);
+ run_container_free(CAST_run(c));
+ ra_set_container_at_index(&r->high_low_container, i, c1,
+ type_after);
+ }
+ }
+ }
+ return answer;
+}
+
+size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf) {
+ size_t portablesize = roaring_bitmap_portable_size_in_bytes(r);
+ uint64_t cardinality = roaring_bitmap_get_cardinality(r);
+ uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t);
+ if (portablesize < sizeasarray) {
+ buf[0] = CROARING_SERIALIZATION_CONTAINER;
+ return roaring_bitmap_portable_serialize(r, buf + 1) + 1;
+ } else {
+ buf[0] = CROARING_SERIALIZATION_ARRAY_UINT32;
+ memcpy(buf + 1, &cardinality, sizeof(uint32_t));
+ roaring_bitmap_to_uint32_array(
+ r, (uint32_t *)(buf + 1 + sizeof(uint32_t)));
+ return 1 + (size_t)sizeasarray;
+ }
+}
+
+size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r) {
+ size_t portablesize = roaring_bitmap_portable_size_in_bytes(r);
+ uint64_t sizeasarray = roaring_bitmap_get_cardinality(r) * sizeof(uint32_t) +
+ sizeof(uint32_t);
+ return portablesize < sizeasarray ? portablesize + 1 : (size_t)sizeasarray + 1;
+}
+
+size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r) {
+ return ra_portable_size_in_bytes(&r->high_low_container);
+}
+
+
+roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf, size_t maxbytes) {
+ roaring_bitmap_t *ans =
+ (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
+ if (ans == NULL) {
+ return NULL;
+ }
+ size_t bytesread;
+ bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf, maxbytes, &bytesread);
+ if (!is_ok) {
+ roaring_free(ans);
+ return NULL;
+ }
+ roaring_bitmap_set_copy_on_write(ans, false);
+ if (!is_ok) {
+ roaring_free(ans);
+ return NULL;
+ }
+ return ans;
+}
+
+roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) {
+ return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX);
+}
+
+
+size_t roaring_bitmap_portable_deserialize_size(const char *buf, size_t maxbytes) {
+ return ra_portable_deserialize_size(buf, maxbytes);
+}
+
+
+size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r,
+ char *buf) {
+ return ra_portable_serialize(&r->high_low_container, buf);
+}
+
+roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) {
+ const char *bufaschar = (const char *)buf;
+ if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) {
+ /* This looks like a compressed set of uint32_t elements */
+ uint32_t card;
+
+ memcpy(&card, bufaschar + 1, sizeof(uint32_t));
+
+ const uint32_t *elems =
+ (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t));
+
+ roaring_bitmap_t *bitmap = roaring_bitmap_create();
+ if (bitmap == NULL) {
+ return NULL;
+ }
+ roaring_bulk_context_t context = {0};
+ for (uint32_t i = 0; i < card; i++) {
+ // elems may not be aligned, read with memcpy
+ uint32_t elem;
+ memcpy(&elem, elems + i, sizeof(elem));
+ roaring_bitmap_add_bulk(bitmap, &context, elem);
+ }
+ return bitmap;
+
+ } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) {
+ return roaring_bitmap_portable_deserialize(bufaschar + 1);
+ } else
+ return (NULL);
+}
+
+roaring_bitmap_t* roaring_bitmap_deserialize_safe(const void *buf, size_t maxbytes) {
+ if (maxbytes < 1) {
+ return NULL;
+ }
+
+ const char *bufaschar = (const char *)buf;
+ if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) {
+ if (maxbytes < 1 + sizeof(uint32_t)) {
+ return NULL;
+ }
+
+ /* This looks like a compressed set of uint32_t elements */
+ uint32_t card;
+ memcpy(&card, bufaschar + 1, sizeof(uint32_t));
+
+ // Check the buffer is big enough to contain card uint32_t elements
+ if (maxbytes < 1 + sizeof(uint32_t) + card * sizeof(uint32_t)) {
+ return NULL;
+ }
+
+ const uint32_t *elems =
+ (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t));
+
+ roaring_bitmap_t *bitmap = roaring_bitmap_create();
+ if (bitmap == NULL) {
+ return NULL;
+ }
+ roaring_bulk_context_t context = {0};
+ for (uint32_t i = 0; i < card; i++) {
+ // elems may not be aligned, read with memcpy
+ uint32_t elem;
+ memcpy(&elem, elems + i, sizeof(elem));
+ roaring_bitmap_add_bulk(bitmap, &context, elem);
+ }
+ return bitmap;
+
+ } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) {
+ return roaring_bitmap_portable_deserialize_safe(bufaschar + 1, maxbytes - 1);
+ } else
+ return (NULL);
+}
+
+bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator,
+ void *ptr) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ for (int i = 0; i < ra->size; ++i)
+ if (!container_iterate(ra->containers[i], ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16,
+ iterator, ptr)) {
+ return false;
+ }
+ return true;
+}
+
+bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator,
+ uint64_t high_bits, void *ptr) {
+ const roaring_array_t *ra = &r->high_low_container;
+
+ for (int i = 0; i < ra->size; ++i)
+ if (!container_iterate64(
+ ra->containers[i], ra->typecodes[i],
+ ((uint32_t)ra->keys[i]) << 16, iterator,
+ high_bits, ptr)) {
+ return false;
+ }
+ return true;
+}
+
+/****
+* begin roaring_uint32_iterator_t
+*****/
+
+// Partially initializes the roaring iterator when it begins looking at
+// a new container.
+static bool iter_new_container_partial_init(roaring_uint32_iterator_t *newit) {
+ newit->in_container_index = 0;
+ newit->run_index = 0;
+ newit->current_value = 0;
+ if (newit->container_index >= newit->parent->high_low_container.size ||
+ newit->container_index < 0) {
+ newit->current_value = UINT32_MAX;
+ return (newit->has_value = false);
+ }
+ // assume not empty
+ newit->has_value = true;
+ // we precompute container, typecode and highbits so that successive
+ // iterators do not have to grab them from odd memory locations
+ // and have to worry about the (easily predicted) container_unwrap_shared
+ // call.
+ newit->container =
+ newit->parent->high_low_container.containers[newit->container_index];
+ newit->typecode =
+ newit->parent->high_low_container.typecodes[newit->container_index];
+ newit->highbits =
+ ((uint32_t)
+ newit->parent->high_low_container.keys[newit->container_index])
+ << 16;
+ newit->container =
+ container_unwrap_shared(newit->container, &(newit->typecode));
+ return newit->has_value;
+}
+
+static bool loadfirstvalue(roaring_uint32_iterator_t *newit) {
+ if (!iter_new_container_partial_init(newit))
+ return newit->has_value;
+
+ switch (newit->typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ const bitset_container_t *bc = const_CAST_bitset(newit->container);
+
+ uint32_t wordindex = 0;
+ uint64_t word;
+ while ((word = bc->words[wordindex]) == 0) {
+ wordindex++; // advance
+ }
+ // here "word" is non-zero
+ newit->in_container_index = wordindex * 64 + roaring_trailing_zeroes(word);
+ newit->current_value = newit->highbits | newit->in_container_index;
+ break; }
+
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac = const_CAST_array(newit->container);
+ newit->current_value = newit->highbits | ac->array[0];
+ break; }
+
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(newit->container);
+ newit->current_value = newit->highbits | rc->runs[0].value;
+ break; }
+
+ default:
+ // if this ever happens, bug!
+ assert(false);
+ } // switch (typecode)
+ return true;
+}
+
+static bool loadlastvalue(roaring_uint32_iterator_t* newit) {
+ if (!iter_new_container_partial_init(newit))
+ return newit->has_value;
+
+ switch(newit->typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ uint32_t wordindex = BITSET_CONTAINER_SIZE_IN_WORDS - 1;
+ uint64_t word;
+ const bitset_container_t* bitset_container = (const bitset_container_t*)newit->container;
+ while ((word = bitset_container->words[wordindex]) == 0)
+ --wordindex;
+
+ int num_leading_zeros = roaring_leading_zeroes(word);
+ newit->in_container_index = (wordindex * 64) + (63 - num_leading_zeros);
+ newit->current_value = newit->highbits | newit->in_container_index;
+ break;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t* array_container = (const array_container_t*)newit->container;
+ newit->in_container_index = array_container->cardinality - 1;
+ newit->current_value = newit->highbits | array_container->array[newit->in_container_index];
+ break;
+ }
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t* run_container = (const run_container_t*)newit->container;
+ newit->run_index = run_container->n_runs - 1;
+ const rle16_t* last_run = &run_container->runs[newit->run_index];
+ newit->current_value = newit->highbits | (last_run->value + last_run->length);
+ break;
+ }
+ default:
+ // if this ever happens, bug!
+ assert(false);
+ }
+ return true;
+}
+
+// prerequesite: the value should be in range of the container
+static bool loadfirstvalue_largeorequal(roaring_uint32_iterator_t *newit, uint32_t val) {
+ // Don't have to check return value because of prerequisite
+ iter_new_container_partial_init(newit);
+ uint16_t lb = val & 0xFFFF;
+
+ switch (newit->typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ const bitset_container_t *bc = const_CAST_bitset(newit->container);
+ newit->in_container_index =
+ bitset_container_index_equalorlarger(bc, lb);
+ newit->current_value = newit->highbits | newit->in_container_index;
+ break; }
+
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac = const_CAST_array(newit->container);
+ newit->in_container_index =
+ array_container_index_equalorlarger(ac, lb);
+ newit->current_value =
+ newit->highbits | ac->array[newit->in_container_index];
+ break; }
+
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(newit->container);
+ newit->run_index = run_container_index_equalorlarger(rc, lb);
+ if (rc->runs[newit->run_index].value <= lb) {
+ newit->current_value = val;
+ } else {
+ newit->current_value =
+ newit->highbits | rc->runs[newit->run_index].value;
+ }
+ break; }
+
+ default:
+ roaring_unreachable;
+ }
+
+ return true;
+}
+
+void roaring_init_iterator(const roaring_bitmap_t *r,
+ roaring_uint32_iterator_t *newit) {
+ newit->parent = r;
+ newit->container_index = 0;
+ newit->has_value = loadfirstvalue(newit);
+}
+
+void roaring_init_iterator_last(const roaring_bitmap_t *r,
+ roaring_uint32_iterator_t *newit) {
+ newit->parent = r;
+ newit->container_index = newit->parent->high_low_container.size - 1;
+ newit->has_value = loadlastvalue(newit);
+}
+
+roaring_uint32_iterator_t *roaring_create_iterator(const roaring_bitmap_t *r) {
+ roaring_uint32_iterator_t *newit =
+ (roaring_uint32_iterator_t *)roaring_malloc(sizeof(roaring_uint32_iterator_t));
+ if (newit == NULL) return NULL;
+ roaring_init_iterator(r, newit);
+ return newit;
+}
+
+roaring_uint32_iterator_t *roaring_copy_uint32_iterator(
+ const roaring_uint32_iterator_t *it) {
+ roaring_uint32_iterator_t *newit =
+ (roaring_uint32_iterator_t *)roaring_malloc(sizeof(roaring_uint32_iterator_t));
+ memcpy(newit, it, sizeof(roaring_uint32_iterator_t));
+ return newit;
+}
+
+bool roaring_move_uint32_iterator_equalorlarger(roaring_uint32_iterator_t *it, uint32_t val) {
+ uint16_t hb = val >> 16;
+ const int i = ra_get_index(& it->parent->high_low_container, hb);
+ if (i >= 0) {
+ uint32_t lowvalue = container_maximum(it->parent->high_low_container.containers[i], it->parent->high_low_container.typecodes[i]);
+ uint16_t lb = val & 0xFFFF;
+ if(lowvalue < lb ) {
+ it->container_index = i+1; // will have to load first value of next container
+ } else {// the value is necessarily within the range of the container
+ it->container_index = i;
+ it->has_value = loadfirstvalue_largeorequal(it, val);
+ return it->has_value;
+ }
+ } else {
+ // there is no matching, so we are going for the next container
+ it->container_index = -i-1;
+ }
+ it->has_value = loadfirstvalue(it);
+ return it->has_value;
+}
+
+
+bool roaring_advance_uint32_iterator(roaring_uint32_iterator_t *it) {
+ if (it->container_index >= it->parent->high_low_container.size) {
+ return (it->has_value = false);
+ }
+ if (it->container_index < 0) {
+ it->container_index = 0;
+ return (it->has_value = loadfirstvalue(it));
+ }
+
+ switch (it->typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ const bitset_container_t *bc = const_CAST_bitset(it->container);
+ it->in_container_index++;
+
+ uint32_t wordindex = it->in_container_index / 64;
+ if (wordindex >= BITSET_CONTAINER_SIZE_IN_WORDS) break;
+
+ uint64_t word = bc->words[wordindex] &
+ (UINT64_MAX << (it->in_container_index % 64));
+ // next part could be optimized/simplified
+ while ((word == 0) &&
+ (wordindex + 1 < BITSET_CONTAINER_SIZE_IN_WORDS)) {
+ wordindex++;
+ word = bc->words[wordindex];
+ }
+ if (word != 0) {
+ it->in_container_index = wordindex * 64 + roaring_trailing_zeroes(word);
+ it->current_value = it->highbits | it->in_container_index;
+ return (it->has_value = true);
+ }
+ break; }
+
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac = const_CAST_array(it->container);
+ it->in_container_index++;
+ if (it->in_container_index < ac->cardinality) {
+ it->current_value =
+ it->highbits | ac->array[it->in_container_index];
+ return (it->has_value = true);
+ }
+ break; }
+
+ case RUN_CONTAINER_TYPE: {
+ if(it->current_value == UINT32_MAX) { // avoid overflow to zero
+ return (it->has_value = false);
+ }
+
+ const run_container_t* rc = const_CAST_run(it->container);
+ uint32_t limit = (it->highbits | (rc->runs[it->run_index].value +
+ rc->runs[it->run_index].length));
+ if (++it->current_value <= limit) {
+ return (it->has_value = true);
+ }
+
+ if (++it->run_index < rc->n_runs) { // Assume the run has a value
+ it->current_value =
+ it->highbits | rc->runs[it->run_index].value;
+ return (it->has_value = true);
+ }
+ break;
+ }
+
+ default:
+ roaring_unreachable;
+ }
+
+ // moving to next container
+ it->container_index++;
+ return (it->has_value = loadfirstvalue(it));
+}
+
+bool roaring_previous_uint32_iterator(roaring_uint32_iterator_t *it) {
+ if (it->container_index < 0) {
+ return (it->has_value = false);
+ }
+ if (it->container_index >= it->parent->high_low_container.size) {
+ it->container_index = it->parent->high_low_container.size - 1;
+ return (it->has_value = loadlastvalue(it));
+ }
+
+ switch (it->typecode) {
+ case BITSET_CONTAINER_TYPE: {
+ if (--it->in_container_index < 0)
+ break;
+
+ const bitset_container_t* bitset_container = (const bitset_container_t*)it->container;
+ int32_t wordindex = it->in_container_index / 64;
+ uint64_t word = bitset_container->words[wordindex] & (UINT64_MAX >> (63 - (it->in_container_index % 64)));
+
+ while (word == 0 && --wordindex >= 0) {
+ word = bitset_container->words[wordindex];
+ }
+ if (word == 0)
+ break;
+
+ int num_leading_zeros = roaring_leading_zeroes(word);
+ it->in_container_index = (wordindex * 64) + (63 - num_leading_zeros);
+ it->current_value = it->highbits | it->in_container_index;
+ return (it->has_value = true);
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ if (--it->in_container_index < 0)
+ break;
+
+ const array_container_t* array_container = (const array_container_t*)it->container;
+ it->current_value = it->highbits | array_container->array[it->in_container_index];
+ return (it->has_value = true);
+ }
+ case RUN_CONTAINER_TYPE: {
+ if(it->current_value == 0)
+ return (it->has_value = false);
+
+ const run_container_t* run_container = (const run_container_t*)it->container;
+ if (--it->current_value >= (it->highbits | run_container->runs[it->run_index].value)) {
+ return (it->has_value = true);
+ }
+
+ if (--it->run_index < 0)
+ break;
+
+ it->current_value = it->highbits | (run_container->runs[it->run_index].value +
+ run_container->runs[it->run_index].length);
+ return (it->has_value = true);
+ }
+ default:
+ // if this ever happens, bug!
+ assert(false);
+ } // switch (typecode)
+
+ // moving to previous container
+ it->container_index--;
+ return (it->has_value = loadlastvalue(it));
+}
+
+uint32_t roaring_read_uint32_iterator(roaring_uint32_iterator_t *it, uint32_t* buf, uint32_t count) {
+ uint32_t ret = 0;
+ uint32_t num_values;
+ uint32_t wordindex; // used for bitsets
+ uint64_t word; // used for bitsets
+ const array_container_t* acont; //TODO remove
+ const run_container_t* rcont; //TODO remove
+ const bitset_container_t* bcont; //TODO remove
+
+ while (it->has_value && ret < count) {
+ switch (it->typecode) {
+ case BITSET_CONTAINER_TYPE:
+ bcont = const_CAST_bitset(it->container);
+ wordindex = it->in_container_index / 64;
+ word = bcont->words[wordindex] & (UINT64_MAX << (it->in_container_index % 64));
+ do {
+ while (word != 0 && ret < count) {
+ buf[0] = it->highbits | (wordindex * 64 + roaring_trailing_zeroes(word));
+ word = word & (word - 1);
+ buf++;
+ ret++;
+ }
+ while (word == 0 && wordindex+1 < BITSET_CONTAINER_SIZE_IN_WORDS) {
+ wordindex++;
+ word = bcont->words[wordindex];
+ }
+ } while (word != 0 && ret < count);
+ it->has_value = (word != 0);
+ if (it->has_value) {
+ it->in_container_index = wordindex * 64 + roaring_trailing_zeroes(word);
+ it->current_value = it->highbits | it->in_container_index;
+ }
+ break;
+ case ARRAY_CONTAINER_TYPE:
+ acont = const_CAST_array(it->container);
+ num_values = minimum_uint32(acont->cardinality - it->in_container_index, count - ret);
+ for (uint32_t i = 0; i < num_values; i++) {
+ buf[i] = it->highbits | acont->array[it->in_container_index + i];
+ }
+ buf += num_values;
+ ret += num_values;
+ it->in_container_index += num_values;
+ it->has_value = (it->in_container_index < acont->cardinality);
+ if (it->has_value) {
+ it->current_value = it->highbits | acont->array[it->in_container_index];
+ }
+ break;
+ case RUN_CONTAINER_TYPE:
+ rcont = const_CAST_run(it->container);
+ //"in_run_index" name is misleading, read it as "max_value_in_current_run"
+ do {
+ uint32_t largest_run_value = it->highbits | (rcont->runs[it->run_index].value + rcont->runs[it->run_index].length);
+ num_values = minimum_uint32(largest_run_value - it->current_value + 1, count - ret);
+ for (uint32_t i = 0; i < num_values; i++) {
+ buf[i] = it->current_value + i;
+ }
+ it->current_value += num_values; // this can overflow to zero: UINT32_MAX+1=0
+ buf += num_values;
+ ret += num_values;
+
+ if (it->current_value > largest_run_value || it->current_value == 0) {
+ it->run_index++;
+ if (it->run_index < rcont->n_runs) {
+ it->current_value = it->highbits | rcont->runs[it->run_index].value;
+ } else {
+ it->has_value = false;
+ }
+ }
+ } while ((ret < count) && it->has_value);
+ break;
+ default:
+ assert(false);
+ }
+ if (it->has_value) {
+ assert(ret == count);
+ return ret;
+ }
+ it->container_index++;
+ it->has_value = loadfirstvalue(it);
+ }
+ return ret;
+}
+
+
+
+void roaring_free_uint32_iterator(roaring_uint32_iterator_t *it) { roaring_free(it); }
+
+/****
+* end of roaring_uint32_iterator_t
+*****/
+
+bool roaring_bitmap_equals(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2) {
+ const roaring_array_t *ra1 = &r1->high_low_container;
+ const roaring_array_t *ra2 = &r2->high_low_container;
+
+ if (ra1->size != ra2->size) {
+ return false;
+ }
+ for (int i = 0; i < ra1->size; ++i) {
+ if (ra1->keys[i] != ra2->keys[i]) {
+ return false;
+ }
+ }
+ for (int i = 0; i < ra1->size; ++i) {
+ bool areequal = container_equals(ra1->containers[i],
+ ra1->typecodes[i],
+ ra2->containers[i],
+ ra2->typecodes[i]);
+ if (!areequal) {
+ return false;
+ }
+ }
+ return true;
+}
+
+bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2) {
+ const roaring_array_t *ra1 = &r1->high_low_container;
+ const roaring_array_t *ra2 = &r2->high_low_container;
+
+ const int length1 = ra1->size,
+ length2 = ra2->size;
+
+ int pos1 = 0, pos2 = 0;
+
+ while (pos1 < length1 && pos2 < length2) {
+ const uint16_t s1 = ra_get_key_at_index(ra1, pos1);
+ const uint16_t s2 = ra_get_key_at_index(ra2, pos2);
+
+ if (s1 == s2) {
+ uint8_t type1, type2;
+ container_t *c1 = ra_get_container_at_index(ra1, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(ra2, pos2, &type2);
+ if (!container_is_subset(c1, type1, c2, type2))
+ return false;
+ ++pos1;
+ ++pos2;
+ } else if (s1 < s2) { // s1 < s2
+ return false;
+ } else { // s1 > s2
+ pos2 = ra_advance_until(ra2, s1, pos2);
+ }
+ }
+ if (pos1 == length1)
+ return true;
+ else
+ return false;
+}
+
+static void insert_flipped_container(roaring_array_t *ans_arr,
+ const roaring_array_t *x1_arr, uint16_t hb,
+ uint16_t lb_start, uint16_t lb_end) {
+ const int i = ra_get_index(x1_arr, hb);
+ const int j = ra_get_index(ans_arr, hb);
+ uint8_t ctype_in, ctype_out;
+ container_t *flipped_container = NULL;
+ if (i >= 0) {
+ container_t *container_to_flip =
+ ra_get_container_at_index(x1_arr, i, &ctype_in);
+ flipped_container =
+ container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start,
+ (uint32_t)(lb_end + 1), &ctype_out);
+
+ if (container_get_cardinality(flipped_container, ctype_out))
+ ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
+ ctype_out);
+ else {
+ container_free(flipped_container, ctype_out);
+ }
+ } else {
+ flipped_container = container_range_of_ones(
+ (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out);
+ ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
+ ctype_out);
+ }
+}
+
+static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb,
+ uint16_t lb_start, uint16_t lb_end) {
+ const int i = ra_get_index(x1_arr, hb);
+ uint8_t ctype_in, ctype_out;
+ container_t *flipped_container = NULL;
+ if (i >= 0) {
+ container_t *container_to_flip =
+ ra_get_container_at_index(x1_arr, i, &ctype_in);
+ flipped_container = container_inot_range(
+ container_to_flip, ctype_in, (uint32_t)lb_start,
+ (uint32_t)(lb_end + 1), &ctype_out);
+ // if a new container was created, the old one was already freed
+ if (container_get_cardinality(flipped_container, ctype_out)) {
+ ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out);
+ } else {
+ container_free(flipped_container, ctype_out);
+ ra_remove_at_index(x1_arr, i);
+ }
+
+ } else {
+ flipped_container = container_range_of_ones(
+ (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out);
+ ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container,
+ ctype_out);
+ }
+}
+
+static void insert_fully_flipped_container(roaring_array_t *ans_arr,
+ const roaring_array_t *x1_arr,
+ uint16_t hb) {
+ const int i = ra_get_index(x1_arr, hb);
+ const int j = ra_get_index(ans_arr, hb);
+ uint8_t ctype_in, ctype_out;
+ container_t *flipped_container = NULL;
+ if (i >= 0) {
+ container_t *container_to_flip =
+ ra_get_container_at_index(x1_arr, i, &ctype_in);
+ flipped_container =
+ container_not(container_to_flip, ctype_in, &ctype_out);
+ if (container_get_cardinality(flipped_container, ctype_out))
+ ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
+ ctype_out);
+ else {
+ container_free(flipped_container, ctype_out);
+ }
+ } else {
+ flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out);
+ ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
+ ctype_out);
+ }
+}
+
+static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) {
+ const int i = ra_get_index(x1_arr, hb);
+ uint8_t ctype_in, ctype_out;
+ container_t *flipped_container = NULL;
+ if (i >= 0) {
+ container_t *container_to_flip =
+ ra_get_container_at_index(x1_arr, i, &ctype_in);
+ flipped_container =
+ container_inot(container_to_flip, ctype_in, &ctype_out);
+
+ if (container_get_cardinality(flipped_container, ctype_out)) {
+ ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out);
+ } else {
+ container_free(flipped_container, ctype_out);
+ ra_remove_at_index(x1_arr, i);
+ }
+
+ } else {
+ flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out);
+ ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container,
+ ctype_out);
+ }
+}
+
+roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1,
+ uint64_t range_start,
+ uint64_t range_end) {
+ if (range_start >= range_end) {
+ return roaring_bitmap_copy(x1);
+ }
+ if(range_end >= UINT64_C(0x100000000)) {
+ range_end = UINT64_C(0x100000000);
+ }
+
+ roaring_bitmap_t *ans = roaring_bitmap_create();
+ roaring_bitmap_set_copy_on_write(ans, is_cow(x1));
+
+ uint16_t hb_start = (uint16_t)(range_start >> 16);
+ const uint16_t lb_start = (uint16_t)range_start; // & 0xFFFF;
+ uint16_t hb_end = (uint16_t)((range_end - 1) >> 16);
+ const uint16_t lb_end = (uint16_t)(range_end - 1); // & 0xFFFF;
+
+ ra_append_copies_until(&ans->high_low_container, &x1->high_low_container,
+ hb_start, is_cow(x1));
+ if (hb_start == hb_end) {
+ insert_flipped_container(&ans->high_low_container,
+ &x1->high_low_container, hb_start, lb_start,
+ lb_end);
+ } else {
+ // start and end containers are distinct
+ if (lb_start > 0) {
+ // handle first (partial) container
+ insert_flipped_container(&ans->high_low_container,
+ &x1->high_low_container, hb_start,
+ lb_start, 0xFFFF);
+ ++hb_start; // for the full containers. Can't wrap.
+ }
+
+ if (lb_end != 0xFFFF) --hb_end; // later we'll handle the partial block
+
+ for (uint32_t hb = hb_start; hb <= hb_end; ++hb) {
+ insert_fully_flipped_container(&ans->high_low_container,
+ &x1->high_low_container, hb);
+ }
+
+ // handle a partial final container
+ if (lb_end != 0xFFFF) {
+ insert_flipped_container(&ans->high_low_container,
+ &x1->high_low_container, hb_end + 1, 0,
+ lb_end);
+ ++hb_end;
+ }
+ }
+ ra_append_copies_after(&ans->high_low_container, &x1->high_low_container,
+ hb_end, is_cow(x1));
+ return ans;
+}
+
+void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start,
+ uint64_t range_end) {
+ if (range_start >= range_end) {
+ return; // empty range
+ }
+ if(range_end >= UINT64_C(0x100000000)) {
+ range_end = UINT64_C(0x100000000);
+ }
+
+ uint16_t hb_start = (uint16_t)(range_start >> 16);
+ const uint16_t lb_start = (uint16_t)range_start;
+ uint16_t hb_end = (uint16_t)((range_end - 1) >> 16);
+ const uint16_t lb_end = (uint16_t)(range_end - 1);
+
+ if (hb_start == hb_end) {
+ inplace_flip_container(&x1->high_low_container, hb_start, lb_start,
+ lb_end);
+ } else {
+ // start and end containers are distinct
+ if (lb_start > 0) {
+ // handle first (partial) container
+ inplace_flip_container(&x1->high_low_container, hb_start, lb_start,
+ 0xFFFF);
+ ++hb_start; // for the full containers. Can't wrap.
+ }
+
+ if (lb_end != 0xFFFF) --hb_end;
+
+ for (uint32_t hb = hb_start; hb <= hb_end; ++hb) {
+ inplace_fully_flip_container(&x1->high_low_container, hb);
+ }
+ // handle a partial final container
+ if (lb_end != 0xFFFF) {
+ inplace_flip_container(&x1->high_low_container, hb_end + 1, 0,
+ lb_end);
+ ++hb_end;
+ }
+ }
+}
+
+static void offset_append_with_merge(roaring_array_t *ra, int k, container_t *c, uint8_t t) {
+ int size = ra_get_size(ra);
+ if (size == 0 || ra_get_key_at_index(ra, size-1) != k) {
+ // No merge.
+ ra_append(ra, k, c, t);
+ return;
+ }
+
+ uint8_t last_t, new_t;
+ container_t *last_c, *new_c;
+
+ // NOTE: we don't need to unwrap here, since we added last_c ourselves
+ // we have the certainty it's not a shared container.
+ // The same applies to c, as it's the result of calling container_offset.
+ last_c = ra_get_container_at_index(ra, size-1, &last_t);
+ new_c = container_ior(last_c, last_t, c, t, &new_t);
+
+ ra_set_container_at_index(ra, size-1, new_c, new_t);
+
+ // Comparison of pointers of different origin is UB (or so claim some compiler
+ // makers), so we compare their bit representation only.
+ if ((uintptr_t)last_c != (uintptr_t)new_c) {
+ container_free(last_c, last_t);
+ }
+ container_free(c, t);
+}
+
+// roaring_bitmap_add_offset adds the value 'offset' to each and every value in
+// a bitmap, generating a new bitmap in the process. If offset + element is
+// outside of the range [0,2^32), that the element will be dropped.
+// We need "offset" to be 64 bits because we want to support values
+// between -0xFFFFFFFF up to +0xFFFFFFFF.
+roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm,
+ int64_t offset) {
+ roaring_bitmap_t *answer;
+ roaring_array_t *ans_ra;
+ int64_t container_offset;
+ uint16_t in_offset;
+
+ const roaring_array_t *bm_ra = &bm->high_low_container;
+ int length = bm_ra->size;
+
+ if (offset == 0) {
+ return roaring_bitmap_copy(bm);
+ }
+
+ container_offset = offset >> 16;
+ in_offset = (uint16_t)(offset - container_offset * (1 << 16));
+
+ answer = roaring_bitmap_create();
+ roaring_bitmap_set_copy_on_write(answer, is_cow(bm));
+
+ ans_ra = &answer->high_low_container;
+
+ if (in_offset == 0) {
+ ans_ra = &answer->high_low_container;
+
+ for (int i = 0, j = 0; i < length; ++i) {
+ int64_t key = ra_get_key_at_index(bm_ra, i);
+ key += container_offset;
+
+ if (key < 0 || key >= (1 << 16)) {
+ continue;
+ }
+
+ ra_append_copy(ans_ra, bm_ra, i, false);
+ ans_ra->keys[j++] = key;
+ }
+
+ return answer;
+ }
+
+ uint8_t t;
+ const container_t *c;
+ container_t *lo, *hi, **lo_ptr, **hi_ptr;
+ int64_t k;
+
+ for (int i = 0; i < length; ++i) {
+ lo = hi = NULL;
+ lo_ptr = hi_ptr = NULL;
+
+ k = ra_get_key_at_index(bm_ra, i)+container_offset;
+ if (k >= 0 && k < (1 << 16)) {
+ lo_ptr = &lo;
+ }
+ if (k+1 >= 0 && k+1 < (1 << 16)) {
+ hi_ptr = &hi;
+ }
+ if (lo_ptr == NULL && hi_ptr == NULL) {
+ continue;
+ }
+
+ c = ra_get_container_at_index(bm_ra, i, &t);
+ c = container_unwrap_shared(c, &t);
+
+ container_add_offset(c, t, lo_ptr, hi_ptr, in_offset);
+ if (lo != NULL) {
+ offset_append_with_merge(ans_ra, k, lo, t);
+ }
+ if (hi != NULL) {
+ ra_append(ans_ra, k+1, hi, t);
+ }
+ }
+
+ return answer;
+}
+
+roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2,
+ const bool bitsetconversion) {
+ uint8_t result_type = 0;
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ if (0 == length1) {
+ return roaring_bitmap_copy(x2);
+ }
+ if (0 == length2) {
+ return roaring_bitmap_copy(x1);
+ }
+ roaring_bitmap_t *answer =
+ roaring_bitmap_create_with_capacity(length1 + length2);
+ roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c;
+ if (bitsetconversion &&
+ (get_container_type(c1, type1) != BITSET_CONTAINER_TYPE) &&
+ (get_container_type(c2, type2) != BITSET_CONTAINER_TYPE)
+ ){
+ container_t *newc1 =
+ container_mutable_unwrap_shared(c1, &type1);
+ newc1 = container_to_bitset(newc1, type1);
+ type1 = BITSET_CONTAINER_TYPE;
+ c = container_lazy_ior(newc1, type1, c2, type2,
+ &result_type);
+ if (c != newc1) { // should not happen
+ container_free(newc1, type1);
+ }
+ } else {
+ c = container_lazy_or(c1, type1, c2, type2, &result_type);
+ }
+ // since we assume that the initial containers are non-empty,
+ // the
+ // result here
+ // can only be non-empty
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ c1 = get_copy_of_container(c1, &type1, is_cow(x1));
+ if (is_cow(x1)) {
+ ra_set_container_at_index(&x1->high_low_container, pos1, c1,
+ type1);
+ }
+ ra_append(&answer->high_low_container, s1, c1, type1);
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+ ra_append(&answer->high_low_container, s2, c2, type2);
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x2->high_low_container, pos2, length2,
+ is_cow(x2));
+ } else if (pos2 == length2) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, length1,
+ is_cow(x1));
+ }
+ return answer;
+}
+
+void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2,
+ const bool bitsetconversion) {
+ uint8_t result_type = 0;
+ int length1 = x1->high_low_container.size;
+ const int length2 = x2->high_low_container.size;
+
+ if (0 == length2) return;
+
+ if (0 == length1) {
+ roaring_bitmap_overwrite(x1, x2);
+ return;
+ }
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ if (!container_is_full(c1, type1)) {
+ if ((bitsetconversion == false) ||
+ (get_container_type(c1, type1) == BITSET_CONTAINER_TYPE)
+ ){
+ c1 = get_writable_copy_if_shared(c1, &type1);
+ } else {
+ // convert to bitset
+ container_t *old_c1 = c1;
+ uint8_t old_type1 = type1;
+ c1 = container_mutable_unwrap_shared(c1, &type1);
+ c1 = container_to_bitset(c1, type1);
+ container_free(old_c1, old_type1);
+ type1 = BITSET_CONTAINER_TYPE;
+ }
+
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c = container_lazy_ior(c1, type1, c2, type2,
+ &result_type);
+
+ if (c != c1) { // in this instance a new container was created,
+ // and we need to free the old one
+ container_free(c1, type1);
+ }
+
+ ra_set_container_at_index(&x1->high_low_container, pos1, c,
+ result_type);
+ }
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ // container_t *c2_clone = container_clone(c2, type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+ ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
+ type2);
+ pos1++;
+ length1++;
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
+ pos2, length2, is_cow(x2));
+ }
+}
+
+roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ uint8_t result_type = 0;
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ if (0 == length1) {
+ return roaring_bitmap_copy(x2);
+ }
+ if (0 == length2) {
+ return roaring_bitmap_copy(x1);
+ }
+ roaring_bitmap_t *answer =
+ roaring_bitmap_create_with_capacity(length1 + length2);
+ roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ container_t *c = container_lazy_xor(
+ c1, type1, c2, type2, &result_type);
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_append(&answer->high_low_container, s1, c, result_type);
+ } else {
+ container_free(c, result_type);
+ }
+
+ ++pos1;
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ c1 = get_copy_of_container(c1, &type1, is_cow(x1));
+ if (is_cow(x1)) {
+ ra_set_container_at_index(&x1->high_low_container, pos1, c1,
+ type1);
+ }
+ ra_append(&answer->high_low_container, s1, c1, type1);
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+ ra_append(&answer->high_low_container, s2, c2, type2);
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x2->high_low_container, pos2, length2,
+ is_cow(x2));
+ } else if (pos2 == length2) {
+ ra_append_copy_range(&answer->high_low_container,
+ &x1->high_low_container, pos1, length1,
+ is_cow(x1));
+ }
+ return answer;
+}
+
+void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ assert(x1 != x2);
+ uint8_t result_type = 0;
+ int length1 = x1->high_low_container.size;
+ const int length2 = x2->high_low_container.size;
+
+ if (0 == length2) return;
+
+ if (0 == length1) {
+ roaring_bitmap_overwrite(x1, x2);
+ return;
+ }
+ int pos1 = 0, pos2 = 0;
+ uint8_t type1, type2;
+ uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ while (true) {
+ if (s1 == s2) {
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+
+ // We do the computation "in place" only when c1 is not a shared container.
+ // Rationale: using a shared container safely with in place computation would
+ // require making a copy and then doing the computation in place which is likely
+ // less efficient than avoiding in place entirely and always generating a new
+ // container.
+
+ container_t *c;
+ if (type1 == SHARED_CONTAINER_TYPE) {
+ c = container_lazy_xor(c1, type1, c2, type2, &result_type);
+ shared_container_free(CAST_shared(c1)); // release
+ }
+ else {
+ c = container_lazy_ixor(c1, type1, c2, type2, &result_type);
+ }
+
+ if (container_nonzero_cardinality(c, result_type)) {
+ ra_set_container_at_index(&x1->high_low_container, pos1, c,
+ result_type);
+ ++pos1;
+ } else {
+ container_free(c, result_type);
+ ra_remove_at_index(&x1->high_low_container, pos1);
+ --length1;
+ }
+ ++pos2;
+ if (pos1 == length1) break;
+ if (pos2 == length2) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ } else if (s1 < s2) { // s1 < s2
+ pos1++;
+ if (pos1 == length1) break;
+ s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+
+ } else { // s1 > s2
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ // container_t *c2_clone = container_clone(c2, type2);
+ c2 = get_copy_of_container(c2, &type2, is_cow(x2));
+ if (is_cow(x2)) {
+ ra_set_container_at_index(&x2->high_low_container, pos2, c2,
+ type2);
+ }
+ ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
+ type2);
+ pos1++;
+ length1++;
+ pos2++;
+ if (pos2 == length2) break;
+ s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+ }
+ }
+ if (pos1 == length1) {
+ ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
+ pos2, length2, is_cow(x2));
+ }
+}
+
+void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r) {
+ roaring_array_t *ra = &r->high_low_container;
+
+ for (int i = 0; i < ra->size; ++i) {
+ const uint8_t old_type = ra->typecodes[i];
+ container_t *old_c = ra->containers[i];
+ uint8_t new_type = old_type;
+ container_t *new_c = container_repair_after_lazy(old_c, &new_type);
+ ra->containers[i] = new_c;
+ ra->typecodes[i] = new_type;
+ }
+}
+
+
+
+/**
+* roaring_bitmap_rank returns the number of integers that are smaller or equal
+* to x.
+*/
+uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) {
+ uint64_t size = 0;
+ uint32_t xhigh = x >> 16;
+ for (int i = 0; i < bm->high_low_container.size; i++) {
+ uint32_t key = bm->high_low_container.keys[i];
+ if (xhigh > key) {
+ size +=
+ container_get_cardinality(bm->high_low_container.containers[i],
+ bm->high_low_container.typecodes[i]);
+ } else if (xhigh == key) {
+ return size + container_rank(bm->high_low_container.containers[i],
+ bm->high_low_container.typecodes[i],
+ x & 0xFFFF);
+ } else {
+ return size;
+ }
+ }
+ return size;
+}
+
+/**
+ * roaring_bitmap_get_index returns the index of x, if not exsist return -1.
+ */
+int64_t roaring_bitmap_get_index(const roaring_bitmap_t *bm, uint32_t x) {
+ int64_t index = 0;
+ const uint16_t xhigh = x >> 16;
+ int32_t high_idx = ra_get_index(&bm->high_low_container, xhigh);
+ if (high_idx < 0) return -1;
+
+ for (int i = 0; i < bm->high_low_container.size; i++) {
+ uint32_t key = bm->high_low_container.keys[i];
+ if (xhigh > key) {
+ index +=
+ container_get_cardinality(bm->high_low_container.containers[i],
+ bm->high_low_container.typecodes[i]);
+ } else if (xhigh == key) {
+ int32_t low_idx = container_get_index(
+ bm->high_low_container.containers[high_idx],
+ bm->high_low_container.typecodes[high_idx], x & 0xFFFF);
+ if (low_idx < 0) return -1;
+ return index + low_idx;
+ } else {
+ return -1;
+ }
+ }
+ return index;
+}
+
+/**
+* roaring_bitmap_smallest returns the smallest value in the set.
+* Returns UINT32_MAX if the set is empty.
+*/
+uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) {
+ if (bm->high_low_container.size > 0) {
+ container_t *c = bm->high_low_container.containers[0];
+ uint8_t type = bm->high_low_container.typecodes[0];
+ uint32_t key = bm->high_low_container.keys[0];
+ uint32_t lowvalue = container_minimum(c, type);
+ return lowvalue | (key << 16);
+ }
+ return UINT32_MAX;
+}
+
+/**
+* roaring_bitmap_smallest returns the greatest value in the set.
+* Returns 0 if the set is empty.
+*/
+uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) {
+ if (bm->high_low_container.size > 0) {
+ container_t *container =
+ bm->high_low_container.containers[bm->high_low_container.size - 1];
+ uint8_t typecode =
+ bm->high_low_container.typecodes[bm->high_low_container.size - 1];
+ uint32_t key =
+ bm->high_low_container.keys[bm->high_low_container.size - 1];
+ uint32_t lowvalue = container_maximum(container, typecode);
+ return lowvalue | (key << 16);
+ }
+ return 0;
+}
+
+bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank,
+ uint32_t *element) {
+ container_t *container;
+ uint8_t typecode;
+ uint16_t key;
+ uint32_t start_rank = 0;
+ int i = 0;
+ bool valid = false;
+ while (!valid && i < bm->high_low_container.size) {
+ container = bm->high_low_container.containers[i];
+ typecode = bm->high_low_container.typecodes[i];
+ valid =
+ container_select(container, typecode, &start_rank, rank, element);
+ i++;
+ }
+
+ if (valid) {
+ key = bm->high_low_container.keys[i - 1];
+ *element |= (((uint32_t)key) << 16); // w/o cast, key promotes signed
+ return true;
+ } else
+ return false;
+}
+
+bool roaring_bitmap_intersect(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ uint64_t answer = 0;
+ int pos1 = 0, pos2 = 0;
+
+ while (pos1 < length1 && pos2 < length2) {
+ const uint16_t s1 = ra_get_key_at_index(& x1->high_low_container, pos1);
+ const uint16_t s2 = ra_get_key_at_index(& x2->high_low_container, pos2);
+
+ if (s1 == s2) {
+ uint8_t type1, type2;
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ if (container_intersect(c1, type1, c2, type2))
+ return true;
+ ++pos1;
+ ++pos2;
+ } else if (s1 < s2) { // s1 < s2
+ pos1 = ra_advance_until(& x1->high_low_container, s2, pos1);
+ } else { // s1 > s2
+ pos2 = ra_advance_until(& x2->high_low_container, s1, pos2);
+ }
+ }
+ return answer != 0;
+}
+
+bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm,
+ uint64_t x, uint64_t y) {
+ if (x >= y) {
+ // Empty range.
+ return false;
+ }
+ roaring_uint32_iterator_t it;
+ roaring_init_iterator(bm, &it);
+ if (!roaring_move_uint32_iterator_equalorlarger(&it, x)) {
+ // No values above x.
+ return false;
+ }
+ if (it.current_value >= y) {
+ // No values below y.
+ return false;
+ }
+ return true;
+}
+
+
+uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ const int length1 = x1->high_low_container.size,
+ length2 = x2->high_low_container.size;
+ uint64_t answer = 0;
+ int pos1 = 0, pos2 = 0;
+ while (pos1 < length1 && pos2 < length2) {
+ const uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, pos1);
+ const uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, pos2);
+
+ if (s1 == s2) {
+ uint8_t type1, type2;
+ container_t *c1 = ra_get_container_at_index(
+ &x1->high_low_container, pos1, &type1);
+ container_t *c2 = ra_get_container_at_index(
+ &x2->high_low_container, pos2, &type2);
+ answer += container_and_cardinality(c1, type1, c2, type2);
+ ++pos1;
+ ++pos2;
+ } else if (s1 < s2) { // s1 < s2
+ pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
+ } else { // s1 > s2
+ pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
+ }
+ }
+ return answer;
+}
+
+double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
+ const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
+ const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
+ return (double)inter / (double)(c1 + c2 - inter);
+}
+
+uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
+ const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
+ const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
+ return c1 + c2 - inter;
+}
+
+uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
+ const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
+ return c1 - inter;
+}
+
+uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1,
+ const roaring_bitmap_t *x2) {
+ const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
+ const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
+ const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
+ return c1 + c2 - 2 * inter;
+}
+
+
+bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) {
+ const uint16_t hb = val >> 16;
+ /*
+ * the next function call involves a binary search and lots of branching.
+ */
+ int32_t i = ra_get_index(&r->high_low_container, hb);
+ if (i < 0) return false;
+
+ uint8_t typecode;
+ // next call ought to be cheap
+ container_t *container =
+ ra_get_container_at_index(&r->high_low_container, i, &typecode);
+ // rest might be a tad expensive, possibly involving another round of binary search
+ return container_contains(container, val & 0xFFFF, typecode);
+}
+
+
+/**
+ * Check whether a range of values from range_start (included) to range_end (excluded) is present
+ */
+bool roaring_bitmap_contains_range(const roaring_bitmap_t *r, uint64_t range_start, uint64_t range_end) {
+ if(range_end >= UINT64_C(0x100000000)) {
+ range_end = UINT64_C(0x100000000);
+ }
+ if (range_start >= range_end) return true; // empty range are always contained!
+ if (range_end - range_start == 1) return roaring_bitmap_contains(r, (uint32_t)range_start);
+ uint16_t hb_rs = (uint16_t)(range_start >> 16);
+ uint16_t hb_re = (uint16_t)((range_end - 1) >> 16);
+ const int32_t span = hb_re - hb_rs;
+ const int32_t hlc_sz = ra_get_size(&r->high_low_container);
+ if (hlc_sz < span + 1) {
+ return false;
+ }
+ int32_t is = ra_get_index(&r->high_low_container, hb_rs);
+ int32_t ie = ra_get_index(&r->high_low_container, hb_re);
+ if ((ie < 0) || (is < 0) || ((ie - is) != span) || ie >= hlc_sz) {
+ return false;
+ }
+ const uint32_t lb_rs = range_start & 0xFFFF;
+ const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1;
+ uint8_t type;
+ container_t *c = ra_get_container_at_index(&r->high_low_container, is,
+ &type);
+ if (hb_rs == hb_re) {
+ return container_contains_range(c, lb_rs, lb_re, type);
+ }
+ if (!container_contains_range(c, lb_rs, 1 << 16, type)) {
+ return false;
+ }
+ c = ra_get_container_at_index(&r->high_low_container, ie, &type);
+ if (!container_contains_range(c, 0, lb_re, type)) {
+ return false;
+ }
+ for (int32_t i = is + 1; i < ie; ++i) {
+ c = ra_get_container_at_index(&r->high_low_container, i, &type);
+ if (!container_is_full(c, type) ) {
+ return false;
+ }
+ }
+ return true;
+}
+
+
+bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1,
+ const roaring_bitmap_t *r2) {
+ return (roaring_bitmap_get_cardinality(r2) >
+ roaring_bitmap_get_cardinality(r1) &&
+ roaring_bitmap_is_subset(r1, r2));
+}
+
+
+/*
+ * FROZEN SERIALIZATION FORMAT DESCRIPTION
+ *
+ * -- (beginning must be aligned by 32 bytes) --
+ * <bitset_data> uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS * num_bitset_containers]
+ * <run_data> rle16_t[total number of rle elements in all run containers]
+ * <array_data> uint16_t[total number of array elements in all array containers]
+ * <keys> uint16_t[num_containers]
+ * <counts> uint16_t[num_containers]
+ * <typecodes> uint8_t[num_containers]
+ * <header> uint32_t
+ *
+ * <header> is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits)
+ * and the number of containers (17 bits).
+ *
+ * <counts> stores number of elements for every container.
+ * Its meaning depends on container type.
+ * For array and bitset containers, this value is the container cardinality minus one.
+ * For run container, it is the number of rle_t elements (n_runs).
+ *
+ * <bitset_data>,<array_data>,<run_data> are flat arrays of elements of
+ * all containers of respective type.
+ *
+ * <*_data> and <keys> are kept close together because they are not accessed
+ * during deserilization. This may reduce IO in case of large mmaped bitmaps.
+ * All members have their native alignments during deserilization except <header>,
+ * which is not guaranteed to be aligned by 4 bytes.
+ */
+
+size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) {
+ const roaring_array_t *ra = &rb->high_low_container;
+ size_t num_bytes = 0;
+ for (int32_t i = 0; i < ra->size; i++) {
+ switch (ra->typecodes[i]) {
+ case BITSET_CONTAINER_TYPE: {
+ num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ break;
+ }
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(ra->containers[i]);
+ num_bytes += rc->n_runs * sizeof(rle16_t);
+ break;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac =
+ const_CAST_array(ra->containers[i]);
+ num_bytes += ac->cardinality * sizeof(uint16_t);
+ break;
+ }
+ default:
+ roaring_unreachable;
+ }
+ }
+ num_bytes += (2 + 2 + 1) * ra->size; // keys, counts, typecodes
+ num_bytes += 4; // header
+ return num_bytes;
+}
+
+inline static void *arena_alloc(char **arena, size_t num_bytes) {
+ char *res = *arena;
+ *arena += num_bytes;
+ return res;
+}
+
+void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) {
+ /*
+ * Note: we do not require user to supply a specifically aligned buffer.
+ * Thus we have to use memcpy() everywhere.
+ */
+
+ const roaring_array_t *ra = &rb->high_low_container;
+
+ size_t bitset_zone_size = 0;
+ size_t run_zone_size = 0;
+ size_t array_zone_size = 0;
+ for (int32_t i = 0; i < ra->size; i++) {
+ switch (ra->typecodes[i]) {
+ case BITSET_CONTAINER_TYPE: {
+ bitset_zone_size +=
+ BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ break;
+ }
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(ra->containers[i]);
+ run_zone_size += rc->n_runs * sizeof(rle16_t);
+ break;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac =
+ const_CAST_array(ra->containers[i]);
+ array_zone_size += ac->cardinality * sizeof(uint16_t);
+ break;
+ }
+ default:
+ roaring_unreachable;
+ }
+ }
+
+ uint64_t *bitset_zone = (uint64_t *)arena_alloc(&buf, bitset_zone_size);
+ rle16_t *run_zone = (rle16_t *)arena_alloc(&buf, run_zone_size);
+ uint16_t *array_zone = (uint16_t *)arena_alloc(&buf, array_zone_size);
+ uint16_t *key_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size);
+ uint16_t *count_zone = (uint16_t *)arena_alloc(&buf, 2*ra->size);
+ uint8_t *typecode_zone = (uint8_t *)arena_alloc(&buf, ra->size);
+ uint32_t *header_zone = (uint32_t *)arena_alloc(&buf, 4);
+
+ for (int32_t i = 0; i < ra->size; i++) {
+ uint16_t count;
+ switch (ra->typecodes[i]) {
+ case BITSET_CONTAINER_TYPE: {
+ const bitset_container_t *bc =
+ const_CAST_bitset(ra->containers[i]);
+ memcpy(bitset_zone, bc->words,
+ BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
+ bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS;
+ if (bc->cardinality != BITSET_UNKNOWN_CARDINALITY) {
+ count = bc->cardinality - 1;
+ } else {
+ count = bitset_container_compute_cardinality(bc) - 1;
+ }
+ break;
+ }
+ case RUN_CONTAINER_TYPE: {
+ const run_container_t *rc = const_CAST_run(ra->containers[i]);
+ size_t num_bytes = rc->n_runs * sizeof(rle16_t);
+ memcpy(run_zone, rc->runs, num_bytes);
+ run_zone += rc->n_runs;
+ count = rc->n_runs;
+ break;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ const array_container_t *ac =
+ const_CAST_array(ra->containers[i]);
+ size_t num_bytes = ac->cardinality * sizeof(uint16_t);
+ memcpy(array_zone, ac->array, num_bytes);
+ array_zone += ac->cardinality;
+ count = ac->cardinality - 1;
+ break;
+ }
+ default:
+ roaring_unreachable;
+ }
+ memcpy(&count_zone[i], &count, 2);
+ }
+ memcpy(key_zone, ra->keys, ra->size * sizeof(uint16_t));
+ memcpy(typecode_zone, ra->typecodes, ra->size * sizeof(uint8_t));
+ uint32_t header = ((uint32_t)ra->size << 15) | FROZEN_COOKIE;
+ memcpy(header_zone, &header, 4);
+}
+
+const roaring_bitmap_t *
+roaring_bitmap_frozen_view(const char *buf, size_t length) {
+ if ((uintptr_t)buf % 32 != 0) {
+ return NULL;
+ }
+
+ // cookie and num_containers
+ if (length < 4) {
+ return NULL;
+ }
+ uint32_t header;
+ memcpy(&header, buf + length - 4, 4); // header may be misaligned
+ if ((header & 0x7FFF) != FROZEN_COOKIE) {
+ return NULL;
+ }
+ int32_t num_containers = (header >> 15);
+
+ // typecodes, counts and keys
+ if (length < 4 + (size_t)num_containers * (1 + 2 + 2)) {
+ return NULL;
+ }
+ uint16_t *keys = (uint16_t *)(buf + length - 4 - num_containers * 5);
+ uint16_t *counts = (uint16_t *)(buf + length - 4 - num_containers * 3);
+ uint8_t *typecodes = (uint8_t *)(buf + length - 4 - num_containers * 1);
+
+ // {bitset,array,run}_zone
+ int32_t num_bitset_containers = 0;
+ int32_t num_run_containers = 0;
+ int32_t num_array_containers = 0;
+ size_t bitset_zone_size = 0;
+ size_t run_zone_size = 0;
+ size_t array_zone_size = 0;
+ for (int32_t i = 0; i < num_containers; i++) {
+ switch (typecodes[i]) {
+ case BITSET_CONTAINER_TYPE:
+ num_bitset_containers++;
+ bitset_zone_size += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ break;
+ case RUN_CONTAINER_TYPE:
+ num_run_containers++;
+ run_zone_size += counts[i] * sizeof(rle16_t);
+ break;
+ case ARRAY_CONTAINER_TYPE:
+ num_array_containers++;
+ array_zone_size += (counts[i] + UINT32_C(1)) * sizeof(uint16_t);
+ break;
+ default:
+ return NULL;
+ }
+ }
+ if (length != bitset_zone_size + run_zone_size + array_zone_size +
+ 5 * num_containers + 4) {
+ return NULL;
+ }
+ uint64_t *bitset_zone = (uint64_t*) (buf);
+ rle16_t *run_zone = (rle16_t*) (buf + bitset_zone_size);
+ uint16_t *array_zone = (uint16_t*) (buf + bitset_zone_size + run_zone_size);
+
+ size_t alloc_size = 0;
+ alloc_size += sizeof(roaring_bitmap_t);
+ alloc_size += num_containers * sizeof(container_t*);
+ alloc_size += num_bitset_containers * sizeof(bitset_container_t);
+ alloc_size += num_run_containers * sizeof(run_container_t);
+ alloc_size += num_array_containers * sizeof(array_container_t);
+
+ char *arena = (char *)roaring_malloc(alloc_size);
+ if (arena == NULL) {
+ return NULL;
+ }
+
+ roaring_bitmap_t *rb = (roaring_bitmap_t *)
+ arena_alloc(&arena, sizeof(roaring_bitmap_t));
+ rb->high_low_container.flags = ROARING_FLAG_FROZEN;
+ rb->high_low_container.allocation_size = num_containers;
+ rb->high_low_container.size = num_containers;
+ rb->high_low_container.keys = (uint16_t *)keys;
+ rb->high_low_container.typecodes = (uint8_t *)typecodes;
+ rb->high_low_container.containers =
+ (container_t **)arena_alloc(&arena,
+ sizeof(container_t*) * num_containers);
+ // Ensure offset of high_low_container.containers is known distance used in
+ // C++ wrapper. sizeof(roaring_bitmap_t) is used as it is the size of the
+ // only allocation that precedes high_low_container.containers. If this is
+ // changed (new allocation or changed order), this offset will also need to
+ // be changed in the C++ wrapper.
+ assert(rb ==
+ (roaring_bitmap_t *)((char *)rb->high_low_container.containers -
+ sizeof(roaring_bitmap_t)));
+ for (int32_t i = 0; i < num_containers; i++) {
+ switch (typecodes[i]) {
+ case BITSET_CONTAINER_TYPE: {
+ bitset_container_t *bitset = (bitset_container_t *)
+ arena_alloc(&arena, sizeof(bitset_container_t));
+ bitset->words = bitset_zone;
+ bitset->cardinality = counts[i] + UINT32_C(1);
+ rb->high_low_container.containers[i] = bitset;
+ bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS;
+ break;
+ }
+ case RUN_CONTAINER_TYPE: {
+ run_container_t *run = (run_container_t *)
+ arena_alloc(&arena, sizeof(run_container_t));
+ run->capacity = counts[i];
+ run->n_runs = counts[i];
+ run->runs = run_zone;
+ rb->high_low_container.containers[i] = run;
+ run_zone += run->n_runs;
+ break;
+ }
+ case ARRAY_CONTAINER_TYPE: {
+ array_container_t *array = (array_container_t *)
+ arena_alloc(&arena, sizeof(array_container_t));
+ array->capacity = counts[i] + UINT32_C(1);
+ array->cardinality = counts[i] + UINT32_C(1);
+ array->array = array_zone;
+ rb->high_low_container.containers[i] = array;
+ array_zone += counts[i] + UINT32_C(1);
+ break;
+ }
+ default:
+ roaring_free(arena);
+ return NULL;
+ }
+ }
+
+ return rb;
+}
+
+ALLOW_UNALIGNED
+roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {
+ char *start_of_buf = (char *) buf;
+ uint32_t cookie;
+ int32_t num_containers;
+ uint16_t *descriptive_headers;
+ uint32_t *offset_headers = NULL;
+ const char *run_flag_bitset = NULL;
+ bool hasrun = false;
+
+ // deserialize cookie
+ memcpy(&cookie, buf, sizeof(uint32_t));
+ buf += sizeof(uint32_t);
+ if (cookie == SERIAL_COOKIE_NO_RUNCONTAINER) {
+ memcpy(&num_containers, buf, sizeof(int32_t));
+ buf += sizeof(int32_t);
+ descriptive_headers = (uint16_t *) buf;
+ buf += num_containers * 2 * sizeof(uint16_t);
+ offset_headers = (uint32_t *) buf;
+ buf += num_containers * sizeof(uint32_t);
+ } else if ((cookie & 0xFFFF) == SERIAL_COOKIE) {
+ num_containers = (cookie >> 16) + 1;
+ hasrun = true;
+ int32_t run_flag_bitset_size = (num_containers + 7) / 8;
+ run_flag_bitset = buf;
+ buf += run_flag_bitset_size;
+ descriptive_headers = (uint16_t *) buf;
+ buf += num_containers * 2 * sizeof(uint16_t);
+ if(num_containers >= NO_OFFSET_THRESHOLD) {
+ offset_headers = (uint32_t *) buf;
+ buf += num_containers * sizeof(uint32_t);
+ }
+ } else {
+ return NULL;
+ }
+
+ // calculate total size for allocation
+ int32_t num_bitset_containers = 0;
+ int32_t num_run_containers = 0;
+ int32_t num_array_containers = 0;
+
+ for (int32_t i = 0; i < num_containers; i++) {
+ uint16_t tmp;
+ memcpy(&tmp, descriptive_headers + 2*i+1, sizeof(tmp));
+ uint32_t cardinality = tmp + 1;
+ bool isbitmap = (cardinality > DEFAULT_MAX_SIZE);
+ bool isrun = false;
+ if(hasrun) {
+ if((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) {
+ isbitmap = false;
+ isrun = true;
+ }
+ }
+
+ if (isbitmap) {
+ num_bitset_containers++;
+ } else if (isrun) {
+ num_run_containers++;
+ } else {
+ num_array_containers++;
+ }
+ }
+
+ size_t alloc_size = 0;
+ alloc_size += sizeof(roaring_bitmap_t);
+ alloc_size += num_containers * sizeof(container_t*);
+ alloc_size += num_bitset_containers * sizeof(bitset_container_t);
+ alloc_size += num_run_containers * sizeof(run_container_t);
+ alloc_size += num_array_containers * sizeof(array_container_t);
+ alloc_size += num_containers * sizeof(uint16_t); // keys
+ alloc_size += num_containers * sizeof(uint8_t); // typecodes
+
+ // allocate bitmap and construct containers
+ char *arena = (char *)roaring_malloc(alloc_size);
+ if (arena == NULL) {
+ return NULL;
+ }
+
+ roaring_bitmap_t *rb = (roaring_bitmap_t *)
+ arena_alloc(&arena, sizeof(roaring_bitmap_t));
+ rb->high_low_container.flags = ROARING_FLAG_FROZEN;
+ rb->high_low_container.allocation_size = num_containers;
+ rb->high_low_container.size = num_containers;
+ rb->high_low_container.containers =
+ (container_t **)arena_alloc(&arena,
+ sizeof(container_t*) * num_containers);
+
+ uint16_t *keys = (uint16_t *)arena_alloc(&arena, num_containers * sizeof(uint16_t));
+ uint8_t *typecodes = (uint8_t *)arena_alloc(&arena, num_containers * sizeof(uint8_t));
+
+ rb->high_low_container.keys = keys;
+ rb->high_low_container.typecodes = typecodes;
+
+ for (int32_t i = 0; i < num_containers; i++) {
+ uint16_t tmp;
+ memcpy(&tmp, descriptive_headers + 2*i+1, sizeof(tmp));
+ int32_t cardinality = tmp + 1;
+ bool isbitmap = (cardinality > DEFAULT_MAX_SIZE);
+ bool isrun = false;
+ if(hasrun) {
+ if((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) {
+ isbitmap = false;
+ isrun = true;
+ }
+ }
+
+ keys[i] = descriptive_headers[2*i];
+
+ if (isbitmap) {
+ typecodes[i] = BITSET_CONTAINER_TYPE;
+ bitset_container_t *c = (bitset_container_t *)arena_alloc(&arena, sizeof(bitset_container_t));
+ c->cardinality = cardinality;
+ if(offset_headers != NULL) {
+ c->words = (uint64_t *) (start_of_buf + offset_headers[i]);
+ } else {
+ c->words = (uint64_t *) buf;
+ buf += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
+ }
+ rb->high_low_container.containers[i] = c;
+ } else if (isrun) {
+ typecodes[i] = RUN_CONTAINER_TYPE;
+ run_container_t *c = (run_container_t *)arena_alloc(&arena, sizeof(run_container_t));
+ c->capacity = cardinality;
+ uint16_t n_runs;
+ if(offset_headers != NULL) {
+ memcpy(&n_runs, start_of_buf + offset_headers[i], sizeof(uint16_t));
+ c->n_runs = n_runs;
+ c->runs = (rle16_t *) (start_of_buf + offset_headers[i] + sizeof(uint16_t));
+ } else {
+ memcpy(&n_runs, buf, sizeof(uint16_t));
+ c->n_runs = n_runs;
+ buf += sizeof(uint16_t);
+ c->runs = (rle16_t *) buf;
+ buf += c->n_runs * sizeof(rle16_t);
+ }
+ rb->high_low_container.containers[i] = c;
+ } else {
+ typecodes[i] = ARRAY_CONTAINER_TYPE;
+ array_container_t *c = (array_container_t *)arena_alloc(&arena, sizeof(array_container_t));
+ c->cardinality = cardinality;
+ c->capacity = cardinality;
+ if(offset_headers != NULL) {
+ c->array = (uint16_t *) (start_of_buf + offset_headers[i]);
+ } else {
+ c->array = (uint16_t *) buf;
+ buf += cardinality * sizeof(uint16_t);
+ }
+ rb->high_low_container.containers[i] = c;
+ }
+ }
+
+ return rb;
+}
+
+bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t * bitset) {
+ uint32_t max_value = roaring_bitmap_maximum(r);
+ size_t new_array_size = (size_t)(((uint64_t)max_value + 63)/64);
+ bool resize_ok = bitset_resize(bitset, new_array_size, true);
+ if(!resize_ok) { return false; }
+ const roaring_array_t *ra = &r->high_low_container;
+ for (int i = 0; i < ra->size; ++i) {
+ uint64_t* words = bitset->array + (ra->keys[i]<<10);
+ uint8_t type = ra->typecodes[i];
+ const container_t *c = ra->containers[i];
+ if(type == SHARED_CONTAINER_TYPE) {
+ c = container_unwrap_shared(c, &type);
+ }
+ switch (type) {
+ case BITSET_CONTAINER_TYPE:
+ {
+ size_t max_word_index = new_array_size - (ra->keys[i]<<10);
+ if(max_word_index > 1024) { max_word_index = 1024; }
+ const bitset_container_t *src = const_CAST_bitset(c);
+ memcpy(words, src->words, max_word_index * sizeof(uint64_t));
+ }
+ break;
+ case ARRAY_CONTAINER_TYPE:
+ {
+ const array_container_t *src = const_CAST_array(c);
+ bitset_set_list(words, src->array, src->cardinality);
+ }
+ break;
+ case RUN_CONTAINER_TYPE:
+ {
+ const run_container_t *src = const_CAST_run(c);
+ for (int32_t rlepos = 0; rlepos < src->n_runs; ++rlepos) {
+ rle16_t rle = src->runs[rlepos];
+ bitset_set_lenrange(words, rle.value, rle.length);
+ }
+ }
+ break;
+ default:
+ roaring_unreachable;
+ }
+ }
+ return true;
+}
+
+#ifdef __cplusplus
+} } } // extern "C" { namespace roaring {
+#endif