#include <assert.h>
#include <inttypes.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>

#include <roaring/roaring.h>

// Include after roaring.h
#include <roaring/array_util.h>
#include <roaring/bitset_util.h>
#include <roaring/containers/containers.h>
#include <roaring/roaring_array.h>

#ifdef __cplusplus
using namespace ::roaring::internal;

extern "C" {
namespace roaring {
namespace api {
#endif

#define CROARING_SERIALIZATION_ARRAY_UINT32 1
#define CROARING_SERIALIZATION_CONTAINER 2
extern inline int roaring_trailing_zeroes(unsigned long long input_num);
extern inline int roaring_leading_zeroes(unsigned long long input_num);
extern inline void roaring_bitmap_init_cleared(roaring_bitmap_t *r);
extern inline bool roaring_bitmap_get_copy_on_write(const roaring_bitmap_t *r);
extern inline void roaring_bitmap_set_copy_on_write(roaring_bitmap_t *r,
                                                    bool cow);
extern inline roaring_bitmap_t *roaring_bitmap_create(void);
extern inline void roaring_bitmap_add_range(roaring_bitmap_t *r, uint64_t min,
                                            uint64_t max);
extern inline void roaring_bitmap_remove_range(roaring_bitmap_t *r,
                                               uint64_t min, uint64_t max);

static inline bool is_cow(const roaring_bitmap_t *r) {
    return r->high_low_container.flags & ROARING_FLAG_COW;
}
static inline bool is_frozen(const roaring_bitmap_t *r) {
    return r->high_low_container.flags & ROARING_FLAG_FROZEN;
}

// this is like roaring_bitmap_add, but it populates pointer arguments in such a
// way
// that we can recover the container touched, which, in turn can be used to
// accelerate some functions (when you repeatedly need to add to the same
// container)
static inline container_t *containerptr_roaring_bitmap_add(roaring_bitmap_t *r,
                                                           uint32_t val,
                                                           uint8_t *type,
                                                           int *index) {
    roaring_array_t *ra = &r->high_low_container;

    uint16_t hb = val >> 16;
    const int i = ra_get_index(ra, hb);
    if (i >= 0) {
        ra_unshare_container_at_index(ra, (uint16_t)i);
        container_t *c = ra_get_container_at_index(ra, (uint16_t)i, type);
        uint8_t new_type = *type;
        container_t *c2 = container_add(c, val & 0xFFFF, *type, &new_type);
        *index = i;
        if (c2 != c) {
            container_free(c, *type);
            ra_set_container_at_index(ra, i, c2, new_type);
            *type = new_type;
            return c2;
        } else {
            return c;
        }
    } else {
        array_container_t *new_ac = array_container_create();
        container_t *c =
            container_add(new_ac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, type);
        // we could just assume that it stays an array container
        ra_insert_new_key_value_at(ra, -i - 1, hb, c, *type);
        *index = -i - 1;
        return c;
    }
}

roaring_bitmap_t *roaring_bitmap_create_with_capacity(uint32_t cap) {
    roaring_bitmap_t *ans =
        (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
    if (!ans) {
        return NULL;
    }
    bool is_ok = ra_init_with_capacity(&ans->high_low_container, cap);
    if (!is_ok) {
        roaring_free(ans);
        return NULL;
    }
    return ans;
}

bool roaring_bitmap_init_with_capacity(roaring_bitmap_t *r, uint32_t cap) {
    return ra_init_with_capacity(&r->high_low_container, cap);
}

static inline void add_bulk_impl(roaring_bitmap_t *r,
                                 roaring_bulk_context_t *context,
                                 uint32_t val) {
    uint16_t key = val >> 16;
    if (context->container == NULL || context->key != key) {
        uint8_t typecode;
        int idx;
        context->container =
            containerptr_roaring_bitmap_add(r, val, &typecode, &idx);
        context->typecode = typecode;
        context->idx = idx;
        context->key = key;
    } else {
        // no need to seek the container, it is at hand
        // because we already have the container at hand, we can do the
        // insertion directly, bypassing the roaring_bitmap_add call
        uint8_t new_typecode;
        container_t *container2 = container_add(
            context->container, val & 0xFFFF, context->typecode, &new_typecode);
        if (container2 != context->container) {
            // rare instance when we need to change the container type
            container_free(context->container, context->typecode);
            ra_set_container_at_index(&r->high_low_container, context->idx,
                                      container2, new_typecode);
            context->typecode = new_typecode;
            context->container = container2;
        }
    }
}

void roaring_bitmap_add_many(roaring_bitmap_t *r, size_t n_args,
                             const uint32_t *vals) {
    uint32_t val;
    const uint32_t *start = vals;
    const uint32_t *end = vals + n_args;
    const uint32_t *current_val = start;

    if (n_args == 0) {
        return;
    }

    uint8_t typecode;
    int idx;
    container_t *container;
    val = *current_val;
    container = containerptr_roaring_bitmap_add(r, val, &typecode, &idx);
    roaring_bulk_context_t context = {container, idx, (uint16_t)(val >> 16),
                                      typecode};

    for (; current_val != end; current_val++) {
        memcpy(&val, current_val, sizeof(val));
        add_bulk_impl(r, &context, val);
    }
}

void roaring_bitmap_add_bulk(roaring_bitmap_t *r,
                             roaring_bulk_context_t *context, uint32_t val) {
    add_bulk_impl(r, context, val);
}

bool roaring_bitmap_contains_bulk(const roaring_bitmap_t *r,
                                  roaring_bulk_context_t *context,
                                  uint32_t val) {
    uint16_t key = val >> 16;
    if (context->container == NULL || context->key != key) {
        int32_t start_idx = -1;
        if (context->container != NULL && context->key < key) {
            start_idx = context->idx;
        }
        int idx = ra_advance_until(&r->high_low_container, key, start_idx);
        if (idx == ra_get_size(&r->high_low_container)) {
            return false;
        }
        uint8_t typecode;
        context->container = ra_get_container_at_index(
            &r->high_low_container, (uint16_t)idx, &typecode);
        context->typecode = typecode;
        context->idx = idx;
        context->key =
            ra_get_key_at_index(&r->high_low_container, (uint16_t)idx);
        // ra_advance_until finds the next key >= the target, we found a later
        // container.
        if (context->key != key) {
            return false;
        }
    }
    // context is now set up
    return container_contains(context->container, val & 0xFFFF,
                              context->typecode);
}

roaring_bitmap_t *roaring_bitmap_of_ptr(size_t n_args, const uint32_t *vals) {
    roaring_bitmap_t *answer = roaring_bitmap_create();
    roaring_bitmap_add_many(answer, n_args, vals);
    return answer;
}

roaring_bitmap_t *roaring_bitmap_of(size_t n_args, ...) {
    // todo: could be greatly optimized but we do not expect this call to ever
    // include long lists
    roaring_bitmap_t *answer = roaring_bitmap_create();
    roaring_bulk_context_t context = {0};
    va_list ap;
    va_start(ap, n_args);
    for (size_t i = 0; i < n_args; i++) {
        uint32_t val = va_arg(ap, uint32_t);
        roaring_bitmap_add_bulk(answer, &context, val);
    }
    va_end(ap);
    return answer;
}

static inline uint64_t minimum_uint64(uint64_t a, uint64_t b) {
    return (a < b) ? a : b;
}

roaring_bitmap_t *roaring_bitmap_from_range(uint64_t min, uint64_t max,
                                            uint32_t step) {
    if (max >= UINT64_C(0x100000000)) {
        max = UINT64_C(0x100000000);
    }
    if (step == 0) return NULL;
    if (max <= min) return NULL;
    roaring_bitmap_t *answer = roaring_bitmap_create();
    if (step >= (1 << 16)) {
        for (uint32_t value = (uint32_t)min; value < max; value += step) {
            roaring_bitmap_add(answer, value);
        }
        return answer;
    }
    uint64_t min_tmp = min;
    do {
        uint32_t key = (uint32_t)min_tmp >> 16;
        uint32_t container_min = min_tmp & 0xFFFF;
        uint32_t container_max =
            (uint32_t)minimum_uint64(max - (key << 16), 1 << 16);
        uint8_t type;
        container_t *container = container_from_range(
            &type, container_min, container_max, (uint16_t)step);
        ra_append(&answer->high_low_container, (uint16_t)key, container, type);
        uint32_t gap = container_max - container_min + step - 1;
        min_tmp += gap - (gap % step);
    } while (min_tmp < max);
    // cardinality of bitmap will be ((uint64_t) max - min + step - 1 ) / step
    return answer;
}

void roaring_bitmap_add_range_closed(roaring_bitmap_t *r, uint32_t min,
                                     uint32_t max) {
    if (min > max) {
        return;
    }

    roaring_array_t *ra = &r->high_low_container;

    uint32_t min_key = min >> 16;
    uint32_t max_key = max >> 16;

    int32_t num_required_containers = max_key - min_key + 1;
    int32_t suffix_length =
        count_greater(ra->keys, ra->size, (uint16_t)max_key);
    int32_t prefix_length =
        count_less(ra->keys, ra->size - suffix_length, (uint16_t)min_key);
    int32_t common_length = ra->size - prefix_length - suffix_length;

    if (num_required_containers > common_length) {
        ra_shift_tail(ra, suffix_length,
                      num_required_containers - common_length);
    }

    int32_t src = prefix_length + common_length - 1;
    int32_t dst = ra->size - suffix_length - 1;
    for (uint32_t key = max_key; key != min_key - 1;
         key--) {  // beware of min_key==0
        uint32_t container_min = (min_key == key) ? (min & 0xffff) : 0;
        uint32_t container_max = (max_key == key) ? (max & 0xffff) : 0xffff;
        container_t *new_container;
        uint8_t new_type;

        if (src >= 0 && ra->keys[src] == key) {
            ra_unshare_container_at_index(ra, (uint16_t)src);
            new_container =
                container_add_range(ra->containers[src], ra->typecodes[src],
                                    container_min, container_max, &new_type);
            if (new_container != ra->containers[src]) {
                container_free(ra->containers[src], ra->typecodes[src]);
            }
            src--;
        } else {
            new_container = container_from_range(&new_type, container_min,
                                                 container_max + 1, 1);
        }
        ra_replace_key_and_container_at_index(ra, dst, (uint16_t)key,
                                              new_container, new_type);
        dst--;
    }
}

void roaring_bitmap_remove_range_closed(roaring_bitmap_t *r, uint32_t min,
                                        uint32_t max) {
    if (min > max) {
        return;
    }

    roaring_array_t *ra = &r->high_low_container;

    uint32_t min_key = min >> 16;
    uint32_t max_key = max >> 16;

    int32_t src = count_less(ra->keys, ra->size, (uint16_t)min_key);
    int32_t dst = src;
    while (src < ra->size && ra->keys[src] <= max_key) {
        uint32_t container_min =
            (min_key == ra->keys[src]) ? (min & 0xffff) : 0;
        uint32_t container_max =
            (max_key == ra->keys[src]) ? (max & 0xffff) : 0xffff;
        ra_unshare_container_at_index(ra, (uint16_t)src);
        container_t *new_container;
        uint8_t new_type;
        new_container =
            container_remove_range(ra->containers[src], ra->typecodes[src],
                                   container_min, container_max, &new_type);
        if (new_container != ra->containers[src]) {
            container_free(ra->containers[src], ra->typecodes[src]);
        }
        if (new_container) {
            ra_replace_key_and_container_at_index(ra, dst, ra->keys[src],
                                                  new_container, new_type);
            dst++;
        }
        src++;
    }
    if (src > dst) {
        ra_shift_tail(ra, ra->size - src, dst - src);
    }
}

void roaring_bitmap_printf(const roaring_bitmap_t *r) {
    const roaring_array_t *ra = &r->high_low_container;

    printf("{");
    for (int i = 0; i < ra->size; ++i) {
        container_printf_as_uint32_array(ra->containers[i], ra->typecodes[i],
                                         ((uint32_t)ra->keys[i]) << 16);

        if (i + 1 < ra->size) {
            printf(",");
        }
    }
    printf("}");
}

void roaring_bitmap_printf_describe(const roaring_bitmap_t *r) {
    const roaring_array_t *ra = &r->high_low_container;

    printf("{");
    for (int i = 0; i < ra->size; ++i) {
        printf("%d: %s (%d)", ra->keys[i],
               get_full_container_name(ra->containers[i], ra->typecodes[i]),
               container_get_cardinality(ra->containers[i], ra->typecodes[i]));
        if (ra->typecodes[i] == SHARED_CONTAINER_TYPE) {
            printf("(shared count = %" PRIu32 " )",
                   croaring_refcount_get(
                       &(CAST_shared(ra->containers[i])->counter)));
        }

        if (i + 1 < ra->size) {
            printf(", ");
        }
    }
    printf("}");
}

typedef struct min_max_sum_s {
    uint32_t min;
    uint32_t max;
    uint64_t sum;
} min_max_sum_t;

static bool min_max_sum_fnc(uint32_t value, void *param) {
    min_max_sum_t *mms = (min_max_sum_t *)param;
    if (value > mms->max) mms->max = value;
    if (value < mms->min) mms->min = value;
    mms->sum += value;
    return true;  // we always process all data points
}

/**
 *  (For advanced users.)
 * Collect statistics about the bitmap
 */
void roaring_bitmap_statistics(const roaring_bitmap_t *r,
                               roaring_statistics_t *stat) {
    const roaring_array_t *ra = &r->high_low_container;

    memset(stat, 0, sizeof(*stat));
    stat->n_containers = ra->size;
    stat->cardinality = roaring_bitmap_get_cardinality(r);
    min_max_sum_t mms;
    mms.min = UINT32_C(0xFFFFFFFF);
    mms.max = UINT32_C(0);
    mms.sum = 0;
    roaring_iterate(r, &min_max_sum_fnc, &mms);
    stat->min_value = mms.min;
    stat->max_value = mms.max;
    stat->sum_value = mms.sum;

    for (int i = 0; i < ra->size; ++i) {
        uint8_t truetype =
            get_container_type(ra->containers[i], ra->typecodes[i]);
        uint32_t card =
            container_get_cardinality(ra->containers[i], ra->typecodes[i]);
        uint32_t sbytes =
            container_size_in_bytes(ra->containers[i], ra->typecodes[i]);
        switch (truetype) {
            case BITSET_CONTAINER_TYPE:
                stat->n_bitset_containers++;
                stat->n_values_bitset_containers += card;
                stat->n_bytes_bitset_containers += sbytes;
                break;
            case ARRAY_CONTAINER_TYPE:
                stat->n_array_containers++;
                stat->n_values_array_containers += card;
                stat->n_bytes_array_containers += sbytes;
                break;
            case RUN_CONTAINER_TYPE:
                stat->n_run_containers++;
                stat->n_values_run_containers += card;
                stat->n_bytes_run_containers += sbytes;
                break;
            default:
                assert(false);
                roaring_unreachable;
        }
    }
}

/*
 * Checks that:
 * - Array containers are sorted and contain no duplicates
 * - Range containers are sorted and contain no overlapping ranges
 * - Roaring containers are sorted by key and there are no duplicate keys
 * - The correct container type is use for each container (e.g. bitmaps aren't
 * used for small containers)
 */
bool roaring_bitmap_internal_validate(const roaring_bitmap_t *r,
                                      const char **reason) {
    const char *reason_local;
    if (reason == NULL) {
        // Always allow assigning through *reason
        reason = &reason_local;
    }
    *reason = NULL;
    const roaring_array_t *ra = &r->high_low_container;
    if (ra->size < 0) {
        *reason = "negative size";
        return false;
    }
    if (ra->allocation_size < 0) {
        *reason = "negative allocation size";
        return false;
    }
    if (ra->size > ra->allocation_size) {
        *reason = "more containers than allocated space";
        return false;
    }
    if (ra->flags & ~(ROARING_FLAG_COW | ROARING_FLAG_FROZEN)) {
        *reason = "invalid flags";
        return false;
    }
    if (ra->size == 0) {
        return true;
    }

    if (ra->keys == NULL) {
        *reason = "keys is NULL";
        return false;
    }
    if (ra->typecodes == NULL) {
        *reason = "typecodes is NULL";
        return false;
    }
    if (ra->containers == NULL) {
        *reason = "containers is NULL";
        return false;
    }

    uint32_t prev_key = ra->keys[0];
    for (int32_t i = 1; i < ra->size; ++i) {
        if (ra->keys[i] <= prev_key) {
            *reason = "keys not strictly increasing";
            return false;
        }
        prev_key = ra->keys[i];
    }

    for (int32_t i = 0; i < ra->size; ++i) {
        if (!container_internal_validate(ra->containers[i], ra->typecodes[i],
                                         reason)) {
            // reason should already be set
            if (*reason == NULL) {
                *reason = "container failed to validate but no reason given";
            }
            return false;
        }
    }

    return true;
}

roaring_bitmap_t *roaring_bitmap_copy(const roaring_bitmap_t *r) {
    roaring_bitmap_t *ans =
        (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
    if (!ans) {
        return NULL;
    }
    if (!ra_init_with_capacity(  // allocation of list of containers can fail
            &ans->high_low_container, r->high_low_container.size)) {
        roaring_free(ans);
        return NULL;
    }
    if (!ra_overwrite(  // memory allocation of individual containers may fail
            &r->high_low_container, &ans->high_low_container, is_cow(r))) {
        roaring_bitmap_free(ans);  // overwrite should leave in freeable state
        return NULL;
    }
    roaring_bitmap_set_copy_on_write(ans, is_cow(r));
    return ans;
}

bool roaring_bitmap_overwrite(roaring_bitmap_t *dest,
                              const roaring_bitmap_t *src) {
    roaring_bitmap_set_copy_on_write(dest, is_cow(src));
    return ra_overwrite(&src->high_low_container, &dest->high_low_container,
                        is_cow(src));
}

void roaring_bitmap_free(const roaring_bitmap_t *r) {
    if (r == NULL) {
        return;
    }
    if (!is_frozen(r)) {
        ra_clear((roaring_array_t *)&r->high_low_container);
    }
    roaring_free((roaring_bitmap_t *)r);
}

void roaring_bitmap_clear(roaring_bitmap_t *r) {
    ra_reset(&r->high_low_container);
}

void roaring_bitmap_add(roaring_bitmap_t *r, uint32_t val) {
    roaring_array_t *ra = &r->high_low_container;

    const uint16_t hb = val >> 16;
    const int i = ra_get_index(ra, hb);
    uint8_t typecode;
    if (i >= 0) {
        ra_unshare_container_at_index(ra, (uint16_t)i);
        container_t *container =
            ra_get_container_at_index(ra, (uint16_t)i, &typecode);
        uint8_t newtypecode = typecode;
        container_t *container2 =
            container_add(container, val & 0xFFFF, typecode, &newtypecode);
        if (container2 != container) {
            container_free(container, typecode);
            ra_set_container_at_index(&r->high_low_container, i, container2,
                                      newtypecode);
        }
    } else {
        array_container_t *newac = array_container_create();
        container_t *container =
            container_add(newac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, &typecode);
        // we could just assume that it stays an array container
        ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
                                   container, typecode);
    }
}

bool roaring_bitmap_add_checked(roaring_bitmap_t *r, uint32_t val) {
    const uint16_t hb = val >> 16;
    const int i = ra_get_index(&r->high_low_container, hb);
    uint8_t typecode;
    bool result = false;
    if (i >= 0) {
        ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i);
        container_t *container = ra_get_container_at_index(
            &r->high_low_container, (uint16_t)i, &typecode);

        const int oldCardinality =
            container_get_cardinality(container, typecode);

        uint8_t newtypecode = typecode;
        container_t *container2 =
            container_add(container, val & 0xFFFF, typecode, &newtypecode);
        if (container2 != container) {
            container_free(container, typecode);
            ra_set_container_at_index(&r->high_low_container, i, container2,
                                      newtypecode);
            result = true;
        } else {
            const int newCardinality =
                container_get_cardinality(container, newtypecode);

            result = oldCardinality != newCardinality;
        }
    } else {
        array_container_t *newac = array_container_create();
        container_t *container =
            container_add(newac, val & 0xFFFF, ARRAY_CONTAINER_TYPE, &typecode);
        // we could just assume that it stays an array container
        ra_insert_new_key_value_at(&r->high_low_container, -i - 1, hb,
                                   container, typecode);
        result = true;
    }

    return result;
}

void roaring_bitmap_remove(roaring_bitmap_t *r, uint32_t val) {
    const uint16_t hb = val >> 16;
    const int i = ra_get_index(&r->high_low_container, hb);
    uint8_t typecode;
    if (i >= 0) {
        ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i);
        container_t *container = ra_get_container_at_index(
            &r->high_low_container, (uint16_t)i, &typecode);
        uint8_t newtypecode = typecode;
        container_t *container2 =
            container_remove(container, val & 0xFFFF, typecode, &newtypecode);
        if (container2 != container) {
            container_free(container, typecode);
            ra_set_container_at_index(&r->high_low_container, i, container2,
                                      newtypecode);
        }
        if (container_get_cardinality(container2, newtypecode) != 0) {
            ra_set_container_at_index(&r->high_low_container, i, container2,
                                      newtypecode);
        } else {
            ra_remove_at_index_and_free(&r->high_low_container, i);
        }
    }
}

bool roaring_bitmap_remove_checked(roaring_bitmap_t *r, uint32_t val) {
    const uint16_t hb = val >> 16;
    const int i = ra_get_index(&r->high_low_container, hb);
    uint8_t typecode;
    bool result = false;
    if (i >= 0) {
        ra_unshare_container_at_index(&r->high_low_container, (uint16_t)i);
        container_t *container = ra_get_container_at_index(
            &r->high_low_container, (uint16_t)i, &typecode);

        const int oldCardinality =
            container_get_cardinality(container, typecode);

        uint8_t newtypecode = typecode;
        container_t *container2 =
            container_remove(container, val & 0xFFFF, typecode, &newtypecode);
        if (container2 != container) {
            container_free(container, typecode);
            ra_set_container_at_index(&r->high_low_container, i, container2,
                                      newtypecode);
        }

        const int newCardinality =
            container_get_cardinality(container2, newtypecode);

        if (newCardinality != 0) {
            ra_set_container_at_index(&r->high_low_container, i, container2,
                                      newtypecode);
        } else {
            ra_remove_at_index_and_free(&r->high_low_container, i);
        }

        result = oldCardinality != newCardinality;
    }
    return result;
}

void roaring_bitmap_remove_many(roaring_bitmap_t *r, size_t n_args,
                                const uint32_t *vals) {
    if (n_args == 0 || r->high_low_container.size == 0) {
        return;
    }
    int32_t pos =
        -1;  // position of the container used in the previous iteration
    for (size_t i = 0; i < n_args; i++) {
        uint16_t key = (uint16_t)(vals[i] >> 16);
        if (pos < 0 || key != r->high_low_container.keys[pos]) {
            pos = ra_get_index(&r->high_low_container, key);
        }
        if (pos >= 0) {
            uint8_t new_typecode;
            container_t *new_container;
            new_container = container_remove(
                r->high_low_container.containers[pos], vals[i] & 0xffff,
                r->high_low_container.typecodes[pos], &new_typecode);
            if (new_container != r->high_low_container.containers[pos]) {
                container_free(r->high_low_container.containers[pos],
                               r->high_low_container.typecodes[pos]);
                ra_replace_key_and_container_at_index(&r->high_low_container,
                                                      pos, key, new_container,
                                                      new_typecode);
            }
            if (!container_nonzero_cardinality(new_container, new_typecode)) {
                container_free(new_container, new_typecode);
                ra_remove_at_index(&r->high_low_container, pos);
                pos = -1;
            }
        }
    }
}

// there should be some SIMD optimizations possible here
roaring_bitmap_t *roaring_bitmap_and(const roaring_bitmap_t *x1,
                                     const roaring_bitmap_t *x2) {
    uint8_t result_type = 0;
    const int length1 = x1->high_low_container.size,
              length2 = x2->high_low_container.size;
    uint32_t neededcap = length1 > length2 ? length2 : length1;
    roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(neededcap);
    roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));

    int pos1 = 0, pos2 = 0;

    while (pos1 < length1 && pos2 < length2) {
        const uint16_t s1 =
            ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
        const uint16_t s2 =
            ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);

        if (s1 == s2) {
            uint8_t type1, type2;
            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
                                                        (uint16_t)pos1, &type1);
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);
            container_t *c = container_and(c1, type1, c2, type2, &result_type);

            if (container_nonzero_cardinality(c, result_type)) {
                ra_append(&answer->high_low_container, s1, c, result_type);
            } else {
                container_free(c, result_type);  // otherwise: memory leak!
            }
            ++pos1;
            ++pos2;
        } else if (s1 < s2) {  // s1 < s2
            pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
        } else {  // s1 > s2
            pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
        }
    }
    return answer;
}

/**
 * Compute the union of 'number' bitmaps.
 */
roaring_bitmap_t *roaring_bitmap_or_many(size_t number,
                                         const roaring_bitmap_t **x) {
    if (number == 0) {
        return roaring_bitmap_create();
    }
    if (number == 1) {
        return roaring_bitmap_copy(x[0]);
    }
    roaring_bitmap_t *answer =
        roaring_bitmap_lazy_or(x[0], x[1], LAZY_OR_BITSET_CONVERSION);
    for (size_t i = 2; i < number; i++) {
        roaring_bitmap_lazy_or_inplace(answer, x[i], LAZY_OR_BITSET_CONVERSION);
    }
    roaring_bitmap_repair_after_lazy(answer);
    return answer;
}

/**
 * Compute the xor of 'number' bitmaps.
 */
roaring_bitmap_t *roaring_bitmap_xor_many(size_t number,
                                          const roaring_bitmap_t **x) {
    if (number == 0) {
        return roaring_bitmap_create();
    }
    if (number == 1) {
        return roaring_bitmap_copy(x[0]);
    }
    roaring_bitmap_t *answer = roaring_bitmap_lazy_xor(x[0], x[1]);
    for (size_t i = 2; i < number; i++) {
        roaring_bitmap_lazy_xor_inplace(answer, x[i]);
    }
    roaring_bitmap_repair_after_lazy(answer);
    return answer;
}

// inplace and (modifies its first argument).
void roaring_bitmap_and_inplace(roaring_bitmap_t *x1,
                                const roaring_bitmap_t *x2) {
    if (x1 == x2) return;
    int pos1 = 0, pos2 = 0, intersection_size = 0;
    const int length1 = ra_get_size(&x1->high_low_container);
    const int length2 = ra_get_size(&x2->high_low_container);

    // any skipped-over or newly emptied containers in x1
    // have to be freed.
    while (pos1 < length1 && pos2 < length2) {
        const uint16_t s1 =
            ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
        const uint16_t s2 =
            ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);

        if (s1 == s2) {
            uint8_t type1, type2, result_type;
            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
                                                        (uint16_t)pos1, &type1);
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);

            // We do the computation "in place" only when c1 is not a shared
            // container. Rationale: using a shared container safely with in
            // place computation would require making a copy and then doing the
            // computation in place which is likely less efficient than avoiding
            // in place entirely and always generating a new container.
            container_t *c =
                (type1 == SHARED_CONTAINER_TYPE)
                    ? container_and(c1, type1, c2, type2, &result_type)
                    : container_iand(c1, type1, c2, type2, &result_type);

            if (c != c1) {  // in this instance a new container was created, and
                            // we need to free the old one
                container_free(c1, type1);
            }
            if (container_nonzero_cardinality(c, result_type)) {
                ra_replace_key_and_container_at_index(&x1->high_low_container,
                                                      intersection_size, s1, c,
                                                      result_type);
                intersection_size++;
            } else {
                container_free(c, result_type);
            }
            ++pos1;
            ++pos2;
        } else if (s1 < s2) {
            pos1 = ra_advance_until_freeing(&x1->high_low_container, s2, pos1);
        } else {  // s1 > s2
            pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
        }
    }

    // if we ended early because x2 ran out, then all remaining in x1 should be
    // freed
    while (pos1 < length1) {
        container_free(x1->high_low_container.containers[pos1],
                       x1->high_low_container.typecodes[pos1]);
        ++pos1;
    }

    // all containers after this have either been copied or freed
    ra_downsize(&x1->high_low_container, intersection_size);
}

roaring_bitmap_t *roaring_bitmap_or(const roaring_bitmap_t *x1,
                                    const roaring_bitmap_t *x2) {
    uint8_t result_type = 0;
    const int length1 = x1->high_low_container.size,
              length2 = x2->high_low_container.size;
    if (0 == length1) {
        return roaring_bitmap_copy(x2);
    }
    if (0 == length2) {
        return roaring_bitmap_copy(x1);
    }
    roaring_bitmap_t *answer =
        roaring_bitmap_create_with_capacity(length1 + length2);
    roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
    int pos1 = 0, pos2 = 0;
    uint8_t type1, type2;
    uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
    uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
    while (true) {
        if (s1 == s2) {
            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
                                                        (uint16_t)pos1, &type1);
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);
            container_t *c = container_or(c1, type1, c2, type2, &result_type);

            // since we assume that the initial containers are non-empty, the
            // result here
            // can only be non-empty
            ra_append(&answer->high_low_container, s1, c, result_type);
            ++pos1;
            ++pos2;
            if (pos1 == length1) break;
            if (pos2 == length2) break;
            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);

        } else if (s1 < s2) {  // s1 < s2
            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
                                                        (uint16_t)pos1, &type1);
            // c1 = container_clone(c1, type1);
            c1 = get_copy_of_container(c1, &type1, is_cow(x1));
            if (is_cow(x1)) {
                ra_set_container_at_index(&x1->high_low_container, pos1, c1,
                                          type1);
            }
            ra_append(&answer->high_low_container, s1, c1, type1);
            pos1++;
            if (pos1 == length1) break;
            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);

        } else {  // s1 > s2
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);
            // c2 = container_clone(c2, type2);
            c2 = get_copy_of_container(c2, &type2, is_cow(x2));
            if (is_cow(x2)) {
                ra_set_container_at_index(&x2->high_low_container, pos2, c2,
                                          type2);
            }
            ra_append(&answer->high_low_container, s2, c2, type2);
            pos2++;
            if (pos2 == length2) break;
            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
        }
    }
    if (pos1 == length1) {
        ra_append_copy_range(&answer->high_low_container,
                             &x2->high_low_container, pos2, length2,
                             is_cow(x2));
    } else if (pos2 == length2) {
        ra_append_copy_range(&answer->high_low_container,
                             &x1->high_low_container, pos1, length1,
                             is_cow(x1));
    }
    return answer;
}

// inplace or (modifies its first argument).
void roaring_bitmap_or_inplace(roaring_bitmap_t *x1,
                               const roaring_bitmap_t *x2) {
    uint8_t result_type = 0;
    int length1 = x1->high_low_container.size;
    const int length2 = x2->high_low_container.size;

    if (0 == length2) return;

    if (0 == length1) {
        roaring_bitmap_overwrite(x1, x2);
        return;
    }
    int pos1 = 0, pos2 = 0;
    uint8_t type1, type2;
    uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
    uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
    while (true) {
        if (s1 == s2) {
            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
                                                        (uint16_t)pos1, &type1);
            if (!container_is_full(c1, type1)) {
                container_t *c2 = ra_get_container_at_index(
                    &x2->high_low_container, (uint16_t)pos2, &type2);
                container_t *c =
                    (type1 == SHARED_CONTAINER_TYPE)
                        ? container_or(c1, type1, c2, type2, &result_type)
                        : container_ior(c1, type1, c2, type2, &result_type);

                if (c != c1) {  // in this instance a new container was created,
                                // and we need to free the old one
                    container_free(c1, type1);
                }
                ra_set_container_at_index(&x1->high_low_container, pos1, c,
                                          result_type);
            }
            ++pos1;
            ++pos2;
            if (pos1 == length1) break;
            if (pos2 == length2) break;
            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);

        } else if (s1 < s2) {  // s1 < s2
            pos1++;
            if (pos1 == length1) break;
            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);

        } else {  // s1 > s2
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);
            c2 = get_copy_of_container(c2, &type2, is_cow(x2));
            if (is_cow(x2)) {
                ra_set_container_at_index(&x2->high_low_container, pos2, c2,
                                          type2);
            }

            // container_t *c2_clone = container_clone(c2, type2);
            ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
                                       type2);
            pos1++;
            length1++;
            pos2++;
            if (pos2 == length2) break;
            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
        }
    }
    if (pos1 == length1) {
        ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
                             pos2, length2, is_cow(x2));
    }
}

roaring_bitmap_t *roaring_bitmap_xor(const roaring_bitmap_t *x1,
                                     const roaring_bitmap_t *x2) {
    uint8_t result_type = 0;
    const int length1 = x1->high_low_container.size,
              length2 = x2->high_low_container.size;
    if (0 == length1) {
        return roaring_bitmap_copy(x2);
    }
    if (0 == length2) {
        return roaring_bitmap_copy(x1);
    }
    roaring_bitmap_t *answer =
        roaring_bitmap_create_with_capacity(length1 + length2);
    roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
    int pos1 = 0, pos2 = 0;
    uint8_t type1, type2;
    uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
    uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
    while (true) {
        if (s1 == s2) {
            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
                                                        (uint16_t)pos1, &type1);
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);
            container_t *c = container_xor(c1, type1, c2, type2, &result_type);

            if (container_nonzero_cardinality(c, result_type)) {
                ra_append(&answer->high_low_container, s1, c, result_type);
            } else {
                container_free(c, result_type);
            }
            ++pos1;
            ++pos2;
            if (pos1 == length1) break;
            if (pos2 == length2) break;
            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);

        } else if (s1 < s2) {  // s1 < s2
            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
                                                        (uint16_t)pos1, &type1);
            c1 = get_copy_of_container(c1, &type1, is_cow(x1));
            if (is_cow(x1)) {
                ra_set_container_at_index(&x1->high_low_container, pos1, c1,
                                          type1);
            }
            ra_append(&answer->high_low_container, s1, c1, type1);
            pos1++;
            if (pos1 == length1) break;
            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);

        } else {  // s1 > s2
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);
            c2 = get_copy_of_container(c2, &type2, is_cow(x2));
            if (is_cow(x2)) {
                ra_set_container_at_index(&x2->high_low_container, pos2, c2,
                                          type2);
            }
            ra_append(&answer->high_low_container, s2, c2, type2);
            pos2++;
            if (pos2 == length2) break;
            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
        }
    }
    if (pos1 == length1) {
        ra_append_copy_range(&answer->high_low_container,
                             &x2->high_low_container, pos2, length2,
                             is_cow(x2));
    } else if (pos2 == length2) {
        ra_append_copy_range(&answer->high_low_container,
                             &x1->high_low_container, pos1, length1,
                             is_cow(x1));
    }
    return answer;
}

// inplace xor (modifies its first argument).

void roaring_bitmap_xor_inplace(roaring_bitmap_t *x1,
                                const roaring_bitmap_t *x2) {
    assert(x1 != x2);
    uint8_t result_type = 0;
    int length1 = x1->high_low_container.size;
    const int length2 = x2->high_low_container.size;

    if (0 == length2) return;

    if (0 == length1) {
        roaring_bitmap_overwrite(x1, x2);
        return;
    }

    // XOR can have new containers inserted from x2, but can also
    // lose containers when x1 and x2 are nonempty and identical.

    int pos1 = 0, pos2 = 0;
    uint8_t type1, type2;
    uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
    uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
    while (true) {
        if (s1 == s2) {
            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
                                                        (uint16_t)pos1, &type1);
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);

            // We do the computation "in place" only when c1 is not a shared
            // container. Rationale: using a shared container safely with in
            // place computation would require making a copy and then doing the
            // computation in place which is likely less efficient than avoiding
            // in place entirely and always generating a new container.

            container_t *c;
            if (type1 == SHARED_CONTAINER_TYPE) {
                c = container_xor(c1, type1, c2, type2, &result_type);
                shared_container_free(CAST_shared(c1));  // so release
            } else {
                c = container_ixor(c1, type1, c2, type2, &result_type);
            }

            if (container_nonzero_cardinality(c, result_type)) {
                ra_set_container_at_index(&x1->high_low_container, pos1, c,
                                          result_type);
                ++pos1;
            } else {
                container_free(c, result_type);
                ra_remove_at_index(&x1->high_low_container, pos1);
                --length1;
            }

            ++pos2;
            if (pos1 == length1) break;
            if (pos2 == length2) break;
            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);

        } else if (s1 < s2) {  // s1 < s2
            pos1++;
            if (pos1 == length1) break;
            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);

        } else {  // s1 > s2
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);
            c2 = get_copy_of_container(c2, &type2, is_cow(x2));
            if (is_cow(x2)) {
                ra_set_container_at_index(&x2->high_low_container, pos2, c2,
                                          type2);
            }

            ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
                                       type2);
            pos1++;
            length1++;
            pos2++;
            if (pos2 == length2) break;
            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
        }
    }
    if (pos1 == length1) {
        ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
                             pos2, length2, is_cow(x2));
    }
}

roaring_bitmap_t *roaring_bitmap_andnot(const roaring_bitmap_t *x1,
                                        const roaring_bitmap_t *x2) {
    uint8_t result_type = 0;
    const int length1 = x1->high_low_container.size,
              length2 = x2->high_low_container.size;
    if (0 == length1) {
        roaring_bitmap_t *empty_bitmap = roaring_bitmap_create();
        roaring_bitmap_set_copy_on_write(empty_bitmap,
                                         is_cow(x1) || is_cow(x2));
        return empty_bitmap;
    }
    if (0 == length2) {
        return roaring_bitmap_copy(x1);
    }
    roaring_bitmap_t *answer = roaring_bitmap_create_with_capacity(length1);
    roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));

    int pos1 = 0, pos2 = 0;
    uint8_t type1, type2;
    uint16_t s1 = 0;
    uint16_t s2 = 0;
    while (true) {
        s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
        s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);

        if (s1 == s2) {
            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
                                                        (uint16_t)pos1, &type1);
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);
            container_t *c =
                container_andnot(c1, type1, c2, type2, &result_type);

            if (container_nonzero_cardinality(c, result_type)) {
                ra_append(&answer->high_low_container, s1, c, result_type);
            } else {
                container_free(c, result_type);
            }
            ++pos1;
            ++pos2;
            if (pos1 == length1) break;
            if (pos2 == length2) break;
        } else if (s1 < s2) {  // s1 < s2
            const int next_pos1 =
                ra_advance_until(&x1->high_low_container, s2, pos1);
            ra_append_copy_range(&answer->high_low_container,
                                 &x1->high_low_container, pos1, next_pos1,
                                 is_cow(x1));
            // TODO : perhaps some of the copy_on_write should be based on
            // answer rather than x1 (more stringent?).  Many similar cases
            pos1 = next_pos1;
            if (pos1 == length1) break;
        } else {  // s1 > s2
            pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
            if (pos2 == length2) break;
        }
    }
    if (pos2 == length2) {
        ra_append_copy_range(&answer->high_low_container,
                             &x1->high_low_container, pos1, length1,
                             is_cow(x1));
    }
    return answer;
}

// inplace andnot (modifies its first argument).

void roaring_bitmap_andnot_inplace(roaring_bitmap_t *x1,
                                   const roaring_bitmap_t *x2) {
    assert(x1 != x2);

    uint8_t result_type = 0;
    int length1 = x1->high_low_container.size;
    const int length2 = x2->high_low_container.size;
    int intersection_size = 0;

    if (0 == length2) return;

    if (0 == length1) {
        roaring_bitmap_clear(x1);
        return;
    }

    int pos1 = 0, pos2 = 0;
    uint8_t type1, type2;
    uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
    uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
    while (true) {
        if (s1 == s2) {
            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
                                                        (uint16_t)pos1, &type1);
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);

            // We do the computation "in place" only when c1 is not a shared
            // container. Rationale: using a shared container safely with in
            // place computation would require making a copy and then doing the
            // computation in place which is likely less efficient than avoiding
            // in place entirely and always generating a new container.

            container_t *c;
            if (type1 == SHARED_CONTAINER_TYPE) {
                c = container_andnot(c1, type1, c2, type2, &result_type);
                shared_container_free(CAST_shared(c1));  // release
            } else {
                c = container_iandnot(c1, type1, c2, type2, &result_type);
            }

            if (container_nonzero_cardinality(c, result_type)) {
                ra_replace_key_and_container_at_index(&x1->high_low_container,
                                                      intersection_size++, s1,
                                                      c, result_type);
            } else {
                container_free(c, result_type);
            }

            ++pos1;
            ++pos2;
            if (pos1 == length1) break;
            if (pos2 == length2) break;
            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);

        } else if (s1 < s2) {  // s1 < s2
            if (pos1 != intersection_size) {
                container_t *c1 = ra_get_container_at_index(
                    &x1->high_low_container, (uint16_t)pos1, &type1);

                ra_replace_key_and_container_at_index(
                    &x1->high_low_container, intersection_size, s1, c1, type1);
            }
            intersection_size++;
            pos1++;
            if (pos1 == length1) break;
            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);

        } else {  // s1 > s2
            pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
            if (pos2 == length2) break;
            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
        }
    }

    if (pos1 < length1) {
        // all containers between intersection_size and
        // pos1 are junk.  However, they have either been moved
        // (thus still referenced) or involved in an iandnot
        // that will clean up all containers that could not be reused.
        // Thus we should not free the junk containers between
        // intersection_size and pos1.
        if (pos1 > intersection_size) {
            // left slide of remaining items
            ra_copy_range(&x1->high_low_container, pos1, length1,
                          intersection_size);
        }
        // else current placement is fine
        intersection_size += (length1 - pos1);
    }
    ra_downsize(&x1->high_low_container, intersection_size);
}

uint64_t roaring_bitmap_get_cardinality(const roaring_bitmap_t *r) {
    const roaring_array_t *ra = &r->high_low_container;

    uint64_t card = 0;
    for (int i = 0; i < ra->size; ++i)
        card += container_get_cardinality(ra->containers[i], ra->typecodes[i]);
    return card;
}

uint64_t roaring_bitmap_range_cardinality(const roaring_bitmap_t *r,
                                          uint64_t range_start,
                                          uint64_t range_end) {
    const roaring_array_t *ra = &r->high_low_container;

    if (range_end > UINT32_MAX) {
        range_end = UINT32_MAX + UINT64_C(1);
    }
    if (range_start >= range_end) {
        return 0;
    }
    range_end--;  // make range_end inclusive
    // now we have: 0 <= range_start <= range_end <= UINT32_MAX

    uint16_t minhb = (uint16_t)(range_start >> 16);
    uint16_t maxhb = (uint16_t)(range_end >> 16);

    uint64_t card = 0;

    int i = ra_get_index(ra, minhb);
    if (i >= 0) {
        if (minhb == maxhb) {
            card += container_rank(ra->containers[i], ra->typecodes[i],
                                   range_end & 0xffff);
        } else {
            card +=
                container_get_cardinality(ra->containers[i], ra->typecodes[i]);
        }
        if ((range_start & 0xffff) != 0) {
            card -= container_rank(ra->containers[i], ra->typecodes[i],
                                   (range_start & 0xffff) - 1);
        }
        i++;
    } else {
        i = -i - 1;
    }

    for (; i < ra->size; i++) {
        uint16_t key = ra->keys[i];
        if (key < maxhb) {
            card +=
                container_get_cardinality(ra->containers[i], ra->typecodes[i]);
        } else if (key == maxhb) {
            card += container_rank(ra->containers[i], ra->typecodes[i],
                                   range_end & 0xffff);
            break;
        } else {
            break;
        }
    }

    return card;
}

bool roaring_bitmap_is_empty(const roaring_bitmap_t *r) {
    return r->high_low_container.size == 0;
}

void roaring_bitmap_to_uint32_array(const roaring_bitmap_t *r, uint32_t *ans) {
    ra_to_uint32_array(&r->high_low_container, ans);
}

bool roaring_bitmap_range_uint32_array(const roaring_bitmap_t *r, size_t offset,
                                       size_t limit, uint32_t *ans) {
    return ra_range_uint32_array(&r->high_low_container, offset, limit, ans);
}

/** convert array and bitmap containers to run containers when it is more
 * efficient;
 * also convert from run containers when more space efficient.  Returns
 * true if the result has at least one run container.
 */
bool roaring_bitmap_run_optimize(roaring_bitmap_t *r) {
    bool answer = false;
    for (int i = 0; i < r->high_low_container.size; i++) {
        uint8_t type_original, type_after;
        ra_unshare_container_at_index(
            &r->high_low_container,
            (uint16_t)i);  // TODO: this introduces extra cloning!
        container_t *c = ra_get_container_at_index(&r->high_low_container,
                                                   (uint16_t)i, &type_original);
        container_t *c1 = convert_run_optimize(c, type_original, &type_after);
        if (type_after == RUN_CONTAINER_TYPE) {
            answer = true;
        }
        ra_set_container_at_index(&r->high_low_container, i, c1, type_after);
    }
    return answer;
}

size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r) {
    size_t answer = 0;
    for (int i = 0; i < r->high_low_container.size; i++) {
        uint8_t type_original;
        container_t *c = ra_get_container_at_index(&r->high_low_container,
                                                   (uint16_t)i, &type_original);
        answer += container_shrink_to_fit(c, type_original);
    }
    answer += ra_shrink_to_fit(&r->high_low_container);
    return answer;
}

/**
 *  Remove run-length encoding even when it is more space efficient
 *  return whether a change was applied
 */
bool roaring_bitmap_remove_run_compression(roaring_bitmap_t *r) {
    bool answer = false;
    for (int i = 0; i < r->high_low_container.size; i++) {
        uint8_t type_original, type_after;
        container_t *c = ra_get_container_at_index(&r->high_low_container,
                                                   (uint16_t)i, &type_original);
        if (get_container_type(c, type_original) == RUN_CONTAINER_TYPE) {
            answer = true;
            if (type_original == SHARED_CONTAINER_TYPE) {
                run_container_t *truec = CAST_run(CAST_shared(c)->container);
                int32_t card = run_container_cardinality(truec);
                container_t *c1 = convert_to_bitset_or_array_container(
                    truec, card, &type_after);
                shared_container_free(CAST_shared(c));  // frees run as needed
                ra_set_container_at_index(&r->high_low_container, i, c1,
                                          type_after);

            } else {
                int32_t card = run_container_cardinality(CAST_run(c));
                container_t *c1 = convert_to_bitset_or_array_container(
                    CAST_run(c), card, &type_after);
                run_container_free(CAST_run(c));
                ra_set_container_at_index(&r->high_low_container, i, c1,
                                          type_after);
            }
        }
    }
    return answer;
}

size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf) {
    size_t portablesize = roaring_bitmap_portable_size_in_bytes(r);
    uint64_t cardinality = roaring_bitmap_get_cardinality(r);
    uint64_t sizeasarray = cardinality * sizeof(uint32_t) + sizeof(uint32_t);
    if (portablesize < sizeasarray) {
        buf[0] = CROARING_SERIALIZATION_CONTAINER;
        return roaring_bitmap_portable_serialize(r, buf + 1) + 1;
    } else {
        buf[0] = CROARING_SERIALIZATION_ARRAY_UINT32;
        memcpy(buf + 1, &cardinality, sizeof(uint32_t));
        roaring_bitmap_to_uint32_array(
            r, (uint32_t *)(buf + 1 + sizeof(uint32_t)));
        return 1 + (size_t)sizeasarray;
    }
}

size_t roaring_bitmap_size_in_bytes(const roaring_bitmap_t *r) {
    size_t portablesize = roaring_bitmap_portable_size_in_bytes(r);
    uint64_t sizeasarray =
        roaring_bitmap_get_cardinality(r) * sizeof(uint32_t) + sizeof(uint32_t);
    return portablesize < sizeasarray ? portablesize + 1
                                      : (size_t)sizeasarray + 1;
}

size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r) {
    return ra_portable_size_in_bytes(&r->high_low_container);
}

roaring_bitmap_t *roaring_bitmap_portable_deserialize_safe(const char *buf,
                                                           size_t maxbytes) {
    roaring_bitmap_t *ans =
        (roaring_bitmap_t *)roaring_malloc(sizeof(roaring_bitmap_t));
    if (ans == NULL) {
        return NULL;
    }
    size_t bytesread;
    bool is_ok = ra_portable_deserialize(&ans->high_low_container, buf,
                                         maxbytes, &bytesread);
    if (!is_ok) {
        roaring_free(ans);
        return NULL;
    }
    roaring_bitmap_set_copy_on_write(ans, false);
    if (!is_ok) {
        roaring_free(ans);
        return NULL;
    }
    return ans;
}

roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf) {
    return roaring_bitmap_portable_deserialize_safe(buf, SIZE_MAX);
}

size_t roaring_bitmap_portable_deserialize_size(const char *buf,
                                                size_t maxbytes) {
    return ra_portable_deserialize_size(buf, maxbytes);
}

size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf) {
    return ra_portable_serialize(&r->high_low_container, buf);
}

roaring_bitmap_t *roaring_bitmap_deserialize(const void *buf) {
    const char *bufaschar = (const char *)buf;
    if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) {
        /* This looks like a compressed set of uint32_t elements */
        uint32_t card;

        memcpy(&card, bufaschar + 1, sizeof(uint32_t));

        const uint32_t *elems =
            (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t));

        roaring_bitmap_t *bitmap = roaring_bitmap_create();
        if (bitmap == NULL) {
            return NULL;
        }
        roaring_bulk_context_t context = {0};
        for (uint32_t i = 0; i < card; i++) {
            // elems may not be aligned, read with memcpy
            uint32_t elem;
            memcpy(&elem, elems + i, sizeof(elem));
            roaring_bitmap_add_bulk(bitmap, &context, elem);
        }
        return bitmap;

    } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) {
        return roaring_bitmap_portable_deserialize(bufaschar + 1);
    } else
        return (NULL);
}

roaring_bitmap_t *roaring_bitmap_deserialize_safe(const void *buf,
                                                  size_t maxbytes) {
    if (maxbytes < 1) {
        return NULL;
    }

    const char *bufaschar = (const char *)buf;
    if (bufaschar[0] == CROARING_SERIALIZATION_ARRAY_UINT32) {
        if (maxbytes < 1 + sizeof(uint32_t)) {
            return NULL;
        }

        /* This looks like a compressed set of uint32_t elements */
        uint32_t card;
        memcpy(&card, bufaschar + 1, sizeof(uint32_t));

        // Check the buffer is big enough to contain card uint32_t elements
        if (maxbytes < 1 + sizeof(uint32_t) + card * sizeof(uint32_t)) {
            return NULL;
        }

        const uint32_t *elems =
            (const uint32_t *)(bufaschar + 1 + sizeof(uint32_t));

        roaring_bitmap_t *bitmap = roaring_bitmap_create();
        if (bitmap == NULL) {
            return NULL;
        }
        roaring_bulk_context_t context = {0};
        for (uint32_t i = 0; i < card; i++) {
            // elems may not be aligned, read with memcpy
            uint32_t elem;
            memcpy(&elem, elems + i, sizeof(elem));
            roaring_bitmap_add_bulk(bitmap, &context, elem);
        }
        return bitmap;

    } else if (bufaschar[0] == CROARING_SERIALIZATION_CONTAINER) {
        return roaring_bitmap_portable_deserialize_safe(bufaschar + 1,
                                                        maxbytes - 1);
    } else
        return (NULL);
}

bool roaring_iterate(const roaring_bitmap_t *r, roaring_iterator iterator,
                     void *ptr) {
    const roaring_array_t *ra = &r->high_low_container;

    for (int i = 0; i < ra->size; ++i)
        if (!container_iterate(ra->containers[i], ra->typecodes[i],
                               ((uint32_t)ra->keys[i]) << 16, iterator, ptr)) {
            return false;
        }
    return true;
}

bool roaring_iterate64(const roaring_bitmap_t *r, roaring_iterator64 iterator,
                       uint64_t high_bits, void *ptr) {
    const roaring_array_t *ra = &r->high_low_container;

    for (int i = 0; i < ra->size; ++i)
        if (!container_iterate64(ra->containers[i], ra->typecodes[i],
                                 ((uint32_t)ra->keys[i]) << 16, iterator,
                                 high_bits, ptr)) {
            return false;
        }
    return true;
}

/****
 * begin roaring_uint32_iterator_t
 *****/

/**
 * Partially initializes the iterator. Leaves it in either state:
 * 1. Invalid due to `has_value = false`, or
 * 2. At a container, with the high bits set, `has_value = true`.
 */
CROARING_WARN_UNUSED static bool iter_new_container_partial_init(
    roaring_uint32_iterator_t *newit) {
    newit->current_value = 0;
    if (newit->container_index >= newit->parent->high_low_container.size ||
        newit->container_index < 0) {
        newit->current_value = UINT32_MAX;
        return (newit->has_value = false);
    }
    newit->has_value = true;
    // we precompute container, typecode and highbits so that successive
    // iterators do not have to grab them from odd memory locations
    // and have to worry about the (easily predicted) container_unwrap_shared
    // call.
    newit->container =
        newit->parent->high_low_container.containers[newit->container_index];
    newit->typecode =
        newit->parent->high_low_container.typecodes[newit->container_index];
    newit->highbits =
        ((uint32_t)
             newit->parent->high_low_container.keys[newit->container_index])
        << 16;
    newit->container =
        container_unwrap_shared(newit->container, &(newit->typecode));
    return true;
}

/**
 * Positions the iterator at the first value of the current container that the
 * iterator points at, if available.
 */
CROARING_WARN_UNUSED static bool loadfirstvalue(
    roaring_uint32_iterator_t *newit) {
    if (iter_new_container_partial_init(newit)) {
        uint16_t value = 0;
        newit->container_it =
            container_init_iterator(newit->container, newit->typecode, &value);
        newit->current_value = newit->highbits | value;
    }
    return newit->has_value;
}

/**
 * Positions the iterator at the last value of the current container that the
 * iterator points at, if available.
 */
CROARING_WARN_UNUSED static bool loadlastvalue(
    roaring_uint32_iterator_t *newit) {
    if (iter_new_container_partial_init(newit)) {
        uint16_t value = 0;
        newit->container_it = container_init_iterator_last(
            newit->container, newit->typecode, &value);
        newit->current_value = newit->highbits | value;
    }
    return newit->has_value;
}

/**
 * Positions the iterator at the smallest value that is larger than or equal to
 * `val` within the current container that the iterator points at. Assumes such
 * a value exists within the current container.
 */
CROARING_WARN_UNUSED static bool loadfirstvalue_largeorequal(
    roaring_uint32_iterator_t *newit, uint32_t val) {
    bool partial_init = iter_new_container_partial_init(newit);
    assert(partial_init);
    if (!partial_init) {
        return false;
    }
    uint16_t value = 0;
    newit->container_it =
        container_init_iterator(newit->container, newit->typecode, &value);
    bool found = container_iterator_lower_bound(
        newit->container, newit->typecode, &newit->container_it, &value,
        val & 0xFFFF);
    assert(found);
    if (!found) {
        return false;
    }
    newit->current_value = newit->highbits | value;
    return true;
}

void roaring_iterator_init(const roaring_bitmap_t *r,
                           roaring_uint32_iterator_t *newit) {
    newit->parent = r;
    newit->container_index = 0;
    newit->has_value = loadfirstvalue(newit);
}

void roaring_iterator_init_last(const roaring_bitmap_t *r,
                                roaring_uint32_iterator_t *newit) {
    newit->parent = r;
    newit->container_index = newit->parent->high_low_container.size - 1;
    newit->has_value = loadlastvalue(newit);
}

roaring_uint32_iterator_t *roaring_iterator_create(const roaring_bitmap_t *r) {
    roaring_uint32_iterator_t *newit =
        (roaring_uint32_iterator_t *)roaring_malloc(
            sizeof(roaring_uint32_iterator_t));
    if (newit == NULL) return NULL;
    roaring_iterator_init(r, newit);
    return newit;
}

roaring_uint32_iterator_t *roaring_uint32_iterator_copy(
    const roaring_uint32_iterator_t *it) {
    roaring_uint32_iterator_t *newit =
        (roaring_uint32_iterator_t *)roaring_malloc(
            sizeof(roaring_uint32_iterator_t));
    memcpy(newit, it, sizeof(roaring_uint32_iterator_t));
    return newit;
}

bool roaring_uint32_iterator_move_equalorlarger(roaring_uint32_iterator_t *it,
                                                uint32_t val) {
    uint16_t hb = val >> 16;
    const int i = ra_get_index(&it->parent->high_low_container, hb);
    if (i >= 0) {
        uint32_t lowvalue =
            container_maximum(it->parent->high_low_container.containers[i],
                              it->parent->high_low_container.typecodes[i]);
        uint16_t lb = val & 0xFFFF;
        if (lowvalue < lb) {
            // will have to load first value of next container
            it->container_index = i + 1;
        } else {
            // the value is necessarily within the range of the container
            it->container_index = i;
            it->has_value = loadfirstvalue_largeorequal(it, val);
            return it->has_value;
        }
    } else {
        // there is no matching, so we are going for the next container
        it->container_index = -i - 1;
    }
    it->has_value = loadfirstvalue(it);
    return it->has_value;
}

bool roaring_uint32_iterator_advance(roaring_uint32_iterator_t *it) {
    if (it->container_index >= it->parent->high_low_container.size) {
        return (it->has_value = false);
    }
    if (it->container_index < 0) {
        it->container_index = 0;
        return (it->has_value = loadfirstvalue(it));
    }
    uint16_t low16 = (uint16_t)it->current_value;
    if (container_iterator_next(it->container, it->typecode, &it->container_it,
                                &low16)) {
        it->current_value = it->highbits | low16;
        return (it->has_value = true);
    }
    it->container_index++;
    return (it->has_value = loadfirstvalue(it));
}

bool roaring_uint32_iterator_previous(roaring_uint32_iterator_t *it) {
    if (it->container_index < 0) {
        return (it->has_value = false);
    }
    if (it->container_index >= it->parent->high_low_container.size) {
        it->container_index = it->parent->high_low_container.size - 1;
        return (it->has_value = loadlastvalue(it));
    }
    uint16_t low16 = (uint16_t)it->current_value;
    if (container_iterator_prev(it->container, it->typecode, &it->container_it,
                                &low16)) {
        it->current_value = it->highbits | low16;
        return (it->has_value = true);
    }
    it->container_index--;
    return (it->has_value = loadlastvalue(it));
}

uint32_t roaring_uint32_iterator_read(roaring_uint32_iterator_t *it,
                                      uint32_t *buf, uint32_t count) {
    uint32_t ret = 0;
    while (it->has_value && ret < count) {
        uint32_t consumed;
        uint16_t low16 = (uint16_t)it->current_value;
        bool has_value = container_iterator_read_into_uint32(
            it->container, it->typecode, &it->container_it, it->highbits, buf,
            count - ret, &consumed, &low16);
        ret += consumed;
        buf += consumed;
        if (has_value) {
            it->has_value = true;
            it->current_value = it->highbits | low16;
            assert(ret == count);
            return ret;
        }
        it->container_index++;
        it->has_value = loadfirstvalue(it);
    }
    return ret;
}

void roaring_uint32_iterator_free(roaring_uint32_iterator_t *it) {
    roaring_free(it);
}

/****
 * end of roaring_uint32_iterator_t
 *****/

bool roaring_bitmap_equals(const roaring_bitmap_t *r1,
                           const roaring_bitmap_t *r2) {
    const roaring_array_t *ra1 = &r1->high_low_container;
    const roaring_array_t *ra2 = &r2->high_low_container;

    if (ra1->size != ra2->size) {
        return false;
    }
    for (int i = 0; i < ra1->size; ++i) {
        if (ra1->keys[i] != ra2->keys[i]) {
            return false;
        }
    }
    for (int i = 0; i < ra1->size; ++i) {
        bool areequal = container_equals(ra1->containers[i], ra1->typecodes[i],
                                         ra2->containers[i], ra2->typecodes[i]);
        if (!areequal) {
            return false;
        }
    }
    return true;
}

bool roaring_bitmap_is_subset(const roaring_bitmap_t *r1,
                              const roaring_bitmap_t *r2) {
    const roaring_array_t *ra1 = &r1->high_low_container;
    const roaring_array_t *ra2 = &r2->high_low_container;

    const int length1 = ra1->size, length2 = ra2->size;

    int pos1 = 0, pos2 = 0;

    while (pos1 < length1 && pos2 < length2) {
        const uint16_t s1 = ra_get_key_at_index(ra1, (uint16_t)pos1);
        const uint16_t s2 = ra_get_key_at_index(ra2, (uint16_t)pos2);

        if (s1 == s2) {
            uint8_t type1, type2;
            container_t *c1 =
                ra_get_container_at_index(ra1, (uint16_t)pos1, &type1);
            container_t *c2 =
                ra_get_container_at_index(ra2, (uint16_t)pos2, &type2);
            if (!container_is_subset(c1, type1, c2, type2)) return false;
            ++pos1;
            ++pos2;
        } else if (s1 < s2) {  // s1 < s2
            return false;
        } else {  // s1 > s2
            pos2 = ra_advance_until(ra2, s1, pos2);
        }
    }
    if (pos1 == length1)
        return true;
    else
        return false;
}

static void insert_flipped_container(roaring_array_t *ans_arr,
                                     const roaring_array_t *x1_arr, uint16_t hb,
                                     uint16_t lb_start, uint16_t lb_end) {
    const int i = ra_get_index(x1_arr, hb);
    const int j = ra_get_index(ans_arr, hb);
    uint8_t ctype_in, ctype_out;
    container_t *flipped_container = NULL;
    if (i >= 0) {
        container_t *container_to_flip =
            ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in);
        flipped_container =
            container_not_range(container_to_flip, ctype_in, (uint32_t)lb_start,
                                (uint32_t)(lb_end + 1), &ctype_out);

        if (container_get_cardinality(flipped_container, ctype_out))
            ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
                                       ctype_out);
        else {
            container_free(flipped_container, ctype_out);
        }
    } else {
        flipped_container = container_range_of_ones(
            (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out);
        ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
                                   ctype_out);
    }
}

static void inplace_flip_container(roaring_array_t *x1_arr, uint16_t hb,
                                   uint16_t lb_start, uint16_t lb_end) {
    const int i = ra_get_index(x1_arr, hb);
    uint8_t ctype_in, ctype_out;
    container_t *flipped_container = NULL;
    if (i >= 0) {
        container_t *container_to_flip =
            ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in);
        flipped_container = container_inot_range(
            container_to_flip, ctype_in, (uint32_t)lb_start,
            (uint32_t)(lb_end + 1), &ctype_out);
        // if a new container was created, the old one was already freed
        if (container_get_cardinality(flipped_container, ctype_out)) {
            ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out);
        } else {
            container_free(flipped_container, ctype_out);
            ra_remove_at_index(x1_arr, i);
        }

    } else {
        flipped_container = container_range_of_ones(
            (uint32_t)lb_start, (uint32_t)(lb_end + 1), &ctype_out);
        ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container,
                                   ctype_out);
    }
}

static void insert_fully_flipped_container(roaring_array_t *ans_arr,
                                           const roaring_array_t *x1_arr,
                                           uint16_t hb) {
    const int i = ra_get_index(x1_arr, hb);
    const int j = ra_get_index(ans_arr, hb);
    uint8_t ctype_in, ctype_out;
    container_t *flipped_container = NULL;
    if (i >= 0) {
        container_t *container_to_flip =
            ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in);
        flipped_container =
            container_not(container_to_flip, ctype_in, &ctype_out);
        if (container_get_cardinality(flipped_container, ctype_out))
            ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
                                       ctype_out);
        else {
            container_free(flipped_container, ctype_out);
        }
    } else {
        flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out);
        ra_insert_new_key_value_at(ans_arr, -j - 1, hb, flipped_container,
                                   ctype_out);
    }
}

static void inplace_fully_flip_container(roaring_array_t *x1_arr, uint16_t hb) {
    const int i = ra_get_index(x1_arr, hb);
    uint8_t ctype_in, ctype_out;
    container_t *flipped_container = NULL;
    if (i >= 0) {
        container_t *container_to_flip =
            ra_get_container_at_index(x1_arr, (uint16_t)i, &ctype_in);
        flipped_container =
            container_inot(container_to_flip, ctype_in, &ctype_out);

        if (container_get_cardinality(flipped_container, ctype_out)) {
            ra_set_container_at_index(x1_arr, i, flipped_container, ctype_out);
        } else {
            container_free(flipped_container, ctype_out);
            ra_remove_at_index(x1_arr, i);
        }

    } else {
        flipped_container = container_range_of_ones(0U, 0x10000U, &ctype_out);
        ra_insert_new_key_value_at(x1_arr, -i - 1, hb, flipped_container,
                                   ctype_out);
    }
}

roaring_bitmap_t *roaring_bitmap_flip(const roaring_bitmap_t *x1,
                                      uint64_t range_start,
                                      uint64_t range_end) {
    if (range_start >= range_end) {
        return roaring_bitmap_copy(x1);
    }
    if (range_end >= UINT64_C(0x100000000)) {
        range_end = UINT64_C(0x100000000);
    }

    roaring_bitmap_t *ans = roaring_bitmap_create();
    roaring_bitmap_set_copy_on_write(ans, is_cow(x1));

    uint16_t hb_start = (uint16_t)(range_start >> 16);
    const uint16_t lb_start = (uint16_t)range_start;  // & 0xFFFF;
    uint16_t hb_end = (uint16_t)((range_end - 1) >> 16);
    const uint16_t lb_end = (uint16_t)(range_end - 1);  // & 0xFFFF;

    ra_append_copies_until(&ans->high_low_container, &x1->high_low_container,
                           hb_start, is_cow(x1));
    if (hb_start == hb_end) {
        insert_flipped_container(&ans->high_low_container,
                                 &x1->high_low_container, hb_start, lb_start,
                                 lb_end);
    } else {
        // start and end containers are distinct
        if (lb_start > 0) {
            // handle first (partial) container
            insert_flipped_container(&ans->high_low_container,
                                     &x1->high_low_container, hb_start,
                                     lb_start, 0xFFFF);
            ++hb_start;  // for the full containers.  Can't wrap.
        }

        if (lb_end != 0xFFFF) --hb_end;  // later we'll handle the partial block

        for (uint32_t hb = hb_start; hb <= hb_end; ++hb) {
            insert_fully_flipped_container(&ans->high_low_container,
                                           &x1->high_low_container,
                                           (uint16_t)hb);
        }

        // handle a partial final container
        if (lb_end != 0xFFFF) {
            insert_flipped_container(&ans->high_low_container,
                                     &x1->high_low_container, hb_end + 1, 0,
                                     lb_end);
            ++hb_end;
        }
    }
    ra_append_copies_after(&ans->high_low_container, &x1->high_low_container,
                           hb_end, is_cow(x1));
    return ans;
}

void roaring_bitmap_flip_inplace(roaring_bitmap_t *x1, uint64_t range_start,
                                 uint64_t range_end) {
    if (range_start >= range_end) {
        return;  // empty range
    }
    if (range_end >= UINT64_C(0x100000000)) {
        range_end = UINT64_C(0x100000000);
    }

    uint16_t hb_start = (uint16_t)(range_start >> 16);
    const uint16_t lb_start = (uint16_t)range_start;
    uint16_t hb_end = (uint16_t)((range_end - 1) >> 16);
    const uint16_t lb_end = (uint16_t)(range_end - 1);

    if (hb_start == hb_end) {
        inplace_flip_container(&x1->high_low_container, hb_start, lb_start,
                               lb_end);
    } else {
        // start and end containers are distinct
        if (lb_start > 0) {
            // handle first (partial) container
            inplace_flip_container(&x1->high_low_container, hb_start, lb_start,
                                   0xFFFF);
            ++hb_start;  // for the full containers.  Can't wrap.
        }

        if (lb_end != 0xFFFF) --hb_end;

        for (uint32_t hb = hb_start; hb <= hb_end; ++hb) {
            inplace_fully_flip_container(&x1->high_low_container, (uint16_t)hb);
        }
        // handle a partial final container
        if (lb_end != 0xFFFF) {
            inplace_flip_container(&x1->high_low_container, hb_end + 1, 0,
                                   lb_end);
            ++hb_end;
        }
    }
}

static void offset_append_with_merge(roaring_array_t *ra, int k, container_t *c,
                                     uint8_t t) {
    int size = ra_get_size(ra);
    if (size == 0 || ra_get_key_at_index(ra, (uint16_t)(size - 1)) != k) {
        // No merge.
        ra_append(ra, (uint16_t)k, c, t);
        return;
    }

    uint8_t last_t, new_t;
    container_t *last_c, *new_c;

    // NOTE: we don't need to unwrap here, since we added last_c ourselves
    // we have the certainty it's not a shared container.
    // The same applies to c, as it's the result of calling container_offset.
    last_c = ra_get_container_at_index(ra, (uint16_t)(size - 1), &last_t);
    new_c = container_ior(last_c, last_t, c, t, &new_t);

    ra_set_container_at_index(ra, size - 1, new_c, new_t);

    // Comparison of pointers of different origin is UB (or so claim some
    // compiler makers), so we compare their bit representation only.
    if ((uintptr_t)last_c != (uintptr_t)new_c) {
        container_free(last_c, last_t);
    }
    container_free(c, t);
}

// roaring_bitmap_add_offset adds the value 'offset' to each and every value in
// a bitmap, generating a new bitmap in the process. If offset + element is
// outside of the range [0,2^32), that the element will be dropped.
// We need "offset" to be 64 bits because we want to support values
// between -0xFFFFFFFF up to +0xFFFFFFFF.
roaring_bitmap_t *roaring_bitmap_add_offset(const roaring_bitmap_t *bm,
                                            int64_t offset) {
    roaring_bitmap_t *answer;
    roaring_array_t *ans_ra;
    int64_t container_offset;
    uint16_t in_offset;

    const roaring_array_t *bm_ra = &bm->high_low_container;
    int length = bm_ra->size;

    if (offset == 0) {
        return roaring_bitmap_copy(bm);
    }

    container_offset = offset >> 16;
    in_offset = (uint16_t)(offset - container_offset * (1 << 16));

    answer = roaring_bitmap_create();
    bool cow = is_cow(bm);
    roaring_bitmap_set_copy_on_write(answer, cow);

    ans_ra = &answer->high_low_container;

    if (in_offset == 0) {
        ans_ra = &answer->high_low_container;

        for (int i = 0, j = 0; i < length; ++i) {
            int64_t key = ra_get_key_at_index(bm_ra, (uint16_t)i);
            key += container_offset;

            if (key < 0 || key >= (1 << 16)) {
                continue;
            }
            ra_append_copy(ans_ra, bm_ra, (uint16_t)i, cow);
            ans_ra->keys[j++] = (uint16_t)key;
        }
        return answer;
    }

    uint8_t t;
    const container_t *c;
    container_t *lo, *hi, **lo_ptr, **hi_ptr;
    int64_t k;

    for (int i = 0; i < length; ++i) {
        lo = hi = NULL;
        lo_ptr = hi_ptr = NULL;

        k = ra_get_key_at_index(bm_ra, (uint16_t)i) + container_offset;
        if (k >= 0 && k < (1 << 16)) {
            lo_ptr = &lo;
        }
        if (k + 1 >= 0 && k + 1 < (1 << 16)) {
            hi_ptr = &hi;
        }
        if (lo_ptr == NULL && hi_ptr == NULL) {
            continue;
        }
        c = ra_get_container_at_index(bm_ra, (uint16_t)i, &t);
        c = container_unwrap_shared(c, &t);

        container_add_offset(c, t, lo_ptr, hi_ptr, in_offset);
        if (lo != NULL) {
            offset_append_with_merge(ans_ra, (int)k, lo, t);
        }
        if (hi != NULL) {
            ra_append(ans_ra, (uint16_t)(k + 1), hi, t);
        }
        // the `lo` and `hi` container type always keep same as container `c`.
        // in the case of `container_add_offset` on bitset container, `lo` and
        // `hi` may has small cardinality, they must be repaired to array
        // container.
    }

    roaring_bitmap_repair_after_lazy(answer);  // do required type conversions.
    return answer;
}

roaring_bitmap_t *roaring_bitmap_lazy_or(const roaring_bitmap_t *x1,
                                         const roaring_bitmap_t *x2,
                                         const bool bitsetconversion) {
    uint8_t result_type = 0;
    const int length1 = x1->high_low_container.size,
              length2 = x2->high_low_container.size;
    if (0 == length1) {
        return roaring_bitmap_copy(x2);
    }
    if (0 == length2) {
        return roaring_bitmap_copy(x1);
    }
    roaring_bitmap_t *answer =
        roaring_bitmap_create_with_capacity(length1 + length2);
    roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
    int pos1 = 0, pos2 = 0;
    uint8_t type1, type2;
    uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
    uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
    while (true) {
        if (s1 == s2) {
            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
                                                        (uint16_t)pos1, &type1);
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);
            container_t *c;
            if (bitsetconversion &&
                (get_container_type(c1, type1) != BITSET_CONTAINER_TYPE) &&
                (get_container_type(c2, type2) != BITSET_CONTAINER_TYPE)) {
                container_t *newc1 =
                    container_mutable_unwrap_shared(c1, &type1);
                newc1 = container_to_bitset(newc1, type1);
                type1 = BITSET_CONTAINER_TYPE;
                c = container_lazy_ior(newc1, type1, c2, type2, &result_type);
                if (c != newc1) {  // should not happen
                    container_free(newc1, type1);
                }
            } else {
                c = container_lazy_or(c1, type1, c2, type2, &result_type);
            }
            // since we assume that the initial containers are non-empty,
            // the
            // result here
            // can only be non-empty
            ra_append(&answer->high_low_container, s1, c, result_type);
            ++pos1;
            ++pos2;
            if (pos1 == length1) break;
            if (pos2 == length2) break;
            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);

        } else if (s1 < s2) {  // s1 < s2
            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
                                                        (uint16_t)pos1, &type1);
            c1 = get_copy_of_container(c1, &type1, is_cow(x1));
            if (is_cow(x1)) {
                ra_set_container_at_index(&x1->high_low_container, pos1, c1,
                                          type1);
            }
            ra_append(&answer->high_low_container, s1, c1, type1);
            pos1++;
            if (pos1 == length1) break;
            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);

        } else {  // s1 > s2
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);
            c2 = get_copy_of_container(c2, &type2, is_cow(x2));
            if (is_cow(x2)) {
                ra_set_container_at_index(&x2->high_low_container, pos2, c2,
                                          type2);
            }
            ra_append(&answer->high_low_container, s2, c2, type2);
            pos2++;
            if (pos2 == length2) break;
            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
        }
    }
    if (pos1 == length1) {
        ra_append_copy_range(&answer->high_low_container,
                             &x2->high_low_container, pos2, length2,
                             is_cow(x2));
    } else if (pos2 == length2) {
        ra_append_copy_range(&answer->high_low_container,
                             &x1->high_low_container, pos1, length1,
                             is_cow(x1));
    }
    return answer;
}

void roaring_bitmap_lazy_or_inplace(roaring_bitmap_t *x1,
                                    const roaring_bitmap_t *x2,
                                    const bool bitsetconversion) {
    uint8_t result_type = 0;
    int length1 = x1->high_low_container.size;
    const int length2 = x2->high_low_container.size;

    if (0 == length2) return;

    if (0 == length1) {
        roaring_bitmap_overwrite(x1, x2);
        return;
    }
    int pos1 = 0, pos2 = 0;
    uint8_t type1, type2;
    uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
    uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
    while (true) {
        if (s1 == s2) {
            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
                                                        (uint16_t)pos1, &type1);
            if (!container_is_full(c1, type1)) {
                if ((bitsetconversion == false) ||
                    (get_container_type(c1, type1) == BITSET_CONTAINER_TYPE)) {
                    c1 = get_writable_copy_if_shared(c1, &type1);
                } else {
                    // convert to bitset
                    container_t *old_c1 = c1;
                    uint8_t old_type1 = type1;
                    c1 = container_mutable_unwrap_shared(c1, &type1);
                    c1 = container_to_bitset(c1, type1);
                    container_free(old_c1, old_type1);
                    type1 = BITSET_CONTAINER_TYPE;
                }

                container_t *c2 = ra_get_container_at_index(
                    &x2->high_low_container, (uint16_t)pos2, &type2);
                container_t *c =
                    container_lazy_ior(c1, type1, c2, type2, &result_type);

                if (c != c1) {  // in this instance a new container was created,
                                // and we need to free the old one
                    container_free(c1, type1);
                }

                ra_set_container_at_index(&x1->high_low_container, pos1, c,
                                          result_type);
            }
            ++pos1;
            ++pos2;
            if (pos1 == length1) break;
            if (pos2 == length2) break;
            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);

        } else if (s1 < s2) {  // s1 < s2
            pos1++;
            if (pos1 == length1) break;
            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);

        } else {  // s1 > s2
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);
            // container_t *c2_clone = container_clone(c2, type2);
            c2 = get_copy_of_container(c2, &type2, is_cow(x2));
            if (is_cow(x2)) {
                ra_set_container_at_index(&x2->high_low_container, pos2, c2,
                                          type2);
            }
            ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
                                       type2);
            pos1++;
            length1++;
            pos2++;
            if (pos2 == length2) break;
            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
        }
    }
    if (pos1 == length1) {
        ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
                             pos2, length2, is_cow(x2));
    }
}

roaring_bitmap_t *roaring_bitmap_lazy_xor(const roaring_bitmap_t *x1,
                                          const roaring_bitmap_t *x2) {
    uint8_t result_type = 0;
    const int length1 = x1->high_low_container.size,
              length2 = x2->high_low_container.size;
    if (0 == length1) {
        return roaring_bitmap_copy(x2);
    }
    if (0 == length2) {
        return roaring_bitmap_copy(x1);
    }
    roaring_bitmap_t *answer =
        roaring_bitmap_create_with_capacity(length1 + length2);
    roaring_bitmap_set_copy_on_write(answer, is_cow(x1) || is_cow(x2));
    int pos1 = 0, pos2 = 0;
    uint8_t type1, type2;
    uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
    uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
    while (true) {
        if (s1 == s2) {
            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
                                                        (uint16_t)pos1, &type1);
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);
            container_t *c =
                container_lazy_xor(c1, type1, c2, type2, &result_type);

            if (container_nonzero_cardinality(c, result_type)) {
                ra_append(&answer->high_low_container, s1, c, result_type);
            } else {
                container_free(c, result_type);
            }

            ++pos1;
            ++pos2;
            if (pos1 == length1) break;
            if (pos2 == length2) break;
            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);

        } else if (s1 < s2) {  // s1 < s2
            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
                                                        (uint16_t)pos1, &type1);
            c1 = get_copy_of_container(c1, &type1, is_cow(x1));
            if (is_cow(x1)) {
                ra_set_container_at_index(&x1->high_low_container, pos1, c1,
                                          type1);
            }
            ra_append(&answer->high_low_container, s1, c1, type1);
            pos1++;
            if (pos1 == length1) break;
            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);

        } else {  // s1 > s2
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);
            c2 = get_copy_of_container(c2, &type2, is_cow(x2));
            if (is_cow(x2)) {
                ra_set_container_at_index(&x2->high_low_container, pos2, c2,
                                          type2);
            }
            ra_append(&answer->high_low_container, s2, c2, type2);
            pos2++;
            if (pos2 == length2) break;
            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
        }
    }
    if (pos1 == length1) {
        ra_append_copy_range(&answer->high_low_container,
                             &x2->high_low_container, pos2, length2,
                             is_cow(x2));
    } else if (pos2 == length2) {
        ra_append_copy_range(&answer->high_low_container,
                             &x1->high_low_container, pos1, length1,
                             is_cow(x1));
    }
    return answer;
}

void roaring_bitmap_lazy_xor_inplace(roaring_bitmap_t *x1,
                                     const roaring_bitmap_t *x2) {
    assert(x1 != x2);
    uint8_t result_type = 0;
    int length1 = x1->high_low_container.size;
    const int length2 = x2->high_low_container.size;

    if (0 == length2) return;

    if (0 == length1) {
        roaring_bitmap_overwrite(x1, x2);
        return;
    }
    int pos1 = 0, pos2 = 0;
    uint8_t type1, type2;
    uint16_t s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
    uint16_t s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
    while (true) {
        if (s1 == s2) {
            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
                                                        (uint16_t)pos1, &type1);
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);

            // We do the computation "in place" only when c1 is not a shared
            // container. Rationale: using a shared container safely with in
            // place computation would require making a copy and then doing the
            // computation in place which is likely less efficient than avoiding
            // in place entirely and always generating a new container.

            container_t *c;
            if (type1 == SHARED_CONTAINER_TYPE) {
                c = container_lazy_xor(c1, type1, c2, type2, &result_type);
                shared_container_free(CAST_shared(c1));  // release
            } else {
                c = container_lazy_ixor(c1, type1, c2, type2, &result_type);
            }

            if (container_nonzero_cardinality(c, result_type)) {
                ra_set_container_at_index(&x1->high_low_container, pos1, c,
                                          result_type);
                ++pos1;
            } else {
                container_free(c, result_type);
                ra_remove_at_index(&x1->high_low_container, pos1);
                --length1;
            }
            ++pos2;
            if (pos1 == length1) break;
            if (pos2 == length2) break;
            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);

        } else if (s1 < s2) {  // s1 < s2
            pos1++;
            if (pos1 == length1) break;
            s1 = ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);

        } else {  // s1 > s2
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);
            // container_t *c2_clone = container_clone(c2, type2);
            c2 = get_copy_of_container(c2, &type2, is_cow(x2));
            if (is_cow(x2)) {
                ra_set_container_at_index(&x2->high_low_container, pos2, c2,
                                          type2);
            }
            ra_insert_new_key_value_at(&x1->high_low_container, pos1, s2, c2,
                                       type2);
            pos1++;
            length1++;
            pos2++;
            if (pos2 == length2) break;
            s2 = ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);
        }
    }
    if (pos1 == length1) {
        ra_append_copy_range(&x1->high_low_container, &x2->high_low_container,
                             pos2, length2, is_cow(x2));
    }
}

void roaring_bitmap_repair_after_lazy(roaring_bitmap_t *r) {
    roaring_array_t *ra = &r->high_low_container;

    for (int i = 0; i < ra->size; ++i) {
        const uint8_t old_type = ra->typecodes[i];
        container_t *old_c = ra->containers[i];
        uint8_t new_type = old_type;
        container_t *new_c = container_repair_after_lazy(old_c, &new_type);
        ra->containers[i] = new_c;
        ra->typecodes[i] = new_type;
    }
}

/**
 * roaring_bitmap_rank returns the number of integers that are smaller or equal
 * to x.
 */
uint64_t roaring_bitmap_rank(const roaring_bitmap_t *bm, uint32_t x) {
    uint64_t size = 0;
    uint32_t xhigh = x >> 16;
    for (int i = 0; i < bm->high_low_container.size; i++) {
        uint32_t key = bm->high_low_container.keys[i];
        if (xhigh > key) {
            size +=
                container_get_cardinality(bm->high_low_container.containers[i],
                                          bm->high_low_container.typecodes[i]);
        } else if (xhigh == key) {
            return size + container_rank(bm->high_low_container.containers[i],
                                         bm->high_low_container.typecodes[i],
                                         x & 0xFFFF);
        } else {
            return size;
        }
    }
    return size;
}
void roaring_bitmap_rank_many(const roaring_bitmap_t *bm, const uint32_t *begin,
                              const uint32_t *end, uint64_t *ans) {
    uint64_t size = 0;

    int i = 0;
    const uint32_t *iter = begin;
    while (i < bm->high_low_container.size && iter != end) {
        uint32_t x = *iter;
        uint32_t xhigh = x >> 16;
        uint32_t key = bm->high_low_container.keys[i];
        if (xhigh > key) {
            size +=
                container_get_cardinality(bm->high_low_container.containers[i],
                                          bm->high_low_container.typecodes[i]);
            i++;
        } else if (xhigh == key) {
            uint32_t consumed = container_rank_many(
                bm->high_low_container.containers[i],
                bm->high_low_container.typecodes[i], size, iter, end, ans);
            iter += consumed;
            ans += consumed;
        } else {
            *(ans++) = size;
            iter++;
        }
    }
}

/**
 * roaring_bitmap_get_index returns the index of x, if not exsist return -1.
 */
int64_t roaring_bitmap_get_index(const roaring_bitmap_t *bm, uint32_t x) {
    int64_t index = 0;
    const uint16_t xhigh = x >> 16;
    int32_t high_idx = ra_get_index(&bm->high_low_container, xhigh);
    if (high_idx < 0) return -1;

    for (int i = 0; i < bm->high_low_container.size; i++) {
        uint32_t key = bm->high_low_container.keys[i];
        if (xhigh > key) {
            index +=
                container_get_cardinality(bm->high_low_container.containers[i],
                                          bm->high_low_container.typecodes[i]);
        } else if (xhigh == key) {
            int32_t low_idx = container_get_index(
                bm->high_low_container.containers[high_idx],
                bm->high_low_container.typecodes[high_idx], x & 0xFFFF);
            if (low_idx < 0) return -1;
            return index + low_idx;
        } else {
            return -1;
        }
    }
    return index;
}

/**
 * roaring_bitmap_smallest returns the smallest value in the set.
 * Returns UINT32_MAX if the set is empty.
 */
uint32_t roaring_bitmap_minimum(const roaring_bitmap_t *bm) {
    if (bm->high_low_container.size > 0) {
        container_t *c = bm->high_low_container.containers[0];
        uint8_t type = bm->high_low_container.typecodes[0];
        uint32_t key = bm->high_low_container.keys[0];
        uint32_t lowvalue = container_minimum(c, type);
        return lowvalue | (key << 16);
    }
    return UINT32_MAX;
}

/**
 * roaring_bitmap_smallest returns the greatest value in the set.
 * Returns 0 if the set is empty.
 */
uint32_t roaring_bitmap_maximum(const roaring_bitmap_t *bm) {
    if (bm->high_low_container.size > 0) {
        container_t *container =
            bm->high_low_container.containers[bm->high_low_container.size - 1];
        uint8_t typecode =
            bm->high_low_container.typecodes[bm->high_low_container.size - 1];
        uint32_t key =
            bm->high_low_container.keys[bm->high_low_container.size - 1];
        uint32_t lowvalue = container_maximum(container, typecode);
        return lowvalue | (key << 16);
    }
    return 0;
}

bool roaring_bitmap_select(const roaring_bitmap_t *bm, uint32_t rank,
                           uint32_t *element) {
    container_t *container;
    uint8_t typecode;
    uint16_t key;
    uint32_t start_rank = 0;
    int i = 0;
    bool valid = false;
    while (!valid && i < bm->high_low_container.size) {
        container = bm->high_low_container.containers[i];
        typecode = bm->high_low_container.typecodes[i];
        valid =
            container_select(container, typecode, &start_rank, rank, element);
        i++;
    }

    if (valid) {
        key = bm->high_low_container.keys[i - 1];
        *element |= (((uint32_t)key) << 16);  // w/o cast, key promotes signed
        return true;
    } else
        return false;
}

bool roaring_bitmap_intersect(const roaring_bitmap_t *x1,
                              const roaring_bitmap_t *x2) {
    const int length1 = x1->high_low_container.size,
              length2 = x2->high_low_container.size;
    uint64_t answer = 0;
    int pos1 = 0, pos2 = 0;

    while (pos1 < length1 && pos2 < length2) {
        const uint16_t s1 =
            ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
        const uint16_t s2 =
            ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);

        if (s1 == s2) {
            uint8_t type1, type2;
            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
                                                        (uint16_t)pos1, &type1);
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);
            if (container_intersect(c1, type1, c2, type2)) return true;
            ++pos1;
            ++pos2;
        } else if (s1 < s2) {  // s1 < s2
            pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
        } else {  // s1 > s2
            pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
        }
    }
    return answer != 0;
}

bool roaring_bitmap_intersect_with_range(const roaring_bitmap_t *bm, uint64_t x,
                                         uint64_t y) {
    if (x >= y) {
        // Empty range.
        return false;
    }
    roaring_uint32_iterator_t it;
    roaring_iterator_init(bm, &it);
    if (!roaring_uint32_iterator_move_equalorlarger(&it, (uint32_t)x)) {
        // No values above x.
        return false;
    }
    if (it.current_value >= y) {
        // No values below y.
        return false;
    }
    return true;
}

uint64_t roaring_bitmap_and_cardinality(const roaring_bitmap_t *x1,
                                        const roaring_bitmap_t *x2) {
    const int length1 = x1->high_low_container.size,
              length2 = x2->high_low_container.size;
    uint64_t answer = 0;
    int pos1 = 0, pos2 = 0;
    while (pos1 < length1 && pos2 < length2) {
        const uint16_t s1 =
            ra_get_key_at_index(&x1->high_low_container, (uint16_t)pos1);
        const uint16_t s2 =
            ra_get_key_at_index(&x2->high_low_container, (uint16_t)pos2);

        if (s1 == s2) {
            uint8_t type1, type2;
            container_t *c1 = ra_get_container_at_index(&x1->high_low_container,
                                                        (uint16_t)pos1, &type1);
            container_t *c2 = ra_get_container_at_index(&x2->high_low_container,
                                                        (uint16_t)pos2, &type2);
            answer += container_and_cardinality(c1, type1, c2, type2);
            ++pos1;
            ++pos2;
        } else if (s1 < s2) {  // s1 < s2
            pos1 = ra_advance_until(&x1->high_low_container, s2, pos1);
        } else {  // s1 > s2
            pos2 = ra_advance_until(&x2->high_low_container, s1, pos2);
        }
    }
    return answer;
}

double roaring_bitmap_jaccard_index(const roaring_bitmap_t *x1,
                                    const roaring_bitmap_t *x2) {
    const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
    const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
    const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
    return (double)inter / (double)(c1 + c2 - inter);
}

uint64_t roaring_bitmap_or_cardinality(const roaring_bitmap_t *x1,
                                       const roaring_bitmap_t *x2) {
    const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
    const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
    const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
    return c1 + c2 - inter;
}

uint64_t roaring_bitmap_andnot_cardinality(const roaring_bitmap_t *x1,
                                           const roaring_bitmap_t *x2) {
    const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
    const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
    return c1 - inter;
}

uint64_t roaring_bitmap_xor_cardinality(const roaring_bitmap_t *x1,
                                        const roaring_bitmap_t *x2) {
    const uint64_t c1 = roaring_bitmap_get_cardinality(x1);
    const uint64_t c2 = roaring_bitmap_get_cardinality(x2);
    const uint64_t inter = roaring_bitmap_and_cardinality(x1, x2);
    return c1 + c2 - 2 * inter;
}

bool roaring_bitmap_contains(const roaring_bitmap_t *r, uint32_t val) {
    const uint16_t hb = val >> 16;
    /*
     * the next function call involves a binary search and lots of branching.
     */
    int32_t i = ra_get_index(&r->high_low_container, hb);
    if (i < 0) return false;

    uint8_t typecode;
    // next call ought to be cheap
    container_t *container = ra_get_container_at_index(&r->high_low_container,
                                                       (uint16_t)i, &typecode);
    // rest might be a tad expensive, possibly involving another round of binary
    // search
    return container_contains(container, val & 0xFFFF, typecode);
}

/**
 * Check whether a range of values from range_start (included) to range_end
 * (excluded) is present
 */
bool roaring_bitmap_contains_range(const roaring_bitmap_t *r,
                                   uint64_t range_start, uint64_t range_end) {
    if (range_end >= UINT64_C(0x100000000)) {
        range_end = UINT64_C(0x100000000);
    }
    if (range_start >= range_end)
        return true;  // empty range are always contained!
    if (range_end - range_start == 1)
        return roaring_bitmap_contains(r, (uint32_t)range_start);
    uint16_t hb_rs = (uint16_t)(range_start >> 16);
    uint16_t hb_re = (uint16_t)((range_end - 1) >> 16);
    const int32_t span = hb_re - hb_rs;
    const int32_t hlc_sz = ra_get_size(&r->high_low_container);
    if (hlc_sz < span + 1) {
        return false;
    }
    int32_t is = ra_get_index(&r->high_low_container, hb_rs);
    int32_t ie = ra_get_index(&r->high_low_container, hb_re);
    if ((ie < 0) || (is < 0) || ((ie - is) != span) || ie >= hlc_sz) {
        return false;
    }
    const uint32_t lb_rs = range_start & 0xFFFF;
    const uint32_t lb_re = ((range_end - 1) & 0xFFFF) + 1;
    uint8_t type;
    container_t *c =
        ra_get_container_at_index(&r->high_low_container, (uint16_t)is, &type);
    if (hb_rs == hb_re) {
        return container_contains_range(c, lb_rs, lb_re, type);
    }
    if (!container_contains_range(c, lb_rs, 1 << 16, type)) {
        return false;
    }
    c = ra_get_container_at_index(&r->high_low_container, (uint16_t)ie, &type);
    if (!container_contains_range(c, 0, lb_re, type)) {
        return false;
    }
    for (int32_t i = is + 1; i < ie; ++i) {
        c = ra_get_container_at_index(&r->high_low_container, (uint16_t)i,
                                      &type);
        if (!container_is_full(c, type)) {
            return false;
        }
    }
    return true;
}

bool roaring_bitmap_is_strict_subset(const roaring_bitmap_t *r1,
                                     const roaring_bitmap_t *r2) {
    return (roaring_bitmap_get_cardinality(r2) >
                roaring_bitmap_get_cardinality(r1) &&
            roaring_bitmap_is_subset(r1, r2));
}

/*
 * FROZEN SERIALIZATION FORMAT DESCRIPTION
 *
 * -- (beginning must be aligned by 32 bytes) --
 * <bitset_data> uint64_t[BITSET_CONTAINER_SIZE_IN_WORDS *
 * num_bitset_containers] <run_data>    rle16_t[total number of rle elements in
 * all run containers] <array_data>  uint16_t[total number of array elements in
 * all array containers] <keys>        uint16_t[num_containers] <counts>
 * uint16_t[num_containers] <typecodes>   uint8_t[num_containers] <header>
 * uint32_t
 *
 * <header> is a 4-byte value which is a bit union of FROZEN_COOKIE (15 bits)
 * and the number of containers (17 bits).
 *
 * <counts> stores number of elements for every container.
 * Its meaning depends on container type.
 * For array and bitset containers, this value is the container cardinality
 * minus one. For run container, it is the number of rle_t elements (n_runs).
 *
 * <bitset_data>,<array_data>,<run_data> are flat arrays of elements of
 * all containers of respective type.
 *
 * <*_data> and <keys> are kept close together because they are not accessed
 * during deserilization. This may reduce IO in case of large mmaped bitmaps.
 * All members have their native alignments during deserilization except
 * <header>, which is not guaranteed to be aligned by 4 bytes.
 */

size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *rb) {
    const roaring_array_t *ra = &rb->high_low_container;
    size_t num_bytes = 0;
    for (int32_t i = 0; i < ra->size; i++) {
        switch (ra->typecodes[i]) {
            case BITSET_CONTAINER_TYPE: {
                num_bytes += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
                break;
            }
            case RUN_CONTAINER_TYPE: {
                const run_container_t *rc = const_CAST_run(ra->containers[i]);
                num_bytes += rc->n_runs * sizeof(rle16_t);
                break;
            }
            case ARRAY_CONTAINER_TYPE: {
                const array_container_t *ac =
                    const_CAST_array(ra->containers[i]);
                num_bytes += ac->cardinality * sizeof(uint16_t);
                break;
            }
            default:
                roaring_unreachable;
        }
    }
    num_bytes += (2 + 2 + 1) * ra->size;  // keys, counts, typecodes
    num_bytes += 4;                       // header
    return num_bytes;
}

inline static void *arena_alloc(char **arena, size_t num_bytes) {
    char *res = *arena;
    *arena += num_bytes;
    return res;
}

void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *rb, char *buf) {
    /*
     * Note: we do not require user to supply a specifically aligned buffer.
     * Thus we have to use memcpy() everywhere.
     */

    const roaring_array_t *ra = &rb->high_low_container;

    size_t bitset_zone_size = 0;
    size_t run_zone_size = 0;
    size_t array_zone_size = 0;
    for (int32_t i = 0; i < ra->size; i++) {
        switch (ra->typecodes[i]) {
            case BITSET_CONTAINER_TYPE: {
                bitset_zone_size +=
                    BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
                break;
            }
            case RUN_CONTAINER_TYPE: {
                const run_container_t *rc = const_CAST_run(ra->containers[i]);
                run_zone_size += rc->n_runs * sizeof(rle16_t);
                break;
            }
            case ARRAY_CONTAINER_TYPE: {
                const array_container_t *ac =
                    const_CAST_array(ra->containers[i]);
                array_zone_size += ac->cardinality * sizeof(uint16_t);
                break;
            }
            default:
                roaring_unreachable;
        }
    }

    uint64_t *bitset_zone = (uint64_t *)arena_alloc(&buf, bitset_zone_size);
    rle16_t *run_zone = (rle16_t *)arena_alloc(&buf, run_zone_size);
    uint16_t *array_zone = (uint16_t *)arena_alloc(&buf, array_zone_size);
    uint16_t *key_zone = (uint16_t *)arena_alloc(&buf, 2 * ra->size);
    uint16_t *count_zone = (uint16_t *)arena_alloc(&buf, 2 * ra->size);
    uint8_t *typecode_zone = (uint8_t *)arena_alloc(&buf, ra->size);
    uint32_t *header_zone = (uint32_t *)arena_alloc(&buf, 4);

    for (int32_t i = 0; i < ra->size; i++) {
        uint16_t count;
        switch (ra->typecodes[i]) {
            case BITSET_CONTAINER_TYPE: {
                const bitset_container_t *bc =
                    const_CAST_bitset(ra->containers[i]);
                memcpy(bitset_zone, bc->words,
                       BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t));
                bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS;
                if (bc->cardinality != BITSET_UNKNOWN_CARDINALITY) {
                    count = (uint16_t)(bc->cardinality - 1);
                } else {
                    count =
                        (uint16_t)(bitset_container_compute_cardinality(bc) -
                                   1);
                }
                break;
            }
            case RUN_CONTAINER_TYPE: {
                const run_container_t *rc = const_CAST_run(ra->containers[i]);
                size_t num_bytes = rc->n_runs * sizeof(rle16_t);
                memcpy(run_zone, rc->runs, num_bytes);
                run_zone += rc->n_runs;
                count = (uint16_t)rc->n_runs;
                break;
            }
            case ARRAY_CONTAINER_TYPE: {
                const array_container_t *ac =
                    const_CAST_array(ra->containers[i]);
                size_t num_bytes = ac->cardinality * sizeof(uint16_t);
                memcpy(array_zone, ac->array, num_bytes);
                array_zone += ac->cardinality;
                count = (uint16_t)(ac->cardinality - 1);
                break;
            }
            default:
                roaring_unreachable;
        }
        memcpy(&count_zone[i], &count, 2);
    }
    memcpy(key_zone, ra->keys, ra->size * sizeof(uint16_t));
    memcpy(typecode_zone, ra->typecodes, ra->size * sizeof(uint8_t));
    uint32_t header = ((uint32_t)ra->size << 15) | FROZEN_COOKIE;
    memcpy(header_zone, &header, 4);
}

const roaring_bitmap_t *roaring_bitmap_frozen_view(const char *buf,
                                                   size_t length) {
    if ((uintptr_t)buf % 32 != 0) {
        return NULL;
    }

    // cookie and num_containers
    if (length < 4) {
        return NULL;
    }
    uint32_t header;
    memcpy(&header, buf + length - 4, 4);  // header may be misaligned
    if ((header & 0x7FFF) != FROZEN_COOKIE) {
        return NULL;
    }
    int32_t num_containers = (header >> 15);

    // typecodes, counts and keys
    if (length < 4 + (size_t)num_containers * (1 + 2 + 2)) {
        return NULL;
    }
    uint16_t *keys = (uint16_t *)(buf + length - 4 - num_containers * 5);
    uint16_t *counts = (uint16_t *)(buf + length - 4 - num_containers * 3);
    uint8_t *typecodes = (uint8_t *)(buf + length - 4 - num_containers * 1);

    // {bitset,array,run}_zone
    int32_t num_bitset_containers = 0;
    int32_t num_run_containers = 0;
    int32_t num_array_containers = 0;
    size_t bitset_zone_size = 0;
    size_t run_zone_size = 0;
    size_t array_zone_size = 0;
    for (int32_t i = 0; i < num_containers; i++) {
        switch (typecodes[i]) {
            case BITSET_CONTAINER_TYPE:
                num_bitset_containers++;
                bitset_zone_size +=
                    BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
                break;
            case RUN_CONTAINER_TYPE:
                num_run_containers++;
                run_zone_size += counts[i] * sizeof(rle16_t);
                break;
            case ARRAY_CONTAINER_TYPE:
                num_array_containers++;
                array_zone_size += (counts[i] + UINT32_C(1)) * sizeof(uint16_t);
                break;
            default:
                return NULL;
        }
    }
    if (length != bitset_zone_size + run_zone_size + array_zone_size +
                      5 * num_containers + 4) {
        return NULL;
    }
    uint64_t *bitset_zone = (uint64_t *)(buf);
    rle16_t *run_zone = (rle16_t *)(buf + bitset_zone_size);
    uint16_t *array_zone = (uint16_t *)(buf + bitset_zone_size + run_zone_size);

    size_t alloc_size = 0;
    alloc_size += sizeof(roaring_bitmap_t);
    alloc_size += num_containers * sizeof(container_t *);
    alloc_size += num_bitset_containers * sizeof(bitset_container_t);
    alloc_size += num_run_containers * sizeof(run_container_t);
    alloc_size += num_array_containers * sizeof(array_container_t);

    char *arena = (char *)roaring_malloc(alloc_size);
    if (arena == NULL) {
        return NULL;
    }

    roaring_bitmap_t *rb =
        (roaring_bitmap_t *)arena_alloc(&arena, sizeof(roaring_bitmap_t));
    rb->high_low_container.flags = ROARING_FLAG_FROZEN;
    rb->high_low_container.allocation_size = num_containers;
    rb->high_low_container.size = num_containers;
    rb->high_low_container.keys = (uint16_t *)keys;
    rb->high_low_container.typecodes = (uint8_t *)typecodes;
    rb->high_low_container.containers = (container_t **)arena_alloc(
        &arena, sizeof(container_t *) * num_containers);
    // Ensure offset of high_low_container.containers is known distance used in
    // C++ wrapper. sizeof(roaring_bitmap_t) is used as it is the size of the
    // only allocation that precedes high_low_container.containers. If this is
    // changed (new allocation or changed order), this offset will also need to
    // be changed in the C++ wrapper.
    assert(rb ==
           (roaring_bitmap_t *)((char *)rb->high_low_container.containers -
                                sizeof(roaring_bitmap_t)));
    for (int32_t i = 0; i < num_containers; i++) {
        switch (typecodes[i]) {
            case BITSET_CONTAINER_TYPE: {
                bitset_container_t *bitset = (bitset_container_t *)arena_alloc(
                    &arena, sizeof(bitset_container_t));
                bitset->words = bitset_zone;
                bitset->cardinality = counts[i] + UINT32_C(1);
                rb->high_low_container.containers[i] = bitset;
                bitset_zone += BITSET_CONTAINER_SIZE_IN_WORDS;
                break;
            }
            case RUN_CONTAINER_TYPE: {
                run_container_t *run = (run_container_t *)arena_alloc(
                    &arena, sizeof(run_container_t));
                run->capacity = counts[i];
                run->n_runs = counts[i];
                run->runs = run_zone;
                rb->high_low_container.containers[i] = run;
                run_zone += run->n_runs;
                break;
            }
            case ARRAY_CONTAINER_TYPE: {
                array_container_t *array = (array_container_t *)arena_alloc(
                    &arena, sizeof(array_container_t));
                array->capacity = counts[i] + UINT32_C(1);
                array->cardinality = counts[i] + UINT32_C(1);
                array->array = array_zone;
                rb->high_low_container.containers[i] = array;
                array_zone += counts[i] + UINT32_C(1);
                break;
            }
            default:
                roaring_free(arena);
                return NULL;
        }
    }

    return rb;
}

ALLOW_UNALIGNED
roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {
    char *start_of_buf = (char *)buf;
    uint32_t cookie;
    int32_t num_containers;
    uint16_t *descriptive_headers;
    uint32_t *offset_headers = NULL;
    const char *run_flag_bitset = NULL;
    bool hasrun = false;

    // deserialize cookie
    memcpy(&cookie, buf, sizeof(uint32_t));
    buf += sizeof(uint32_t);
    if (cookie == SERIAL_COOKIE_NO_RUNCONTAINER) {
        memcpy(&num_containers, buf, sizeof(int32_t));
        buf += sizeof(int32_t);
        descriptive_headers = (uint16_t *)buf;
        buf += num_containers * 2 * sizeof(uint16_t);
        offset_headers = (uint32_t *)buf;
        buf += num_containers * sizeof(uint32_t);
    } else if ((cookie & 0xFFFF) == SERIAL_COOKIE) {
        num_containers = (cookie >> 16) + 1;
        hasrun = true;
        int32_t run_flag_bitset_size = (num_containers + 7) / 8;
        run_flag_bitset = buf;
        buf += run_flag_bitset_size;
        descriptive_headers = (uint16_t *)buf;
        buf += num_containers * 2 * sizeof(uint16_t);
        if (num_containers >= NO_OFFSET_THRESHOLD) {
            offset_headers = (uint32_t *)buf;
            buf += num_containers * sizeof(uint32_t);
        }
    } else {
        return NULL;
    }

    // calculate total size for allocation
    int32_t num_bitset_containers = 0;
    int32_t num_run_containers = 0;
    int32_t num_array_containers = 0;

    for (int32_t i = 0; i < num_containers; i++) {
        uint16_t tmp;
        memcpy(&tmp, descriptive_headers + 2 * i + 1, sizeof(tmp));
        uint32_t cardinality = tmp + 1;
        bool isbitmap = (cardinality > DEFAULT_MAX_SIZE);
        bool isrun = false;
        if (hasrun) {
            if ((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) {
                isbitmap = false;
                isrun = true;
            }
        }

        if (isbitmap) {
            num_bitset_containers++;
        } else if (isrun) {
            num_run_containers++;
        } else {
            num_array_containers++;
        }
    }

    size_t alloc_size = 0;
    alloc_size += sizeof(roaring_bitmap_t);
    alloc_size += num_containers * sizeof(container_t *);
    alloc_size += num_bitset_containers * sizeof(bitset_container_t);
    alloc_size += num_run_containers * sizeof(run_container_t);
    alloc_size += num_array_containers * sizeof(array_container_t);
    alloc_size += num_containers * sizeof(uint16_t);  // keys
    alloc_size += num_containers * sizeof(uint8_t);   // typecodes

    // allocate bitmap and construct containers
    char *arena = (char *)roaring_malloc(alloc_size);
    if (arena == NULL) {
        return NULL;
    }

    roaring_bitmap_t *rb =
        (roaring_bitmap_t *)arena_alloc(&arena, sizeof(roaring_bitmap_t));
    rb->high_low_container.flags = ROARING_FLAG_FROZEN;
    rb->high_low_container.allocation_size = num_containers;
    rb->high_low_container.size = num_containers;
    rb->high_low_container.containers = (container_t **)arena_alloc(
        &arena, sizeof(container_t *) * num_containers);

    uint16_t *keys =
        (uint16_t *)arena_alloc(&arena, num_containers * sizeof(uint16_t));
    uint8_t *typecodes =
        (uint8_t *)arena_alloc(&arena, num_containers * sizeof(uint8_t));

    rb->high_low_container.keys = keys;
    rb->high_low_container.typecodes = typecodes;

    for (int32_t i = 0; i < num_containers; i++) {
        uint16_t tmp;
        memcpy(&tmp, descriptive_headers + 2 * i + 1, sizeof(tmp));
        int32_t cardinality = tmp + 1;
        bool isbitmap = (cardinality > DEFAULT_MAX_SIZE);
        bool isrun = false;
        if (hasrun) {
            if ((run_flag_bitset[i / 8] & (1 << (i % 8))) != 0) {
                isbitmap = false;
                isrun = true;
            }
        }

        keys[i] = descriptive_headers[2 * i];

        if (isbitmap) {
            typecodes[i] = BITSET_CONTAINER_TYPE;
            bitset_container_t *c = (bitset_container_t *)arena_alloc(
                &arena, sizeof(bitset_container_t));
            c->cardinality = cardinality;
            if (offset_headers != NULL) {
                c->words = (uint64_t *)(start_of_buf + offset_headers[i]);
            } else {
                c->words = (uint64_t *)buf;
                buf += BITSET_CONTAINER_SIZE_IN_WORDS * sizeof(uint64_t);
            }
            rb->high_low_container.containers[i] = c;
        } else if (isrun) {
            typecodes[i] = RUN_CONTAINER_TYPE;
            run_container_t *c =
                (run_container_t *)arena_alloc(&arena, sizeof(run_container_t));
            c->capacity = cardinality;
            uint16_t n_runs;
            if (offset_headers != NULL) {
                memcpy(&n_runs, start_of_buf + offset_headers[i],
                       sizeof(uint16_t));
                c->n_runs = n_runs;
                c->runs = (rle16_t *)(start_of_buf + offset_headers[i] +
                                      sizeof(uint16_t));
            } else {
                memcpy(&n_runs, buf, sizeof(uint16_t));
                c->n_runs = n_runs;
                buf += sizeof(uint16_t);
                c->runs = (rle16_t *)buf;
                buf += c->n_runs * sizeof(rle16_t);
            }
            rb->high_low_container.containers[i] = c;
        } else {
            typecodes[i] = ARRAY_CONTAINER_TYPE;
            array_container_t *c = (array_container_t *)arena_alloc(
                &arena, sizeof(array_container_t));
            c->cardinality = cardinality;
            c->capacity = cardinality;
            if (offset_headers != NULL) {
                c->array = (uint16_t *)(start_of_buf + offset_headers[i]);
            } else {
                c->array = (uint16_t *)buf;
                buf += cardinality * sizeof(uint16_t);
            }
            rb->high_low_container.containers[i] = c;
        }
    }

    return rb;
}

bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset) {
    uint32_t max_value = roaring_bitmap_maximum(r);
    size_t new_array_size = (size_t)(((uint64_t)max_value + 63) / 64);
    bool resize_ok = bitset_resize(bitset, new_array_size, true);
    if (!resize_ok) {
        return false;
    }
    const roaring_array_t *ra = &r->high_low_container;
    for (int i = 0; i < ra->size; ++i) {
        uint64_t *words = bitset->array + (ra->keys[i] << 10);
        uint8_t type = ra->typecodes[i];
        const container_t *c = ra->containers[i];
        if (type == SHARED_CONTAINER_TYPE) {
            c = container_unwrap_shared(c, &type);
        }
        switch (type) {
            case BITSET_CONTAINER_TYPE: {
                size_t max_word_index = new_array_size - (ra->keys[i] << 10);
                if (max_word_index > 1024) {
                    max_word_index = 1024;
                }
                const bitset_container_t *src = const_CAST_bitset(c);
                memcpy(words, src->words, max_word_index * sizeof(uint64_t));
            } break;
            case ARRAY_CONTAINER_TYPE: {
                const array_container_t *src = const_CAST_array(c);
                bitset_set_list(words, src->array, src->cardinality);
            } break;
            case RUN_CONTAINER_TYPE: {
                const run_container_t *src = const_CAST_run(c);
                for (int32_t rlepos = 0; rlepos < src->n_runs; ++rlepos) {
                    rle16_t rle = src->runs[rlepos];
                    bitset_set_lenrange(words, rle.value, rle.length);
                }
            } break;
            default:
                roaring_unreachable;
        }
    }
    return true;
}

#ifdef __cplusplus
}
}
}  // extern "C" { namespace roaring {
#endif