diff options
author | robot-piglet <robot-piglet@yandex-team.com> | 2024-10-05 22:20:55 +0300 |
---|---|---|
committer | robot-piglet <robot-piglet@yandex-team.com> | 2024-10-05 22:32:53 +0300 |
commit | 2eec23b402e467c4997cae5f4fea19abad3195a6 (patch) | |
tree | 7bd2721e854318523c28c0915c235407f78cdfef /contrib/libs/croaring/src | |
parent | c3797467c201a8de261c9da5475b2386197cb4b0 (diff) | |
download | ydb-2eec23b402e467c4997cae5f4fea19abad3195a6.tar.gz |
Intermediate changes
commit_hash:89e9af18f838d98d46029c1a50881e032a4efa72
Diffstat (limited to 'contrib/libs/croaring/src')
-rw-r--r-- | contrib/libs/croaring/src/bitset_util.c | 31 | ||||
-rw-r--r-- | contrib/libs/croaring/src/roaring.c | 2 | ||||
-rw-r--r-- | contrib/libs/croaring/src/roaring64.c | 25 |
3 files changed, 34 insertions, 24 deletions
diff --git a/contrib/libs/croaring/src/bitset_util.c b/contrib/libs/croaring/src/bitset_util.c index 0ae7d92582..6bc12b44b4 100644 --- a/contrib/libs/croaring/src/bitset_util.c +++ b/contrib/libs/croaring/src/bitset_util.c @@ -613,16 +613,13 @@ size_t bitset_extract_setbits_avx512(const uint64_t *words, size_t length, for (; (i < length) && (out < safeout); ++i) { uint64_t w = words[i]; while ((w != 0) && (out < safeout)) { - uint64_t t = - w & (~w + 1); // on x64, should compile to BLSI (careful: the - // Intel compiler seems to fail) int r = roaring_trailing_zeroes(w); // on x64, should compile to TZCNT uint32_t val = r + base; memcpy(out, &val, sizeof(uint32_t)); // should be compiled as a MOV on x64 out++; - w ^= t; + w &= (w - 1); } base += 64; } @@ -667,15 +664,12 @@ size_t bitset_extract_setbits_avx512_uint16(const uint64_t *array, for (; (i < length) && (out < safeout); ++i) { uint64_t w = array[i]; while ((w != 0) && (out < safeout)) { - uint64_t t = - w & (~w + 1); // on x64, should compile to BLSI (careful: the - // Intel compiler seems to fail) int r = roaring_trailing_zeroes(w); // on x64, should compile to TZCNT uint32_t val = r + base; memcpy(out, &val, sizeof(uint16_t)); out++; - w ^= t; + w &= (w - 1); } base += 64; } @@ -725,16 +719,13 @@ size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length, for (; (i < length) && (out < safeout); ++i) { uint64_t w = words[i]; while ((w != 0) && (out < safeout)) { - uint64_t t = - w & (~w + 1); // on x64, should compile to BLSI (careful: the - // Intel compiler seems to fail) int r = roaring_trailing_zeroes(w); // on x64, should compile to TZCNT uint32_t val = r + base; memcpy(out, &val, sizeof(uint32_t)); // should be compiled as a MOV on x64 out++; - w ^= t; + w &= (w - 1); } base += 64; } @@ -749,16 +740,13 @@ size_t bitset_extract_setbits(const uint64_t *words, size_t length, for (size_t i = 0; i < length; ++i) { uint64_t w = words[i]; while (w != 0) { - uint64_t t = - w & (~w + 1); // on x64, should compile to BLSI (careful: the - // Intel compiler seems to fail) int r = roaring_trailing_zeroes(w); // on x64, should compile to TZCNT uint32_t val = r + base; memcpy(out + outpos, &val, sizeof(uint32_t)); // should be compiled as a MOV on x64 outpos++; - w ^= t; + w &= (w - 1); } base += 64; } @@ -772,10 +760,9 @@ size_t bitset_extract_intersection_setbits_uint16( for (size_t i = 0; i < length; ++i) { uint64_t w = words1[i] & words2[i]; while (w != 0) { - uint64_t t = w & (~w + 1); int r = roaring_trailing_zeroes(w); out[outpos++] = (uint16_t)(r + base); - w ^= t; + w &= (w - 1); } base += 64; } @@ -836,11 +823,10 @@ size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length, for (; (i < length) && (out < safeout); ++i) { uint64_t w = words[i]; while ((w != 0) && (out < safeout)) { - uint64_t t = w & (~w + 1); int r = roaring_trailing_zeroes(w); *out = (uint16_t)(r + base); out++; - w ^= t; + w &= (w - 1); } base += 64; } @@ -864,10 +850,9 @@ size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length, for (size_t i = 0; i < length; ++i) { uint64_t w = words[i]; while (w != 0) { - uint64_t t = w & (~w + 1); int r = roaring_trailing_zeroes(w); out[outpos++] = (uint16_t)(r + base); - w ^= t; + w &= (w - 1); } base += 64; } @@ -1158,4 +1143,4 @@ void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length) { #endif #if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic pop -#endif
\ No newline at end of file +#endif diff --git a/contrib/libs/croaring/src/roaring.c b/contrib/libs/croaring/src/roaring.c index e3847bae92..5a71fd39c3 100644 --- a/contrib/libs/croaring/src/roaring.c +++ b/contrib/libs/croaring/src/roaring.c @@ -3319,7 +3319,7 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) { bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset) { uint32_t max_value = roaring_bitmap_maximum(r); - size_t new_array_size = (size_t)(((uint64_t)max_value + 63) / 64); + size_t new_array_size = (size_t)(max_value / 64 + 1); bool resize_ok = bitset_resize(bitset, new_array_size, true); if (!resize_ok) { return false; diff --git a/contrib/libs/croaring/src/roaring64.c b/contrib/libs/croaring/src/roaring64.c index e63d3d965c..914faefe0b 100644 --- a/contrib/libs/croaring/src/roaring64.c +++ b/contrib/libs/croaring/src/roaring64.c @@ -1954,6 +1954,7 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( roaring64_bitmap_t *r = roaring64_bitmap_create(); // Iterate through buckets ordered by increasing keys. + int64_t previous_high32 = -1; for (uint64_t bucket = 0; bucket < buckets; ++bucket) { // Read as uint32 the most significant 32 bits of the bucket. uint32_t high32; @@ -1964,6 +1965,12 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( memcpy(&high32, buf, sizeof(high32)); buf += sizeof(high32); read_bytes += sizeof(high32); + // High 32 bits must be strictly increasing. + if (high32 <= previous_high32) { + roaring64_bitmap_free(r); + return NULL; + } + previous_high32 = high32; // Read the 32-bit Roaring bitmaps representing the least significant // bits of a set of elements. @@ -1983,6 +1990,24 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe( buf += bitmap32_size; read_bytes += bitmap32_size; + // While we don't attempt to validate much, we must ensure that there + // is no duplication in the high 48 bits - inserting into the ART + // assumes (or UB) no duplicate keys. The top 32 bits must be unique + // because we check for strict increasing values of high32, but we + // must also ensure the top 16 bits within each 32-bit bitmap are also + // at least unique (we ensure they're strictly increasing as well, + // which they must be for a _valid_ bitmap, since it's cheaper to check) + int32_t last_bitmap_key = -1; + for (int i = 0; i < bitmap32->high_low_container.size; i++) { + uint16_t key = bitmap32->high_low_container.keys[i]; + if (key <= last_bitmap_key) { + roaring_bitmap_free(bitmap32); + roaring64_bitmap_free(r); + return NULL; + } + last_bitmap_key = key; + } + // Insert all containers of the 32-bit bitmap into the 64-bit bitmap. move_from_roaring32_offset(r, bitmap32, high32); roaring_bitmap_free(bitmap32); |