aboutsummaryrefslogtreecommitdiffstats
path: root/contrib
diff options
context:
space:
mode:
authorrobot-piglet <robot-piglet@yandex-team.com>2024-10-05 22:20:55 +0300
committerrobot-piglet <robot-piglet@yandex-team.com>2024-10-05 22:32:53 +0300
commit2eec23b402e467c4997cae5f4fea19abad3195a6 (patch)
tree7bd2721e854318523c28c0915c235407f78cdfef /contrib
parentc3797467c201a8de261c9da5475b2386197cb4b0 (diff)
downloadydb-2eec23b402e467c4997cae5f4fea19abad3195a6.tar.gz
Intermediate changes
commit_hash:89e9af18f838d98d46029c1a50881e032a4efa72
Diffstat (limited to 'contrib')
-rw-r--r--contrib/libs/croaring/README.md15
-rw-r--r--contrib/libs/croaring/include/roaring/roaring.h23
-rw-r--r--contrib/libs/croaring/include/roaring/roaring64.h19
-rw-r--r--contrib/libs/croaring/include/roaring/roaring_version.h4
-rw-r--r--contrib/libs/croaring/src/bitset_util.c31
-rw-r--r--contrib/libs/croaring/src/roaring.c2
-rw-r--r--contrib/libs/croaring/src/roaring64.c25
-rw-r--r--contrib/libs/croaring/ya.make5
8 files changed, 88 insertions, 36 deletions
diff --git a/contrib/libs/croaring/README.md b/contrib/libs/croaring/README.md
index f582d31154..eb5ee92752 100644
--- a/contrib/libs/croaring/README.md
+++ b/contrib/libs/croaring/README.md
@@ -410,10 +410,17 @@ int main() {
// we can write a bitmap to a pointer and recover it later
uint32_t expectedsize = roaring_bitmap_portable_size_in_bytes(r1);
char *serializedbytes = malloc(expectedsize);
+ // When serializing data to a file, we recommend that you also use
+ // checksums so that, at deserialization, you can be confident
+ // that you are recovering the correct data.
roaring_bitmap_portable_serialize(r1, serializedbytes);
// Note: it is expected that the input follows the specification
// https://github.com/RoaringBitmap/RoaringFormatSpec
// otherwise the result may be unusable.
+ // The 'roaring_bitmap_portable_deserialize_safe' function will not read
+ // beyond expectedsize bytes.
+ // We recommend you further use checksums to make sure that the input is from
+ // serialized data.
roaring_bitmap_t *t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
if(t == NULL) { return EXIT_FAILURE; }
const char *reason = NULL;
@@ -428,7 +435,11 @@ int main() {
roaring_bitmap_portable_deserialize_size(serializedbytes, expectedsize);
assert(sizeofbitmap ==
expectedsize); // sizeofbitmap would be zero if no bitmap were found
- // we can also read the bitmap "safely" by specifying a byte size limit:
+ // We can also read the bitmap "safely" by specifying a byte size limit.
+ // The 'roaring_bitmap_portable_deserialize_safe' function will not read
+ // beyond expectedsize bytes.
+ // We recommend you further use checksums to make sure that the input is from
+ // serialized data.
t = roaring_bitmap_portable_deserialize_safe(serializedbytes, expectedsize);
if(t == NULL) {
printf("Problem during deserialization.\n");
@@ -500,6 +511,8 @@ We also support efficient 64-bit compressed bitmaps in C:
roaring64_bitmap_free(r2);
```
+The API is similar to the conventional 32-bit bitmaps. Please see
+the header file `roaring64.h` (compare with `roaring.h`).
# Conventional bitsets (C)
diff --git a/contrib/libs/croaring/include/roaring/roaring.h b/contrib/libs/croaring/include/roaring/roaring.h
index 295bc3cb33..4256ad0ca5 100644
--- a/contrib/libs/croaring/include/roaring/roaring.h
+++ b/contrib/libs/croaring/include/roaring/roaring.h
@@ -552,6 +552,10 @@ size_t roaring_bitmap_shrink_to_fit(roaring_bitmap_t *r);
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
+ *
+ * When serializing data to a file, we recommend that you also use
+ * checksums so that, at deserialization, you can be confident
+ * that you are recovering the correct data.
*/
size_t roaring_bitmap_serialize(const roaring_bitmap_t *r, char *buf);
@@ -615,7 +619,10 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
* https://github.com/RoaringBitmap/RoaringFormatSpec
*
* The function itself is safe in the sense that it will not cause buffer
- * overflows. However, for correct operations, it is assumed that the bitmap
+ * overflows: it will not read beyond the scope of the provided buffer
+ * (buf,maxbytes).
+ *
+ * However, for correct operations, it is assumed that the bitmap
* read was once serialized from a valid bitmap (i.e., it follows the format
* specification). If you provided an incorrect input (garbage), then the bitmap
* read may not be in a valid state and following operations may not lead to
@@ -625,8 +632,10 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize(const char *buf);
* but not for random inputs.
*
* You may use roaring_bitmap_internal_validate to check the validity of the
- * bitmap prior to using it. You may also use other strategies to check for
- * corrupted inputs (e.g., checksums).
+ * bitmap prior to using it.
+ *
+ * We recommend that you use checksums to check that serialized data corresponds
+ * to a serialized bitmap.
*
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
@@ -688,6 +697,10 @@ size_t roaring_bitmap_portable_size_in_bytes(const roaring_bitmap_t *r);
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
+ *
+ * When serializing data to a file, we recommend that you also use
+ * checksums so that, at deserialization, you can be confident
+ * that you are recovering the correct data.
*/
size_t roaring_bitmap_portable_serialize(const roaring_bitmap_t *r, char *buf);
@@ -722,6 +735,10 @@ size_t roaring_bitmap_frozen_size_in_bytes(const roaring_bitmap_t *r);
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
+ *
+ * When serializing data to a file, we recommend that you also use
+ * checksums so that, at deserialization, you can be confident
+ * that you are recovering the correct data.
*/
void roaring_bitmap_frozen_serialize(const roaring_bitmap_t *r, char *buf);
diff --git a/contrib/libs/croaring/include/roaring/roaring64.h b/contrib/libs/croaring/include/roaring/roaring64.h
index fd89feb5e0..c1f574d605 100644
--- a/contrib/libs/croaring/include/roaring/roaring64.h
+++ b/contrib/libs/croaring/include/roaring/roaring64.h
@@ -1,13 +1,13 @@
#ifndef ROARING64_H
#define ROARING64_H
-#include <roaring.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <roaring/memory.h>
#include <roaring/portability.h>
+#include <roaring/roaring.h>
#include <roaring/roaring_types.h>
#ifdef __cplusplus
@@ -511,6 +511,10 @@ size_t roaring64_bitmap_portable_size_in_bytes(const roaring64_bitmap_t *r);
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
+ *
+ * When serializing data to a file, we recommend that you also use
+ * checksums so that, at deserialization, you can be confident
+ * that you are recovering the correct data.
*/
size_t roaring64_bitmap_portable_serialize(const roaring64_bitmap_t *r,
char *buf);
@@ -525,14 +529,17 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf,
size_t maxbytes);
/**
- * Read a bitmap from a serialized buffer safely (reading up to maxbytes).
+ * Read a bitmap from a serialized buffer (reading up to maxbytes).
* In case of failure, NULL is returned.
*
* This is meant to be compatible with other languages
* https://github.com/RoaringBitmap/RoaringFormatSpec#extension-for-64-bit-implementations
*
* The function itself is safe in the sense that it will not cause buffer
- * overflows. However, for correct operations, it is assumed that the bitmap
+ * overflows: it will not read beyond the scope of the provided buffer
+ * (buf,maxbytes).
+ *
+ * However, for correct operations, it is assumed that the bitmap
* read was once serialized from a valid bitmap (i.e., it follows the format
* specification). If you provided an incorrect input (garbage), then the bitmap
* read may not be in a valid state and following operations may not lead to
@@ -541,6 +548,12 @@ size_t roaring64_bitmap_portable_deserialize_size(const char *buf,
* order. This is is guaranteed to happen when serializing an existing bitmap,
* but not for random inputs.
*
+ * You may use roaring64_bitmap_internal_validate to check the validity of the
+ * bitmap prior to using it.
+ *
+ * We recommend that you use checksums to check that serialized data corresponds
+ * to a serialized bitmap.
+ *
* This function is endian-sensitive. If you have a big-endian system (e.g., a
* mainframe IBM s390x), the data format is going to be big-endian and not
* compatible with little-endian systems.
diff --git a/contrib/libs/croaring/include/roaring/roaring_version.h b/contrib/libs/croaring/include/roaring/roaring_version.h
index 0d136bbbd3..d40681f4f6 100644
--- a/contrib/libs/croaring/include/roaring/roaring_version.h
+++ b/contrib/libs/croaring/include/roaring/roaring_version.h
@@ -2,11 +2,11 @@
// /include/roaring/roaring_version.h automatically generated by release.py, do not change by hand
#ifndef ROARING_INCLUDE_ROARING_VERSION
#define ROARING_INCLUDE_ROARING_VERSION
-#define ROARING_VERSION "4.1.2"
+#define ROARING_VERSION "4.1.6"
enum {
ROARING_VERSION_MAJOR = 4,
ROARING_VERSION_MINOR = 1,
- ROARING_VERSION_REVISION = 2
+ ROARING_VERSION_REVISION = 6
};
#endif // ROARING_INCLUDE_ROARING_VERSION
// clang-format on \ No newline at end of file
diff --git a/contrib/libs/croaring/src/bitset_util.c b/contrib/libs/croaring/src/bitset_util.c
index 0ae7d92582..6bc12b44b4 100644
--- a/contrib/libs/croaring/src/bitset_util.c
+++ b/contrib/libs/croaring/src/bitset_util.c
@@ -613,16 +613,13 @@ size_t bitset_extract_setbits_avx512(const uint64_t *words, size_t length,
for (; (i < length) && (out < safeout); ++i) {
uint64_t w = words[i];
while ((w != 0) && (out < safeout)) {
- uint64_t t =
- w & (~w + 1); // on x64, should compile to BLSI (careful: the
- // Intel compiler seems to fail)
int r =
roaring_trailing_zeroes(w); // on x64, should compile to TZCNT
uint32_t val = r + base;
memcpy(out, &val,
sizeof(uint32_t)); // should be compiled as a MOV on x64
out++;
- w ^= t;
+ w &= (w - 1);
}
base += 64;
}
@@ -667,15 +664,12 @@ size_t bitset_extract_setbits_avx512_uint16(const uint64_t *array,
for (; (i < length) && (out < safeout); ++i) {
uint64_t w = array[i];
while ((w != 0) && (out < safeout)) {
- uint64_t t =
- w & (~w + 1); // on x64, should compile to BLSI (careful: the
- // Intel compiler seems to fail)
int r =
roaring_trailing_zeroes(w); // on x64, should compile to TZCNT
uint32_t val = r + base;
memcpy(out, &val, sizeof(uint16_t));
out++;
- w ^= t;
+ w &= (w - 1);
}
base += 64;
}
@@ -725,16 +719,13 @@ size_t bitset_extract_setbits_avx2(const uint64_t *words, size_t length,
for (; (i < length) && (out < safeout); ++i) {
uint64_t w = words[i];
while ((w != 0) && (out < safeout)) {
- uint64_t t =
- w & (~w + 1); // on x64, should compile to BLSI (careful: the
- // Intel compiler seems to fail)
int r =
roaring_trailing_zeroes(w); // on x64, should compile to TZCNT
uint32_t val = r + base;
memcpy(out, &val,
sizeof(uint32_t)); // should be compiled as a MOV on x64
out++;
- w ^= t;
+ w &= (w - 1);
}
base += 64;
}
@@ -749,16 +740,13 @@ size_t bitset_extract_setbits(const uint64_t *words, size_t length,
for (size_t i = 0; i < length; ++i) {
uint64_t w = words[i];
while (w != 0) {
- uint64_t t =
- w & (~w + 1); // on x64, should compile to BLSI (careful: the
- // Intel compiler seems to fail)
int r =
roaring_trailing_zeroes(w); // on x64, should compile to TZCNT
uint32_t val = r + base;
memcpy(out + outpos, &val,
sizeof(uint32_t)); // should be compiled as a MOV on x64
outpos++;
- w ^= t;
+ w &= (w - 1);
}
base += 64;
}
@@ -772,10 +760,9 @@ size_t bitset_extract_intersection_setbits_uint16(
for (size_t i = 0; i < length; ++i) {
uint64_t w = words1[i] & words2[i];
while (w != 0) {
- uint64_t t = w & (~w + 1);
int r = roaring_trailing_zeroes(w);
out[outpos++] = (uint16_t)(r + base);
- w ^= t;
+ w &= (w - 1);
}
base += 64;
}
@@ -836,11 +823,10 @@ size_t bitset_extract_setbits_sse_uint16(const uint64_t *words, size_t length,
for (; (i < length) && (out < safeout); ++i) {
uint64_t w = words[i];
while ((w != 0) && (out < safeout)) {
- uint64_t t = w & (~w + 1);
int r = roaring_trailing_zeroes(w);
*out = (uint16_t)(r + base);
out++;
- w ^= t;
+ w &= (w - 1);
}
base += 64;
}
@@ -864,10 +850,9 @@ size_t bitset_extract_setbits_uint16(const uint64_t *words, size_t length,
for (size_t i = 0; i < length; ++i) {
uint64_t w = words[i];
while (w != 0) {
- uint64_t t = w & (~w + 1);
int r = roaring_trailing_zeroes(w);
out[outpos++] = (uint16_t)(r + base);
- w ^= t;
+ w &= (w - 1);
}
base += 64;
}
@@ -1158,4 +1143,4 @@ void bitset_flip_list(uint64_t *words, const uint16_t *list, uint64_t length) {
#endif
#if defined(__GNUC__) && !defined(__clang__)
#pragma GCC diagnostic pop
-#endif \ No newline at end of file
+#endif
diff --git a/contrib/libs/croaring/src/roaring.c b/contrib/libs/croaring/src/roaring.c
index e3847bae92..5a71fd39c3 100644
--- a/contrib/libs/croaring/src/roaring.c
+++ b/contrib/libs/croaring/src/roaring.c
@@ -3319,7 +3319,7 @@ roaring_bitmap_t *roaring_bitmap_portable_deserialize_frozen(const char *buf) {
bool roaring_bitmap_to_bitset(const roaring_bitmap_t *r, bitset_t *bitset) {
uint32_t max_value = roaring_bitmap_maximum(r);
- size_t new_array_size = (size_t)(((uint64_t)max_value + 63) / 64);
+ size_t new_array_size = (size_t)(max_value / 64 + 1);
bool resize_ok = bitset_resize(bitset, new_array_size, true);
if (!resize_ok) {
return false;
diff --git a/contrib/libs/croaring/src/roaring64.c b/contrib/libs/croaring/src/roaring64.c
index e63d3d965c..914faefe0b 100644
--- a/contrib/libs/croaring/src/roaring64.c
+++ b/contrib/libs/croaring/src/roaring64.c
@@ -1954,6 +1954,7 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(
roaring64_bitmap_t *r = roaring64_bitmap_create();
// Iterate through buckets ordered by increasing keys.
+ int64_t previous_high32 = -1;
for (uint64_t bucket = 0; bucket < buckets; ++bucket) {
// Read as uint32 the most significant 32 bits of the bucket.
uint32_t high32;
@@ -1964,6 +1965,12 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(
memcpy(&high32, buf, sizeof(high32));
buf += sizeof(high32);
read_bytes += sizeof(high32);
+ // High 32 bits must be strictly increasing.
+ if (high32 <= previous_high32) {
+ roaring64_bitmap_free(r);
+ return NULL;
+ }
+ previous_high32 = high32;
// Read the 32-bit Roaring bitmaps representing the least significant
// bits of a set of elements.
@@ -1983,6 +1990,24 @@ roaring64_bitmap_t *roaring64_bitmap_portable_deserialize_safe(
buf += bitmap32_size;
read_bytes += bitmap32_size;
+ // While we don't attempt to validate much, we must ensure that there
+ // is no duplication in the high 48 bits - inserting into the ART
+ // assumes (or UB) no duplicate keys. The top 32 bits must be unique
+ // because we check for strict increasing values of high32, but we
+ // must also ensure the top 16 bits within each 32-bit bitmap are also
+ // at least unique (we ensure they're strictly increasing as well,
+ // which they must be for a _valid_ bitmap, since it's cheaper to check)
+ int32_t last_bitmap_key = -1;
+ for (int i = 0; i < bitmap32->high_low_container.size; i++) {
+ uint16_t key = bitmap32->high_low_container.keys[i];
+ if (key <= last_bitmap_key) {
+ roaring_bitmap_free(bitmap32);
+ roaring64_bitmap_free(r);
+ return NULL;
+ }
+ last_bitmap_key = key;
+ }
+
// Insert all containers of the 32-bit bitmap into the 64-bit bitmap.
move_from_roaring32_offset(r, bitmap32, high32);
roaring_bitmap_free(bitmap32);
diff --git a/contrib/libs/croaring/ya.make b/contrib/libs/croaring/ya.make
index 55acab5673..c180e97069 100644
--- a/contrib/libs/croaring/ya.make
+++ b/contrib/libs/croaring/ya.make
@@ -10,13 +10,12 @@ LICENSE(
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
-VERSION(4.1.2)
+VERSION(4.1.6)
-ORIGINAL_SOURCE(https://github.com/RoaringBitmap/CRoaring/archive/v4.1.2.tar.gz)
+ORIGINAL_SOURCE(https://github.com/RoaringBitmap/CRoaring/archive/v4.1.6.tar.gz)
ADDINCL(
GLOBAL contrib/libs/croaring/include
- contrib/libs/croaring/include/roaring
)
NO_COMPILER_WARNINGS()