aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/croaring/src/array_util.c
diff options
context:
space:
mode:
authorAlexSm <alex@ydb.tech>2024-01-04 15:09:05 +0100
committerGitHub <noreply@github.com>2024-01-04 15:09:05 +0100
commitdab291146f6cd7d35684e3a1150e5bb1c412982c (patch)
tree36ef35f6cacb6432845a4a33f940c95871036b32 /contrib/libs/croaring/src/array_util.c
parent63660ad5e7512029fd0218e7a636580695a24e1f (diff)
downloadydb-dab291146f6cd7d35684e3a1150e5bb1c412982c.tar.gz
Library import 5, delete go dependencies (#832)
* Library import 5, delete go dependencies * Fix yt client
Diffstat (limited to 'contrib/libs/croaring/src/array_util.c')
-rw-r--r--contrib/libs/croaring/src/array_util.c2170
1 files changed, 0 insertions, 2170 deletions
diff --git a/contrib/libs/croaring/src/array_util.c b/contrib/libs/croaring/src/array_util.c
deleted file mode 100644
index eb7dcbc497..0000000000
--- a/contrib/libs/croaring/src/array_util.c
+++ /dev/null
@@ -1,2170 +0,0 @@
-#include <assert.h>
-#include <stdbool.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <roaring/array_util.h>
-#include <roaring/portability.h>
-#include <roaring/utilasm.h>
-
-#if CROARING_IS_X64
-#ifndef CROARING_COMPILER_SUPPORTS_AVX512
-#error "CROARING_COMPILER_SUPPORTS_AVX512 needs to be defined."
-#endif // CROARING_COMPILER_SUPPORTS_AVX512
-#endif
-
-#ifdef __cplusplus
-using namespace ::roaring::internal;
-extern "C" { namespace roaring { namespace internal {
-#endif
-
-extern inline int32_t binarySearch(const uint16_t *array, int32_t lenarray,
- uint16_t ikey);
-
-#if CROARING_IS_X64
-// used by intersect_vector16
-ALIGNED(0x1000)
-static const uint8_t shuffle_mask16[] = {
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 4, 5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
- 6, 7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 4, 5, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
- 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
- 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 4, 5, 6, 7, 8, 9, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
- 8, 9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
- 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 4, 5, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
- 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
- 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 4, 5, 6, 7, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9,
- 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
- 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 4, 5, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9,
- 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9,
- 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 4, 5, 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9,
- 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
- 6, 7, 8, 9, 10, 11, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 0xFF, 0xFF, 0xFF, 0xFF, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 12, 13, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 12, 13, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 4, 5, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 12, 13, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
- 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
- 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 4, 5, 6, 7, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
- 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 12, 13, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 4, 5, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 12, 13,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
- 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 4, 5, 8, 9, 12, 13, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
- 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9,
- 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
- 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9,
- 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 4, 5, 6, 7, 8, 9, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
- 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11, 12, 13, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11,
- 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
- 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 4, 5, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 10, 11,
- 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11,
- 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 10, 11, 12, 13,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 4, 5, 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 10, 11,
- 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
- 6, 7, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13,
- 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9,
- 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 10, 11,
- 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9,
- 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 10, 11,
- 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 4, 5, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
- 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 10, 11,
- 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
- 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 6, 7, 8, 9, 10, 11, 12, 13,
- 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
- 6, 7, 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
- 8, 9, 10, 11, 12, 13, 0xFF, 0xFF, 14, 15, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 14, 15, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 4, 5, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 4, 5, 6, 7, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 14, 15, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
- 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 4, 5, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 4, 5, 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
- 6, 7, 8, 9, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 10, 11,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 10, 11, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 10, 11,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 10, 11, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 4, 5, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 10, 11, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
- 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 10, 11, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
- 6, 7, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 4, 5, 6, 7, 10, 11, 14, 15, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
- 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 10, 11,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9, 10, 11, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 4, 5, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 8, 9, 10, 11,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
- 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 4, 5, 8, 9, 10, 11, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 8, 9, 10, 11, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
- 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 8, 9,
- 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
- 8, 9, 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 4, 5, 6, 7, 8, 9, 10, 11, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 8, 9,
- 10, 11, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 4, 5, 6, 7, 8, 9, 10, 11, 14, 15, 0xFF, 0xFF,
- 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 12, 13, 14, 15, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 12, 13,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
- 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 4, 5, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 12, 13,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 12, 13,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 12, 13, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 4, 5, 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 12, 13,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
- 6, 7, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9,
- 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 8, 9, 12, 13,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9,
- 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 8, 9, 12, 13,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 4, 5, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7, 8, 9, 12, 13,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7,
- 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 6, 7, 8, 9, 12, 13, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7, 8, 9, 12, 13,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
- 6, 7, 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 4, 5, 6, 7, 8, 9, 12, 13, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 6, 7,
- 8, 9, 12, 13, 14, 15, 0xFF, 0xFF, 10, 11, 12, 13,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 10, 11, 12, 13, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 4, 5, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 10, 11, 12, 13,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5,
- 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 4, 5, 10, 11, 12, 13, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 6, 7, 10, 11, 12, 13, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 6, 7,
- 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 6, 7, 10, 11,
- 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 6, 7,
- 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 4, 5, 6, 7, 10, 11,
- 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF,
- 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 8, 9, 10, 11, 12, 13,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 8, 9,
- 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 2, 3, 8, 9, 10, 11, 12, 13, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 4, 5, 8, 9, 10, 11, 12, 13,
- 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5,
- 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF,
- 2, 3, 4, 5, 8, 9, 10, 11, 12, 13, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3, 4, 5, 8, 9,
- 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 6, 7, 8, 9,
- 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0, 1, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 2, 3, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15, 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 2, 3,
- 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF,
- 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
- 0xFF, 0xFF, 0xFF, 0xFF, 0, 1, 4, 5, 6, 7, 8, 9,
- 10, 11, 12, 13, 14, 15, 0xFF, 0xFF, 2, 3, 4, 5,
- 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0xFF, 0xFF,
- 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
- 12, 13, 14, 15};
-
-/**
- * From Schlegel et al., Fast Sorted-Set Intersection using SIMD Instructions
- * Optimized by D. Lemire on May 3rd 2013
- */
-CROARING_TARGET_AVX2
-int32_t intersect_vector16(const uint16_t *__restrict__ A, size_t s_a,
- const uint16_t *__restrict__ B, size_t s_b,
- uint16_t *C) {
- size_t count = 0;
- size_t i_a = 0, i_b = 0;
- const int vectorlength = sizeof(__m128i) / sizeof(uint16_t);
- const size_t st_a = (s_a / vectorlength) * vectorlength;
- const size_t st_b = (s_b / vectorlength) * vectorlength;
- __m128i v_a, v_b;
- if ((i_a < st_a) && (i_b < st_b)) {
- v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
- v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
- while ((A[i_a] == 0) || (B[i_b] == 0)) {
- const __m128i res_v = _mm_cmpestrm(
- v_b, vectorlength, v_a, vectorlength,
- _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
- const int r = _mm_extract_epi32(res_v, 0);
- __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + r);
- __m128i p = _mm_shuffle_epi8(v_a, sm16);
- _mm_storeu_si128((__m128i *)&C[count], p); // can overflow
- count += _mm_popcnt_u32(r);
- const uint16_t a_max = A[i_a + vectorlength - 1];
- const uint16_t b_max = B[i_b + vectorlength - 1];
- if (a_max <= b_max) {
- i_a += vectorlength;
- if (i_a == st_a) break;
- v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
- }
- if (b_max <= a_max) {
- i_b += vectorlength;
- if (i_b == st_b) break;
- v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
- }
- }
- if ((i_a < st_a) && (i_b < st_b))
- while (true) {
- const __m128i res_v = _mm_cmpistrm(
- v_b, v_a,
- _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
- const int r = _mm_extract_epi32(res_v, 0);
- __m128i sm16 =
- _mm_loadu_si128((const __m128i *)shuffle_mask16 + r);
- __m128i p = _mm_shuffle_epi8(v_a, sm16);
- _mm_storeu_si128((__m128i *)&C[count], p); // can overflow
- count += _mm_popcnt_u32(r);
- const uint16_t a_max = A[i_a + vectorlength - 1];
- const uint16_t b_max = B[i_b + vectorlength - 1];
- if (a_max <= b_max) {
- i_a += vectorlength;
- if (i_a == st_a) break;
- v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
- }
- if (b_max <= a_max) {
- i_b += vectorlength;
- if (i_b == st_b) break;
- v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
- }
- }
- }
- // intersect the tail using scalar intersection
- while (i_a < s_a && i_b < s_b) {
- uint16_t a = A[i_a];
- uint16_t b = B[i_b];
- if (a < b) {
- i_a++;
- } else if (b < a) {
- i_b++;
- } else {
- C[count] = a; //==b;
- count++;
- i_a++;
- i_b++;
- }
- }
- return (int32_t)count;
-}
-
-ALLOW_UNALIGNED
-int array_container_to_uint32_array_vector16(void *vout, const uint16_t* array, size_t cardinality,
- uint32_t base) {
- int outpos = 0;
- uint32_t *out = (uint32_t *)vout;
- size_t i = 0;
- for ( ;i + sizeof(__m128i)/sizeof(uint16_t) <= cardinality; i += sizeof(__m128i)/sizeof(uint16_t)) {
- __m128i vinput = _mm_loadu_si128((const __m128i*) (array + i));
- __m256i voutput = _mm256_add_epi32(_mm256_cvtepu16_epi32(vinput), _mm256_set1_epi32(base));
- _mm256_storeu_si256((__m256i*)(out + outpos), voutput);
- outpos += sizeof(__m256i)/sizeof(uint32_t);
- }
- for ( ; i < cardinality; ++i) {
- const uint32_t val = base + array[i];
- memcpy(out + outpos, &val,
- sizeof(uint32_t)); // should be compiled as a MOV on x64
- outpos++;
- }
- return outpos;
-}
-
-int32_t intersect_vector16_inplace(uint16_t *__restrict__ A, size_t s_a,
- const uint16_t *__restrict__ B, size_t s_b) {
- size_t count = 0;
- size_t i_a = 0, i_b = 0;
- const int vectorlength = sizeof(__m128i) / sizeof(uint16_t);
- const size_t st_a = (s_a / vectorlength) * vectorlength;
- const size_t st_b = (s_b / vectorlength) * vectorlength;
- __m128i v_a, v_b;
- if ((i_a < st_a) && (i_b < st_b)) {
- v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
- v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
- __m128i tmp[2] = {_mm_setzero_si128()};
- size_t tmp_count = 0;
- while ((A[i_a] == 0) || (B[i_b] == 0)) {
- const __m128i res_v = _mm_cmpestrm(
- v_b, vectorlength, v_a, vectorlength,
- _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
- const int r = _mm_extract_epi32(res_v, 0);
- __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + r);
- __m128i p = _mm_shuffle_epi8(v_a, sm16);
- _mm_storeu_si128((__m128i*)&((uint16_t*)tmp)[tmp_count], p);
- tmp_count += _mm_popcnt_u32(r);
- const uint16_t a_max = A[i_a + vectorlength - 1];
- const uint16_t b_max = B[i_b + vectorlength - 1];
- if (a_max <= b_max) {
- _mm_storeu_si128((__m128i *)&A[count], tmp[0]);
- _mm_storeu_si128(tmp, _mm_setzero_si128());
- count += tmp_count;
- tmp_count = 0;
- i_a += vectorlength;
- if (i_a == st_a) break;
- v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
- }
- if (b_max <= a_max) {
- i_b += vectorlength;
- if (i_b == st_b) break;
- v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
- }
- }
- if ((i_a < st_a) && (i_b < st_b)) {
- while (true) {
- const __m128i res_v = _mm_cmpistrm(
- v_b, v_a,
- _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
- const int r = _mm_extract_epi32(res_v, 0);
- __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 + r);
- __m128i p = _mm_shuffle_epi8(v_a, sm16);
- _mm_storeu_si128((__m128i*)&((uint16_t*)tmp)[tmp_count], p);
- tmp_count += _mm_popcnt_u32(r);
- const uint16_t a_max = A[i_a + vectorlength - 1];
- const uint16_t b_max = B[i_b + vectorlength - 1];
- if (a_max <= b_max) {
- _mm_storeu_si128((__m128i *)&A[count], tmp[0]);
- _mm_storeu_si128(tmp, _mm_setzero_si128());
- count += tmp_count;
- tmp_count = 0;
- i_a += vectorlength;
- if (i_a == st_a) break;
- v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
- }
- if (b_max <= a_max) {
- i_b += vectorlength;
- if (i_b == st_b) break;
- v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
- }
- }
- }
- // tmp_count <= 8, so this does not affect efficiency so much
- for (size_t i = 0; i < tmp_count; i++) {
- A[count] = ((uint16_t*)tmp)[i];
- count++;
- }
- i_a += tmp_count; // We can at least jump pass $tmp_count elements in A
- }
- // intersect the tail using scalar intersection
- while (i_a < s_a && i_b < s_b) {
- uint16_t a = A[i_a];
- uint16_t b = B[i_b];
- if (a < b) {
- i_a++;
- } else if (b < a) {
- i_b++;
- } else {
- A[count] = a; //==b;
- count++;
- i_a++;
- i_b++;
- }
- }
- return (int32_t)count;
-}
-CROARING_UNTARGET_AVX2
-
-CROARING_TARGET_AVX2
-int32_t intersect_vector16_cardinality(const uint16_t *__restrict__ A,
- size_t s_a,
- const uint16_t *__restrict__ B,
- size_t s_b) {
- size_t count = 0;
- size_t i_a = 0, i_b = 0;
- const int vectorlength = sizeof(__m128i) / sizeof(uint16_t);
- const size_t st_a = (s_a / vectorlength) * vectorlength;
- const size_t st_b = (s_b / vectorlength) * vectorlength;
- __m128i v_a, v_b;
- if ((i_a < st_a) && (i_b < st_b)) {
- v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
- v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
- while ((A[i_a] == 0) || (B[i_b] == 0)) {
- const __m128i res_v = _mm_cmpestrm(
- v_b, vectorlength, v_a, vectorlength,
- _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
- const int r = _mm_extract_epi32(res_v, 0);
- count += _mm_popcnt_u32(r);
- const uint16_t a_max = A[i_a + vectorlength - 1];
- const uint16_t b_max = B[i_b + vectorlength - 1];
- if (a_max <= b_max) {
- i_a += vectorlength;
- if (i_a == st_a) break;
- v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
- }
- if (b_max <= a_max) {
- i_b += vectorlength;
- if (i_b == st_b) break;
- v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
- }
- }
- if ((i_a < st_a) && (i_b < st_b))
- while (true) {
- const __m128i res_v = _mm_cmpistrm(
- v_b, v_a,
- _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK);
- const int r = _mm_extract_epi32(res_v, 0);
- count += _mm_popcnt_u32(r);
- const uint16_t a_max = A[i_a + vectorlength - 1];
- const uint16_t b_max = B[i_b + vectorlength - 1];
- if (a_max <= b_max) {
- i_a += vectorlength;
- if (i_a == st_a) break;
- v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
- }
- if (b_max <= a_max) {
- i_b += vectorlength;
- if (i_b == st_b) break;
- v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
- }
- }
- }
- // intersect the tail using scalar intersection
- while (i_a < s_a && i_b < s_b) {
- uint16_t a = A[i_a];
- uint16_t b = B[i_b];
- if (a < b) {
- i_a++;
- } else if (b < a) {
- i_b++;
- } else {
- count++;
- i_a++;
- i_b++;
- }
- }
- return (int32_t)count;
-}
-CROARING_UNTARGET_AVX2
-
-CROARING_TARGET_AVX2
-/////////
-// Warning:
-// This function may not be safe if A == C or B == C.
-/////////
-int32_t difference_vector16(const uint16_t *__restrict__ A, size_t s_a,
- const uint16_t *__restrict__ B, size_t s_b,
- uint16_t *C) {
- // we handle the degenerate case
- if (s_a == 0) return 0;
- if (s_b == 0) {
- if (A != C) memcpy(C, A, sizeof(uint16_t) * s_a);
- return (int32_t)s_a;
- }
- // handle the leading zeroes, it is messy but it allows us to use the fast
- // _mm_cmpistrm instrinsic safely
- int32_t count = 0;
- if ((A[0] == 0) || (B[0] == 0)) {
- if ((A[0] == 0) && (B[0] == 0)) {
- A++;
- s_a--;
- B++;
- s_b--;
- } else if (A[0] == 0) {
- C[count++] = 0;
- A++;
- s_a--;
- } else {
- B++;
- s_b--;
- }
- }
- // at this point, we have two non-empty arrays, made of non-zero
- // increasing values.
- size_t i_a = 0, i_b = 0;
- const size_t vectorlength = sizeof(__m128i) / sizeof(uint16_t);
- const size_t st_a = (s_a / vectorlength) * vectorlength;
- const size_t st_b = (s_b / vectorlength) * vectorlength;
- if ((i_a < st_a) && (i_b < st_b)) { // this is the vectorized code path
- __m128i v_a, v_b; //, v_bmax;
- // we load a vector from A and a vector from B
- v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
- v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
- // we have a runningmask which indicates which values from A have been
- // spotted in B, these don't get written out.
- __m128i runningmask_a_found_in_b = _mm_setzero_si128();
- /****
- * start of the main vectorized loop
- *****/
- while (true) {
- // afoundinb will contain a mask indicate for each entry in A
- // whether it is seen
- // in B
- const __m128i a_found_in_b =
- _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY |
- _SIDD_BIT_MASK);
- runningmask_a_found_in_b =
- _mm_or_si128(runningmask_a_found_in_b, a_found_in_b);
- // we always compare the last values of A and B
- const uint16_t a_max = A[i_a + vectorlength - 1];
- const uint16_t b_max = B[i_b + vectorlength - 1];
- if (a_max <= b_max) {
- // Ok. In this code path, we are ready to write our v_a
- // because there is no need to read more from B, they will
- // all be large values.
- const int bitmask_belongs_to_difference =
- _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF;
- /*** next few lines are probably expensive *****/
- __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 +
- bitmask_belongs_to_difference);
- __m128i p = _mm_shuffle_epi8(v_a, sm16);
- _mm_storeu_si128((__m128i *)&C[count], p); // can overflow
- count += _mm_popcnt_u32(bitmask_belongs_to_difference);
- // we advance a
- i_a += vectorlength;
- if (i_a == st_a) // no more
- break;
- runningmask_a_found_in_b = _mm_setzero_si128();
- v_a = _mm_lddqu_si128((__m128i *)&A[i_a]);
- }
- if (b_max <= a_max) {
- // in this code path, the current v_b has become useless
- i_b += vectorlength;
- if (i_b == st_b) break;
- v_b = _mm_lddqu_si128((__m128i *)&B[i_b]);
- }
- }
- // at this point, either we have i_a == st_a, which is the end of the
- // vectorized processing,
- // or we have i_b == st_b, and we are not done processing the vector...
- // so we need to finish it off.
- if (i_a < st_a) { // we have unfinished business...
- uint16_t buffer[8]; // buffer to do a masked load
- memset(buffer, 0, 8 * sizeof(uint16_t));
- memcpy(buffer, B + i_b, (s_b - i_b) * sizeof(uint16_t));
- v_b = _mm_lddqu_si128((__m128i *)buffer);
- const __m128i a_found_in_b =
- _mm_cmpistrm(v_b, v_a, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_ANY |
- _SIDD_BIT_MASK);
- runningmask_a_found_in_b =
- _mm_or_si128(runningmask_a_found_in_b, a_found_in_b);
- const int bitmask_belongs_to_difference =
- _mm_extract_epi32(runningmask_a_found_in_b, 0) ^ 0xFF;
- __m128i sm16 = _mm_loadu_si128((const __m128i *)shuffle_mask16 +
- bitmask_belongs_to_difference);
- __m128i p = _mm_shuffle_epi8(v_a, sm16);
- _mm_storeu_si128((__m128i *)&C[count], p); // can overflow
- count += _mm_popcnt_u32(bitmask_belongs_to_difference);
- i_a += vectorlength;
- }
- // at this point we should have i_a == st_a and i_b == st_b
- }
- // do the tail using scalar code
- while (i_a < s_a && i_b < s_b) {
- uint16_t a = A[i_a];
- uint16_t b = B[i_b];
- if (b < a) {
- i_b++;
- } else if (a < b) {
- C[count] = a;
- count++;
- i_a++;
- } else { //==
- i_a++;
- i_b++;
- }
- }
- if (i_a < s_a) {
- if(C == A) {
- assert((size_t)count <= i_a);
- if((size_t)count < i_a) {
- memmove(C + count, A + i_a, sizeof(uint16_t) * (s_a - i_a));
- }
- } else {
- for(size_t i = 0; i < (s_a - i_a); i++) {
- C[count + i] = A[i + i_a];
- }
- }
- count += (int32_t)(s_a - i_a);
- }
- return count;
-}
-CROARING_UNTARGET_AVX2
-#endif // CROARING_IS_X64
-
-
-
-/**
-* Branchless binary search going after 4 values at once.
-* Assumes that array is sorted.
-* You have that array[*index1] >= target1, array[*index12] >= target2, ...
-* except when *index1 = n, in which case you know that all values in array are
-* smaller than target1, and so forth.
-* It has logarithmic complexity.
-*/
-static void binarySearch4(const uint16_t *array, int32_t n, uint16_t target1,
- uint16_t target2, uint16_t target3, uint16_t target4,
- int32_t *index1, int32_t *index2, int32_t *index3,
- int32_t *index4) {
- const uint16_t *base1 = array;
- const uint16_t *base2 = array;
- const uint16_t *base3 = array;
- const uint16_t *base4 = array;
- if (n == 0)
- return;
- while (n > 1) {
- int32_t half = n >> 1;
- base1 = (base1[half] < target1) ? &base1[half] : base1;
- base2 = (base2[half] < target2) ? &base2[half] : base2;
- base3 = (base3[half] < target3) ? &base3[half] : base3;
- base4 = (base4[half] < target4) ? &base4[half] : base4;
- n -= half;
- }
- *index1 = (int32_t)((*base1 < target1) + base1 - array);
- *index2 = (int32_t)((*base2 < target2) + base2 - array);
- *index3 = (int32_t)((*base3 < target3) + base3 - array);
- *index4 = (int32_t)((*base4 < target4) + base4 - array);
-}
-
-/**
-* Branchless binary search going after 2 values at once.
-* Assumes that array is sorted.
-* You have that array[*index1] >= target1, array[*index12] >= target2.
-* except when *index1 = n, in which case you know that all values in array are
-* smaller than target1, and so forth.
-* It has logarithmic complexity.
-*/
-static void binarySearch2(const uint16_t *array, int32_t n, uint16_t target1,
- uint16_t target2, int32_t *index1, int32_t *index2) {
- const uint16_t *base1 = array;
- const uint16_t *base2 = array;
- if (n == 0)
- return;
- while (n > 1) {
- int32_t half = n >> 1;
- base1 = (base1[half] < target1) ? &base1[half] : base1;
- base2 = (base2[half] < target2) ? &base2[half] : base2;
- n -= half;
- }
- *index1 = (int32_t)((*base1 < target1) + base1 - array);
- *index2 = (int32_t)((*base2 < target2) + base2 - array);
-}
-
-/* Computes the intersection between one small and one large set of uint16_t.
- * Stores the result into buffer and return the number of elements.
- * Processes the small set in blocks of 4 values calling binarySearch4
- * and binarySearch2. This approach can be slightly superior to a conventional
- * galloping search in some instances.
- */
-int32_t intersect_skewed_uint16(const uint16_t *small, size_t size_s,
- const uint16_t *large, size_t size_l,
- uint16_t *buffer) {
- size_t pos = 0, idx_l = 0, idx_s = 0;
-
- if (0 == size_s) {
- return 0;
- }
- int32_t index1 = 0, index2 = 0, index3 = 0, index4 = 0;
- while ((idx_s + 4 <= size_s) && (idx_l < size_l)) {
- uint16_t target1 = small[idx_s];
- uint16_t target2 = small[idx_s + 1];
- uint16_t target3 = small[idx_s + 2];
- uint16_t target4 = small[idx_s + 3];
- binarySearch4(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, target3,
- target4, &index1, &index2, &index3, &index4);
- if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) {
- buffer[pos++] = target1;
- }
- if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) {
- buffer[pos++] = target2;
- }
- if ((index3 + idx_l < size_l) && (large[idx_l + index3] == target3)) {
- buffer[pos++] = target3;
- }
- if ((index4 + idx_l < size_l) && (large[idx_l + index4] == target4)) {
- buffer[pos++] = target4;
- }
- idx_s += 4;
- idx_l += index4;
- }
- if ((idx_s + 2 <= size_s) && (idx_l < size_l)) {
- uint16_t target1 = small[idx_s];
- uint16_t target2 = small[idx_s + 1];
- binarySearch2(large + idx_l, (int32_t)(size_l - idx_l), target1, target2, &index1,
- &index2);
- if ((index1 + idx_l < size_l) && (large[idx_l + index1] == target1)) {
- buffer[pos++] = target1;
- }
- if ((index2 + idx_l < size_l) && (large[idx_l + index2] == target2)) {
- buffer[pos++] = target2;
- }
- idx_s += 2;
- idx_l += index2;
- }
- if ((idx_s < size_s) && (idx_l < size_l)) {
- uint16_t val_s = small[idx_s];
- int32_t index = binarySearch(large + idx_l, (int32_t)(size_l - idx_l), val_s);
- if (index >= 0)
- buffer[pos++] = val_s;
- }
- return (int32_t)pos;
-}
-
-
-
-// TODO: this could be accelerated, possibly, by using binarySearch4 as above.
-int32_t intersect_skewed_uint16_cardinality(const uint16_t *small,
- size_t size_s,
- const uint16_t *large,
- size_t size_l) {
- size_t pos = 0, idx_l = 0, idx_s = 0;
-
- if (0 == size_s) {
- return 0;
- }
-
- uint16_t val_l = large[idx_l], val_s = small[idx_s];
-
- while (true) {
- if (val_l < val_s) {
- idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s);
- if (idx_l == size_l) break;
- val_l = large[idx_l];
- } else if (val_s < val_l) {
- idx_s++;
- if (idx_s == size_s) break;
- val_s = small[idx_s];
- } else {
- pos++;
- idx_s++;
- if (idx_s == size_s) break;
- val_s = small[idx_s];
- idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s);
- if (idx_l == size_l) break;
- val_l = large[idx_l];
- }
- }
-
- return (int32_t)pos;
-}
-
-bool intersect_skewed_uint16_nonempty(const uint16_t *small, size_t size_s,
- const uint16_t *large, size_t size_l) {
- size_t idx_l = 0, idx_s = 0;
-
- if (0 == size_s) {
- return false;
- }
-
- uint16_t val_l = large[idx_l], val_s = small[idx_s];
-
- while (true) {
- if (val_l < val_s) {
- idx_l = advanceUntil(large, (int32_t)idx_l, (int32_t)size_l, val_s);
- if (idx_l == size_l) break;
- val_l = large[idx_l];
- } else if (val_s < val_l) {
- idx_s++;
- if (idx_s == size_s) break;
- val_s = small[idx_s];
- } else {
- return true;
- }
- }
-
- return false;
-}
-
-/**
- * Generic intersection function.
- */
-int32_t intersect_uint16(const uint16_t *A, const size_t lenA,
- const uint16_t *B, const size_t lenB, uint16_t *out) {
- const uint16_t *initout = out;
- if (lenA == 0 || lenB == 0) return 0;
- const uint16_t *endA = A + lenA;
- const uint16_t *endB = B + lenB;
-
- while (1) {
- while (*A < *B) {
- SKIP_FIRST_COMPARE:
- if (++A == endA) return (int32_t)(out - initout);
- }
- while (*A > *B) {
- if (++B == endB) return (int32_t)(out - initout);
- }
- if (*A == *B) {
- *out++ = *A;
- if (++A == endA || ++B == endB) return (int32_t)(out - initout);
- } else {
- goto SKIP_FIRST_COMPARE;
- }
- }
- return (int32_t)(out - initout); // NOTREACHED
-}
-
-int32_t intersect_uint16_cardinality(const uint16_t *A, const size_t lenA,
- const uint16_t *B, const size_t lenB) {
- int32_t answer = 0;
- if (lenA == 0 || lenB == 0) return 0;
- const uint16_t *endA = A + lenA;
- const uint16_t *endB = B + lenB;
-
- while (1) {
- while (*A < *B) {
- SKIP_FIRST_COMPARE:
- if (++A == endA) return answer;
- }
- while (*A > *B) {
- if (++B == endB) return answer;
- }
- if (*A == *B) {
- ++answer;
- if (++A == endA || ++B == endB) return answer;
- } else {
- goto SKIP_FIRST_COMPARE;
- }
- }
- return answer; // NOTREACHED
-}
-
-
-bool intersect_uint16_nonempty(const uint16_t *A, const size_t lenA,
- const uint16_t *B, const size_t lenB) {
- if (lenA == 0 || lenB == 0) return 0;
- const uint16_t *endA = A + lenA;
- const uint16_t *endB = B + lenB;
-
- while (1) {
- while (*A < *B) {
- SKIP_FIRST_COMPARE:
- if (++A == endA) return false;
- }
- while (*A > *B) {
- if (++B == endB) return false;
- }
- if (*A == *B) {
- return true;
- } else {
- goto SKIP_FIRST_COMPARE;
- }
- }
- return false; // NOTREACHED
-}
-
-
-
-/**
- * Generic intersection function.
- */
-size_t intersection_uint32(const uint32_t *A, const size_t lenA,
- const uint32_t *B, const size_t lenB,
- uint32_t *out) {
- const uint32_t *initout = out;
- if (lenA == 0 || lenB == 0) return 0;
- const uint32_t *endA = A + lenA;
- const uint32_t *endB = B + lenB;
-
- while (1) {
- while (*A < *B) {
- SKIP_FIRST_COMPARE:
- if (++A == endA) return (out - initout);
- }
- while (*A > *B) {
- if (++B == endB) return (out - initout);
- }
- if (*A == *B) {
- *out++ = *A;
- if (++A == endA || ++B == endB) return (out - initout);
- } else {
- goto SKIP_FIRST_COMPARE;
- }
- }
- return (out - initout); // NOTREACHED
-}
-
-size_t intersection_uint32_card(const uint32_t *A, const size_t lenA,
- const uint32_t *B, const size_t lenB) {
- if (lenA == 0 || lenB == 0) return 0;
- size_t card = 0;
- const uint32_t *endA = A + lenA;
- const uint32_t *endB = B + lenB;
-
- while (1) {
- while (*A < *B) {
- SKIP_FIRST_COMPARE:
- if (++A == endA) return card;
- }
- while (*A > *B) {
- if (++B == endB) return card;
- }
- if (*A == *B) {
- card++;
- if (++A == endA || ++B == endB) return card;
- } else {
- goto SKIP_FIRST_COMPARE;
- }
- }
- return card; // NOTREACHED
-}
-
-// can one vectorize the computation of the union? (Update: Yes! See
-// union_vector16).
-
-size_t union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
- size_t size_2, uint16_t *buffer) {
- size_t pos = 0, idx_1 = 0, idx_2 = 0;
-
- if (0 == size_2) {
- memmove(buffer, set_1, size_1 * sizeof(uint16_t));
- return size_1;
- }
- if (0 == size_1) {
- memmove(buffer, set_2, size_2 * sizeof(uint16_t));
- return size_2;
- }
-
- uint16_t val_1 = set_1[idx_1], val_2 = set_2[idx_2];
-
- while (true) {
- if (val_1 < val_2) {
- buffer[pos++] = val_1;
- ++idx_1;
- if (idx_1 >= size_1) break;
- val_1 = set_1[idx_1];
- } else if (val_2 < val_1) {
- buffer[pos++] = val_2;
- ++idx_2;
- if (idx_2 >= size_2) break;
- val_2 = set_2[idx_2];
- } else {
- buffer[pos++] = val_1;
- ++idx_1;
- ++idx_2;
- if (idx_1 >= size_1 || idx_2 >= size_2) break;
- val_1 = set_1[idx_1];
- val_2 = set_2[idx_2];
- }
- }
-
- if (idx_1 < size_1) {
- const size_t n_elems = size_1 - idx_1;
- memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint16_t));
- pos += n_elems;
- } else if (idx_2 < size_2) {
- const size_t n_elems = size_2 - idx_2;
- memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint16_t));
- pos += n_elems;
- }
-
- return pos;
-}
-
-int difference_uint16(const uint16_t *a1, int length1, const uint16_t *a2,
- int length2, uint16_t *a_out) {
- int out_card = 0;
- int k1 = 0, k2 = 0;
- if (length1 == 0) return 0;
- if (length2 == 0) {
- if (a1 != a_out) memcpy(a_out, a1, sizeof(uint16_t) * length1);
- return length1;
- }
- uint16_t s1 = a1[k1];
- uint16_t s2 = a2[k2];
- while (true) {
- if (s1 < s2) {
- a_out[out_card++] = s1;
- ++k1;
- if (k1 >= length1) {
- break;
- }
- s1 = a1[k1];
- } else if (s1 == s2) {
- ++k1;
- ++k2;
- if (k1 >= length1) {
- break;
- }
- if (k2 >= length2) {
- memmove(a_out + out_card, a1 + k1,
- sizeof(uint16_t) * (length1 - k1));
- return out_card + length1 - k1;
- }
- s1 = a1[k1];
- s2 = a2[k2];
- } else { // if (val1>val2)
- ++k2;
- if (k2 >= length2) {
- memmove(a_out + out_card, a1 + k1,
- sizeof(uint16_t) * (length1 - k1));
- return out_card + length1 - k1;
- }
- s2 = a2[k2];
- }
- }
- return out_card;
-}
-
-int32_t xor_uint16(const uint16_t *array_1, int32_t card_1,
- const uint16_t *array_2, int32_t card_2, uint16_t *out) {
- int32_t pos1 = 0, pos2 = 0, pos_out = 0;
- while (pos1 < card_1 && pos2 < card_2) {
- const uint16_t v1 = array_1[pos1];
- const uint16_t v2 = array_2[pos2];
- if (v1 == v2) {
- ++pos1;
- ++pos2;
- continue;
- }
- if (v1 < v2) {
- out[pos_out++] = v1;
- ++pos1;
- } else {
- out[pos_out++] = v2;
- ++pos2;
- }
- }
- if (pos1 < card_1) {
- const size_t n_elems = card_1 - pos1;
- memcpy(out + pos_out, array_1 + pos1, n_elems * sizeof(uint16_t));
- pos_out += (int32_t)n_elems;
- } else if (pos2 < card_2) {
- const size_t n_elems = card_2 - pos2;
- memcpy(out + pos_out, array_2 + pos2, n_elems * sizeof(uint16_t));
- pos_out += (int32_t)n_elems;
- }
- return pos_out;
-}
-
-#if CROARING_IS_X64
-
-/***
- * start of the SIMD 16-bit union code
- *
- */
-CROARING_TARGET_AVX2
-
-// Assuming that vInput1 and vInput2 are sorted, produces a sorted output going
-// from vecMin all the way to vecMax
-// developed originally for merge sort using SIMD instructions.
-// Standard merge. See, e.g., Inoue and Taura, SIMD- and Cache-Friendly
-// Algorithm for Sorting an Array of Structures
-static inline void sse_merge(const __m128i *vInput1,
- const __m128i *vInput2, // input 1 & 2
- __m128i *vecMin, __m128i *vecMax) { // output
- __m128i vecTmp;
- vecTmp = _mm_min_epu16(*vInput1, *vInput2);
- *vecMax = _mm_max_epu16(*vInput1, *vInput2);
- vecTmp = _mm_alignr_epi8(vecTmp, vecTmp, 2);
- *vecMin = _mm_min_epu16(vecTmp, *vecMax);
- *vecMax = _mm_max_epu16(vecTmp, *vecMax);
- vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
- *vecMin = _mm_min_epu16(vecTmp, *vecMax);
- *vecMax = _mm_max_epu16(vecTmp, *vecMax);
- vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
- *vecMin = _mm_min_epu16(vecTmp, *vecMax);
- *vecMax = _mm_max_epu16(vecTmp, *vecMax);
- vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
- *vecMin = _mm_min_epu16(vecTmp, *vecMax);
- *vecMax = _mm_max_epu16(vecTmp, *vecMax);
- vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
- *vecMin = _mm_min_epu16(vecTmp, *vecMax);
- *vecMax = _mm_max_epu16(vecTmp, *vecMax);
- vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
- *vecMin = _mm_min_epu16(vecTmp, *vecMax);
- *vecMax = _mm_max_epu16(vecTmp, *vecMax);
- vecTmp = _mm_alignr_epi8(*vecMin, *vecMin, 2);
- *vecMin = _mm_min_epu16(vecTmp, *vecMax);
- *vecMax = _mm_max_epu16(vecTmp, *vecMax);
- *vecMin = _mm_alignr_epi8(*vecMin, *vecMin, 2);
-}
-CROARING_UNTARGET_AVX2
-// used by store_unique, generated by simdunion.py
-static uint8_t uniqshuf[] = {
- 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
- 0xc, 0xd, 0xe, 0xf, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
- 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
- 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
- 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9,
- 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
- 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
- 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb,
- 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9,
- 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9,
- 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
- 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
- 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb,
- 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
- 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb,
- 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
- 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
- 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
- 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
- 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9,
- 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
- 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x2, 0x3, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
- 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
- 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x2, 0x3, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xc, 0xd, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
- 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
- 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9,
- 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
- 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9,
- 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
- 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
- 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x6, 0x7, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
- 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x2, 0x3, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb, 0xe, 0xf, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
- 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
- 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x8, 0x9, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
- 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x2, 0x3, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xe, 0xf, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
- 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x4, 0x5, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xe, 0xf, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xe, 0xf,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xe, 0xf, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF,
- 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
- 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
- 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
- 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
- 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9,
- 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
- 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xa, 0xb, 0xc, 0xd,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xa, 0xb,
- 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb,
- 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
- 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xa, 0xb,
- 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
- 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xa, 0xb, 0xc, 0xd,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xa, 0xb,
- 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xa, 0xb,
- 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xa, 0xb, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
- 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
- 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xc, 0xd,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9,
- 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xc, 0xd,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
- 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x4, 0x5, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xc, 0xd,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9,
- 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xc, 0xd,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
- 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
- 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x6, 0x7, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xc, 0xd,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
- 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x2, 0x3, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xc, 0xd, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xc, 0xd, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
- 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
- 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9,
- 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
- 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x2, 0x3, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xa, 0xb,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9,
- 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9,
- 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x8, 0x9, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7,
- 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
- 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x2, 0x3, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7, 0xa, 0xb, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xa, 0xb,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
- 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x4, 0x5, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0xa, 0xb, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xa, 0xb,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xa, 0xb, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x6, 0x7,
- 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x6, 0x7,
- 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x6, 0x7, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x8, 0x9,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5,
- 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x2, 0x3, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x8, 0x9, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x8, 0x9, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5,
- 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x4, 0x5, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3, 0x6, 0x7, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0x6, 0x7,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x6, 0x7, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x2, 0x3,
- 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x2, 0x3, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0x4, 0x5, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x4, 0x5, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0x0, 0x1, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0x2, 0x3, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x0, 0x1, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
- 0xFF, 0xFF, 0xFF, 0xFF};
-CROARING_TARGET_AVX2
-// write vector new, while omitting repeated values assuming that previously
-// written vector was "old"
-static inline int store_unique(__m128i old, __m128i newval, uint16_t *output) {
- __m128i vecTmp = _mm_alignr_epi8(newval, old, 16 - 2);
- // lots of high latency instructions follow (optimize?)
- int M = _mm_movemask_epi8(
- _mm_packs_epi16(_mm_cmpeq_epi16(vecTmp, newval), _mm_setzero_si128()));
- int numberofnewvalues = 8 - _mm_popcnt_u32(M);
- __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M);
- __m128i val = _mm_shuffle_epi8(newval, key);
- _mm_storeu_si128((__m128i *)output, val);
- return numberofnewvalues;
-}
-CROARING_UNTARGET_AVX2
-
-// working in-place, this function overwrites the repeated values
-// could be avoided?
-static inline uint32_t unique(uint16_t *out, uint32_t len) {
- uint32_t pos = 1;
- for (uint32_t i = 1; i < len; ++i) {
- if (out[i] != out[i - 1]) {
- out[pos++] = out[i];
- }
- }
- return pos;
-}
-
-// use with qsort, could be avoided
-static int uint16_compare(const void *a, const void *b) {
- return (*(uint16_t *)a - *(uint16_t *)b);
-}
-
-CROARING_TARGET_AVX2
-// a one-pass SSE union algorithm
-// This function may not be safe if array1 == output or array2 == output.
-uint32_t union_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
- const uint16_t *__restrict__ array2, uint32_t length2,
- uint16_t *__restrict__ output) {
- if ((length1 < 8) || (length2 < 8)) {
- return (uint32_t)union_uint16(array1, length1, array2, length2, output);
- }
- __m128i vA, vB, V, vecMin, vecMax;
- __m128i laststore;
- uint16_t *initoutput = output;
- uint32_t len1 = length1 / 8;
- uint32_t len2 = length2 / 8;
- uint32_t pos1 = 0;
- uint32_t pos2 = 0;
- // we start the machine
- vA = _mm_lddqu_si128((const __m128i *)array1 + pos1);
- pos1++;
- vB = _mm_lddqu_si128((const __m128i *)array2 + pos2);
- pos2++;
- sse_merge(&vA, &vB, &vecMin, &vecMax);
- laststore = _mm_set1_epi16(-1);
- output += store_unique(laststore, vecMin, output);
- laststore = vecMin;
- if ((pos1 < len1) && (pos2 < len2)) {
- uint16_t curA, curB;
- curA = array1[8 * pos1];
- curB = array2[8 * pos2];
- while (true) {
- if (curA <= curB) {
- V = _mm_lddqu_si128((const __m128i *)array1 + pos1);
- pos1++;
- if (pos1 < len1) {
- curA = array1[8 * pos1];
- } else {
- break;
- }
- } else {
- V = _mm_lddqu_si128((const __m128i *)array2 + pos2);
- pos2++;
- if (pos2 < len2) {
- curB = array2[8 * pos2];
- } else {
- break;
- }
- }
- sse_merge(&V, &vecMax, &vecMin, &vecMax);
- output += store_unique(laststore, vecMin, output);
- laststore = vecMin;
- }
- sse_merge(&V, &vecMax, &vecMin, &vecMax);
- output += store_unique(laststore, vecMin, output);
- laststore = vecMin;
- }
- // we finish the rest off using a scalar algorithm
- // could be improved?
- //
- // copy the small end on a tmp buffer
- uint32_t len = (uint32_t)(output - initoutput);
- uint16_t buffer[16];
- uint32_t leftoversize = store_unique(laststore, vecMax, buffer);
- if (pos1 == len1) {
- memcpy(buffer + leftoversize, array1 + 8 * pos1,
- (length1 - 8 * len1) * sizeof(uint16_t));
- leftoversize += length1 - 8 * len1;
- qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
-
- leftoversize = unique(buffer, leftoversize);
- len += (uint32_t)union_uint16(buffer, leftoversize, array2 + 8 * pos2,
- length2 - 8 * pos2, output);
- } else {
- memcpy(buffer + leftoversize, array2 + 8 * pos2,
- (length2 - 8 * len2) * sizeof(uint16_t));
- leftoversize += length2 - 8 * len2;
- qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
- leftoversize = unique(buffer, leftoversize);
- len += (uint32_t)union_uint16(buffer, leftoversize, array1 + 8 * pos1,
- length1 - 8 * pos1, output);
- }
- return len;
-}
-CROARING_UNTARGET_AVX2
-
-/**
- * End of the SIMD 16-bit union code
- *
- */
-
-/**
- * Start of SIMD 16-bit XOR code
- */
-
-CROARING_TARGET_AVX2
-// write vector new, while omitting repeated values assuming that previously
-// written vector was "old"
-static inline int store_unique_xor(__m128i old, __m128i newval,
- uint16_t *output) {
- __m128i vecTmp1 = _mm_alignr_epi8(newval, old, 16 - 4);
- __m128i vecTmp2 = _mm_alignr_epi8(newval, old, 16 - 2);
- __m128i equalleft = _mm_cmpeq_epi16(vecTmp2, vecTmp1);
- __m128i equalright = _mm_cmpeq_epi16(vecTmp2, newval);
- __m128i equalleftoright = _mm_or_si128(equalleft, equalright);
- int M = _mm_movemask_epi8(
- _mm_packs_epi16(equalleftoright, _mm_setzero_si128()));
- int numberofnewvalues = 8 - _mm_popcnt_u32(M);
- __m128i key = _mm_lddqu_si128((const __m128i *)uniqshuf + M);
- __m128i val = _mm_shuffle_epi8(vecTmp2, key);
- _mm_storeu_si128((__m128i *)output, val);
- return numberofnewvalues;
-}
-CROARING_UNTARGET_AVX2
-
-// working in-place, this function overwrites the repeated values
-// could be avoided? Warning: assumes len > 0
-static inline uint32_t unique_xor(uint16_t *out, uint32_t len) {
- uint32_t pos = 1;
- for (uint32_t i = 1; i < len; ++i) {
- if (out[i] != out[i - 1]) {
- out[pos++] = out[i];
- } else
- pos--; // if it is identical to previous, delete it
- }
- return pos;
-}
-CROARING_TARGET_AVX2
-// a one-pass SSE xor algorithm
-uint32_t xor_vector16(const uint16_t *__restrict__ array1, uint32_t length1,
- const uint16_t *__restrict__ array2, uint32_t length2,
- uint16_t *__restrict__ output) {
- if ((length1 < 8) || (length2 < 8)) {
- return xor_uint16(array1, length1, array2, length2, output);
- }
- __m128i vA, vB, V, vecMin, vecMax;
- __m128i laststore;
- uint16_t *initoutput = output;
- uint32_t len1 = length1 / 8;
- uint32_t len2 = length2 / 8;
- uint32_t pos1 = 0;
- uint32_t pos2 = 0;
- // we start the machine
- vA = _mm_lddqu_si128((const __m128i *)array1 + pos1);
- pos1++;
- vB = _mm_lddqu_si128((const __m128i *)array2 + pos2);
- pos2++;
- sse_merge(&vA, &vB, &vecMin, &vecMax);
- laststore = _mm_set1_epi16(-1);
- uint16_t buffer[17];
- output += store_unique_xor(laststore, vecMin, output);
-
- laststore = vecMin;
- if ((pos1 < len1) && (pos2 < len2)) {
- uint16_t curA, curB;
- curA = array1[8 * pos1];
- curB = array2[8 * pos2];
- while (true) {
- if (curA <= curB) {
- V = _mm_lddqu_si128((const __m128i *)array1 + pos1);
- pos1++;
- if (pos1 < len1) {
- curA = array1[8 * pos1];
- } else {
- break;
- }
- } else {
- V = _mm_lddqu_si128((const __m128i *)array2 + pos2);
- pos2++;
- if (pos2 < len2) {
- curB = array2[8 * pos2];
- } else {
- break;
- }
- }
- sse_merge(&V, &vecMax, &vecMin, &vecMax);
- // conditionally stores the last value of laststore as well as all
- // but the
- // last value of vecMin
- output += store_unique_xor(laststore, vecMin, output);
- laststore = vecMin;
- }
- sse_merge(&V, &vecMax, &vecMin, &vecMax);
- // conditionally stores the last value of laststore as well as all but
- // the
- // last value of vecMin
- output += store_unique_xor(laststore, vecMin, output);
- laststore = vecMin;
- }
- uint32_t len = (uint32_t)(output - initoutput);
-
- // we finish the rest off using a scalar algorithm
- // could be improved?
- // conditionally stores the last value of laststore as well as all but the
- // last value of vecMax,
- // we store to "buffer"
- int leftoversize = store_unique_xor(laststore, vecMax, buffer);
- uint16_t vec7 = _mm_extract_epi16(vecMax, 7);
- uint16_t vec6 = _mm_extract_epi16(vecMax, 6);
- if (vec7 != vec6) buffer[leftoversize++] = vec7;
- if (pos1 == len1) {
- memcpy(buffer + leftoversize, array1 + 8 * pos1,
- (length1 - 8 * len1) * sizeof(uint16_t));
- leftoversize += length1 - 8 * len1;
- if (leftoversize == 0) { // trivial case
- memcpy(output, array2 + 8 * pos2,
- (length2 - 8 * pos2) * sizeof(uint16_t));
- len += (length2 - 8 * pos2);
- } else {
- qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
- leftoversize = unique_xor(buffer, leftoversize);
- len += xor_uint16(buffer, leftoversize, array2 + 8 * pos2,
- length2 - 8 * pos2, output);
- }
- } else {
- memcpy(buffer + leftoversize, array2 + 8 * pos2,
- (length2 - 8 * len2) * sizeof(uint16_t));
- leftoversize += length2 - 8 * len2;
- if (leftoversize == 0) { // trivial case
- memcpy(output, array1 + 8 * pos1,
- (length1 - 8 * pos1) * sizeof(uint16_t));
- len += (length1 - 8 * pos1);
- } else {
- qsort(buffer, leftoversize, sizeof(uint16_t), uint16_compare);
- leftoversize = unique_xor(buffer, leftoversize);
- len += xor_uint16(buffer, leftoversize, array1 + 8 * pos1,
- length1 - 8 * pos1, output);
- }
- }
- return len;
-}
-CROARING_UNTARGET_AVX2
-/**
- * End of SIMD 16-bit XOR code
- */
-
-#endif // CROARING_IS_X64
-
-size_t union_uint32(const uint32_t *set_1, size_t size_1, const uint32_t *set_2,
- size_t size_2, uint32_t *buffer) {
- size_t pos = 0, idx_1 = 0, idx_2 = 0;
-
- if (0 == size_2) {
- memmove(buffer, set_1, size_1 * sizeof(uint32_t));
- return size_1;
- }
- if (0 == size_1) {
- memmove(buffer, set_2, size_2 * sizeof(uint32_t));
- return size_2;
- }
-
- uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2];
-
- while (true) {
- if (val_1 < val_2) {
- buffer[pos++] = val_1;
- ++idx_1;
- if (idx_1 >= size_1) break;
- val_1 = set_1[idx_1];
- } else if (val_2 < val_1) {
- buffer[pos++] = val_2;
- ++idx_2;
- if (idx_2 >= size_2) break;
- val_2 = set_2[idx_2];
- } else {
- buffer[pos++] = val_1;
- ++idx_1;
- ++idx_2;
- if (idx_1 >= size_1 || idx_2 >= size_2) break;
- val_1 = set_1[idx_1];
- val_2 = set_2[idx_2];
- }
- }
-
- if (idx_1 < size_1) {
- const size_t n_elems = size_1 - idx_1;
- memmove(buffer + pos, set_1 + idx_1, n_elems * sizeof(uint32_t));
- pos += n_elems;
- } else if (idx_2 < size_2) {
- const size_t n_elems = size_2 - idx_2;
- memmove(buffer + pos, set_2 + idx_2, n_elems * sizeof(uint32_t));
- pos += n_elems;
- }
-
- return pos;
-}
-
-size_t union_uint32_card(const uint32_t *set_1, size_t size_1,
- const uint32_t *set_2, size_t size_2) {
- size_t pos = 0, idx_1 = 0, idx_2 = 0;
-
- if (0 == size_2) {
- return size_1;
- }
- if (0 == size_1) {
- return size_2;
- }
-
- uint32_t val_1 = set_1[idx_1], val_2 = set_2[idx_2];
-
- while (true) {
- if (val_1 < val_2) {
- ++idx_1;
- ++pos;
- if (idx_1 >= size_1) break;
- val_1 = set_1[idx_1];
- } else if (val_2 < val_1) {
- ++idx_2;
- ++pos;
- if (idx_2 >= size_2) break;
- val_2 = set_2[idx_2];
- } else {
- ++idx_1;
- ++idx_2;
- ++pos;
- if (idx_1 >= size_1 || idx_2 >= size_2) break;
- val_1 = set_1[idx_1];
- val_2 = set_2[idx_2];
- }
- }
-
- if (idx_1 < size_1) {
- const size_t n_elems = size_1 - idx_1;
- pos += n_elems;
- } else if (idx_2 < size_2) {
- const size_t n_elems = size_2 - idx_2;
- pos += n_elems;
- }
- return pos;
-}
-
-
-
-size_t fast_union_uint16(const uint16_t *set_1, size_t size_1, const uint16_t *set_2,
- size_t size_2, uint16_t *buffer) {
-#if CROARING_IS_X64
- if( croaring_hardware_support() & ROARING_SUPPORTS_AVX2 ) {
- // compute union with smallest array first
- if (size_1 < size_2) {
- return union_vector16(set_1, (uint32_t)size_1,
- set_2, (uint32_t)size_2, buffer);
- } else {
- return union_vector16(set_2, (uint32_t)size_2,
- set_1, (uint32_t)size_1, buffer);
- }
- } else {
- // compute union with smallest array first
- if (size_1 < size_2) {
- return union_uint16(
- set_1, size_1, set_2, size_2, buffer);
- } else {
- return union_uint16(
- set_2, size_2, set_1, size_1, buffer);
- }
- }
-#else
- // compute union with smallest array first
- if (size_1 < size_2) {
- return union_uint16(
- set_1, size_1, set_2, size_2, buffer);
- } else {
- return union_uint16(
- set_2, size_2, set_1, size_1, buffer);
- }
-#endif
-}
-#if CROARING_IS_X64
-#if CROARING_COMPILER_SUPPORTS_AVX512
-CROARING_TARGET_AVX512
-static inline bool _avx512_memequals(const void *s1, const void *s2, size_t n) {
- const uint8_t *ptr1 = (const uint8_t *)s1;
- const uint8_t *ptr2 = (const uint8_t *)s2;
- const uint8_t *end1 = ptr1 + n;
- const uint8_t *end8 = ptr1 + ((n >> 3) << 3);
- const uint8_t *end32 = ptr1 + ((n >> 5) << 5);
- const uint8_t *end64 = ptr1 + ((n >> 6) << 6);
-
- while (ptr1 < end64){
- __m512i r1 = _mm512_loadu_si512((const __m512i*)ptr1);
- __m512i r2 = _mm512_loadu_si512((const __m512i*)ptr2);
-
- uint64_t mask = _mm512_cmpeq_epi8_mask(r1, r2);
-
- if (mask != UINT64_MAX) {
- return false;
- }
-
- ptr1 += 64;
- ptr2 += 64;
-
- }
-
- while (ptr1 < end32) {
- __m256i r1 = _mm256_loadu_si256((const __m256i*)ptr1);
- __m256i r2 = _mm256_loadu_si256((const __m256i*)ptr2);
- int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2));
- if ((uint32_t)mask != UINT32_MAX) {
- return false;
- }
- ptr1 += 32;
- ptr2 += 32;
- }
-
- while (ptr1 < end8) {
- uint64_t v1, v2;
- memcpy(&v1,ptr1,sizeof(uint64_t));
- memcpy(&v2,ptr2,sizeof(uint64_t));
- if (v1 != v2) {
- return false;
- }
- ptr1 += 8;
- ptr2 += 8;
- }
-
- while (ptr1 < end1) {
- if (*ptr1 != *ptr2) {
- return false;
- }
- ptr1++;
- ptr2++;
- }
-
- return true;
-}
-CROARING_UNTARGET_AVX512
-#endif // CROARING_COMPILER_SUPPORTS_AVX512
-
-CROARING_TARGET_AVX2
-static inline bool _avx2_memequals(const void *s1, const void *s2, size_t n) {
- const uint8_t *ptr1 = (const uint8_t *)s1;
- const uint8_t *ptr2 = (const uint8_t *)s2;
- const uint8_t *end1 = ptr1 + n;
- const uint8_t *end8 = ptr1 + n/8*8;
- const uint8_t *end32 = ptr1 + n/32*32;
-
- while (ptr1 < end32) {
- __m256i r1 = _mm256_loadu_si256((const __m256i*)ptr1);
- __m256i r2 = _mm256_loadu_si256((const __m256i*)ptr2);
- int mask = _mm256_movemask_epi8(_mm256_cmpeq_epi8(r1, r2));
- if ((uint32_t)mask != UINT32_MAX) {
- return false;
- }
- ptr1 += 32;
- ptr2 += 32;
- }
-
- while (ptr1 < end8) {
- uint64_t v1, v2;
- memcpy(&v1,ptr1,sizeof(uint64_t));
- memcpy(&v2,ptr2,sizeof(uint64_t));
- if (v1 != v2) {
- return false;
- }
- ptr1 += 8;
- ptr2 += 8;
- }
-
- while (ptr1 < end1) {
- if (*ptr1 != *ptr2) {
- return false;
- }
- ptr1++;
- ptr2++;
- }
-
- return true;
-}
-CROARING_UNTARGET_AVX2
-#endif
-
-bool memequals(const void *s1, const void *s2, size_t n) {
- if (n == 0) {
- return true;
- }
-#if CROARING_IS_X64
- int support = croaring_hardware_support();
-#if CROARING_COMPILER_SUPPORTS_AVX512
- if( support & ROARING_SUPPORTS_AVX512 ) {
- return _avx512_memequals(s1, s2, n);
- } else
-#endif // CROARING_COMPILER_SUPPORTS_AVX512
- if( support & ROARING_SUPPORTS_AVX2 ) {
- return _avx2_memequals(s1, s2, n);
- } else {
- return memcmp(s1, s2, n) == 0;
- }
-#else
- return memcmp(s1, s2, n) == 0;
-#endif
-}
-
-
-#if CROARING_IS_X64
-#if CROARING_COMPILER_SUPPORTS_AVX512
-CROARING_TARGET_AVX512
-ALLOW_UNALIGNED
-int avx512_array_container_to_uint32_array(void *vout, const uint16_t* array, size_t cardinality,
- uint32_t base) {
- int outpos = 0;
- uint32_t *out = (uint32_t *)vout;
- size_t i = 0;
- for ( ;i + sizeof(__m256i)/sizeof(uint16_t) <= cardinality; i += sizeof(__m256i)/sizeof(uint16_t)) {
- __m256i vinput = _mm256_loadu_si256((const __m256i*) (array + i));
- __m512i voutput = _mm512_add_epi32(_mm512_cvtepu16_epi32(vinput), _mm512_set1_epi32(base));
- _mm512_storeu_si512((__m512i*)(out + outpos), voutput);
- outpos += sizeof(__m512i)/sizeof(uint32_t);
- }
- for ( ; i < cardinality; ++i) {
- const uint32_t val = base + array[i];
- memcpy(out + outpos, &val,
- sizeof(uint32_t)); // should be compiled as a MOV on x64
- outpos++;
- }
- return outpos;
-}
-CROARING_UNTARGET_AVX512
-#endif // #if CROARING_COMPILER_SUPPORTS_AVX512
-#endif // #if CROARING_IS_X64
-
-
-#ifdef __cplusplus
-} } } // extern "C" { namespace roaring { namespace internal {
-#endif