aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/croaring/include/roaring/portability.h
blob: fba2a4b2542ccea0b1b3f7e1a69f59d71c05ad88 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
/*
 * portability.h
 *
 */

 /**
  * All macros should be prefixed with either CROARING or ROARING.
  * The library uses both ROARING_...
  * as well as CROAIRING_ as prefixes. The ROARING_ prefix is for
  * macros that are provided by the build system or that are closely
  * related to the format. The header macros may also use ROARING_.
  * The CROARING_ prefix is for internal macros that a user is unlikely
  * to ever interact with.
  */

#ifndef INCLUDE_PORTABILITY_H_
#define INCLUDE_PORTABILITY_H_

#ifndef _GNU_SOURCE
#define _GNU_SOURCE 1
#endif // _GNU_SOURCE
#ifndef __STDC_FORMAT_MACROS
#define __STDC_FORMAT_MACROS 1
#endif // __STDC_FORMAT_MACROS

#ifdef _MSC_VER
#define CROARING_VISUAL_STUDIO 1
/**
 * We want to differentiate carefully between
 * clang under visual studio and regular visual
 * studio.
 */
#ifdef __clang__
// clang under visual studio
#define CROARING_CLANG_VISUAL_STUDIO 1
#else
// just regular visual studio (best guess)
#define CROARING_REGULAR_VISUAL_STUDIO 1
#endif // __clang__
#endif // _MSC_VER
#ifndef CROARING_VISUAL_STUDIO
#define CROARING_VISUAL_STUDIO 0
#endif
#ifndef CROARING_CLANG_VISUAL_STUDIO
#define CROARING_CLANG_VISUAL_STUDIO 0
#endif
#ifndef CROARING_REGULAR_VISUAL_STUDIO
#define CROARING_REGULAR_VISUAL_STUDIO 0
#endif

#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE < 200809L)
#undef _POSIX_C_SOURCE
#endif

#ifndef _POSIX_C_SOURCE
#define _POSIX_C_SOURCE 200809L
#endif // !(defined(_POSIX_C_SOURCE)) || (_POSIX_C_SOURCE < 200809L)
#if !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700)
#define _XOPEN_SOURCE 700
#endif // !(defined(_XOPEN_SOURCE)) || (_XOPEN_SOURCE < 700)

#ifdef __illumos__
#define __EXTENSIONS__
#endif

#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>  // will provide posix_memalign with _POSIX_C_SOURCE as defined above
#ifdef __GLIBC__
#include <malloc.h>  // this should never be needed but there are some reports that it is needed.
#endif

#ifdef __cplusplus
extern "C" {  // portability definitions are in global scope, not a namespace
#endif

#if defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ != 8
#error This code assumes  64-bit long longs (by use of the GCC intrinsics). Your system is not currently supported.
#endif

#if CROARING_REGULAR_VISUAL_STUDIO
#ifndef __restrict__
#define __restrict__ __restrict
#endif // __restrict__
#endif // CROARING_REGULAR_VISUAL_STUDIO



#if defined(__x86_64__) || defined(_M_X64)
// we have an x64 processor
#define CROARING_IS_X64 1

#if defined(_MSC_VER) && (_MSC_VER < 1910)
// Old visual studio systems won't support AVX2 well.
#undef CROARING_IS_X64
#endif

#if defined(__clang_major__) && (__clang_major__<= 8) && !defined(__AVX2__)
// Older versions of clang have a bug affecting us
// https://stackoverflow.com/questions/57228537/how-does-one-use-pragma-clang-attribute-push-with-c-namespaces
#undef CROARING_IS_X64
#endif

#ifdef ROARING_DISABLE_X64
#undef CROARING_IS_X64
#endif
// we include the intrinsic header
#if !CROARING_REGULAR_VISUAL_STUDIO
/* Non-Microsoft C/C++-compatible compiler */
#include <x86intrin.h>  // on some recent GCC, this will declare posix_memalign



#if CROARING_CLANG_VISUAL_STUDIO

/**
 * You are not supposed, normally, to include these
 * headers directly. Instead you should either include intrin.h
 * or x86intrin.h. However, when compiling with clang
 * under Windows (i.e., when _MSC_VER is set), these headers
 * only get included *if* the corresponding features are detected
 * from macros:
 * e.g., if __AVX2__ is set... in turn,  we normally set these
 * macros by compiling against the corresponding architecture
 * (e.g., arch:AVX2, -mavx2, etc.) which compiles the whole
 * software with these advanced instructions. These headers would
 * normally guard against such usage, but we carefully included
 * <x86intrin.h>  (or <intrin.h>) before, so the headers
 * are fooled.
 */
#include <bmiintrin.h>   // for _blsr_u64
#include <lzcntintrin.h> // for  __lzcnt64
#include <immintrin.h>   // for most things (AVX2, AVX512, _popcnt64)
#include <smmintrin.h>
#include <tmmintrin.h>
#include <avxintrin.h>
#include <avx2intrin.h>
#include <wmmintrin.h>
#if _MSC_VER >= 1920
// Important: we need the AVX-512 headers:
#include <avx512fintrin.h>
#include <avx512dqintrin.h>
#include <avx512cdintrin.h>
#include <avx512bwintrin.h>
#include <avx512vlintrin.h>
#include <avx512vbmiintrin.h>
#include <avx512vbmi2intrin.h>
#include <avx512vpopcntdqintrin.h>
#endif // _MSC_VER >= 1920
// unfortunately, we may not get _blsr_u64, but, thankfully, clang
// has it as a macro.
#ifndef _blsr_u64
// we roll our own
#define _blsr_u64(n) ((n - 1) & n)
#endif //  _blsr_u64
#endif // SIMDJSON_CLANG_VISUAL_STUDIO


#endif // CROARING_REGULAR_VISUAL_STUDIO
#endif // defined(__x86_64__) || defined(_M_X64)

#if !defined(CROARING_USENEON) && !defined(DISABLENEON) && defined(__ARM_NEON)
#  define CROARING_USENEON
#endif
#if defined(CROARING_USENEON)
#  include <arm_neon.h>
#endif

#if !CROARING_REGULAR_VISUAL_STUDIO
/* Non-Microsoft C/C++-compatible compiler, assumes that it supports inline
 * assembly */
#define CROARING_INLINE_ASM 1
#endif  // _MSC_VER

#if CROARING_REGULAR_VISUAL_STUDIO
/* Microsoft C/C++-compatible compiler */
#include <intrin.h>

#ifndef __clang__  // if one compiles with MSVC *with* clang, then these
                   // intrinsics are defined!!!
#define CROARING_INTRINSICS 1
// sadly there is no way to check whether we are missing these intrinsics
// specifically.

/* wrappers for Visual Studio built-ins that look like gcc built-ins __builtin_ctzll */
/* result might be undefined when input_num is zero */
inline int roaring_trailing_zeroes(unsigned long long input_num) {
    unsigned long index;
#ifdef _WIN64  // highly recommended!!!
    _BitScanForward64(&index, input_num);
#else  // if we must support 32-bit Windows
    if ((uint32_t)input_num != 0) {
        _BitScanForward(&index, (uint32_t)input_num);
    } else {
        _BitScanForward(&index, (uint32_t)(input_num >> 32));
        index += 32;
    }
#endif // _WIN64
    return index;
}

/* wrappers for Visual Studio built-ins that look like gcc built-ins __builtin_clzll */
/* result might be undefined when input_num is zero */
inline int roaring_leading_zeroes(unsigned long long input_num) {
    unsigned long index;
#ifdef _WIN64  // highly recommended!!!
    _BitScanReverse64(&index, input_num);
#else  // if we must support 32-bit Windows
    if (input_num > 0xFFFFFFFF) {
        _BitScanReverse(&index, (uint32_t)(input_num >> 32));
        index += 32;
    } else {
        _BitScanReverse(&index, (uint32_t)(input_num));
    }
#endif // _WIN64
    return 63 - index;
}

/* Use #define so this is effective even under /Ob0 (no inline) */
#define roaring_unreachable __assume(0)
#endif // __clang__

#endif // CROARING_REGULAR_VISUAL_STUDIO

#ifndef CROARING_INTRINSICS
#define CROARING_INTRINSICS 1
#define roaring_unreachable __builtin_unreachable()
static inline int roaring_trailing_zeroes(unsigned long long input_num) { return __builtin_ctzll(input_num); }
static inline int roaring_leading_zeroes(unsigned long long input_num) { return __builtin_clzll(input_num); }
#endif

#if CROARING_REGULAR_VISUAL_STUDIO
#define ALIGNED(x) __declspec(align(x))
#elif defined(__GNUC__) || defined(__clang__)
#define ALIGNED(x) __attribute__((aligned(x)))
#else
#warning "Warning. Unrecognized compiler."
#define ALIGNED(x)
#endif

#if defined(__GNUC__) || defined(__clang__)
#define WARN_UNUSED __attribute__((warn_unused_result))
#else
#define WARN_UNUSED
#endif

#define IS_BIG_ENDIAN (*(uint16_t *)"\0\xff" < 0x100)

#ifdef CROARING_USENEON
// we can always compute the popcount fast.
#elif (defined(_M_ARM) || defined(_M_ARM64)) && ((defined(_WIN64) || defined(_WIN32)) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO)
// we will need this function:
static inline int roaring_hamming_backup(uint64_t x) {
  uint64_t c1 = UINT64_C(0x5555555555555555);
  uint64_t c2 = UINT64_C(0x3333333333333333);
  uint64_t c4 = UINT64_C(0x0F0F0F0F0F0F0F0F);
  x -= (x >> 1) & c1;
  x = (( x >> 2) & c2) + (x & c2); x=(x +(x>>4))&c4;
  x *= UINT64_C(0x0101010101010101);
  return x >> 56;
}
#endif


static inline int roaring_hamming(uint64_t x) {
#if defined(_WIN64) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO
#ifdef CROARING_USENEON
   return vaddv_u8(vcnt_u8(vcreate_u8(input_num)));
#elif defined(_M_ARM64)
  return roaring_hamming_backup(x);
  // (int) _CountOneBits64(x); is unavailable
#else  // _M_ARM64
  return (int) __popcnt64(x);
#endif // _M_ARM64
#elif defined(_WIN32) && defined(CROARING_REGULAR_VISUAL_STUDIO) && CROARING_REGULAR_VISUAL_STUDIO
#ifdef _M_ARM
  return roaring_hamming_backup(x);
  // _CountOneBits is unavailable
#else // _M_ARM
    return (int) __popcnt(( unsigned int)x) + (int)  __popcnt(( unsigned int)(x>>32));
#endif // _M_ARM
#else
    return __builtin_popcountll(x);
#endif
}

#ifndef UINT64_C
#define UINT64_C(c) (c##ULL)
#endif // UINT64_C

#ifndef UINT32_C
#define UINT32_C(c) (c##UL)
#endif // UINT32_C

#ifdef __cplusplus
}  // extern "C" {
#endif // __cplusplus


// this is almost standard?
#undef STRINGIFY_IMPLEMENTATION_
#undef STRINGIFY
#define STRINGIFY_IMPLEMENTATION_(a) #a
#define STRINGIFY(a) STRINGIFY_IMPLEMENTATION_(a)

// Our fast kernels require 64-bit systems.
//
// On 32-bit x86, we lack 64-bit popcnt, lzcnt, blsr instructions.
// Furthermore, the number of SIMD registers is reduced.
//
// On 32-bit ARM, we would have smaller registers.
//
// The library should still have the fallback kernel. It is
// slower, but it should run everywhere.

//
// Enable valid runtime implementations, and select CROARING_BUILTIN_IMPLEMENTATION
//

// We are going to use runtime dispatch.
#if CROARING_IS_X64
#ifdef __clang__
// clang does not have GCC push pop
// warning: clang attribute push can't be used within a namespace in clang up
// til 8.0 so CROARING_TARGET_REGION and CROARING_UNTARGET_REGION must be *outside* of a
// namespace.
#define CROARING_TARGET_REGION(T)                                                       \
  _Pragma(STRINGIFY(                                                           \
      clang attribute push(__attribute__((target(T))), apply_to = function)))
#define CROARING_UNTARGET_REGION _Pragma("clang attribute pop")
#elif defined(__GNUC__)
// GCC is easier
#define CROARING_TARGET_REGION(T)                                                       \
  _Pragma("GCC push_options") _Pragma(STRINGIFY(GCC target(T)))
#define CROARING_UNTARGET_REGION _Pragma("GCC pop_options")
#endif // clang then gcc

#endif // CROARING_IS_X64

// Default target region macros don't do anything.
#ifndef CROARING_TARGET_REGION
#define CROARING_TARGET_REGION(T)
#define CROARING_UNTARGET_REGION
#endif


#define CROARING_TARGET_AVX2 CROARING_TARGET_REGION("avx2,bmi,pclmul,lzcnt,popcnt")
#define CROARING_TARGET_AVX512 CROARING_TARGET_REGION("avx2,bmi,bmi2,pclmul,lzcnt,popcnt,avx512f,avx512dq,avx512bw,avx512vbmi2,avx512bitalg,avx512vpopcntdq")
#define CROARING_UNTARGET_AVX2 CROARING_UNTARGET_REGION
#define CROARING_UNTARGET_AVX512 CROARING_UNTARGET_REGION

#ifdef __AVX2__
// No need for runtime dispatching.
// It is unnecessary and harmful to old clang to tag regions.
#undef CROARING_TARGET_AVX2
#define CROARING_TARGET_AVX2
#undef CROARING_UNTARGET_AVX2
#define CROARING_UNTARGET_AVX2
#endif

#if defined(__AVX512F__) && defined(__AVX512DQ__) && defined(__AVX512BW__) && defined(__AVX512VBMI2__) && defined(__AVX512BITALG__) && defined(__AVX512VPOPCNTDQ__)
// No need for runtime dispatching.
// It is unnecessary and harmful to old clang to tag regions.
#undef CROARING_TARGET_AVX512
#define CROARING_TARGET_AVX512
#undef CROARING_UNTARGET_AVX512
#define CROARING_UNTARGET_AVX512
#endif

// Allow unaligned memory access
#if defined(__GNUC__) || defined(__clang__)
#define ALLOW_UNALIGNED __attribute__((no_sanitize("alignment")))
#else
#define ALLOW_UNALIGNED
#endif

#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
 #define CROARING_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
#elif defined(_WIN32)
 #define CROARING_IS_BIG_ENDIAN 0
 #else
 #if defined(__APPLE__) || defined(__FreeBSD__) // defined __BYTE_ORDER__ && defined __ORDER_BIG_ENDIAN__
 #include <machine/endian.h>
 #elif defined(sun) || defined(__sun) // defined(__APPLE__) || defined(__FreeBSD__)
 #include <sys/byteorder.h>
 #else  // defined(__APPLE__) || defined(__FreeBSD__)

 #ifdef __has_include
 #if __has_include(<endian.h>)
 #include <endian.h>
 #endif //__has_include(<endian.h>)
 #endif //__has_include

 #endif // defined(__APPLE__) || defined(__FreeBSD__)


 #ifndef !defined(__BYTE_ORDER__) || !defined(__ORDER_LITTLE_ENDIAN__)
 #define CROARING_IS_BIG_ENDIAN 0
 #endif

 #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 #define CROARING_IS_BIG_ENDIAN 0
 #else // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 #define CROARING_IS_BIG_ENDIAN 1
 #endif // __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#endif

// Defines for the possible CROARING atomic implementations
#define CROARING_ATOMIC_IMPL_NONE          1
#define CROARING_ATOMIC_IMPL_CPP           2
#define CROARING_ATOMIC_IMPL_C             3
#define CROARING_ATOMIC_IMPL_C_WINDOWS     4

// If the use has forced a specific implementation, use that, otherwise,
// figure out the best implementation we can use.
#if !defined(CROARING_ATOMIC_IMPL)
  #if defined(__cplusplus) && __cplusplus >= 201103L
    #ifdef __has_include
      #if __has_include(<atomic>)
        #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP
      #endif //__has_include(<atomic>)
    #else
      // We lack __has_include to check:
      #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_CPP
    #endif //__has_include
  #elif __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_ATOMICS__)
    #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C
  #elif CROARING_REGULAR_VISUAL_STUDIO
    // https://www.technetworkhub.com/c11-atomics-in-visual-studio-2022-version-17/
    #define CROARING_ATOMIC_IMPL CROARING_ATOMIC_IMPL_C_WINDOWS
  #endif
#endif // !defined(CROARING_ATOMIC_IMPL)

#if CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C
#include <stdatomic.h>
typedef _Atomic(uint32_t) croaring_refcount_t;

static inline void croaring_refcount_inc(croaring_refcount_t *val) {
    // Increasing the reference counter can always be done with
    // memory_order_relaxed: New references to an object can only be formed from
    // an existing reference, and passing an existing reference from one thread to
    // another must already provide any required synchronization.
    atomic_fetch_add_explicit(val, 1, memory_order_relaxed);
}

static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
    // It is important to enforce any possible access to the object in one thread
    // (through an existing reference) to happen before deleting the object in a
    // different thread. This is achieved by a "release" operation after dropping
    // a reference (any access to the object through this reference must obviously
    // happened before), and an "acquire" operation before deleting the object.
    bool is_zero = atomic_fetch_sub_explicit(val, 1, memory_order_release) == 1;
    if (is_zero) {
        atomic_thread_fence(memory_order_acquire);
    }
    return is_zero;
}

static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
    return atomic_load_explicit(val, memory_order_relaxed);
}
#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_CPP
#include <atomic>
typedef std::atomic<uint32_t> croaring_refcount_t;

static inline void croaring_refcount_inc(croaring_refcount_t *val) {
    val->fetch_add(1, std::memory_order_relaxed);
}

static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
    // See above comments on the c11 atomic implementation for memory ordering
    bool is_zero = val->fetch_sub(1, std::memory_order_release) == 1;
    if (is_zero) {
        std::atomic_thread_fence(std::memory_order_acquire);
    }
    return is_zero;
}

static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
    return val->load(std::memory_order_relaxed);
}
#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_C_WINDOWS
#include <intrin.h>
#pragma intrinsic(_InterlockedIncrement)
#pragma intrinsic(_InterlockedDecrement)

// _InterlockedIncrement and _InterlockedDecrement take a (signed) long, and
// overflow is defined to wrap, so we can pretend it is a uint32_t for our case
typedef volatile long croaring_refcount_t;

static inline void croaring_refcount_inc(croaring_refcount_t *val) {
    _InterlockedIncrement(val);
}

static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
    return _InterlockedDecrement(val) == 0;
}

static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
    // Per https://learn.microsoft.com/en-us/windows/win32/sync/interlocked-variable-access
    // > Simple reads and writes to properly-aligned 32-bit variables are atomic
    // > operations. In other words, you will not end up with only one portion
    // > of the variable updated; all bits are updated in an atomic fashion.
    return *val;
}
#elif CROARING_ATOMIC_IMPL == CROARING_ATOMIC_IMPL_NONE
#include <assert.h>
typedef uint32_t croaring_refcount_t;

static inline void croaring_refcount_inc(croaring_refcount_t *val) {
    *val += 1;
}

static inline bool croaring_refcount_dec(croaring_refcount_t *val) {
    assert(*val > 0);
    *val -= 1;
    return val == 0;
}

static inline uint32_t croaring_refcount_get(const croaring_refcount_t *val) {
    return *val;
}
#else
#error "Unknown atomic implementation"
#endif


// We need portability.h to be included first,
// but we also always want isadetection.h to be
// included (right after).
// See https://github.com/RoaringBitmap/CRoaring/issues/394
// There is no scenario where we want portability.h to
// be included, but not isadetection.h: the latter is a
// strict requirement.
#include <roaring/isadetection.h> // include it last!
#endif /* INCLUDE_PORTABILITY_H_ */