aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/liburing/src
diff options
context:
space:
mode:
authorilnaz <ilnaz@ydb.tech>2022-12-13 16:01:38 +0300
committerilnaz <ilnaz@ydb.tech>2022-12-13 16:01:38 +0300
commitf2bea70bea01921ec43846224d100f2c70dd5719 (patch)
treeeead917572063b63adc1c9a76284c8fbd10f25a3 /contrib/libs/liburing/src
parent1ab9ee3dfe0ab4023a3a57bf55de31dff3eac908 (diff)
downloadydb-f2bea70bea01921ec43846224d100f2c70dd5719.tar.gz
Add cross-link
Diffstat (limited to 'contrib/libs/liburing/src')
-rw-r--r--contrib/libs/liburing/src/arch/aarch64/lib.h48
-rw-r--r--contrib/libs/liburing/src/arch/aarch64/syscall.h91
-rw-r--r--contrib/libs/liburing/src/arch/syscall-defs.h94
-rw-r--r--contrib/libs/liburing/src/arch/x86/lib.h11
-rw-r--r--contrib/libs/liburing/src/arch/x86/syscall.h296
-rw-r--r--contrib/libs/liburing/src/include/liburing.h1354
-rw-r--r--contrib/libs/liburing/src/include/liburing/barrier.h81
-rw-r--r--contrib/libs/liburing/src/include/liburing/compat.h9
-rw-r--r--contrib/libs/liburing/src/include/liburing/io_uring.h682
-rw-r--r--contrib/libs/liburing/src/int_flags.h9
-rw-r--r--contrib/libs/liburing/src/lib.h61
-rw-r--r--contrib/libs/liburing/src/queue.c436
-rw-r--r--contrib/libs/liburing/src/register.c370
-rw-r--r--contrib/libs/liburing/src/setup.c370
-rw-r--r--contrib/libs/liburing/src/syscall.c30
-rw-r--r--contrib/libs/liburing/src/syscall.h51
16 files changed, 3993 insertions, 0 deletions
diff --git a/contrib/libs/liburing/src/arch/aarch64/lib.h b/contrib/libs/liburing/src/arch/aarch64/lib.h
new file mode 100644
index 00000000000..5a75c1a6723
--- /dev/null
+++ b/contrib/libs/liburing/src/arch/aarch64/lib.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef LIBURING_ARCH_AARCH64_LIB_H
+#define LIBURING_ARCH_AARCH64_LIB_H
+
+#include <elf.h>
+#include <sys/auxv.h>
+#include "../../syscall.h"
+
+static inline long __get_page_size(void)
+{
+ Elf64_Off buf[2];
+ long ret = 4096;
+ int fd;
+
+ fd = __sys_open("/proc/self/auxv", O_RDONLY, 0);
+ if (fd < 0)
+ return ret;
+
+ while (1) {
+ ssize_t x;
+
+ x = __sys_read(fd, buf, sizeof(buf));
+ if (x < sizeof(buf))
+ break;
+
+ if (buf[0] == AT_PAGESZ) {
+ ret = buf[1];
+ break;
+ }
+ }
+
+ __sys_close(fd);
+ return ret;
+}
+
+static inline long get_page_size(void)
+{
+ static long cache_val;
+
+ if (cache_val)
+ return cache_val;
+
+ cache_val = __get_page_size();
+ return cache_val;
+}
+
+#endif /* #ifndef LIBURING_ARCH_AARCH64_LIB_H */
diff --git a/contrib/libs/liburing/src/arch/aarch64/syscall.h b/contrib/libs/liburing/src/arch/aarch64/syscall.h
new file mode 100644
index 00000000000..3015ac37235
--- /dev/null
+++ b/contrib/libs/liburing/src/arch/aarch64/syscall.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef LIBURING_ARCH_AARCH64_SYSCALL_H
+#define LIBURING_ARCH_AARCH64_SYSCALL_H
+
+#if defined(__aarch64__)
+
+#define __do_syscallN(...) ({ \
+ __asm__ volatile ( \
+ "svc 0" \
+ : "=r"(x0) \
+ : __VA_ARGS__ \
+ : "memory", "cc"); \
+ (long) x0; \
+})
+
+#define __do_syscall0(__n) ({ \
+ register long x8 __asm__("x8") = __n; \
+ register long x0 __asm__("x0"); \
+ \
+ __do_syscallN("r" (x8)); \
+})
+
+#define __do_syscall1(__n, __a) ({ \
+ register long x8 __asm__("x8") = __n; \
+ register __typeof__(__a) x0 __asm__("x0") = __a; \
+ \
+ __do_syscallN("r" (x8), "0" (x0)); \
+})
+
+#define __do_syscall2(__n, __a, __b) ({ \
+ register long x8 __asm__("x8") = __n; \
+ register __typeof__(__a) x0 __asm__("x0") = __a; \
+ register __typeof__(__b) x1 __asm__("x1") = __b; \
+ \
+ __do_syscallN("r" (x8), "0" (x0), "r" (x1)); \
+})
+
+#define __do_syscall3(__n, __a, __b, __c) ({ \
+ register long x8 __asm__("x8") = __n; \
+ register __typeof__(__a) x0 __asm__("x0") = __a; \
+ register __typeof__(__b) x1 __asm__("x1") = __b; \
+ register __typeof__(__c) x2 __asm__("x2") = __c; \
+ \
+ __do_syscallN("r" (x8), "0" (x0), "r" (x1), "r" (x2)); \
+})
+
+#define __do_syscall4(__n, __a, __b, __c, __d) ({ \
+ register long x8 __asm__("x8") = __n; \
+ register __typeof__(__a) x0 __asm__("x0") = __a; \
+ register __typeof__(__b) x1 __asm__("x1") = __b; \
+ register __typeof__(__c) x2 __asm__("x2") = __c; \
+ register __typeof__(__d) x3 __asm__("x3") = __d; \
+ \
+ __do_syscallN("r" (x8), "0" (x0), "r" (x1), "r" (x2), "r" (x3));\
+})
+
+#define __do_syscall5(__n, __a, __b, __c, __d, __e) ({ \
+ register long x8 __asm__("x8") = __n; \
+ register __typeof__(__a) x0 __asm__("x0") = __a; \
+ register __typeof__(__b) x1 __asm__("x1") = __b; \
+ register __typeof__(__c) x2 __asm__("x2") = __c; \
+ register __typeof__(__d) x3 __asm__("x3") = __d; \
+ register __typeof__(__e) x4 __asm__("x4") = __e; \
+ \
+ __do_syscallN("r" (x8), "0" (x0), "r" (x1), "r" (x2), "r" (x3), \
+ "r"(x4)); \
+})
+
+#define __do_syscall6(__n, __a, __b, __c, __d, __e, __f) ({ \
+ register long x8 __asm__("x8") = __n; \
+ register __typeof__(__a) x0 __asm__("x0") = __a; \
+ register __typeof__(__b) x1 __asm__("x1") = __b; \
+ register __typeof__(__c) x2 __asm__("x2") = __c; \
+ register __typeof__(__d) x3 __asm__("x3") = __d; \
+ register __typeof__(__e) x4 __asm__("x4") = __e; \
+ register __typeof__(__f) x5 __asm__("x5") = __f; \
+ \
+ __do_syscallN("r" (x8), "0" (x0), "r" (x1), "r" (x2), "r" (x3), \
+ "r" (x4), "r"(x5)); \
+})
+
+#include "../syscall-defs.h"
+
+#else /* #if defined(__aarch64__) */
+
+#error #include "../generic/syscall.h"
+
+#endif /* #if defined(__aarch64__) */
+
+#endif /* #ifndef LIBURING_ARCH_AARCH64_SYSCALL_H */
diff --git a/contrib/libs/liburing/src/arch/syscall-defs.h b/contrib/libs/liburing/src/arch/syscall-defs.h
new file mode 100644
index 00000000000..d351f8b736a
--- /dev/null
+++ b/contrib/libs/liburing/src/arch/syscall-defs.h
@@ -0,0 +1,94 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef LIBURING_ARCH_SYSCALL_DEFS_H
+#define LIBURING_ARCH_SYSCALL_DEFS_H
+
+#include <fcntl.h>
+
+static inline int __sys_open(const char *pathname, int flags, mode_t mode)
+{
+ /*
+ * Some architectures don't have __NR_open, but __NR_openat.
+ */
+#ifdef __NR_open
+ return (int) __do_syscall3(__NR_open, pathname, flags, mode);
+#else
+ return (int) __do_syscall4(__NR_openat, AT_FDCWD, pathname, flags, mode);
+#endif
+}
+
+static inline ssize_t __sys_read(int fd, void *buffer, size_t size)
+{
+ return (ssize_t) __do_syscall3(__NR_read, fd, buffer, size);
+}
+
+static inline void *__sys_mmap(void *addr, size_t length, int prot, int flags,
+ int fd, off_t offset)
+{
+ int nr;
+
+#if defined(__NR_mmap2)
+ nr = __NR_mmap2;
+ offset >>= 12;
+#else
+ nr = __NR_mmap;
+#endif
+ return (void *) __do_syscall6(nr, addr, length, prot, flags, fd, offset);
+}
+
+static inline int __sys_munmap(void *addr, size_t length)
+{
+ return (int) __do_syscall2(__NR_munmap, addr, length);
+}
+
+static inline int __sys_madvise(void *addr, size_t length, int advice)
+{
+ return (int) __do_syscall3(__NR_madvise, addr, length, advice);
+}
+
+static inline int __sys_getrlimit(int resource, struct rlimit *rlim)
+{
+ return (int) __do_syscall2(__NR_getrlimit, resource, rlim);
+}
+
+static inline int __sys_setrlimit(int resource, const struct rlimit *rlim)
+{
+ return (int) __do_syscall2(__NR_setrlimit, resource, rlim);
+}
+
+static inline int __sys_close(int fd)
+{
+ return (int) __do_syscall1(__NR_close, fd);
+}
+
+static inline int __sys_io_uring_register(unsigned int fd, unsigned int opcode,
+ const void *arg, unsigned int nr_args)
+{
+ return (int) __do_syscall4(__NR_io_uring_register, fd, opcode, arg,
+ nr_args);
+}
+
+static inline int __sys_io_uring_setup(unsigned int entries,
+ struct io_uring_params *p)
+{
+ return (int) __do_syscall2(__NR_io_uring_setup, entries, p);
+}
+
+static inline int __sys_io_uring_enter2(unsigned int fd, unsigned int to_submit,
+ unsigned int min_complete,
+ unsigned int flags, sigset_t *sig,
+ size_t sz)
+{
+ return (int) __do_syscall6(__NR_io_uring_enter, fd, to_submit,
+ min_complete, flags, sig, sz);
+}
+
+static inline int __sys_io_uring_enter(unsigned int fd, unsigned int to_submit,
+ unsigned int min_complete,
+ unsigned int flags, sigset_t *sig)
+{
+ return __sys_io_uring_enter2(fd, to_submit, min_complete, flags, sig,
+ _NSIG / 8);
+}
+
+#endif
diff --git a/contrib/libs/liburing/src/arch/x86/lib.h b/contrib/libs/liburing/src/arch/x86/lib.h
new file mode 100644
index 00000000000..6ece2d44f22
--- /dev/null
+++ b/contrib/libs/liburing/src/arch/x86/lib.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef LIBURING_ARCH_X86_LIB_H
+#define LIBURING_ARCH_X86_LIB_H
+
+static inline long get_page_size(void)
+{
+ return 4096;
+}
+
+#endif /* #ifndef LIBURING_ARCH_X86_LIB_H */
diff --git a/contrib/libs/liburing/src/arch/x86/syscall.h b/contrib/libs/liburing/src/arch/x86/syscall.h
new file mode 100644
index 00000000000..fdd9aecf1a0
--- /dev/null
+++ b/contrib/libs/liburing/src/arch/x86/syscall.h
@@ -0,0 +1,296 @@
+/* SPDX-License-Identifier: MIT */
+
+#ifndef LIBURING_ARCH_X86_SYSCALL_H
+#define LIBURING_ARCH_X86_SYSCALL_H
+
+#if defined(__x86_64__)
+/**
+ * Note for syscall registers usage (x86-64):
+ * - %rax is the syscall number.
+ * - %rax is also the return value.
+ * - %rdi is the 1st argument.
+ * - %rsi is the 2nd argument.
+ * - %rdx is the 3rd argument.
+ * - %r10 is the 4th argument (**yes it's %r10, not %rcx!**).
+ * - %r8 is the 5th argument.
+ * - %r9 is the 6th argument.
+ *
+ * `syscall` instruction will clobber %r11 and %rcx.
+ *
+ * After the syscall returns to userspace:
+ * - %r11 will contain %rflags.
+ * - %rcx will contain the return address.
+ *
+ * IOW, after the syscall returns to userspace:
+ * %r11 == %rflags and %rcx == %rip.
+ */
+
+#define __do_syscall0(NUM) ({ \
+ intptr_t rax; \
+ \
+ __asm__ volatile( \
+ "syscall" \
+ : "=a"(rax) /* %rax */ \
+ : "a"(NUM) /* %rax */ \
+ : "rcx", "r11", "memory" \
+ ); \
+ rax; \
+})
+
+#define __do_syscall1(NUM, ARG1) ({ \
+ intptr_t rax; \
+ \
+ __asm__ volatile( \
+ "syscall" \
+ : "=a"(rax) /* %rax */ \
+ : "a"((NUM)), /* %rax */ \
+ "D"((ARG1)) /* %rdi */ \
+ : "rcx", "r11", "memory" \
+ ); \
+ rax; \
+})
+
+#define __do_syscall2(NUM, ARG1, ARG2) ({ \
+ intptr_t rax; \
+ \
+ __asm__ volatile( \
+ "syscall" \
+ : "=a"(rax) /* %rax */ \
+ : "a"((NUM)), /* %rax */ \
+ "D"((ARG1)), /* %rdi */ \
+ "S"((ARG2)) /* %rsi */ \
+ : "rcx", "r11", "memory" \
+ ); \
+ rax; \
+})
+
+#define __do_syscall3(NUM, ARG1, ARG2, ARG3) ({ \
+ intptr_t rax; \
+ \
+ __asm__ volatile( \
+ "syscall" \
+ : "=a"(rax) /* %rax */ \
+ : "a"((NUM)), /* %rax */ \
+ "D"((ARG1)), /* %rdi */ \
+ "S"((ARG2)), /* %rsi */ \
+ "d"((ARG3)) /* %rdx */ \
+ : "rcx", "r11", "memory" \
+ ); \
+ rax; \
+})
+
+#define __do_syscall4(NUM, ARG1, ARG2, ARG3, ARG4) ({ \
+ intptr_t rax; \
+ register __typeof__(ARG4) __r10 __asm__("r10") = (ARG4); \
+ \
+ __asm__ volatile( \
+ "syscall" \
+ : "=a"(rax) /* %rax */ \
+ : "a"((NUM)), /* %rax */ \
+ "D"((ARG1)), /* %rdi */ \
+ "S"((ARG2)), /* %rsi */ \
+ "d"((ARG3)), /* %rdx */ \
+ "r"(__r10) /* %r10 */ \
+ : "rcx", "r11", "memory" \
+ ); \
+ rax; \
+})
+
+#define __do_syscall5(NUM, ARG1, ARG2, ARG3, ARG4, ARG5) ({ \
+ intptr_t rax; \
+ register __typeof__(ARG4) __r10 __asm__("r10") = (ARG4); \
+ register __typeof__(ARG5) __r8 __asm__("r8") = (ARG5); \
+ \
+ __asm__ volatile( \
+ "syscall" \
+ : "=a"(rax) /* %rax */ \
+ : "a"((NUM)), /* %rax */ \
+ "D"((ARG1)), /* %rdi */ \
+ "S"((ARG2)), /* %rsi */ \
+ "d"((ARG3)), /* %rdx */ \
+ "r"(__r10), /* %r10 */ \
+ "r"(__r8) /* %r8 */ \
+ : "rcx", "r11", "memory" \
+ ); \
+ rax; \
+})
+
+#define __do_syscall6(NUM, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6) ({ \
+ intptr_t rax; \
+ register __typeof__(ARG4) __r10 __asm__("r10") = (ARG4); \
+ register __typeof__(ARG5) __r8 __asm__("r8") = (ARG5); \
+ register __typeof__(ARG6) __r9 __asm__("r9") = (ARG6); \
+ \
+ __asm__ volatile( \
+ "syscall" \
+ : "=a"(rax) /* %rax */ \
+ : "a"((NUM)), /* %rax */ \
+ "D"((ARG1)), /* %rdi */ \
+ "S"((ARG2)), /* %rsi */ \
+ "d"((ARG3)), /* %rdx */ \
+ "r"(__r10), /* %r10 */ \
+ "r"(__r8), /* %r8 */ \
+ "r"(__r9) /* %r9 */ \
+ : "rcx", "r11", "memory" \
+ ); \
+ rax; \
+})
+
+#include "../syscall-defs.h"
+
+#else /* #if defined(__x86_64__) */
+
+#ifdef CONFIG_NOLIBC
+/**
+ * Note for syscall registers usage (x86, 32-bit):
+ * - %eax is the syscall number.
+ * - %eax is also the return value.
+ * - %ebx is the 1st argument.
+ * - %ecx is the 2nd argument.
+ * - %edx is the 3rd argument.
+ * - %esi is the 4th argument.
+ * - %edi is the 5th argument.
+ * - %ebp is the 6th argument.
+ */
+
+#define __do_syscall0(NUM) ({ \
+ intptr_t eax; \
+ \
+ __asm__ volatile( \
+ "int $0x80" \
+ : "=a"(eax) /* %eax */ \
+ : "a"(NUM) /* %eax */ \
+ : "memory" \
+ ); \
+ eax; \
+})
+
+#define __do_syscall1(NUM, ARG1) ({ \
+ intptr_t eax; \
+ \
+ __asm__ volatile( \
+ "int $0x80" \
+ : "=a"(eax) /* %eax */ \
+ : "a"(NUM), /* %eax */ \
+ "b"((ARG1)) /* %ebx */ \
+ : "memory" \
+ ); \
+ eax; \
+})
+
+#define __do_syscall2(NUM, ARG1, ARG2) ({ \
+ intptr_t eax; \
+ \
+ __asm__ volatile( \
+ "int $0x80" \
+ : "=a" (eax) /* %eax */ \
+ : "a"(NUM), /* %eax */ \
+ "b"((ARG1)), /* %ebx */ \
+ "c"((ARG2)) /* %ecx */ \
+ : "memory" \
+ ); \
+ eax; \
+})
+
+#define __do_syscall3(NUM, ARG1, ARG2, ARG3) ({ \
+ intptr_t eax; \
+ \
+ __asm__ volatile( \
+ "int $0x80" \
+ : "=a" (eax) /* %eax */ \
+ : "a"(NUM), /* %eax */ \
+ "b"((ARG1)), /* %ebx */ \
+ "c"((ARG2)), /* %ecx */ \
+ "d"((ARG3)) /* %edx */ \
+ : "memory" \
+ ); \
+ eax; \
+})
+
+#define __do_syscall4(NUM, ARG1, ARG2, ARG3, ARG4) ({ \
+ intptr_t eax; \
+ \
+ __asm__ volatile( \
+ "int $0x80" \
+ : "=a" (eax) /* %eax */ \
+ : "a"(NUM), /* %eax */ \
+ "b"((ARG1)), /* %ebx */ \
+ "c"((ARG2)), /* %ecx */ \
+ "d"((ARG3)), /* %edx */ \
+ "S"((ARG4)) /* %esi */ \
+ : "memory" \
+ ); \
+ eax; \
+})
+
+#define __do_syscall5(NUM, ARG1, ARG2, ARG3, ARG4, ARG5) ({ \
+ intptr_t eax; \
+ \
+ __asm__ volatile( \
+ "int $0x80" \
+ : "=a" (eax) /* %eax */ \
+ : "a"(NUM), /* %eax */ \
+ "b"((ARG1)), /* %ebx */ \
+ "c"((ARG2)), /* %ecx */ \
+ "d"((ARG3)), /* %edx */ \
+ "S"((ARG4)), /* %esi */ \
+ "D"((ARG5)) /* %edi */ \
+ : "memory" \
+ ); \
+ eax; \
+})
+
+
+/*
+ * On i386, the 6th argument of syscall goes in %ebp. However, both Clang
+ * and GCC cannot use %ebp in the clobber list and in the "r" constraint
+ * without using -fomit-frame-pointer. To make it always available for
+ * any kind of compilation, the below workaround is implemented:
+ *
+ * 1) Push the 6-th argument.
+ * 2) Push %ebp.
+ * 3) Load the 6-th argument from 4(%esp) to %ebp.
+ * 4) Do the syscall (int $0x80).
+ * 5) Pop %ebp (restore the old value of %ebp).
+ * 6) Add %esp by 4 (undo the stack pointer).
+ *
+ * WARNING:
+ * Don't use register variables for __do_syscall6(), there is a known
+ * GCC bug that results in an endless loop.
+ *
+ * BugLink: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105032
+ *
+ */
+#define __do_syscall6(NUM, ARG1, ARG2, ARG3, ARG4, ARG5, ARG6) ({ \
+ intptr_t eax = (intptr_t)(NUM); \
+ intptr_t arg6 = (intptr_t)(ARG6); /* Always in memory */ \
+ __asm__ volatile ( \
+ "pushl %[_arg6]\n\t" \
+ "pushl %%ebp\n\t" \
+ "movl 4(%%esp),%%ebp\n\t" \
+ "int $0x80\n\t" \
+ "popl %%ebp\n\t" \
+ "addl $4,%%esp" \
+ : "+a"(eax) /* %eax */ \
+ : "b"(ARG1), /* %ebx */ \
+ "c"(ARG2), /* %ecx */ \
+ "d"(ARG3), /* %edx */ \
+ "S"(ARG4), /* %esi */ \
+ "D"(ARG5), /* %edi */ \
+ [_arg6]"m"(arg6) /* memory */ \
+ : "memory", "cc" \
+ ); \
+ eax; \
+})
+
+#include "../syscall-defs.h"
+
+#else /* #ifdef CONFIG_NOLIBC */
+
+#error #include "../generic/syscall.h"
+
+#endif /* #ifdef CONFIG_NOLIBC */
+
+#endif /* #if defined(__x86_64__) */
+
+#endif /* #ifndef LIBURING_ARCH_X86_SYSCALL_H */
diff --git a/contrib/libs/liburing/src/include/liburing.h b/contrib/libs/liburing/src/include/liburing.h
new file mode 100644
index 00000000000..12a703fdd2e
--- /dev/null
+++ b/contrib/libs/liburing/src/include/liburing.h
@@ -0,0 +1,1354 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef LIB_URING_H
+#define LIB_URING_H
+
+#ifndef _XOPEN_SOURCE
+#define _XOPEN_SOURCE 500 /* Required for glibc to expose sigset_t */
+#endif
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE /* Required for musl to expose cpu_set_t */
+#endif
+
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <inttypes.h>
+#include <time.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <linux/swab.h>
+#include "liburing/compat.h"
+#include "liburing/io_uring.h"
+#include "liburing/barrier.h"
+
+#ifndef uring_unlikely
+#define uring_unlikely(cond) __builtin_expect(!!(cond), 0)
+#endif
+
+#ifndef uring_likely
+#define uring_likely(cond) __builtin_expect(!!(cond), 1)
+#endif
+
+#ifdef __alpha__
+/*
+ * alpha and mips are the exceptions, all other architectures have
+ * common numbers for new system calls.
+ */
+#ifndef __NR_io_uring_setup
+#define __NR_io_uring_setup 535
+#endif
+#ifndef __NR_io_uring_enter
+#define __NR_io_uring_enter 536
+#endif
+#ifndef __NR_io_uring_register
+#define __NR_io_uring_register 537
+#endif
+#elif defined __mips__
+#ifndef __NR_io_uring_setup
+#define __NR_io_uring_setup (__NR_Linux + 425)
+#endif
+#ifndef __NR_io_uring_enter
+#define __NR_io_uring_enter (__NR_Linux + 426)
+#endif
+#ifndef __NR_io_uring_register
+#define __NR_io_uring_register (__NR_Linux + 427)
+#endif
+#else /* !__alpha__ and !__mips__ */
+#ifndef __NR_io_uring_setup
+#define __NR_io_uring_setup 425
+#endif
+#ifndef __NR_io_uring_enter
+#define __NR_io_uring_enter 426
+#endif
+#ifndef __NR_io_uring_register
+#define __NR_io_uring_register 427
+#endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Library interface to io_uring
+ */
+struct io_uring_sq {
+ unsigned *khead;
+ unsigned *ktail;
+ // Deprecated: use `ring_mask` instead of `*kring_mask`
+ unsigned *kring_mask;
+ // Deprecated: use `ring_entries` instead of `*kring_entries`
+ unsigned *kring_entries;
+ unsigned *kflags;
+ unsigned *kdropped;
+ unsigned *array;
+ struct io_uring_sqe *sqes;
+
+ unsigned sqe_head;
+ unsigned sqe_tail;
+
+ size_t ring_sz;
+ void *ring_ptr;
+
+ unsigned ring_mask;
+ unsigned ring_entries;
+
+ unsigned pad[2];
+};
+
+struct io_uring_cq {
+ unsigned *khead;
+ unsigned *ktail;
+ // Deprecated: use `ring_mask` instead of `*kring_mask`
+ unsigned *kring_mask;
+ // Deprecated: use `ring_entries` instead of `*kring_entries`
+ unsigned *kring_entries;
+ unsigned *kflags;
+ unsigned *koverflow;
+ struct io_uring_cqe *cqes;
+
+ size_t ring_sz;
+ void *ring_ptr;
+
+ unsigned ring_mask;
+ unsigned ring_entries;
+
+ unsigned pad[2];
+};
+
+struct io_uring {
+ struct io_uring_sq sq;
+ struct io_uring_cq cq;
+ unsigned flags;
+ int ring_fd;
+
+ unsigned features;
+ int enter_ring_fd;
+ __u8 int_flags;
+ __u8 pad[3];
+ unsigned pad2;
+};
+
+/*
+ * Library interface
+ */
+
+/*
+ * return an allocated io_uring_probe structure, or NULL if probe fails (for
+ * example, if it is not available). The caller is responsible for freeing it
+ */
+struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring);
+/* same as io_uring_get_probe_ring, but takes care of ring init and teardown */
+struct io_uring_probe *io_uring_get_probe(void);
+
+/*
+ * frees a probe allocated through io_uring_get_probe() or
+ * io_uring_get_probe_ring()
+ */
+void io_uring_free_probe(struct io_uring_probe *probe);
+
+static inline int io_uring_opcode_supported(const struct io_uring_probe *p,
+ int op)
+{
+ if (op > p->last_op)
+ return 0;
+ return (p->ops[op].flags & IO_URING_OP_SUPPORTED) != 0;
+}
+
+int io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
+ struct io_uring_params *p);
+int io_uring_queue_init(unsigned entries, struct io_uring *ring,
+ unsigned flags);
+int io_uring_queue_mmap(int fd, struct io_uring_params *p,
+ struct io_uring *ring);
+int io_uring_ring_dontfork(struct io_uring *ring);
+void io_uring_queue_exit(struct io_uring *ring);
+unsigned io_uring_peek_batch_cqe(struct io_uring *ring,
+ struct io_uring_cqe **cqes, unsigned count);
+int io_uring_wait_cqes(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
+ unsigned wait_nr, struct __kernel_timespec *ts,
+ sigset_t *sigmask);
+int io_uring_wait_cqe_timeout(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr,
+ struct __kernel_timespec *ts);
+int io_uring_submit(struct io_uring *ring);
+int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr);
+int io_uring_submit_and_wait_timeout(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr,
+ unsigned wait_nr,
+ struct __kernel_timespec *ts,
+ sigset_t *sigmask);
+
+int io_uring_register_buffers(struct io_uring *ring, const struct iovec *iovecs,
+ unsigned nr_iovecs);
+int io_uring_register_buffers_tags(struct io_uring *ring,
+ const struct iovec *iovecs,
+ const __u64 *tags, unsigned nr);
+int io_uring_register_buffers_sparse(struct io_uring *ring, unsigned nr);
+int io_uring_register_buffers_update_tag(struct io_uring *ring,
+ unsigned off,
+ const struct iovec *iovecs,
+ const __u64 *tags, unsigned nr);
+int io_uring_unregister_buffers(struct io_uring *ring);
+
+int io_uring_register_files(struct io_uring *ring, const int *files,
+ unsigned nr_files);
+int io_uring_register_files_tags(struct io_uring *ring, const int *files,
+ const __u64 *tags, unsigned nr);
+int io_uring_register_files_sparse(struct io_uring *ring, unsigned nr);
+int io_uring_register_files_update_tag(struct io_uring *ring, unsigned off,
+ const int *files, const __u64 *tags,
+ unsigned nr_files);
+
+int io_uring_unregister_files(struct io_uring *ring);
+int io_uring_register_files_update(struct io_uring *ring, unsigned off,
+ const int *files, unsigned nr_files);
+int io_uring_register_eventfd(struct io_uring *ring, int fd);
+int io_uring_register_eventfd_async(struct io_uring *ring, int fd);
+int io_uring_unregister_eventfd(struct io_uring *ring);
+int io_uring_register_probe(struct io_uring *ring, struct io_uring_probe *p,
+ unsigned nr);
+int io_uring_register_personality(struct io_uring *ring);
+int io_uring_unregister_personality(struct io_uring *ring, int id);
+int io_uring_register_restrictions(struct io_uring *ring,
+ struct io_uring_restriction *res,
+ unsigned int nr_res);
+int io_uring_enable_rings(struct io_uring *ring);
+int __io_uring_sqring_wait(struct io_uring *ring);
+int io_uring_register_iowq_aff(struct io_uring *ring, size_t cpusz,
+ const cpu_set_t *mask);
+int io_uring_unregister_iowq_aff(struct io_uring *ring);
+int io_uring_register_iowq_max_workers(struct io_uring *ring,
+ unsigned int *values);
+int io_uring_register_ring_fd(struct io_uring *ring);
+int io_uring_unregister_ring_fd(struct io_uring *ring);
+int io_uring_register_buf_ring(struct io_uring *ring,
+ struct io_uring_buf_reg *reg, unsigned int flags);
+int io_uring_unregister_buf_ring(struct io_uring *ring, int bgid);
+int io_uring_register_sync_cancel(struct io_uring *ring,
+ struct io_uring_sync_cancel_reg *reg);
+
+int io_uring_register_file_alloc_range(struct io_uring *ring,
+ unsigned off, unsigned len);
+
+int io_uring_get_events(struct io_uring *ring);
+int io_uring_submit_and_get_events(struct io_uring *ring);
+
+/*
+ * io_uring syscalls.
+ */
+int io_uring_enter(unsigned int fd, unsigned int to_submit,
+ unsigned int min_complete, unsigned int flags, sigset_t *sig);
+int io_uring_enter2(unsigned int fd, unsigned int to_submit,
+ unsigned int min_complete, unsigned int flags,
+ sigset_t *sig, size_t sz);
+int io_uring_setup(unsigned int entries, struct io_uring_params *p);
+int io_uring_register(unsigned int fd, unsigned int opcode, const void *arg,
+ unsigned int nr_args);
+
+/*
+ * Helper for the peek/wait single cqe functions. Exported because of that,
+ * but probably shouldn't be used directly in an application.
+ */
+int __io_uring_get_cqe(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr, unsigned submit,
+ unsigned wait_nr, sigset_t *sigmask);
+
+#define LIBURING_UDATA_TIMEOUT ((__u64) -1)
+
+/*
+ * Calculates the step size for CQE iteration.
+ * For standard CQE's its 1, for big CQE's its two.
+ */
+#define io_uring_cqe_shift(ring) \
+ (!!((ring)->flags & IORING_SETUP_CQE32))
+
+#define io_uring_cqe_index(ring,ptr,mask) \
+ (((ptr) & (mask)) << io_uring_cqe_shift(ring))
+
+#define io_uring_for_each_cqe(ring, head, cqe) \
+ /* \
+ * io_uring_smp_load_acquire() enforces the order of tail \
+ * and CQE reads. \
+ */ \
+ for (head = *(ring)->cq.khead; \
+ (cqe = (head != io_uring_smp_load_acquire((ring)->cq.ktail) ? \
+ &(ring)->cq.cqes[io_uring_cqe_index(ring, head, (ring)->cq.ring_mask)] : NULL)); \
+ head++) \
+
+/*
+ * Must be called after io_uring_for_each_cqe()
+ */
+static inline void io_uring_cq_advance(struct io_uring *ring,
+ unsigned nr)
+{
+ if (nr) {
+ struct io_uring_cq *cq = &ring->cq;
+
+ /*
+ * Ensure that the kernel only sees the new value of the head
+ * index after the CQEs have been read.
+ */
+ io_uring_smp_store_release(cq->khead, *cq->khead + nr);
+ }
+}
+
+/*
+ * Must be called after io_uring_{peek,wait}_cqe() after the cqe has
+ * been processed by the application.
+ */
+static inline void io_uring_cqe_seen(struct io_uring *ring,
+ struct io_uring_cqe *cqe)
+{
+ if (cqe)
+ io_uring_cq_advance(ring, 1);
+}
+
+/*
+ * Command prep helpers
+ */
+
+/*
+ * Associate pointer @data with the sqe, for later retrieval from the cqe
+ * at command completion time with io_uring_cqe_get_data().
+ */
+static inline void io_uring_sqe_set_data(struct io_uring_sqe *sqe, void *data)
+{
+ sqe->user_data = (unsigned long) data;
+}
+
+static inline void *io_uring_cqe_get_data(const struct io_uring_cqe *cqe)
+{
+ return (void *) (uintptr_t) cqe->user_data;
+}
+
+/*
+ * Assign a 64-bit value to this sqe, which can get retrieved at completion
+ * time with io_uring_cqe_get_data64. Just like the non-64 variants, except
+ * these store a 64-bit type rather than a data pointer.
+ */
+static inline void io_uring_sqe_set_data64(struct io_uring_sqe *sqe,
+ __u64 data)
+{
+ sqe->user_data = data;
+}
+
+static inline __u64 io_uring_cqe_get_data64(const struct io_uring_cqe *cqe)
+{
+ return cqe->user_data;
+}
+
+/*
+ * Tell the app the have the 64-bit variants of the get/set userdata
+ */
+#define LIBURING_HAVE_DATA64
+
+static inline void io_uring_sqe_set_flags(struct io_uring_sqe *sqe,
+ unsigned flags)
+{
+ sqe->flags = (__u8) flags;
+}
+
+static inline void __io_uring_set_target_fixed_file(struct io_uring_sqe *sqe,
+ unsigned int file_index)
+{
+ /* 0 means no fixed files, indexes should be encoded as "index + 1" */
+ sqe->file_index = file_index + 1;
+}
+
+static inline void io_uring_prep_rw(int op, struct io_uring_sqe *sqe, int fd,
+ const void *addr, unsigned len,
+ __u64 offset)
+{
+ sqe->opcode = (__u8) op;
+ sqe->flags = 0;
+ sqe->ioprio = 0;
+ sqe->fd = fd;
+ sqe->off = offset;
+ sqe->addr = (unsigned long) addr;
+ sqe->len = len;
+ sqe->rw_flags = 0;
+ sqe->buf_index = 0;
+ sqe->personality = 0;
+ sqe->file_index = 0;
+ sqe->addr3 = 0;
+ sqe->__pad2[0] = 0;
+}
+
+/**
+ * @pre Either fd_in or fd_out must be a pipe.
+ * @param off_in If fd_in refers to a pipe, off_in must be (int64_t) -1;
+ * If fd_in does not refer to a pipe and off_in is (int64_t) -1,
+ * then bytes are read from fd_in starting from the file offset
+ * and it is adjust appropriately;
+ * If fd_in does not refer to a pipe and off_in is not
+ * (int64_t) -1, then the starting offset of fd_in will be
+ * off_in.
+ * @param off_out The description of off_in also applied to off_out.
+ * @param splice_flags see man splice(2) for description of flags.
+ *
+ * This splice operation can be used to implement sendfile by splicing to an
+ * intermediate pipe first, then splice to the final destination.
+ * In fact, the implementation of sendfile in kernel uses splice internally.
+ *
+ * NOTE that even if fd_in or fd_out refers to a pipe, the splice operation
+ * can still failed with EINVAL if one of the fd doesn't explicitly support
+ * splice operation, e.g. reading from terminal is unsupported from kernel 5.7
+ * to 5.11.
+ * Check issue #291 for more information.
+ */
+static inline void io_uring_prep_splice(struct io_uring_sqe *sqe,
+ int fd_in, int64_t off_in,
+ int fd_out, int64_t off_out,
+ unsigned int nbytes,
+ unsigned int splice_flags)
+{
+ io_uring_prep_rw(IORING_OP_SPLICE, sqe, fd_out, NULL, nbytes,
+ (__u64) off_out);
+ sqe->splice_off_in = (__u64) off_in;
+ sqe->splice_fd_in = fd_in;
+ sqe->splice_flags = splice_flags;
+}
+
+static inline void io_uring_prep_tee(struct io_uring_sqe *sqe,
+ int fd_in, int fd_out,
+ unsigned int nbytes,
+ unsigned int splice_flags)
+{
+ io_uring_prep_rw(IORING_OP_TEE, sqe, fd_out, NULL, nbytes, 0);
+ sqe->splice_off_in = 0;
+ sqe->splice_fd_in = fd_in;
+ sqe->splice_flags = splice_flags;
+}
+
+static inline void io_uring_prep_readv(struct io_uring_sqe *sqe, int fd,
+ const struct iovec *iovecs,
+ unsigned nr_vecs, __u64 offset)
+{
+ io_uring_prep_rw(IORING_OP_READV, sqe, fd, iovecs, nr_vecs, offset);
+}
+
+static inline void io_uring_prep_readv2(struct io_uring_sqe *sqe, int fd,
+ const struct iovec *iovecs,
+ unsigned nr_vecs, __u64 offset,
+ int flags)
+{
+ io_uring_prep_readv(sqe, fd, iovecs, nr_vecs, offset);
+ sqe->rw_flags = flags;
+}
+
+static inline void io_uring_prep_read_fixed(struct io_uring_sqe *sqe, int fd,
+ void *buf, unsigned nbytes,
+ __u64 offset, int buf_index)
+{
+ io_uring_prep_rw(IORING_OP_READ_FIXED, sqe, fd, buf, nbytes, offset);
+ sqe->buf_index = (__u16) buf_index;
+}
+
+static inline void io_uring_prep_writev(struct io_uring_sqe *sqe, int fd,
+ const struct iovec *iovecs,
+ unsigned nr_vecs, __u64 offset)
+{
+ io_uring_prep_rw(IORING_OP_WRITEV, sqe, fd, iovecs, nr_vecs, offset);
+}
+
+static inline void io_uring_prep_writev2(struct io_uring_sqe *sqe, int fd,
+ const struct iovec *iovecs,
+ unsigned nr_vecs, __u64 offset,
+ int flags)
+{
+ io_uring_prep_writev(sqe, fd, iovecs, nr_vecs, offset);
+ sqe->rw_flags = flags;
+}
+
+static inline void io_uring_prep_write_fixed(struct io_uring_sqe *sqe, int fd,
+ const void *buf, unsigned nbytes,
+ __u64 offset, int buf_index)
+{
+ io_uring_prep_rw(IORING_OP_WRITE_FIXED, sqe, fd, buf, nbytes, offset);
+ sqe->buf_index = (__u16) buf_index;
+}
+
+static inline void io_uring_prep_recvmsg(struct io_uring_sqe *sqe, int fd,
+ struct msghdr *msg, unsigned flags)
+{
+ io_uring_prep_rw(IORING_OP_RECVMSG, sqe, fd, msg, 1, 0);
+ sqe->msg_flags = flags;
+}
+
+static inline void io_uring_prep_recvmsg_multishot(struct io_uring_sqe *sqe, int fd,
+ struct msghdr *msg, unsigned flags)
+{
+ io_uring_prep_recvmsg(sqe, fd, msg, flags);
+ sqe->ioprio |= IORING_RECV_MULTISHOT;
+}
+
+static inline void io_uring_prep_sendmsg(struct io_uring_sqe *sqe, int fd,
+ const struct msghdr *msg,
+ unsigned flags)
+{
+ io_uring_prep_rw(IORING_OP_SENDMSG, sqe, fd, msg, 1, 0);
+ sqe->msg_flags = flags;
+}
+
+static inline unsigned __io_uring_prep_poll_mask(unsigned poll_mask)
+{
+#if __BYTE_ORDER == __BIG_ENDIAN
+ poll_mask = __swahw32(poll_mask);
+#endif
+ return poll_mask;
+}
+
+static inline void io_uring_prep_poll_add(struct io_uring_sqe *sqe, int fd,
+ unsigned poll_mask)
+{
+ io_uring_prep_rw(IORING_OP_POLL_ADD, sqe, fd, NULL, 0, 0);
+ sqe->poll32_events = __io_uring_prep_poll_mask(poll_mask);
+}
+
+static inline void io_uring_prep_poll_multishot(struct io_uring_sqe *sqe,
+ int fd, unsigned poll_mask)
+{
+ io_uring_prep_poll_add(sqe, fd, poll_mask);
+ sqe->len = IORING_POLL_ADD_MULTI;
+}
+
+static inline void io_uring_prep_poll_remove(struct io_uring_sqe *sqe,
+ __u64 user_data)
+{
+ io_uring_prep_rw(IORING_OP_POLL_REMOVE, sqe, -1, NULL, 0, 0);
+ sqe->addr = user_data;
+}
+
+static inline void io_uring_prep_poll_update(struct io_uring_sqe *sqe,
+ __u64 old_user_data,
+ __u64 new_user_data,
+ unsigned poll_mask, unsigned flags)
+{
+ io_uring_prep_rw(IORING_OP_POLL_REMOVE, sqe, -1, NULL, flags,
+ new_user_data);
+ sqe->addr = old_user_data;
+ sqe->poll32_events = __io_uring_prep_poll_mask(poll_mask);
+}
+
+static inline void io_uring_prep_fsync(struct io_uring_sqe *sqe, int fd,
+ unsigned fsync_flags)
+{
+ io_uring_prep_rw(IORING_OP_FSYNC, sqe, fd, NULL, 0, 0);
+ sqe->fsync_flags = fsync_flags;
+}
+
+static inline void io_uring_prep_nop(struct io_uring_sqe *sqe)
+{
+ io_uring_prep_rw(IORING_OP_NOP, sqe, -1, NULL, 0, 0);
+}
+
+static inline void io_uring_prep_timeout(struct io_uring_sqe *sqe,
+ struct __kernel_timespec *ts,
+ unsigned count, unsigned flags)
+{
+ io_uring_prep_rw(IORING_OP_TIMEOUT, sqe, -1, ts, 1, count);
+ sqe->timeout_flags = flags;
+}
+
+static inline void io_uring_prep_timeout_remove(struct io_uring_sqe *sqe,
+ __u64 user_data, unsigned flags)
+{
+ io_uring_prep_rw(IORING_OP_TIMEOUT_REMOVE, sqe, -1, NULL, 0, 0);
+ sqe->addr = user_data;
+ sqe->timeout_flags = flags;
+}
+
+static inline void io_uring_prep_timeout_update(struct io_uring_sqe *sqe,
+ struct __kernel_timespec *ts,
+ __u64 user_data, unsigned flags)
+{
+ io_uring_prep_rw(IORING_OP_TIMEOUT_REMOVE, sqe, -1, NULL, 0,
+ (uintptr_t) ts);
+ sqe->addr = user_data;
+ sqe->timeout_flags = flags | IORING_TIMEOUT_UPDATE;
+}
+
+static inline void io_uring_prep_accept(struct io_uring_sqe *sqe, int fd,
+ struct sockaddr *addr,
+ socklen_t *addrlen, int flags)
+{
+ io_uring_prep_rw(IORING_OP_ACCEPT, sqe, fd, addr, 0,
+ (__u64) (unsigned long) addrlen);
+ sqe->accept_flags = (__u32) flags;
+}
+
+/* accept directly into the fixed file table */
+static inline void io_uring_prep_accept_direct(struct io_uring_sqe *sqe, int fd,
+ struct sockaddr *addr,
+ socklen_t *addrlen, int flags,
+ unsigned int file_index)
+{
+ io_uring_prep_accept(sqe, fd, addr, addrlen, flags);
+ __io_uring_set_target_fixed_file(sqe, file_index);
+}
+
+static inline void io_uring_prep_multishot_accept(struct io_uring_sqe *sqe,
+ int fd, struct sockaddr *addr,
+ socklen_t *addrlen, int flags)
+{
+ io_uring_prep_accept(sqe, fd, addr, addrlen, flags);
+ sqe->ioprio |= IORING_ACCEPT_MULTISHOT;
+}
+
+/* multishot accept directly into the fixed file table */
+static inline void io_uring_prep_multishot_accept_direct(struct io_uring_sqe *sqe,
+ int fd,
+ struct sockaddr *addr,
+ socklen_t *addrlen,
+ int flags)
+{
+ io_uring_prep_multishot_accept(sqe, fd, addr, addrlen, flags);
+ __io_uring_set_target_fixed_file(sqe, IORING_FILE_INDEX_ALLOC - 1);
+}
+
+static inline void io_uring_prep_cancel64(struct io_uring_sqe *sqe,
+ __u64 user_data, int flags)
+{
+ io_uring_prep_rw(IORING_OP_ASYNC_CANCEL, sqe, -1, NULL, 0, 0);
+ sqe->addr = user_data;
+ sqe->cancel_flags = (__u32) flags;
+}
+
+static inline void io_uring_prep_cancel(struct io_uring_sqe *sqe,
+ void *user_data, int flags)
+{
+ io_uring_prep_cancel64(sqe, (__u64) (uintptr_t) user_data, flags);
+}
+
+static inline void io_uring_prep_cancel_fd(struct io_uring_sqe *sqe, int fd,
+ unsigned int flags)
+{
+ io_uring_prep_rw(IORING_OP_ASYNC_CANCEL, sqe, fd, NULL, 0, 0);
+ sqe->cancel_flags = (__u32) flags | IORING_ASYNC_CANCEL_FD;
+}
+
+static inline void io_uring_prep_link_timeout(struct io_uring_sqe *sqe,
+ struct __kernel_timespec *ts,
+ unsigned flags)
+{
+ io_uring_prep_rw(IORING_OP_LINK_TIMEOUT, sqe, -1, ts, 1, 0);
+ sqe->timeout_flags = flags;
+}
+
+static inline void io_uring_prep_connect(struct io_uring_sqe *sqe, int fd,
+ const struct sockaddr *addr,
+ socklen_t addrlen)
+{
+ io_uring_prep_rw(IORING_OP_CONNECT, sqe, fd, addr, 0, addrlen);
+}
+
+static inline void io_uring_prep_files_update(struct io_uring_sqe *sqe,
+ int *fds, unsigned nr_fds,
+ int offset)
+{
+ io_uring_prep_rw(IORING_OP_FILES_UPDATE, sqe, -1, fds, nr_fds,
+ (__u64) offset);
+}
+
+static inline void io_uring_prep_fallocate(struct io_uring_sqe *sqe, int fd,
+ int mode, off_t offset, off_t len)
+{
+ io_uring_prep_rw(IORING_OP_FALLOCATE, sqe, fd,
+ 0, (unsigned int) mode, (__u64) offset);
+ sqe->addr = (__u64) len;
+}
+
+static inline void io_uring_prep_openat(struct io_uring_sqe *sqe, int dfd,
+ const char *path, int flags,
+ mode_t mode)
+{
+ io_uring_prep_rw(IORING_OP_OPENAT, sqe, dfd, path, mode, 0);
+ sqe->open_flags = (__u32) flags;
+}
+
+/* open directly into the fixed file table */
+static inline void io_uring_prep_openat_direct(struct io_uring_sqe *sqe,
+ int dfd, const char *path,
+ int flags, mode_t mode,
+ unsigned file_index)
+{
+ io_uring_prep_openat(sqe, dfd, path, flags, mode);
+ __io_uring_set_target_fixed_file(sqe, file_index);
+}
+
+static inline void io_uring_prep_close(struct io_uring_sqe *sqe, int fd)
+{
+ io_uring_prep_rw(IORING_OP_CLOSE, sqe, fd, NULL, 0, 0);
+}
+
+static inline void io_uring_prep_close_direct(struct io_uring_sqe *sqe,
+ unsigned file_index)
+{
+ io_uring_prep_close(sqe, 0);
+ __io_uring_set_target_fixed_file(sqe, file_index);
+}
+
+static inline void io_uring_prep_read(struct io_uring_sqe *sqe, int fd,
+ void *buf, unsigned nbytes, __u64 offset)
+{
+ io_uring_prep_rw(IORING_OP_READ, sqe, fd, buf, nbytes, offset);
+}
+
+static inline void io_uring_prep_write(struct io_uring_sqe *sqe, int fd,
+ const void *buf, unsigned nbytes,
+ __u64 offset)
+{
+ io_uring_prep_rw(IORING_OP_WRITE, sqe, fd, buf, nbytes, offset);
+}
+
+struct statx;
+static inline void io_uring_prep_statx(struct io_uring_sqe *sqe, int dfd,
+ const char *path, int flags, unsigned mask,
+ struct statx *statxbuf)
+{
+ io_uring_prep_rw(IORING_OP_STATX, sqe, dfd, path, mask,
+ (__u64) (unsigned long) statxbuf);
+ sqe->statx_flags = (__u32) flags;
+}
+
+static inline void io_uring_prep_fadvise(struct io_uring_sqe *sqe, int fd,
+ __u64 offset, off_t len, int advice)
+{
+ io_uring_prep_rw(IORING_OP_FADVISE, sqe, fd, NULL, (__u32) len, offset);
+ sqe->fadvise_advice = (__u32) advice;
+}
+
+static inline void io_uring_prep_madvise(struct io_uring_sqe *sqe, void *addr,
+ off_t length, int advice)
+{
+ io_uring_prep_rw(IORING_OP_MADVISE, sqe, -1, addr, (__u32) length, 0);
+ sqe->fadvise_advice = (__u32) advice;
+}
+
+static inline void io_uring_prep_send(struct io_uring_sqe *sqe, int sockfd,
+ const void *buf, size_t len, int flags)
+{
+ io_uring_prep_rw(IORING_OP_SEND, sqe, sockfd, buf, (__u32) len, 0);
+ sqe->msg_flags = (__u32) flags;
+}
+
+static inline void io_uring_prep_send_zc(struct io_uring_sqe *sqe, int sockfd,
+ const void *buf, size_t len, int flags,
+ unsigned zc_flags)
+{
+ io_uring_prep_rw(IORING_OP_SEND_ZC, sqe, sockfd, buf, (__u32) len, 0);
+ sqe->msg_flags = (__u32) flags;
+ sqe->ioprio = zc_flags;
+}
+
+static inline void io_uring_prep_send_zc_fixed(struct io_uring_sqe *sqe,
+ int sockfd, const void *buf,
+ size_t len, int flags,
+ unsigned zc_flags,
+ unsigned buf_index)
+{
+ io_uring_prep_send_zc(sqe, sockfd, buf, len, flags, zc_flags);
+ sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
+ sqe->buf_index = buf_index;
+}
+
+static inline void io_uring_prep_sendmsg_zc(struct io_uring_sqe *sqe, int fd,
+ const struct msghdr *msg,
+ unsigned flags)
+{
+ io_uring_prep_sendmsg(sqe, fd, msg, flags);
+ sqe->opcode = IORING_OP_SENDMSG_ZC;
+}
+
+static inline void io_uring_prep_send_set_addr(struct io_uring_sqe *sqe,
+ const struct sockaddr *dest_addr,
+ __u16 addr_len)
+{
+ sqe->addr2 = (unsigned long)(const void *)dest_addr;
+ sqe->addr_len = addr_len;
+}
+
+static inline void io_uring_prep_recv(struct io_uring_sqe *sqe, int sockfd,
+ void *buf, size_t len, int flags)
+{
+ io_uring_prep_rw(IORING_OP_RECV, sqe, sockfd, buf, (__u32) len, 0);
+ sqe->msg_flags = (__u32) flags;
+}
+
+static inline void io_uring_prep_recv_multishot(struct io_uring_sqe *sqe,
+ int sockfd, void *buf,
+ size_t len, int flags)
+{
+ io_uring_prep_recv(sqe, sockfd, buf, len, flags);
+ sqe->ioprio |= IORING_RECV_MULTISHOT;
+}
+
+static inline struct io_uring_recvmsg_out *
+io_uring_recvmsg_validate(void *buf, int buf_len, struct msghdr *msgh)
+{
+ unsigned long header = msgh->msg_controllen + msgh->msg_namelen +
+ sizeof(struct io_uring_recvmsg_out);
+ if (buf_len < 0 || (unsigned long)buf_len < header)
+ return NULL;
+ return (struct io_uring_recvmsg_out *)buf;
+}
+
+static inline void *io_uring_recvmsg_name(struct io_uring_recvmsg_out *o)
+{
+ return (void *) &o[1];
+}
+
+static inline struct cmsghdr *
+io_uring_recvmsg_cmsg_firsthdr(struct io_uring_recvmsg_out *o,
+ struct msghdr *msgh)
+{
+ if (o->controllen < sizeof(struct cmsghdr))
+ return NULL;
+
+ return (struct cmsghdr *)((unsigned char *) io_uring_recvmsg_name(o) +
+ msgh->msg_namelen);
+}
+
+static inline struct cmsghdr *
+io_uring_recvmsg_cmsg_nexthdr(struct io_uring_recvmsg_out *o, struct msghdr *msgh,
+ struct cmsghdr *cmsg)
+{
+ unsigned char *end;
+
+ if (cmsg->cmsg_len < sizeof(struct cmsghdr))
+ return NULL;
+ end = (unsigned char *) io_uring_recvmsg_cmsg_firsthdr(o, msgh) +
+ o->controllen;
+ cmsg = (struct cmsghdr *)((unsigned char *) cmsg +
+ CMSG_ALIGN(cmsg->cmsg_len));
+
+ if ((unsigned char *) (cmsg + 1) > end)
+ return NULL;
+ if (((unsigned char *) cmsg) + CMSG_ALIGN(cmsg->cmsg_len) > end)
+ return NULL;
+
+ return cmsg;
+}
+
+static inline void *io_uring_recvmsg_payload(struct io_uring_recvmsg_out *o,
+ struct msghdr *msgh)
+{
+ return (void *)((unsigned char *)io_uring_recvmsg_name(o) +
+ msgh->msg_namelen + msgh->msg_controllen);
+}
+
+static inline unsigned int
+io_uring_recvmsg_payload_length(struct io_uring_recvmsg_out *o,
+ int buf_len, struct msghdr *msgh)
+{
+ unsigned long payload_start, payload_end;
+
+ payload_start = (unsigned long) io_uring_recvmsg_payload(o, msgh);
+ payload_end = (unsigned long) o + buf_len;
+ return (unsigned int) (payload_end - payload_start);
+}
+
+static inline void io_uring_prep_openat2(struct io_uring_sqe *sqe, int dfd,
+ const char *path, struct open_how *how)
+{
+ io_uring_prep_rw(IORING_OP_OPENAT2, sqe, dfd, path, sizeof(*how),
+ (uint64_t) (uintptr_t) how);
+}
+
+/* open directly into the fixed file table */
+static inline void io_uring_prep_openat2_direct(struct io_uring_sqe *sqe,
+ int dfd, const char *path,
+ struct open_how *how,
+ unsigned file_index)
+{
+ io_uring_prep_openat2(sqe, dfd, path, how);
+ __io_uring_set_target_fixed_file(sqe, file_index);
+}
+
+struct epoll_event;
+static inline void io_uring_prep_epoll_ctl(struct io_uring_sqe *sqe, int epfd,
+ int fd, int op,
+ struct epoll_event *ev)
+{
+ io_uring_prep_rw(IORING_OP_EPOLL_CTL, sqe, epfd, ev,
+ (__u32) op, (__u32) fd);
+}
+
+static inline void io_uring_prep_provide_buffers(struct io_uring_sqe *sqe,
+ void *addr, int len, int nr,
+ int bgid, int bid)
+{
+ io_uring_prep_rw(IORING_OP_PROVIDE_BUFFERS, sqe, nr, addr, (__u32) len,
+ (__u64) bid);
+ sqe->buf_group = (__u16) bgid;
+}
+
+static inline void io_uring_prep_remove_buffers(struct io_uring_sqe *sqe,
+ int nr, int bgid)
+{
+ io_uring_prep_rw(IORING_OP_REMOVE_BUFFERS, sqe, nr, NULL, 0, 0);
+ sqe->buf_group = (__u16) bgid;
+}
+
+static inline void io_uring_prep_shutdown(struct io_uring_sqe *sqe, int fd,
+ int how)
+{
+ io_uring_prep_rw(IORING_OP_SHUTDOWN, sqe, fd, NULL, (__u32) how, 0);
+}
+
+static inline void io_uring_prep_unlinkat(struct io_uring_sqe *sqe, int dfd,
+ const char *path, int flags)
+{
+ io_uring_prep_rw(IORING_OP_UNLINKAT, sqe, dfd, path, 0, 0);
+ sqe->unlink_flags = (__u32) flags;
+}
+
+static inline void io_uring_prep_unlink(struct io_uring_sqe *sqe,
+ const char *path, int flags)
+{
+ io_uring_prep_unlinkat(sqe, AT_FDCWD, path, flags);
+}
+
+static inline void io_uring_prep_renameat(struct io_uring_sqe *sqe, int olddfd,
+ const char *oldpath, int newdfd,
+ const char *newpath, unsigned int flags)
+{
+ io_uring_prep_rw(IORING_OP_RENAMEAT, sqe, olddfd, oldpath,
+ (__u32) newdfd,
+ (uint64_t) (uintptr_t) newpath);
+ sqe->rename_flags = (__u32) flags;
+}
+
+static inline void io_uring_prep_rename(struct io_uring_sqe *sqe,
+ const char *oldpath, const char *newpath)
+{
+ io_uring_prep_renameat(sqe, AT_FDCWD, oldpath, AT_FDCWD, newpath, 0);
+}
+
+static inline void io_uring_prep_sync_file_range(struct io_uring_sqe *sqe,
+ int fd, unsigned len,
+ __u64 offset, int flags)
+{
+ io_uring_prep_rw(IORING_OP_SYNC_FILE_RANGE, sqe, fd, NULL, len, offset);
+ sqe->sync_range_flags = (__u32) flags;
+}
+
+static inline void io_uring_prep_mkdirat(struct io_uring_sqe *sqe, int dfd,
+ const char *path, mode_t mode)
+{
+ io_uring_prep_rw(IORING_OP_MKDIRAT, sqe, dfd, path, mode, 0);
+}
+
+static inline void io_uring_prep_mkdir(struct io_uring_sqe *sqe,
+ const char *path, mode_t mode)
+{
+ io_uring_prep_mkdirat(sqe, AT_FDCWD, path, mode);
+}
+
+static inline void io_uring_prep_symlinkat(struct io_uring_sqe *sqe,
+ const char *target, int newdirfd,
+ const char *linkpath)
+{
+ io_uring_prep_rw(IORING_OP_SYMLINKAT, sqe, newdirfd, target, 0,
+ (uint64_t) (uintptr_t) linkpath);
+}
+
+static inline void io_uring_prep_symlink(struct io_uring_sqe *sqe,
+ const char *target, const char *linkpath)
+{
+ io_uring_prep_symlinkat(sqe, target, AT_FDCWD, linkpath);
+}
+
+static inline void io_uring_prep_linkat(struct io_uring_sqe *sqe, int olddfd,
+ const char *oldpath, int newdfd,
+ const char *newpath, int flags)
+{
+ io_uring_prep_rw(IORING_OP_LINKAT, sqe, olddfd, oldpath, (__u32) newdfd,
+ (uint64_t) (uintptr_t) newpath);
+ sqe->hardlink_flags = (__u32) flags;
+}
+
+static inline void io_uring_prep_link(struct io_uring_sqe *sqe,
+ const char *oldpath, const char *newpath, int flags)
+{
+ io_uring_prep_linkat(sqe, AT_FDCWD, oldpath, AT_FDCWD, newpath, flags);
+}
+
+static inline void io_uring_prep_msg_ring(struct io_uring_sqe *sqe, int fd,
+ unsigned int len, __u64 data,
+ unsigned int flags)
+{
+ io_uring_prep_rw(IORING_OP_MSG_RING, sqe, fd, NULL, len, data);
+ sqe->rw_flags = flags;
+}
+
+static inline void io_uring_prep_getxattr(struct io_uring_sqe *sqe,
+ const char *name,
+ char *value,
+ const char *path,
+ unsigned int len)
+{
+ io_uring_prep_rw(IORING_OP_GETXATTR, sqe, 0, name, len,
+ (__u64) (uintptr_t) value);
+ sqe->addr3 = (__u64) (uintptr_t) path;
+ sqe->xattr_flags = 0;
+}
+
+static inline void io_uring_prep_setxattr(struct io_uring_sqe *sqe,
+ const char *name,
+ const char *value,
+ const char *path,
+ int flags,
+ unsigned int len)
+{
+ io_uring_prep_rw(IORING_OP_SETXATTR, sqe, 0, name, len,
+ (__u64) (uintptr_t) value);
+ sqe->addr3 = (__u64) (uintptr_t) path;
+ sqe->xattr_flags = flags;
+}
+
+static inline void io_uring_prep_fgetxattr(struct io_uring_sqe *sqe,
+ int fd,
+ const char *name,
+ char *value,
+ unsigned int len)
+{
+ io_uring_prep_rw(IORING_OP_FGETXATTR, sqe, fd, name, len,
+ (__u64) (uintptr_t) value);
+ sqe->xattr_flags = 0;
+}
+
+static inline void io_uring_prep_fsetxattr(struct io_uring_sqe *sqe,
+ int fd,
+ const char *name,
+ const char *value,
+ int flags,
+ unsigned int len)
+{
+ io_uring_prep_rw(IORING_OP_FSETXATTR, sqe, fd, name, len,
+ (__u64) (uintptr_t) value);
+ sqe->xattr_flags = flags;
+}
+
+static inline void io_uring_prep_socket(struct io_uring_sqe *sqe, int domain,
+ int type, int protocol,
+ unsigned int flags)
+{
+ io_uring_prep_rw(IORING_OP_SOCKET, sqe, domain, NULL, protocol, type);
+ sqe->rw_flags = flags;
+}
+
+static inline void io_uring_prep_socket_direct(struct io_uring_sqe *sqe,
+ int domain, int type,
+ int protocol,
+ unsigned file_index,
+ unsigned int flags)
+{
+ io_uring_prep_rw(IORING_OP_SOCKET, sqe, domain, NULL, protocol, type);
+ sqe->rw_flags = flags;
+ __io_uring_set_target_fixed_file(sqe, file_index);
+}
+
+static inline void io_uring_prep_socket_direct_alloc(struct io_uring_sqe *sqe,
+ int domain, int type, int protocol,
+ unsigned int flags)
+{
+ io_uring_prep_rw(IORING_OP_SOCKET, sqe, domain, NULL, protocol, type);
+ sqe->rw_flags = flags;
+ __io_uring_set_target_fixed_file(sqe, IORING_FILE_INDEX_ALLOC - 1);
+}
+
+/*
+ * Returns number of unconsumed (if SQPOLL) or unsubmitted entries exist in
+ * the SQ ring
+ */
+static inline unsigned io_uring_sq_ready(const struct io_uring *ring)
+{
+ unsigned khead = *ring->sq.khead;
+
+ /*
+ * Without a barrier, we could miss an update and think the SQ wasn't
+ * ready. We don't need the load acquire for non-SQPOLL since then we
+ * drive updates.
+ */
+ if (ring->flags & IORING_SETUP_SQPOLL)
+ khead = io_uring_smp_load_acquire(ring->sq.khead);
+
+ /* always use real head, to avoid losing sync for short submit */
+ return ring->sq.sqe_tail - khead;
+}
+
+/*
+ * Returns how much space is left in the SQ ring.
+ */
+static inline unsigned io_uring_sq_space_left(const struct io_uring *ring)
+{
+ return ring->sq.ring_entries - io_uring_sq_ready(ring);
+}
+
+/*
+ * Only applicable when using SQPOLL - allows the caller to wait for space
+ * to free up in the SQ ring, which happens when the kernel side thread has
+ * consumed one or more entries. If the SQ ring is currently non-full, no
+ * action is taken. Note: may return -EINVAL if the kernel doesn't support
+ * this feature.
+ */
+static inline int io_uring_sqring_wait(struct io_uring *ring)
+{
+ if (!(ring->flags & IORING_SETUP_SQPOLL))
+ return 0;
+ if (io_uring_sq_space_left(ring))
+ return 0;
+
+ return __io_uring_sqring_wait(ring);
+}
+
+/*
+ * Returns how many unconsumed entries are ready in the CQ ring
+ */
+static inline unsigned io_uring_cq_ready(const struct io_uring *ring)
+{
+ return io_uring_smp_load_acquire(ring->cq.ktail) - *ring->cq.khead;
+}
+
+/*
+ * Returns true if there are overflow entries waiting to be flushed onto
+ * the CQ ring
+ */
+static inline bool io_uring_cq_has_overflow(const struct io_uring *ring)
+{
+ return IO_URING_READ_ONCE(*ring->sq.kflags) & IORING_SQ_CQ_OVERFLOW;
+}
+
+/*
+ * Returns true if the eventfd notification is currently enabled
+ */
+static inline bool io_uring_cq_eventfd_enabled(const struct io_uring *ring)
+{
+ if (!ring->cq.kflags)
+ return true;
+
+ return !(*ring->cq.kflags & IORING_CQ_EVENTFD_DISABLED);
+}
+
+/*
+ * Toggle eventfd notification on or off, if an eventfd is registered with
+ * the ring.
+ */
+static inline int io_uring_cq_eventfd_toggle(struct io_uring *ring,
+ bool enabled)
+{
+ uint32_t flags;
+
+ if (!!enabled == io_uring_cq_eventfd_enabled(ring))
+ return 0;
+
+ if (!ring->cq.kflags)
+ return -EOPNOTSUPP;
+
+ flags = *ring->cq.kflags;
+
+ if (enabled)
+ flags &= ~IORING_CQ_EVENTFD_DISABLED;
+ else
+ flags |= IORING_CQ_EVENTFD_DISABLED;
+
+ IO_URING_WRITE_ONCE(*ring->cq.kflags, flags);
+
+ return 0;
+}
+
+/*
+ * Return an IO completion, waiting for 'wait_nr' completions if one isn't
+ * readily available. Returns 0 with cqe_ptr filled in on success, -errno on
+ * failure.
+ */
+static inline int io_uring_wait_cqe_nr(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr,
+ unsigned wait_nr)
+{
+ return __io_uring_get_cqe(ring, cqe_ptr, 0, wait_nr, NULL);
+}
+
+/*
+ * Internal helper, don't use directly in applications. Use one of the
+ * "official" versions of this, io_uring_peek_cqe(), io_uring_wait_cqe(),
+ * or io_uring_wait_cqes*().
+ */
+static inline int __io_uring_peek_cqe(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr,
+ unsigned *nr_available)
+{
+ struct io_uring_cqe *cqe;
+ int err = 0;
+ unsigned available;
+ unsigned mask = ring->cq.ring_mask;
+ int shift = 0;
+
+ if (ring->flags & IORING_SETUP_CQE32)
+ shift = 1;
+
+ do {
+ unsigned tail = io_uring_smp_load_acquire(ring->cq.ktail);
+ unsigned head = *ring->cq.khead;
+
+ cqe = NULL;
+ available = tail - head;
+ if (!available)
+ break;
+
+ cqe = &ring->cq.cqes[(head & mask) << shift];
+ if (!(ring->features & IORING_FEAT_EXT_ARG) &&
+ cqe->user_data == LIBURING_UDATA_TIMEOUT) {
+ if (cqe->res < 0)
+ err = cqe->res;
+ io_uring_cq_advance(ring, 1);
+ if (!err)
+ continue;
+ cqe = NULL;
+ }
+
+ break;
+ } while (1);
+
+ *cqe_ptr = cqe;
+ if (nr_available)
+ *nr_available = available;
+ return err;
+}
+
+/*
+ * Return an IO completion, if one is readily available. Returns 0 with
+ * cqe_ptr filled in on success, -errno on failure.
+ */
+static inline int io_uring_peek_cqe(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr)
+{
+ if (!__io_uring_peek_cqe(ring, cqe_ptr, NULL) && *cqe_ptr)
+ return 0;
+
+ return io_uring_wait_cqe_nr(ring, cqe_ptr, 0);
+}
+
+/*
+ * Return an IO completion, waiting for it if necessary. Returns 0 with
+ * cqe_ptr filled in on success, -errno on failure.
+ */
+static inline int io_uring_wait_cqe(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr)
+{
+ if (!__io_uring_peek_cqe(ring, cqe_ptr, NULL) && *cqe_ptr)
+ return 0;
+
+ return io_uring_wait_cqe_nr(ring, cqe_ptr, 1);
+}
+
+/*
+ * Return an sqe to fill. Application must later call io_uring_submit()
+ * when it's ready to tell the kernel about it. The caller may call this
+ * function multiple times before calling io_uring_submit().
+ *
+ * Returns a vacant sqe, or NULL if we're full.
+ */
+static inline struct io_uring_sqe *_io_uring_get_sqe(struct io_uring *ring)
+{
+ struct io_uring_sq *sq = &ring->sq;
+ unsigned int head, next = sq->sqe_tail + 1;
+ int shift = 0;
+
+ if (ring->flags & IORING_SETUP_SQE128)
+ shift = 1;
+ if (!(ring->flags & IORING_SETUP_SQPOLL))
+ head = IO_URING_READ_ONCE(*sq->khead);
+ else
+ head = io_uring_smp_load_acquire(sq->khead);
+
+ if (next - head <= sq->ring_entries) {
+ struct io_uring_sqe *sqe;
+
+ sqe = &sq->sqes[(sq->sqe_tail & sq->ring_mask) << shift];
+ sq->sqe_tail = next;
+ return sqe;
+ }
+
+ return NULL;
+}
+
+/*
+ * Return the appropriate mask for a buffer ring of size 'ring_entries'
+ */
+static inline int io_uring_buf_ring_mask(__u32 ring_entries)
+{
+ return ring_entries - 1;
+}
+
+static inline void io_uring_buf_ring_init(struct io_uring_buf_ring *br)
+{
+ br->tail = 0;
+}
+
+/*
+ * Assign 'buf' with the addr/len/buffer ID supplied
+ */
+static inline void io_uring_buf_ring_add(struct io_uring_buf_ring *br,
+ void *addr, unsigned int len,
+ unsigned short bid, int mask,
+ int buf_offset)
+{
+ struct io_uring_buf *buf = &br->bufs[(br->tail + buf_offset) & mask];
+
+ buf->addr = (unsigned long) (uintptr_t) addr;
+ buf->len = len;
+ buf->bid = bid;
+}
+
+/*
+ * Make 'count' new buffers visible to the kernel. Called after
+ * io_uring_buf_ring_add() has been called 'count' times to fill in new
+ * buffers.
+ */
+static inline void io_uring_buf_ring_advance(struct io_uring_buf_ring *br,
+ int count)
+{
+ unsigned short new_tail = br->tail + count;
+
+ io_uring_smp_store_release(&br->tail, new_tail);
+}
+
+/*
+ * Make 'count' new buffers visible to the kernel while at the same time
+ * advancing the CQ ring seen entries. This can be used when the application
+ * is using ring provided buffers and returns buffers while processing CQEs,
+ * avoiding an extra atomic when needing to increment both the CQ ring and
+ * the ring buffer index at the same time.
+ */
+static inline void io_uring_buf_ring_cq_advance(struct io_uring *ring,
+ struct io_uring_buf_ring *br,
+ int count)
+{
+ br->tail += count;
+ io_uring_cq_advance(ring, count);
+}
+
+#ifndef LIBURING_INTERNAL
+static inline struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
+{
+ return _io_uring_get_sqe(ring);
+}
+#else
+struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring);
+#endif
+
+ssize_t io_uring_mlock_size(unsigned entries, unsigned flags);
+ssize_t io_uring_mlock_size_params(unsigned entries, struct io_uring_params *p);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/libs/liburing/src/include/liburing/barrier.h b/contrib/libs/liburing/src/include/liburing/barrier.h
new file mode 100644
index 00000000000..aedeb47663d
--- /dev/null
+++ b/contrib/libs/liburing/src/include/liburing/barrier.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef LIBURING_BARRIER_H
+#define LIBURING_BARRIER_H
+
+/*
+From the kernel documentation file refcount-vs-atomic.rst:
+
+A RELEASE memory ordering guarantees that all prior loads and
+stores (all po-earlier instructions) on the same CPU are completed
+before the operation. It also guarantees that all po-earlier
+stores on the same CPU and all propagated stores from other CPUs
+must propagate to all other CPUs before the release operation
+(A-cumulative property). This is implemented using
+:c:func:`smp_store_release`.
+
+An ACQUIRE memory ordering guarantees that all post loads and
+stores (all po-later instructions) on the same CPU are
+completed after the acquire operation. It also guarantees that all
+po-later stores on the same CPU must propagate to all other CPUs
+after the acquire operation executes. This is implemented using
+:c:func:`smp_acquire__after_ctrl_dep`.
+*/
+
+#ifdef __cplusplus
+#include <atomic>
+
+template <typename T>
+static inline void IO_URING_WRITE_ONCE(T &var, T val)
+{
+ std::atomic_store_explicit(reinterpret_cast<std::atomic<T> *>(&var),
+ val, std::memory_order_relaxed);
+}
+template <typename T>
+static inline T IO_URING_READ_ONCE(const T &var)
+{
+ return std::atomic_load_explicit(
+ reinterpret_cast<const std::atomic<T> *>(&var),
+ std::memory_order_relaxed);
+}
+
+template <typename T>
+static inline void io_uring_smp_store_release(T *p, T v)
+{
+ std::atomic_store_explicit(reinterpret_cast<std::atomic<T> *>(p), v,
+ std::memory_order_release);
+}
+
+template <typename T>
+static inline T io_uring_smp_load_acquire(const T *p)
+{
+ return std::atomic_load_explicit(
+ reinterpret_cast<const std::atomic<T> *>(p),
+ std::memory_order_acquire);
+}
+
+static inline void io_uring_smp_mb()
+{
+ std::atomic_thread_fence(std::memory_order_seq_cst);
+}
+#else
+#include <stdatomic.h>
+
+#define IO_URING_WRITE_ONCE(var, val) \
+ atomic_store_explicit((_Atomic __typeof__(var) *)&(var), \
+ (val), memory_order_relaxed)
+#define IO_URING_READ_ONCE(var) \
+ atomic_load_explicit((_Atomic __typeof__(var) *)&(var), \
+ memory_order_relaxed)
+
+#define io_uring_smp_store_release(p, v) \
+ atomic_store_explicit((_Atomic __typeof__(*(p)) *)(p), (v), \
+ memory_order_release)
+#define io_uring_smp_load_acquire(p) \
+ atomic_load_explicit((_Atomic __typeof__(*(p)) *)(p), \
+ memory_order_acquire)
+
+#define io_uring_smp_mb() \
+ atomic_thread_fence(memory_order_seq_cst)
+#endif
+
+#endif /* defined(LIBURING_BARRIER_H) */
diff --git a/contrib/libs/liburing/src/include/liburing/compat.h b/contrib/libs/liburing/src/include/liburing/compat.h
new file mode 100644
index 00000000000..e5794395781
--- /dev/null
+++ b/contrib/libs/liburing/src/include/liburing/compat.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef LIBURING_COMPAT_H
+#define LIBURING_COMPAT_H
+
+#include <linux/time_types.h>
+
+#include <linux/openat2.h>
+
+#endif
diff --git a/contrib/libs/liburing/src/include/liburing/io_uring.h b/contrib/libs/liburing/src/include/liburing/io_uring.h
new file mode 100644
index 00000000000..a3e09208df7
--- /dev/null
+++ b/contrib/libs/liburing/src/include/liburing/io_uring.h
@@ -0,0 +1,682 @@
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR MIT */
+/*
+ * Header file for the io_uring interface.
+ *
+ * Copyright (C) 2019 Jens Axboe
+ * Copyright (C) 2019 Christoph Hellwig
+ */
+#ifndef LINUX_IO_URING_H
+#define LINUX_IO_URING_H
+
+#include <linux/fs.h>
+#include <linux/types.h>
+/*
+ * this file is shared with liburing and that has to autodetect
+ * if linux/time_types.h is available
+ */
+#ifdef __KERNEL__
+#define HAVE_LINUX_TIME_TYPES_H 1
+#endif
+#ifdef HAVE_LINUX_TIME_TYPES_H
+#include <linux/time_types.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * IO submission data structure (Submission Queue Entry)
+ */
+struct io_uring_sqe {
+ __u8 opcode; /* type of operation for this sqe */
+ __u8 flags; /* IOSQE_ flags */
+ __u16 ioprio; /* ioprio for the request */
+ __s32 fd; /* file descriptor to do IO on */
+ union {
+ __u64 off; /* offset into file */
+ __u64 addr2;
+ struct {
+ __u32 cmd_op;
+ __u32 __pad1;
+ };
+ };
+ union {
+ __u64 addr; /* pointer to buffer or iovecs */
+ __u64 splice_off_in;
+ };
+ __u32 len; /* buffer size or number of iovecs */
+ union {
+ __kernel_rwf_t rw_flags;
+ __u32 fsync_flags;
+ __u16 poll_events; /* compatibility */
+ __u32 poll32_events; /* word-reversed for BE */
+ __u32 sync_range_flags;
+ __u32 msg_flags;
+ __u32 timeout_flags;
+ __u32 accept_flags;
+ __u32 cancel_flags;
+ __u32 open_flags;
+ __u32 statx_flags;
+ __u32 fadvise_advice;
+ __u32 splice_flags;
+ __u32 rename_flags;
+ __u32 unlink_flags;
+ __u32 hardlink_flags;
+ __u32 xattr_flags;
+ __u32 msg_ring_flags;
+ __u32 uring_cmd_flags;
+ };
+ __u64 user_data; /* data to be passed back at completion time */
+ /* pack this to avoid bogus arm OABI complaints */
+ union {
+ /* index into fixed buffers, if used */
+ __u16 buf_index;
+ /* for grouped buffer selection */
+ __u16 buf_group;
+ } __attribute__((packed));
+ /* personality to use, if used */
+ __u16 personality;
+ union {
+ __s32 splice_fd_in;
+ __u32 file_index;
+ struct {
+ __u16 addr_len;
+ __u16 __pad3[1];
+ };
+ };
+ union {
+ struct {
+ __u64 addr3;
+ __u64 __pad2[1];
+ };
+ /*
+ * If the ring is initialized with IORING_SETUP_SQE128, then
+ * this field is used for 80 bytes of arbitrary command data
+ */
+ __u8 cmd[0];
+ };
+};
+
+/*
+ * If sqe->file_index is set to this for opcodes that instantiate a new
+ * direct descriptor (like openat/openat2/accept), then io_uring will allocate
+ * an available direct descriptor instead of having the application pass one
+ * in. The picked direct descriptor will be returned in cqe->res, or -ENFILE
+ * if the space is full.
+ */
+#define IORING_FILE_INDEX_ALLOC (~0U)
+
+enum {
+ IOSQE_FIXED_FILE_BIT,
+ IOSQE_IO_DRAIN_BIT,
+ IOSQE_IO_LINK_BIT,
+ IOSQE_IO_HARDLINK_BIT,
+ IOSQE_ASYNC_BIT,
+ IOSQE_BUFFER_SELECT_BIT,
+ IOSQE_CQE_SKIP_SUCCESS_BIT,
+};
+
+/*
+ * sqe->flags
+ */
+/* use fixed fileset */
+#define IOSQE_FIXED_FILE (1U << IOSQE_FIXED_FILE_BIT)
+/* issue after inflight IO */
+#define IOSQE_IO_DRAIN (1U << IOSQE_IO_DRAIN_BIT)
+/* links next sqe */
+#define IOSQE_IO_LINK (1U << IOSQE_IO_LINK_BIT)
+/* like LINK, but stronger */
+#define IOSQE_IO_HARDLINK (1U << IOSQE_IO_HARDLINK_BIT)
+/* always go async */
+#define IOSQE_ASYNC (1U << IOSQE_ASYNC_BIT)
+/* select buffer from sqe->buf_group */
+#define IOSQE_BUFFER_SELECT (1U << IOSQE_BUFFER_SELECT_BIT)
+/* don't post CQE if request succeeded */
+#define IOSQE_CQE_SKIP_SUCCESS (1U << IOSQE_CQE_SKIP_SUCCESS_BIT)
+
+/*
+ * io_uring_setup() flags
+ */
+#define IORING_SETUP_IOPOLL (1U << 0) /* io_context is polled */
+#define IORING_SETUP_SQPOLL (1U << 1) /* SQ poll thread */
+#define IORING_SETUP_SQ_AFF (1U << 2) /* sq_thread_cpu is valid */
+#define IORING_SETUP_CQSIZE (1U << 3) /* app defines CQ size */
+#define IORING_SETUP_CLAMP (1U << 4) /* clamp SQ/CQ ring sizes */
+#define IORING_SETUP_ATTACH_WQ (1U << 5) /* attach to existing wq */
+#define IORING_SETUP_R_DISABLED (1U << 6) /* start with ring disabled */
+#define IORING_SETUP_SUBMIT_ALL (1U << 7) /* continue submit on error */
+/*
+ * Cooperative task running. When requests complete, they often require
+ * forcing the submitter to transition to the kernel to complete. If this
+ * flag is set, work will be done when the task transitions anyway, rather
+ * than force an inter-processor interrupt reschedule. This avoids interrupting
+ * a task running in userspace, and saves an IPI.
+ */
+#define IORING_SETUP_COOP_TASKRUN (1U << 8)
+/*
+ * If COOP_TASKRUN is set, get notified if task work is available for
+ * running and a kernel transition would be needed to run it. This sets
+ * IORING_SQ_TASKRUN in the sq ring flags. Not valid with COOP_TASKRUN.
+ */
+#define IORING_SETUP_TASKRUN_FLAG (1U << 9)
+#define IORING_SETUP_SQE128 (1U << 10) /* SQEs are 128 byte */
+#define IORING_SETUP_CQE32 (1U << 11) /* CQEs are 32 byte */
+/*
+ * Only one task is allowed to submit requests
+ */
+#define IORING_SETUP_SINGLE_ISSUER (1U << 12)
+
+/*
+ * Defer running task work to get events.
+ * Rather than running bits of task work whenever the task transitions
+ * try to do it just before it is needed.
+ */
+#define IORING_SETUP_DEFER_TASKRUN (1U << 13)
+
+enum io_uring_op {
+ IORING_OP_NOP,
+ IORING_OP_READV,
+ IORING_OP_WRITEV,
+ IORING_OP_FSYNC,
+ IORING_OP_READ_FIXED,
+ IORING_OP_WRITE_FIXED,
+ IORING_OP_POLL_ADD,
+ IORING_OP_POLL_REMOVE,
+ IORING_OP_SYNC_FILE_RANGE,
+ IORING_OP_SENDMSG,
+ IORING_OP_RECVMSG,
+ IORING_OP_TIMEOUT,
+ IORING_OP_TIMEOUT_REMOVE,
+ IORING_OP_ACCEPT,
+ IORING_OP_ASYNC_CANCEL,
+ IORING_OP_LINK_TIMEOUT,
+ IORING_OP_CONNECT,
+ IORING_OP_FALLOCATE,
+ IORING_OP_OPENAT,
+ IORING_OP_CLOSE,
+ IORING_OP_FILES_UPDATE,
+ IORING_OP_STATX,
+ IORING_OP_READ,
+ IORING_OP_WRITE,
+ IORING_OP_FADVISE,
+ IORING_OP_MADVISE,
+ IORING_OP_SEND,
+ IORING_OP_RECV,
+ IORING_OP_OPENAT2,
+ IORING_OP_EPOLL_CTL,
+ IORING_OP_SPLICE,
+ IORING_OP_PROVIDE_BUFFERS,
+ IORING_OP_REMOVE_BUFFERS,
+ IORING_OP_TEE,
+ IORING_OP_SHUTDOWN,
+ IORING_OP_RENAMEAT,
+ IORING_OP_UNLINKAT,
+ IORING_OP_MKDIRAT,
+ IORING_OP_SYMLINKAT,
+ IORING_OP_LINKAT,
+ IORING_OP_MSG_RING,
+ IORING_OP_FSETXATTR,
+ IORING_OP_SETXATTR,
+ IORING_OP_FGETXATTR,
+ IORING_OP_GETXATTR,
+ IORING_OP_SOCKET,
+ IORING_OP_URING_CMD,
+ IORING_OP_SEND_ZC,
+ IORING_OP_SENDMSG_ZC,
+
+ /* this goes last, obviously */
+ IORING_OP_LAST,
+};
+
+/*
+ * sqe->uring_cmd_flags
+ * IORING_URING_CMD_FIXED use registered buffer; pass thig flag
+ * along with setting sqe->buf_index.
+ */
+#define IORING_URING_CMD_FIXED (1U << 0)
+
+
+/*
+ * sqe->fsync_flags
+ */
+#define IORING_FSYNC_DATASYNC (1U << 0)
+
+/*
+ * sqe->timeout_flags
+ */
+#define IORING_TIMEOUT_ABS (1U << 0)
+#define IORING_TIMEOUT_UPDATE (1U << 1)
+#define IORING_TIMEOUT_BOOTTIME (1U << 2)
+#define IORING_TIMEOUT_REALTIME (1U << 3)
+#define IORING_LINK_TIMEOUT_UPDATE (1U << 4)
+#define IORING_TIMEOUT_ETIME_SUCCESS (1U << 5)
+#define IORING_TIMEOUT_CLOCK_MASK (IORING_TIMEOUT_BOOTTIME | IORING_TIMEOUT_REALTIME)
+#define IORING_TIMEOUT_UPDATE_MASK (IORING_TIMEOUT_UPDATE | IORING_LINK_TIMEOUT_UPDATE)
+/*
+ * sqe->splice_flags
+ * extends splice(2) flags
+ */
+#define SPLICE_F_FD_IN_FIXED (1U << 31) /* the last bit of __u32 */
+
+/*
+ * POLL_ADD flags. Note that since sqe->poll_events is the flag space, the
+ * command flags for POLL_ADD are stored in sqe->len.
+ *
+ * IORING_POLL_ADD_MULTI Multishot poll. Sets IORING_CQE_F_MORE if
+ * the poll handler will continue to report
+ * CQEs on behalf of the same SQE.
+ *
+ * IORING_POLL_UPDATE Update existing poll request, matching
+ * sqe->addr as the old user_data field.
+ *
+ * IORING_POLL_LEVEL Level triggered poll.
+ */
+#define IORING_POLL_ADD_MULTI (1U << 0)
+#define IORING_POLL_UPDATE_EVENTS (1U << 1)
+#define IORING_POLL_UPDATE_USER_DATA (1U << 2)
+#define IORING_POLL_ADD_LEVEL (1U << 3)
+
+/*
+ * ASYNC_CANCEL flags.
+ *
+ * IORING_ASYNC_CANCEL_ALL Cancel all requests that match the given key
+ * IORING_ASYNC_CANCEL_FD Key off 'fd' for cancelation rather than the
+ * request 'user_data'
+ * IORING_ASYNC_CANCEL_ANY Match any request
+ * IORING_ASYNC_CANCEL_FD_FIXED 'fd' passed in is a fixed descriptor
+ */
+#define IORING_ASYNC_CANCEL_ALL (1U << 0)
+#define IORING_ASYNC_CANCEL_FD (1U << 1)
+#define IORING_ASYNC_CANCEL_ANY (1U << 2)
+#define IORING_ASYNC_CANCEL_FD_FIXED (1U << 3)
+
+/*
+ * send/sendmsg and recv/recvmsg flags (sqe->ioprio)
+ *
+ * IORING_RECVSEND_POLL_FIRST If set, instead of first attempting to send
+ * or receive and arm poll if that yields an
+ * -EAGAIN result, arm poll upfront and skip
+ * the initial transfer attempt.
+ *
+ * IORING_RECV_MULTISHOT Multishot recv. Sets IORING_CQE_F_MORE if
+ * the handler will continue to report
+ * CQEs on behalf of the same SQE.
+ *
+ * IORING_RECVSEND_FIXED_BUF Use registered buffers, the index is stored in
+ * the buf_index field.
+ */
+#define IORING_RECVSEND_POLL_FIRST (1U << 0)
+#define IORING_RECV_MULTISHOT (1U << 1)
+#define IORING_RECVSEND_FIXED_BUF (1U << 2)
+
+/*
+ * accept flags stored in sqe->ioprio
+ */
+#define IORING_ACCEPT_MULTISHOT (1U << 0)
+
+/*
+ * IORING_OP_MSG_RING command types, stored in sqe->addr
+ */
+enum {
+ IORING_MSG_DATA, /* pass sqe->len as 'res' and off as user_data */
+ IORING_MSG_SEND_FD, /* send a registered fd to another ring */
+};
+
+/*
+ * IORING_OP_MSG_RING flags (sqe->msg_ring_flags)
+ *
+ * IORING_MSG_RING_CQE_SKIP Don't post a CQE to the target ring. Not
+ * applicable for IORING_MSG_DATA, obviously.
+ */
+#define IORING_MSG_RING_CQE_SKIP (1U << 0)
+
+/*
+ * IO completion data structure (Completion Queue Entry)
+ */
+struct io_uring_cqe {
+ __u64 user_data; /* sqe->data submission passed back */
+ __s32 res; /* result code for this event */
+ __u32 flags;
+
+ /*
+ * If the ring is initialized with IORING_SETUP_CQE32, then this field
+ * contains 16-bytes of padding, doubling the size of the CQE.
+ */
+ __u64 big_cqe[];
+};
+
+/*
+ * cqe->flags
+ *
+ * IORING_CQE_F_BUFFER If set, the upper 16 bits are the buffer ID
+ * IORING_CQE_F_MORE If set, parent SQE will generate more CQE entries
+ * IORING_CQE_F_SOCK_NONEMPTY If set, more data to read after socket recv
+ * IORING_CQE_F_NOTIF Set for notification CQEs. Can be used to distinct
+ * them from sends.
+ */
+#define IORING_CQE_F_BUFFER (1U << 0)
+#define IORING_CQE_F_MORE (1U << 1)
+#define IORING_CQE_F_SOCK_NONEMPTY (1U << 2)
+#define IORING_CQE_F_NOTIF (1U << 3)
+
+enum {
+ IORING_CQE_BUFFER_SHIFT = 16,
+};
+
+/*
+ * Magic offsets for the application to mmap the data it needs
+ */
+#define IORING_OFF_SQ_RING 0ULL
+#define IORING_OFF_CQ_RING 0x8000000ULL
+#define IORING_OFF_SQES 0x10000000ULL
+
+/*
+ * Filled with the offset for mmap(2)
+ */
+struct io_sqring_offsets {
+ __u32 head;
+ __u32 tail;
+ __u32 ring_mask;
+ __u32 ring_entries;
+ __u32 flags;
+ __u32 dropped;
+ __u32 array;
+ __u32 resv1;
+ __u64 resv2;
+};
+
+/*
+ * sq_ring->flags
+ */
+#define IORING_SQ_NEED_WAKEUP (1U << 0) /* needs io_uring_enter wakeup */
+#define IORING_SQ_CQ_OVERFLOW (1U << 1) /* CQ ring is overflown */
+#define IORING_SQ_TASKRUN (1U << 2) /* task should enter the kernel */
+
+struct io_cqring_offsets {
+ __u32 head;
+ __u32 tail;
+ __u32 ring_mask;
+ __u32 ring_entries;
+ __u32 overflow;
+ __u32 cqes;
+ __u32 flags;
+ __u32 resv1;
+ __u64 resv2;
+};
+
+/*
+ * cq_ring->flags
+ */
+
+/* disable eventfd notifications */
+#define IORING_CQ_EVENTFD_DISABLED (1U << 0)
+
+/*
+ * io_uring_enter(2) flags
+ */
+#define IORING_ENTER_GETEVENTS (1U << 0)
+#define IORING_ENTER_SQ_WAKEUP (1U << 1)
+#define IORING_ENTER_SQ_WAIT (1U << 2)
+#define IORING_ENTER_EXT_ARG (1U << 3)
+#define IORING_ENTER_REGISTERED_RING (1U << 4)
+
+/*
+ * Passed in for io_uring_setup(2). Copied back with updated info on success
+ */
+struct io_uring_params {
+ __u32 sq_entries;
+ __u32 cq_entries;
+ __u32 flags;
+ __u32 sq_thread_cpu;
+ __u32 sq_thread_idle;
+ __u32 features;
+ __u32 wq_fd;
+ __u32 resv[3];
+ struct io_sqring_offsets sq_off;
+ struct io_cqring_offsets cq_off;
+};
+
+/*
+ * io_uring_params->features flags
+ */
+#define IORING_FEAT_SINGLE_MMAP (1U << 0)
+#define IORING_FEAT_NODROP (1U << 1)
+#define IORING_FEAT_SUBMIT_STABLE (1U << 2)
+#define IORING_FEAT_RW_CUR_POS (1U << 3)
+#define IORING_FEAT_CUR_PERSONALITY (1U << 4)
+#define IORING_FEAT_FAST_POLL (1U << 5)
+#define IORING_FEAT_POLL_32BITS (1U << 6)
+#define IORING_FEAT_SQPOLL_NONFIXED (1U << 7)
+#define IORING_FEAT_EXT_ARG (1U << 8)
+#define IORING_FEAT_NATIVE_WORKERS (1U << 9)
+#define IORING_FEAT_RSRC_TAGS (1U << 10)
+#define IORING_FEAT_CQE_SKIP (1U << 11)
+#define IORING_FEAT_LINKED_FILE (1U << 12)
+
+/*
+ * io_uring_register(2) opcodes and arguments
+ */
+enum {
+ IORING_REGISTER_BUFFERS = 0,
+ IORING_UNREGISTER_BUFFERS = 1,
+ IORING_REGISTER_FILES = 2,
+ IORING_UNREGISTER_FILES = 3,
+ IORING_REGISTER_EVENTFD = 4,
+ IORING_UNREGISTER_EVENTFD = 5,
+ IORING_REGISTER_FILES_UPDATE = 6,
+ IORING_REGISTER_EVENTFD_ASYNC = 7,
+ IORING_REGISTER_PROBE = 8,
+ IORING_REGISTER_PERSONALITY = 9,
+ IORING_UNREGISTER_PERSONALITY = 10,
+ IORING_REGISTER_RESTRICTIONS = 11,
+ IORING_REGISTER_ENABLE_RINGS = 12,
+
+ /* extended with tagging */
+ IORING_REGISTER_FILES2 = 13,
+ IORING_REGISTER_FILES_UPDATE2 = 14,
+ IORING_REGISTER_BUFFERS2 = 15,
+ IORING_REGISTER_BUFFERS_UPDATE = 16,
+
+ /* set/clear io-wq thread affinities */
+ IORING_REGISTER_IOWQ_AFF = 17,
+ IORING_UNREGISTER_IOWQ_AFF = 18,
+
+ /* set/get max number of io-wq workers */
+ IORING_REGISTER_IOWQ_MAX_WORKERS = 19,
+
+ /* register/unregister io_uring fd with the ring */
+ IORING_REGISTER_RING_FDS = 20,
+ IORING_UNREGISTER_RING_FDS = 21,
+
+ /* register ring based provide buffer group */
+ IORING_REGISTER_PBUF_RING = 22,
+ IORING_UNREGISTER_PBUF_RING = 23,
+
+ /* sync cancelation API */
+ IORING_REGISTER_SYNC_CANCEL = 24,
+
+ /* register a range of fixed file slots for automatic slot allocation */
+ IORING_REGISTER_FILE_ALLOC_RANGE = 25,
+
+ /* this goes last */
+ IORING_REGISTER_LAST
+};
+
+/* io-wq worker categories */
+enum {
+ IO_WQ_BOUND,
+ IO_WQ_UNBOUND,
+};
+
+/* deprecated, see struct io_uring_rsrc_update */
+struct io_uring_files_update {
+ __u32 offset;
+ __u32 resv;
+ __aligned_u64 /* __s32 * */ fds;
+};
+
+/*
+ * Register a fully sparse file space, rather than pass in an array of all
+ * -1 file descriptors.
+ */
+#define IORING_RSRC_REGISTER_SPARSE (1U << 0)
+
+struct io_uring_rsrc_register {
+ __u32 nr;
+ __u32 flags;
+ __u64 resv2;
+ __aligned_u64 data;
+ __aligned_u64 tags;
+};
+
+struct io_uring_rsrc_update {
+ __u32 offset;
+ __u32 resv;
+ __aligned_u64 data;
+};
+
+struct io_uring_rsrc_update2 {
+ __u32 offset;
+ __u32 resv;
+ __aligned_u64 data;
+ __aligned_u64 tags;
+ __u32 nr;
+ __u32 resv2;
+};
+
+struct io_uring_notification_slot {
+ __u64 tag;
+ __u64 resv[3];
+};
+
+struct io_uring_notification_register {
+ __u32 nr_slots;
+ __u32 resv;
+ __u64 resv2;
+ __u64 data;
+ __u64 resv3;
+};
+
+/* Skip updating fd indexes set to this value in the fd table */
+#define IORING_REGISTER_FILES_SKIP (-2)
+
+#define IO_URING_OP_SUPPORTED (1U << 0)
+
+struct io_uring_probe_op {
+ __u8 op;
+ __u8 resv;
+ __u16 flags; /* IO_URING_OP_* flags */
+ __u32 resv2;
+};
+
+struct io_uring_probe {
+ __u8 last_op; /* last opcode supported */
+ __u8 ops_len; /* length of ops[] array below */
+ __u16 resv;
+ __u32 resv2[3];
+ struct io_uring_probe_op ops[];
+};
+
+struct io_uring_restriction {
+ __u16 opcode;
+ union {
+ __u8 register_op; /* IORING_RESTRICTION_REGISTER_OP */
+ __u8 sqe_op; /* IORING_RESTRICTION_SQE_OP */
+ __u8 sqe_flags; /* IORING_RESTRICTION_SQE_FLAGS_* */
+ };
+ __u8 resv;
+ __u32 resv2[3];
+};
+
+struct io_uring_buf {
+ __u64 addr;
+ __u32 len;
+ __u16 bid;
+ __u16 resv;
+};
+
+struct io_uring_buf_ring {
+ union {
+ /*
+ * To avoid spilling into more pages than we need to, the
+ * ring tail is overlaid with the io_uring_buf->resv field.
+ */
+ struct {
+ __u64 resv1;
+ __u32 resv2;
+ __u16 resv3;
+ __u16 tail;
+ };
+ struct io_uring_buf bufs[0];
+ };
+};
+
+/* argument for IORING_(UN)REGISTER_PBUF_RING */
+struct io_uring_buf_reg {
+ __u64 ring_addr;
+ __u32 ring_entries;
+ __u16 bgid;
+ __u16 pad;
+ __u64 resv[3];
+};
+
+/*
+ * io_uring_restriction->opcode values
+ */
+enum {
+ /* Allow an io_uring_register(2) opcode */
+ IORING_RESTRICTION_REGISTER_OP = 0,
+
+ /* Allow an sqe opcode */
+ IORING_RESTRICTION_SQE_OP = 1,
+
+ /* Allow sqe flags */
+ IORING_RESTRICTION_SQE_FLAGS_ALLOWED = 2,
+
+ /* Require sqe flags (these flags must be set on each submission) */
+ IORING_RESTRICTION_SQE_FLAGS_REQUIRED = 3,
+
+ IORING_RESTRICTION_LAST
+};
+
+struct io_uring_getevents_arg {
+ __u64 sigmask;
+ __u32 sigmask_sz;
+ __u32 pad;
+ __u64 ts;
+};
+
+/*
+ * Argument for IORING_REGISTER_SYNC_CANCEL
+ */
+struct io_uring_sync_cancel_reg {
+ __u64 addr;
+ __s32 fd;
+ __u32 flags;
+ struct __kernel_timespec timeout;
+ __u64 pad[4];
+};
+
+/*
+ * Argument for IORING_REGISTER_FILE_ALLOC_RANGE
+ * The range is specified as [off, off + len)
+ */
+struct io_uring_file_index_range {
+ __u32 off;
+ __u32 len;
+ __u64 resv;
+};
+
+struct io_uring_recvmsg_out {
+ __u32 namelen;
+ __u32 controllen;
+ __u32 payloadlen;
+ __u32 flags;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/contrib/libs/liburing/src/int_flags.h b/contrib/libs/liburing/src/int_flags.h
new file mode 100644
index 00000000000..90505ec340b
--- /dev/null
+++ b/contrib/libs/liburing/src/int_flags.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef LIBURING_INT_FLAGS
+#define LIBURING_INT_FLAGS
+
+enum {
+ INT_FLAG_REG_RING = 1,
+};
+
+#endif
diff --git a/contrib/libs/liburing/src/lib.h b/contrib/libs/liburing/src/lib.h
new file mode 100644
index 00000000000..82c0ab10d3f
--- /dev/null
+++ b/contrib/libs/liburing/src/lib.h
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef LIBURING_LIB_H
+#define LIBURING_LIB_H
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#if defined(__x86_64__) || defined(__i386__)
+#include "arch/x86/lib.h"
+#elif defined(__aarch64__)
+#include "arch/aarch64/lib.h"
+#else
+/*
+ * We don't have nolibc support for this arch. Must use libc!
+ */
+#ifdef CONFIG_NOLIBC
+#error "This arch doesn't support building liburing without libc"
+#endif
+/* libc wrappers. */
+#error #include "arch/generic/lib.h"
+#endif
+
+
+#ifndef offsetof
+#define offsetof(TYPE, FIELD) ((size_t) &((TYPE *)0)->FIELD)
+#endif
+
+#ifndef container_of
+#define container_of(PTR, TYPE, FIELD) ({ \
+ __typeof__(((TYPE *)0)->FIELD) *__FIELD_PTR = (PTR); \
+ (TYPE *)((char *) __FIELD_PTR - offsetof(TYPE, FIELD)); \
+})
+#endif
+
+#define __maybe_unused __attribute__((__unused__))
+#define __hot __attribute__((__hot__))
+#define __cold __attribute__((__cold__))
+
+void *__uring_malloc(size_t len);
+void __uring_free(void *p);
+
+static inline void *uring_malloc(size_t len)
+{
+#ifdef CONFIG_NOLIBC
+ return __uring_malloc(len);
+#else
+ return malloc(len);
+#endif
+}
+
+static inline void uring_free(void *ptr)
+{
+#ifdef CONFIG_NOLIBC
+ __uring_free(ptr);
+#else
+ free(ptr);
+#endif
+}
+
+#endif /* #ifndef LIBURING_LIB_H */
diff --git a/contrib/libs/liburing/src/queue.c b/contrib/libs/liburing/src/queue.c
new file mode 100644
index 00000000000..4b4220782d6
--- /dev/null
+++ b/contrib/libs/liburing/src/queue.c
@@ -0,0 +1,436 @@
+#include "../config-host.h"
+/* SPDX-License-Identifier: MIT */
+#define _POSIX_C_SOURCE 200112L
+
+#include "lib.h"
+#include "syscall.h"
+#include "liburing.h"
+#include "int_flags.h"
+#include "liburing/compat.h"
+#include "liburing/io_uring.h"
+
+/*
+ * Returns true if we're not using SQ thread (thus nobody submits but us)
+ * or if IORING_SQ_NEED_WAKEUP is set, so submit thread must be explicitly
+ * awakened. For the latter case, we set the thread wakeup flag.
+ * If no SQEs are ready for submission, returns false.
+ */
+static inline bool sq_ring_needs_enter(struct io_uring *ring,
+ unsigned submit,
+ unsigned *flags)
+{
+ if (!submit)
+ return false;
+
+ if (!(ring->flags & IORING_SETUP_SQPOLL))
+ return true;
+
+ /*
+ * Ensure the kernel can see the store to the SQ tail before we read
+ * the flags.
+ */
+ io_uring_smp_mb();
+
+ if (uring_unlikely(IO_URING_READ_ONCE(*ring->sq.kflags) &
+ IORING_SQ_NEED_WAKEUP)) {
+ *flags |= IORING_ENTER_SQ_WAKEUP;
+ return true;
+ }
+
+ return false;
+}
+
+static inline bool cq_ring_needs_flush(struct io_uring *ring)
+{
+ return IO_URING_READ_ONCE(*ring->sq.kflags) &
+ (IORING_SQ_CQ_OVERFLOW | IORING_SQ_TASKRUN);
+}
+
+static inline bool cq_ring_needs_enter(struct io_uring *ring)
+{
+ return (ring->flags & IORING_SETUP_IOPOLL) || cq_ring_needs_flush(ring);
+}
+
+struct get_data {
+ unsigned submit;
+ unsigned wait_nr;
+ unsigned get_flags;
+ int sz;
+ int has_ts;
+ void *arg;
+};
+
+static int _io_uring_get_cqe(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr,
+ struct get_data *data)
+{
+ struct io_uring_cqe *cqe = NULL;
+ bool looped = false;
+ int err = 0;
+
+ do {
+ bool need_enter = false;
+ unsigned flags = 0;
+ unsigned nr_available;
+ int ret;
+
+ ret = __io_uring_peek_cqe(ring, &cqe, &nr_available);
+ if (ret) {
+ if (!err)
+ err = ret;
+ break;
+ }
+ if (!cqe && !data->wait_nr && !data->submit) {
+ /*
+ * If we already looped once, we already entererd
+ * the kernel. Since there's nothing to submit or
+ * wait for, don't keep retrying.
+ */
+ if (looped || !cq_ring_needs_enter(ring)) {
+ if (!err)
+ err = -EAGAIN;
+ break;
+ }
+ need_enter = true;
+ }
+ if (data->wait_nr > nr_available || need_enter) {
+ flags = IORING_ENTER_GETEVENTS | data->get_flags;
+ need_enter = true;
+ }
+ if (sq_ring_needs_enter(ring, data->submit, &flags))
+ need_enter = true;
+ if (!need_enter)
+ break;
+ if (looped && data->has_ts) {
+ struct io_uring_getevents_arg *arg = data->arg;
+
+ if (!cqe && arg->ts && !err)
+ err = -ETIME;
+ break;
+ }
+
+ if (ring->int_flags & INT_FLAG_REG_RING)
+ flags |= IORING_ENTER_REGISTERED_RING;
+ ret = __sys_io_uring_enter2(ring->enter_ring_fd, data->submit,
+ data->wait_nr, flags, data->arg,
+ data->sz);
+ if (ret < 0) {
+ if (!err)
+ err = ret;
+ break;
+ }
+
+ data->submit -= ret;
+ if (cqe)
+ break;
+ if (!looped) {
+ looped = true;
+ err = ret;
+ }
+ } while (1);
+
+ *cqe_ptr = cqe;
+ return err;
+}
+
+int __io_uring_get_cqe(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
+ unsigned submit, unsigned wait_nr, sigset_t *sigmask)
+{
+ struct get_data data = {
+ .submit = submit,
+ .wait_nr = wait_nr,
+ .get_flags = 0,
+ .sz = _NSIG / 8,
+ .arg = sigmask,
+ };
+
+ return _io_uring_get_cqe(ring, cqe_ptr, &data);
+}
+
+int io_uring_get_events(struct io_uring *ring)
+{
+ int flags = IORING_ENTER_GETEVENTS;
+
+ if (ring->int_flags & INT_FLAG_REG_RING)
+ flags |= IORING_ENTER_REGISTERED_RING;
+ return __sys_io_uring_enter(ring->enter_ring_fd, 0, 0, flags, NULL);
+}
+
+/*
+ * Fill in an array of IO completions up to count, if any are available.
+ * Returns the amount of IO completions filled.
+ */
+unsigned io_uring_peek_batch_cqe(struct io_uring *ring,
+ struct io_uring_cqe **cqes, unsigned count)
+{
+ unsigned ready;
+ bool overflow_checked = false;
+ int shift = 0;
+
+ if (ring->flags & IORING_SETUP_CQE32)
+ shift = 1;
+
+again:
+ ready = io_uring_cq_ready(ring);
+ if (ready) {
+ unsigned head = *ring->cq.khead;
+ unsigned mask = ring->cq.ring_mask;
+ unsigned last;
+ int i = 0;
+
+ count = count > ready ? ready : count;
+ last = head + count;
+ for (;head != last; head++, i++)
+ cqes[i] = &ring->cq.cqes[(head & mask) << shift];
+
+ return count;
+ }
+
+ if (overflow_checked)
+ return 0;
+
+ if (cq_ring_needs_flush(ring)) {
+ io_uring_get_events(ring);
+ overflow_checked = true;
+ goto again;
+ }
+
+ return 0;
+}
+
+/*
+ * Sync internal state with kernel ring state on the SQ side. Returns the
+ * number of pending items in the SQ ring, for the shared ring.
+ */
+unsigned __io_uring_flush_sq(struct io_uring *ring)
+{
+ struct io_uring_sq *sq = &ring->sq;
+ unsigned tail = sq->sqe_tail;
+
+ if (sq->sqe_head != tail) {
+ sq->sqe_head = tail;
+ /*
+ * Ensure kernel sees the SQE updates before the tail update.
+ */
+ if (!(ring->flags & IORING_SETUP_SQPOLL))
+ IO_URING_WRITE_ONCE(*sq->ktail, tail);
+ else
+ io_uring_smp_store_release(sq->ktail, tail);
+ }
+ /*
+ * This _may_ look problematic, as we're not supposed to be reading
+ * SQ->head without acquire semantics. When we're in SQPOLL mode, the
+ * kernel submitter could be updating this right now. For non-SQPOLL,
+ * task itself does it, and there's no potential race. But even for
+ * SQPOLL, the load is going to be potentially out-of-date the very
+ * instant it's done, regardless or whether or not it's done
+ * atomically. Worst case, we're going to be over-estimating what
+ * we can submit. The point is, we need to be able to deal with this
+ * situation regardless of any perceived atomicity.
+ */
+ return tail - *sq->khead;
+}
+
+/*
+ * If we have kernel support for IORING_ENTER_EXT_ARG, then we can use that
+ * more efficiently than queueing an internal timeout command.
+ */
+static int io_uring_wait_cqes_new(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr,
+ unsigned wait_nr,
+ struct __kernel_timespec *ts,
+ sigset_t *sigmask)
+{
+ struct io_uring_getevents_arg arg = {
+ .sigmask = (unsigned long) sigmask,
+ .sigmask_sz = _NSIG / 8,
+ .ts = (unsigned long) ts
+ };
+ struct get_data data = {
+ .wait_nr = wait_nr,
+ .get_flags = IORING_ENTER_EXT_ARG,
+ .sz = sizeof(arg),
+ .has_ts = ts != NULL,
+ .arg = &arg
+ };
+
+ return _io_uring_get_cqe(ring, cqe_ptr, &data);
+}
+
+/*
+ * Like io_uring_wait_cqe(), except it accepts a timeout value as well. Note
+ * that an sqe is used internally to handle the timeout. For kernel doesn't
+ * support IORING_FEAT_EXT_ARG, applications using this function must never
+ * set sqe->user_data to LIBURING_UDATA_TIMEOUT!
+ *
+ * For kernels without IORING_FEAT_EXT_ARG (5.10 and older), if 'ts' is
+ * specified, the application need not call io_uring_submit() before
+ * calling this function, as we will do that on its behalf. From this it also
+ * follows that this function isn't safe to use for applications that split SQ
+ * and CQ handling between two threads and expect that to work without
+ * synchronization, as this function manipulates both the SQ and CQ side.
+ *
+ * For kernels with IORING_FEAT_EXT_ARG, no implicit submission is done and
+ * hence this function is safe to use for applications that split SQ and CQ
+ * handling between two threads.
+ */
+static int __io_uring_submit_timeout(struct io_uring *ring, unsigned wait_nr,
+ struct __kernel_timespec *ts)
+{
+ struct io_uring_sqe *sqe;
+ int ret;
+
+ /*
+ * If the SQ ring is full, we may need to submit IO first
+ */
+ sqe = io_uring_get_sqe(ring);
+ if (!sqe) {
+ ret = io_uring_submit(ring);
+ if (ret < 0)
+ return ret;
+ sqe = io_uring_get_sqe(ring);
+ if (!sqe)
+ return -EAGAIN;
+ }
+ io_uring_prep_timeout(sqe, ts, wait_nr, 0);
+ sqe->user_data = LIBURING_UDATA_TIMEOUT;
+ return __io_uring_flush_sq(ring);
+}
+
+int io_uring_wait_cqes(struct io_uring *ring, struct io_uring_cqe **cqe_ptr,
+ unsigned wait_nr, struct __kernel_timespec *ts,
+ sigset_t *sigmask)
+{
+ int to_submit = 0;
+
+ if (ts) {
+ if (ring->features & IORING_FEAT_EXT_ARG)
+ return io_uring_wait_cqes_new(ring, cqe_ptr, wait_nr,
+ ts, sigmask);
+ to_submit = __io_uring_submit_timeout(ring, wait_nr, ts);
+ if (to_submit < 0)
+ return to_submit;
+ }
+
+ return __io_uring_get_cqe(ring, cqe_ptr, to_submit, wait_nr, sigmask);
+}
+
+int io_uring_submit_and_wait_timeout(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr,
+ unsigned wait_nr,
+ struct __kernel_timespec *ts,
+ sigset_t *sigmask)
+{
+ int to_submit;
+
+ if (ts) {
+ if (ring->features & IORING_FEAT_EXT_ARG) {
+ struct io_uring_getevents_arg arg = {
+ .sigmask = (unsigned long) sigmask,
+ .sigmask_sz = _NSIG / 8,
+ .ts = (unsigned long) ts
+ };
+ struct get_data data = {
+ .submit = __io_uring_flush_sq(ring),
+ .wait_nr = wait_nr,
+ .get_flags = IORING_ENTER_EXT_ARG,
+ .sz = sizeof(arg),
+ .has_ts = ts != NULL,
+ .arg = &arg
+ };
+
+ return _io_uring_get_cqe(ring, cqe_ptr, &data);
+ }
+ to_submit = __io_uring_submit_timeout(ring, wait_nr, ts);
+ if (to_submit < 0)
+ return to_submit;
+ } else
+ to_submit = __io_uring_flush_sq(ring);
+
+ return __io_uring_get_cqe(ring, cqe_ptr, to_submit, wait_nr, sigmask);
+}
+
+/*
+ * See io_uring_wait_cqes() - this function is the same, it just always uses
+ * '1' as the wait_nr.
+ */
+int io_uring_wait_cqe_timeout(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr,
+ struct __kernel_timespec *ts)
+{
+ return io_uring_wait_cqes(ring, cqe_ptr, 1, ts, NULL);
+}
+
+/*
+ * Submit sqes acquired from io_uring_get_sqe() to the kernel.
+ *
+ * Returns number of sqes submitted
+ */
+static int __io_uring_submit(struct io_uring *ring, unsigned submitted,
+ unsigned wait_nr, bool getevents)
+{
+ bool cq_needs_enter = getevents || wait_nr || cq_ring_needs_enter(ring);
+ unsigned flags;
+ int ret;
+
+ flags = 0;
+ if (sq_ring_needs_enter(ring, submitted, &flags) || cq_needs_enter) {
+ if (cq_needs_enter)
+ flags |= IORING_ENTER_GETEVENTS;
+ if (ring->int_flags & INT_FLAG_REG_RING)
+ flags |= IORING_ENTER_REGISTERED_RING;
+
+ ret = __sys_io_uring_enter(ring->enter_ring_fd, submitted,
+ wait_nr, flags, NULL);
+ } else
+ ret = submitted;
+
+ return ret;
+}
+
+static int __io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr)
+{
+ return __io_uring_submit(ring, __io_uring_flush_sq(ring), wait_nr, false);
+}
+
+/*
+ * Submit sqes acquired from io_uring_get_sqe() to the kernel.
+ *
+ * Returns number of sqes submitted
+ */
+int io_uring_submit(struct io_uring *ring)
+{
+ return __io_uring_submit_and_wait(ring, 0);
+}
+
+/*
+ * Like io_uring_submit(), but allows waiting for events as well.
+ *
+ * Returns number of sqes submitted
+ */
+int io_uring_submit_and_wait(struct io_uring *ring, unsigned wait_nr)
+{
+ return __io_uring_submit_and_wait(ring, wait_nr);
+}
+
+int io_uring_submit_and_get_events(struct io_uring *ring)
+{
+ return __io_uring_submit(ring, __io_uring_flush_sq(ring), 0, true);
+}
+
+#ifdef LIBURING_INTERNAL
+struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
+{
+ return _io_uring_get_sqe(ring);
+}
+#endif
+
+int __io_uring_sqring_wait(struct io_uring *ring)
+{
+ int flags = IORING_ENTER_SQ_WAIT;
+
+ if (ring->int_flags & INT_FLAG_REG_RING)
+ flags |= IORING_ENTER_REGISTERED_RING;
+
+ return __sys_io_uring_enter(ring->enter_ring_fd, 0, 0, flags, NULL);
+}
diff --git a/contrib/libs/liburing/src/register.c b/contrib/libs/liburing/src/register.c
new file mode 100644
index 00000000000..13e80a5811e
--- /dev/null
+++ b/contrib/libs/liburing/src/register.c
@@ -0,0 +1,370 @@
+#include "../config-host.h"
+/* SPDX-License-Identifier: MIT */
+#define _POSIX_C_SOURCE 200112L
+
+#include "lib.h"
+#include "syscall.h"
+#include "liburing.h"
+#include "int_flags.h"
+#include "liburing/compat.h"
+#include "liburing/io_uring.h"
+
+int io_uring_register_buffers_update_tag(struct io_uring *ring, unsigned off,
+ const struct iovec *iovecs,
+ const __u64 *tags,
+ unsigned nr)
+{
+ struct io_uring_rsrc_update2 up = {
+ .offset = off,
+ .data = (unsigned long)iovecs,
+ .tags = (unsigned long)tags,
+ .nr = nr,
+ };
+
+ return __sys_io_uring_register(ring->ring_fd,IORING_REGISTER_BUFFERS_UPDATE, &up,
+ sizeof(up));
+}
+
+int io_uring_register_buffers_tags(struct io_uring *ring,
+ const struct iovec *iovecs,
+ const __u64 *tags,
+ unsigned nr)
+{
+ struct io_uring_rsrc_register reg = {
+ .nr = nr,
+ .data = (unsigned long)iovecs,
+ .tags = (unsigned long)tags,
+ };
+
+ return __sys_io_uring_register(ring->ring_fd,
+ IORING_REGISTER_BUFFERS2, &reg,
+ sizeof(reg));
+}
+
+int io_uring_register_buffers_sparse(struct io_uring *ring, unsigned nr)
+{
+ struct io_uring_rsrc_register reg = {
+ .flags = IORING_RSRC_REGISTER_SPARSE,
+ .nr = nr,
+ };
+
+ return __sys_io_uring_register(ring->ring_fd, IORING_REGISTER_BUFFERS2,
+ &reg, sizeof(reg));
+}
+
+int io_uring_register_buffers(struct io_uring *ring, const struct iovec *iovecs,
+ unsigned nr_iovecs)
+{
+ int ret;
+
+ ret = __sys_io_uring_register(ring->ring_fd, IORING_REGISTER_BUFFERS,
+ iovecs, nr_iovecs);
+ return (ret < 0) ? ret : 0;
+}
+
+int io_uring_unregister_buffers(struct io_uring *ring)
+{
+ int ret;
+
+ ret = __sys_io_uring_register(ring->ring_fd, IORING_UNREGISTER_BUFFERS,
+ NULL, 0);
+ return (ret < 0) ? ret : 0;
+}
+
+int io_uring_register_files_update_tag(struct io_uring *ring, unsigned off,
+ const int *files, const __u64 *tags,
+ unsigned nr_files)
+{
+ struct io_uring_rsrc_update2 up = {
+ .offset = off,
+ .data = (unsigned long)files,
+ .tags = (unsigned long)tags,
+ .nr = nr_files,
+ };
+
+ return __sys_io_uring_register(ring->ring_fd,
+ IORING_REGISTER_FILES_UPDATE2, &up,
+ sizeof(up));
+}
+
+/*
+ * Register an update for an existing file set. The updates will start at
+ * 'off' in the original array, and 'nr_files' is the number of files we'll
+ * update.
+ *
+ * Returns number of files updated on success, -ERROR on failure.
+ */
+int io_uring_register_files_update(struct io_uring *ring, unsigned off,
+ const int *files, unsigned nr_files)
+{
+ struct io_uring_files_update up = {
+ .offset = off,
+ .fds = (unsigned long) files,
+ };
+
+ return __sys_io_uring_register(ring->ring_fd,
+ IORING_REGISTER_FILES_UPDATE, &up,
+ nr_files);
+}
+
+static int increase_rlimit_nofile(unsigned nr)
+{
+ int ret;
+ struct rlimit rlim;
+
+ ret = __sys_getrlimit(RLIMIT_NOFILE, &rlim);
+ if (ret < 0)
+ return ret;
+
+ if (rlim.rlim_cur < nr) {
+ rlim.rlim_cur += nr;
+ __sys_setrlimit(RLIMIT_NOFILE, &rlim);
+ }
+
+ return 0;
+}
+
+int io_uring_register_files_sparse(struct io_uring *ring, unsigned nr)
+{
+ struct io_uring_rsrc_register reg = {
+ .flags = IORING_RSRC_REGISTER_SPARSE,
+ .nr = nr,
+ };
+ int ret, did_increase = 0;
+
+ do {
+ ret = __sys_io_uring_register(ring->ring_fd,
+ IORING_REGISTER_FILES2, &reg,
+ sizeof(reg));
+ if (ret >= 0)
+ break;
+ if (ret == -EMFILE && !did_increase) {
+ did_increase = 1;
+ increase_rlimit_nofile(nr);
+ continue;
+ }
+ break;
+ } while (1);
+
+ return ret;
+}
+
+int io_uring_register_files_tags(struct io_uring *ring, const int *files,
+ const __u64 *tags, unsigned nr)
+{
+ struct io_uring_rsrc_register reg = {
+ .nr = nr,
+ .data = (unsigned long)files,
+ .tags = (unsigned long)tags,
+ };
+ int ret, did_increase = 0;
+
+ do {
+ ret = __sys_io_uring_register(ring->ring_fd,
+ IORING_REGISTER_FILES2, &reg,
+ sizeof(reg));
+ if (ret >= 0)
+ break;
+ if (ret == -EMFILE && !did_increase) {
+ did_increase = 1;
+ increase_rlimit_nofile(nr);
+ continue;
+ }
+ break;
+ } while (1);
+
+ return ret;
+}
+
+int io_uring_register_files(struct io_uring *ring, const int *files,
+ unsigned nr_files)
+{
+ int ret, did_increase = 0;
+
+ do {
+ ret = __sys_io_uring_register(ring->ring_fd,
+ IORING_REGISTER_FILES, files,
+ nr_files);
+ if (ret >= 0)
+ break;
+ if (ret == -EMFILE && !did_increase) {
+ did_increase = 1;
+ increase_rlimit_nofile(nr_files);
+ continue;
+ }
+ break;
+ } while (1);
+
+ return ret;
+}
+
+int io_uring_unregister_files(struct io_uring *ring)
+{
+ int ret;
+
+ ret = __sys_io_uring_register(ring->ring_fd, IORING_UNREGISTER_FILES,
+ NULL, 0);
+ return (ret < 0) ? ret : 0;
+}
+
+int io_uring_register_eventfd(struct io_uring *ring, int event_fd)
+{
+ int ret;
+
+ ret = __sys_io_uring_register(ring->ring_fd, IORING_REGISTER_EVENTFD,
+ &event_fd, 1);
+ return (ret < 0) ? ret : 0;
+}
+
+int io_uring_unregister_eventfd(struct io_uring *ring)
+{
+ int ret;
+
+ ret = __sys_io_uring_register(ring->ring_fd, IORING_UNREGISTER_EVENTFD,
+ NULL, 0);
+ return (ret < 0) ? ret : 0;
+}
+
+int io_uring_register_eventfd_async(struct io_uring *ring, int event_fd)
+{
+ int ret;
+
+ ret = __sys_io_uring_register(ring->ring_fd,
+ IORING_REGISTER_EVENTFD_ASYNC, &event_fd,
+ 1);
+ return (ret < 0) ? ret : 0;
+}
+
+int io_uring_register_probe(struct io_uring *ring, struct io_uring_probe *p,
+ unsigned int nr_ops)
+{
+ int ret;
+
+ ret = __sys_io_uring_register(ring->ring_fd, IORING_REGISTER_PROBE, p,
+ nr_ops);
+ return (ret < 0) ? ret : 0;
+}
+
+int io_uring_register_personality(struct io_uring *ring)
+{
+ return __sys_io_uring_register(ring->ring_fd,
+ IORING_REGISTER_PERSONALITY, NULL, 0);
+}
+
+int io_uring_unregister_personality(struct io_uring *ring, int id)
+{
+ return __sys_io_uring_register(ring->ring_fd,
+ IORING_UNREGISTER_PERSONALITY, NULL, id);
+}
+
+int io_uring_register_restrictions(struct io_uring *ring,
+ struct io_uring_restriction *res,
+ unsigned int nr_res)
+{
+ int ret;
+
+ ret = __sys_io_uring_register(ring->ring_fd,
+ IORING_REGISTER_RESTRICTIONS, res,
+ nr_res);
+ return (ret < 0) ? ret : 0;
+}
+
+int io_uring_enable_rings(struct io_uring *ring)
+{
+ return __sys_io_uring_register(ring->ring_fd,
+ IORING_REGISTER_ENABLE_RINGS, NULL, 0);
+}
+
+int io_uring_register_iowq_aff(struct io_uring *ring, size_t cpusz,
+ const cpu_set_t *mask)
+{
+ if (cpusz >= (1U << 31))
+ return -EINVAL;
+
+ return __sys_io_uring_register(ring->ring_fd, IORING_REGISTER_IOWQ_AFF,
+ mask, (int) cpusz);
+}
+
+int io_uring_unregister_iowq_aff(struct io_uring *ring)
+{
+ return __sys_io_uring_register(ring->ring_fd,
+ IORING_UNREGISTER_IOWQ_AFF, NULL, 0);
+}
+
+int io_uring_register_iowq_max_workers(struct io_uring *ring, unsigned int *val)
+{
+ return __sys_io_uring_register(ring->ring_fd,
+ IORING_REGISTER_IOWQ_MAX_WORKERS, val,
+ 2);
+}
+
+int io_uring_register_ring_fd(struct io_uring *ring)
+{
+ struct io_uring_rsrc_update up = {
+ .data = ring->ring_fd,
+ .offset = -1U,
+ };
+ int ret;
+
+ ret = __sys_io_uring_register(ring->ring_fd, IORING_REGISTER_RING_FDS,
+ &up, 1);
+ if (ret == 1) {
+ ring->enter_ring_fd = up.offset;
+ ring->int_flags |= INT_FLAG_REG_RING;
+ }
+ return ret;
+}
+
+
+int io_uring_unregister_ring_fd(struct io_uring *ring)
+{
+ struct io_uring_rsrc_update up = {
+ .offset = ring->enter_ring_fd,
+ };
+ int ret;
+
+ ret = __sys_io_uring_register(ring->ring_fd, IORING_UNREGISTER_RING_FDS,
+ &up, 1);
+ if (ret == 1) {
+ ring->enter_ring_fd = ring->ring_fd;
+ ring->int_flags &= ~INT_FLAG_REG_RING;
+ }
+ return ret;
+}
+
+int io_uring_register_buf_ring(struct io_uring *ring,
+ struct io_uring_buf_reg *reg,
+ unsigned int __maybe_unused flags)
+{
+ return __sys_io_uring_register(ring->ring_fd, IORING_REGISTER_PBUF_RING,
+ reg, 1);
+}
+
+int io_uring_unregister_buf_ring(struct io_uring *ring, int bgid)
+{
+ struct io_uring_buf_reg reg = { .bgid = bgid };
+
+ return __sys_io_uring_register(ring->ring_fd,
+ IORING_UNREGISTER_PBUF_RING, &reg, 1);
+}
+
+int io_uring_register_sync_cancel(struct io_uring *ring,
+ struct io_uring_sync_cancel_reg *reg)
+{
+ return __sys_io_uring_register(ring->ring_fd,
+ IORING_REGISTER_SYNC_CANCEL, reg, 1);
+}
+
+int io_uring_register_file_alloc_range(struct io_uring *ring,
+ unsigned off, unsigned len)
+{
+ struct io_uring_file_index_range range;
+
+ memset(&range, 0, sizeof(range));
+ range.off = off;
+ range.len = len;
+
+ return __sys_io_uring_register(ring->ring_fd,
+ IORING_REGISTER_FILE_ALLOC_RANGE, &range,
+ 0);
+}
diff --git a/contrib/libs/liburing/src/setup.c b/contrib/libs/liburing/src/setup.c
new file mode 100644
index 00000000000..0c15e75c1c4
--- /dev/null
+++ b/contrib/libs/liburing/src/setup.c
@@ -0,0 +1,370 @@
+#include "../config-host.h"
+/* SPDX-License-Identifier: MIT */
+#define _DEFAULT_SOURCE
+
+#include "lib.h"
+#include "syscall.h"
+#include "liburing.h"
+#include "int_flags.h"
+#include "liburing/compat.h"
+#include "liburing/io_uring.h"
+
+static void io_uring_unmap_rings(struct io_uring_sq *sq, struct io_uring_cq *cq)
+{
+ __sys_munmap(sq->ring_ptr, sq->ring_sz);
+ if (cq->ring_ptr && cq->ring_ptr != sq->ring_ptr)
+ __sys_munmap(cq->ring_ptr, cq->ring_sz);
+}
+
+static int io_uring_mmap(int fd, struct io_uring_params *p,
+ struct io_uring_sq *sq, struct io_uring_cq *cq)
+{
+ size_t size;
+ int ret;
+
+ size = sizeof(struct io_uring_cqe);
+ if (p->flags & IORING_SETUP_CQE32)
+ size += sizeof(struct io_uring_cqe);
+
+ sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned);
+ cq->ring_sz = p->cq_off.cqes + p->cq_entries * size;
+
+ if (p->features & IORING_FEAT_SINGLE_MMAP) {
+ if (cq->ring_sz > sq->ring_sz)
+ sq->ring_sz = cq->ring_sz;
+ cq->ring_sz = sq->ring_sz;
+ }
+ sq->ring_ptr = __sys_mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd,
+ IORING_OFF_SQ_RING);
+ if (IS_ERR(sq->ring_ptr))
+ return PTR_ERR(sq->ring_ptr);
+
+ if (p->features & IORING_FEAT_SINGLE_MMAP) {
+ cq->ring_ptr = sq->ring_ptr;
+ } else {
+ cq->ring_ptr = __sys_mmap(0, cq->ring_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd,
+ IORING_OFF_CQ_RING);
+ if (IS_ERR(cq->ring_ptr)) {
+ ret = PTR_ERR(cq->ring_ptr);
+ cq->ring_ptr = NULL;
+ goto err;
+ }
+ }
+
+ sq->khead = sq->ring_ptr + p->sq_off.head;
+ sq->ktail = sq->ring_ptr + p->sq_off.tail;
+ sq->kring_mask = sq->ring_ptr + p->sq_off.ring_mask;
+ sq->kring_entries = sq->ring_ptr + p->sq_off.ring_entries;
+ sq->kflags = sq->ring_ptr + p->sq_off.flags;
+ sq->kdropped = sq->ring_ptr + p->sq_off.dropped;
+ sq->array = sq->ring_ptr + p->sq_off.array;
+
+ size = sizeof(struct io_uring_sqe);
+ if (p->flags & IORING_SETUP_SQE128)
+ size += 64;
+ sq->sqes = __sys_mmap(0, size * p->sq_entries, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQES);
+ if (IS_ERR(sq->sqes)) {
+ ret = PTR_ERR(sq->sqes);
+err:
+ io_uring_unmap_rings(sq, cq);
+ return ret;
+ }
+
+ cq->khead = cq->ring_ptr + p->cq_off.head;
+ cq->ktail = cq->ring_ptr + p->cq_off.tail;
+ cq->kring_mask = cq->ring_ptr + p->cq_off.ring_mask;
+ cq->kring_entries = cq->ring_ptr + p->cq_off.ring_entries;
+ cq->koverflow = cq->ring_ptr + p->cq_off.overflow;
+ cq->cqes = cq->ring_ptr + p->cq_off.cqes;
+ if (p->cq_off.flags)
+ cq->kflags = cq->ring_ptr + p->cq_off.flags;
+
+ sq->ring_mask = *sq->kring_mask;
+ sq->ring_entries = *sq->kring_entries;
+ cq->ring_mask = *cq->kring_mask;
+ cq->ring_entries = *cq->kring_entries;
+ return 0;
+}
+
+/*
+ * For users that want to specify sq_thread_cpu or sq_thread_idle, this
+ * interface is a convenient helper for mmap()ing the rings.
+ * Returns -errno on error, or zero on success. On success, 'ring'
+ * contains the necessary information to read/write to the rings.
+ */
+__cold int io_uring_queue_mmap(int fd, struct io_uring_params *p,
+ struct io_uring *ring)
+{
+ int ret;
+
+ memset(ring, 0, sizeof(*ring));
+ ret = io_uring_mmap(fd, p, &ring->sq, &ring->cq);
+ if (!ret) {
+ ring->flags = p->flags;
+ ring->ring_fd = ring->enter_ring_fd = fd;
+ ring->int_flags = 0;
+ }
+ return ret;
+}
+
+/*
+ * Ensure that the mmap'ed rings aren't available to a child after a fork(2).
+ * This uses madvise(..., MADV_DONTFORK) on the mmap'ed ranges.
+ */
+__cold int io_uring_ring_dontfork(struct io_uring *ring)
+{
+ size_t len;
+ int ret;
+
+ if (!ring->sq.ring_ptr || !ring->sq.sqes || !ring->cq.ring_ptr)
+ return -EINVAL;
+
+ len = sizeof(struct io_uring_sqe);
+ if (ring->flags & IORING_SETUP_SQE128)
+ len += 64;
+ len *= ring->sq.ring_entries;
+ ret = __sys_madvise(ring->sq.sqes, len, MADV_DONTFORK);
+ if (ret < 0)
+ return ret;
+
+ len = ring->sq.ring_sz;
+ ret = __sys_madvise(ring->sq.ring_ptr, len, MADV_DONTFORK);
+ if (ret < 0)
+ return ret;
+
+ if (ring->cq.ring_ptr != ring->sq.ring_ptr) {
+ len = ring->cq.ring_sz;
+ ret = __sys_madvise(ring->cq.ring_ptr, len, MADV_DONTFORK);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+__cold int io_uring_queue_init_params(unsigned entries, struct io_uring *ring,
+ struct io_uring_params *p)
+{
+ int fd, ret;
+ unsigned *sq_array;
+ unsigned sq_entries, index;
+
+ fd = __sys_io_uring_setup(entries, p);
+ if (fd < 0)
+ return fd;
+
+ ret = io_uring_queue_mmap(fd, p, ring);
+ if (ret) {
+ __sys_close(fd);
+ return ret;
+ }
+
+ /*
+ * Directly map SQ slots to SQEs
+ */
+ sq_array = ring->sq.array;
+ sq_entries = ring->sq.ring_entries;
+ for (index = 0; index < sq_entries; index++)
+ sq_array[index] = index;
+
+ ring->features = p->features;
+ return 0;
+}
+
+/*
+ * Returns -errno on error, or zero on success. On success, 'ring'
+ * contains the necessary information to read/write to the rings.
+ */
+__cold int io_uring_queue_init(unsigned entries, struct io_uring *ring,
+ unsigned flags)
+{
+ struct io_uring_params p;
+
+ memset(&p, 0, sizeof(p));
+ p.flags = flags;
+
+ return io_uring_queue_init_params(entries, ring, &p);
+}
+
+__cold void io_uring_queue_exit(struct io_uring *ring)
+{
+ struct io_uring_sq *sq = &ring->sq;
+ struct io_uring_cq *cq = &ring->cq;
+ size_t sqe_size;
+
+ sqe_size = sizeof(struct io_uring_sqe);
+ if (ring->flags & IORING_SETUP_SQE128)
+ sqe_size += 64;
+ __sys_munmap(sq->sqes, sqe_size * sq->ring_entries);
+ io_uring_unmap_rings(sq, cq);
+ /*
+ * Not strictly required, but frees up the slot we used now rather
+ * than at process exit time.
+ */
+ if (ring->int_flags & INT_FLAG_REG_RING)
+ io_uring_unregister_ring_fd(ring);
+ __sys_close(ring->ring_fd);
+}
+
+__cold struct io_uring_probe *io_uring_get_probe_ring(struct io_uring *ring)
+{
+ struct io_uring_probe *probe;
+ size_t len;
+ int r;
+
+ len = sizeof(*probe) + 256 * sizeof(struct io_uring_probe_op);
+ probe = uring_malloc(len);
+ if (!probe)
+ return NULL;
+ memset(probe, 0, len);
+
+ r = io_uring_register_probe(ring, probe, 256);
+ if (r >= 0)
+ return probe;
+
+ uring_free(probe);
+ return NULL;
+}
+
+__cold struct io_uring_probe *io_uring_get_probe(void)
+{
+ struct io_uring ring;
+ struct io_uring_probe *probe;
+ int r;
+
+ r = io_uring_queue_init(2, &ring, 0);
+ if (r < 0)
+ return NULL;
+
+ probe = io_uring_get_probe_ring(&ring);
+ io_uring_queue_exit(&ring);
+ return probe;
+}
+
+__cold void io_uring_free_probe(struct io_uring_probe *probe)
+{
+ uring_free(probe);
+}
+
+static inline int __fls(unsigned long x)
+{
+ if (!x)
+ return 0;
+ return 8 * sizeof(x) - __builtin_clzl(x);
+}
+
+static unsigned roundup_pow2(unsigned depth)
+{
+ return 1U << __fls(depth - 1);
+}
+
+static size_t npages(size_t size, long page_size)
+{
+ size--;
+ size /= page_size;
+ return __fls((int) size);
+}
+
+#define KRING_SIZE 320
+
+static size_t rings_size(struct io_uring_params *p, unsigned entries,
+ unsigned cq_entries, long page_size)
+{
+ size_t pages, sq_size, cq_size;
+
+ cq_size = sizeof(struct io_uring_cqe);
+ if (p->flags & IORING_SETUP_CQE32)
+ cq_size += sizeof(struct io_uring_cqe);
+ cq_size *= cq_entries;
+ cq_size += KRING_SIZE;
+ cq_size = (cq_size + 63) & ~63UL;
+ pages = (size_t) 1 << npages(cq_size, page_size);
+
+ sq_size = sizeof(struct io_uring_sqe);
+ if (p->flags & IORING_SETUP_SQE128)
+ sq_size += 64;
+ sq_size *= entries;
+ pages += (size_t) 1 << npages(sq_size, page_size);
+ return pages * page_size;
+}
+
+#define KERN_MAX_ENTRIES 32768
+#define KERN_MAX_CQ_ENTRIES (2 * KERN_MAX_ENTRIES)
+
+/*
+ * Return the required ulimit -l memlock memory required for a given ring
+ * setup, in bytes. May return -errno on error. On newer (5.12+) kernels,
+ * io_uring no longer requires any memlock memory, and hence this function
+ * will return 0 for that case. On older (5.11 and prior) kernels, this will
+ * return the required memory so that the caller can ensure that enough space
+ * is available before setting up a ring with the specified parameters.
+ */
+__cold ssize_t io_uring_mlock_size_params(unsigned entries,
+ struct io_uring_params *p)
+{
+ struct io_uring_params lp = { };
+ struct io_uring ring;
+ unsigned cq_entries;
+ long page_size;
+ ssize_t ret;
+
+ /*
+ * We only really use this inited ring to see if the kernel is newer
+ * or not. Newer kernels don't require memlocked memory. If we fail,
+ * it's most likely because it's an older kernel and we have no
+ * available memlock space. Just continue on, lp.features will still
+ * be zeroed at this point and we'll do the right thing.
+ */
+ ret = io_uring_queue_init_params(entries, &ring, &lp);
+ if (!ret)
+ io_uring_queue_exit(&ring);
+
+ /*
+ * Native workers imply using cgroup memory accounting, and hence no
+ * memlock memory is needed for the ring allocations.
+ */
+ if (lp.features & IORING_FEAT_NATIVE_WORKERS)
+ return 0;
+
+ if (!entries)
+ return -EINVAL;
+ if (entries > KERN_MAX_ENTRIES) {
+ if (!(p->flags & IORING_SETUP_CLAMP))
+ return -EINVAL;
+ entries = KERN_MAX_ENTRIES;
+ }
+
+ entries = roundup_pow2(entries);
+ if (p->flags & IORING_SETUP_CQSIZE) {
+ if (!p->cq_entries)
+ return -EINVAL;
+ cq_entries = p->cq_entries;
+ if (cq_entries > KERN_MAX_CQ_ENTRIES) {
+ if (!(p->flags & IORING_SETUP_CLAMP))
+ return -EINVAL;
+ cq_entries = KERN_MAX_CQ_ENTRIES;
+ }
+ cq_entries = roundup_pow2(cq_entries);
+ if (cq_entries < entries)
+ return -EINVAL;
+ } else {
+ cq_entries = 2 * entries;
+ }
+
+ page_size = get_page_size();
+ return rings_size(p, entries, cq_entries, page_size);
+}
+
+/*
+ * Return required ulimit -l memory space for a given ring setup. See
+ * @io_uring_mlock_size_params().
+ */
+__cold ssize_t io_uring_mlock_size(unsigned entries, unsigned flags)
+{
+ struct io_uring_params p = { .flags = flags, };
+
+ return io_uring_mlock_size_params(entries, &p);
+}
diff --git a/contrib/libs/liburing/src/syscall.c b/contrib/libs/liburing/src/syscall.c
new file mode 100644
index 00000000000..0e38a4eac7b
--- /dev/null
+++ b/contrib/libs/liburing/src/syscall.c
@@ -0,0 +1,30 @@
+#include "../config-host.h"
+/* SPDX-License-Identifier: MIT */
+
+#include "syscall.h"
+#include <liburing.h>
+
+int io_uring_enter(unsigned int fd, unsigned int to_submit,
+ unsigned int min_complete, unsigned int flags, sigset_t *sig)
+{
+ return __sys_io_uring_enter(fd, to_submit, min_complete, flags, sig);
+}
+
+int io_uring_enter2(unsigned int fd, unsigned int to_submit,
+ unsigned int min_complete, unsigned int flags,
+ sigset_t *sig, size_t sz)
+{
+ return __sys_io_uring_enter2(fd, to_submit, min_complete, flags, sig,
+ sz);
+}
+
+int io_uring_setup(unsigned int entries, struct io_uring_params *p)
+{
+ return __sys_io_uring_setup(entries, p);
+}
+
+int io_uring_register(unsigned int fd, unsigned int opcode, const void *arg,
+ unsigned int nr_args)
+{
+ return __sys_io_uring_register(fd, opcode, arg, nr_args);
+}
diff --git a/contrib/libs/liburing/src/syscall.h b/contrib/libs/liburing/src/syscall.h
new file mode 100644
index 00000000000..4fa77e60ed3
--- /dev/null
+++ b/contrib/libs/liburing/src/syscall.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef LIBURING_SYSCALL_H
+#define LIBURING_SYSCALL_H
+
+#include <errno.h>
+#include <signal.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <stdbool.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/resource.h>
+#include <liburing.h>
+
+/*
+ * Don't put this below the #include "arch/$arch/syscall.h", that
+ * file may need it.
+ */
+struct io_uring_params;
+
+static inline void *ERR_PTR(intptr_t n)
+{
+ return (void *) n;
+}
+
+static inline int PTR_ERR(const void *ptr)
+{
+ return (int) (intptr_t) ptr;
+}
+
+static inline bool IS_ERR(const void *ptr)
+{
+ return uring_unlikely((uintptr_t) ptr >= (uintptr_t) -4095UL);
+}
+
+#if defined(__x86_64__) || defined(__i386__)
+#include "arch/x86/syscall.h"
+#elif defined(__aarch64__)
+#include "arch/aarch64/syscall.h"
+#else
+/*
+ * We don't have native syscall wrappers
+ * for this arch. Must use libc!
+ */
+#ifdef CONFIG_NOLIBC
+ #error "This arch doesn't support building liburing without libc"
+#endif
+/* libc syscall wrappers. */
+#error #include "arch/generic/syscall.h"
+#endif
+#endif