diff options
author | Mikhail Borisov <borisov.mikhail@gmail.com> | 2022-02-10 16:45:39 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:39 +0300 |
commit | a6a92afe03e02795227d2641b49819b687f088f8 (patch) | |
tree | f6984a1d27d5a7ec88a6fdd6e20cd5b7693b6ece /contrib/restricted/libffi/src/aarch64 | |
parent | c6dc8b8bd530985bc4cce0137e9a5de32f1087cb (diff) | |
download | ydb-a6a92afe03e02795227d2641b49819b687f088f8.tar.gz |
Restoring authorship annotation for Mikhail Borisov <borisov.mikhail@gmail.com>. Commit 1 of 2.
Diffstat (limited to 'contrib/restricted/libffi/src/aarch64')
-rw-r--r-- | contrib/restricted/libffi/src/aarch64/ffi.c | 2018 | ||||
-rw-r--r-- | contrib/restricted/libffi/src/aarch64/ffitarget.h | 184 | ||||
-rw-r--r-- | contrib/restricted/libffi/src/aarch64/internal.h | 134 | ||||
-rw-r--r-- | contrib/restricted/libffi/src/aarch64/sysv.S | 880 | ||||
-rw-r--r-- | contrib/restricted/libffi/src/aarch64/win64_armasm.S | 1012 |
5 files changed, 2114 insertions, 2114 deletions
diff --git a/contrib/restricted/libffi/src/aarch64/ffi.c b/contrib/restricted/libffi/src/aarch64/ffi.c index 1ebf43c192..84d44ab74a 100644 --- a/contrib/restricted/libffi/src/aarch64/ffi.c +++ b/contrib/restricted/libffi/src/aarch64/ffi.c @@ -1,1009 +1,1009 @@ -/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -``Software''), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#if defined(__aarch64__) || defined(__arm64__)|| defined (_M_ARM64) -#include <stdio.h> -#include <stdlib.h> -#include <stdint.h> -#include <fficonfig.h> -#include <ffi.h> -#include <ffi_common.h> -#include "internal.h" -#ifdef _M_ARM64 -#include <windows.h> /* FlushInstructionCache */ -#endif - -/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE; - all further uses in this file will refer to the 128-bit type. */ -#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE -# if FFI_TYPE_LONGDOUBLE != 4 -# error FFI_TYPE_LONGDOUBLE out of date -# endif -#else -# undef FFI_TYPE_LONGDOUBLE -# define FFI_TYPE_LONGDOUBLE 4 -#endif - -union _d -{ - UINT64 d; - UINT32 s[2]; -}; - -struct _v -{ - union _d d[2] __attribute__((aligned(16))); -}; - -struct call_context -{ - struct _v v[N_V_ARG_REG]; - UINT64 x[N_X_ARG_REG]; -}; - -#if FFI_EXEC_TRAMPOLINE_TABLE - -#ifdef __MACH__ -#include <mach/vm_param.h> -#endif - -#else - -#if defined (__clang__) && defined (__APPLE__) -extern void sys_icache_invalidate (void *start, size_t len); -#endif - -static inline void -ffi_clear_cache (void *start, void *end) -{ -#if defined (__clang__) && defined (__APPLE__) - sys_icache_invalidate (start, (char *)end - (char *)start); -#elif defined (__GNUC__) - __builtin___clear_cache (start, end); -#elif defined (_M_ARM64) - FlushInstructionCache(GetCurrentProcess(), start, (char*)end - (char*)start); -#else -#error "Missing builtin to flush instruction cache" -#endif -} - -#endif - -/* A subroutine of is_vfp_type. Given a structure type, return the type code - of the first non-structure element. Recurse for structure elements. - Return -1 if the structure is in fact empty, i.e. no nested elements. */ - -static int -is_hfa0 (const ffi_type *ty) -{ - ffi_type **elements = ty->elements; - int i, ret = -1; - - if (elements != NULL) - for (i = 0; elements[i]; ++i) - { - ret = elements[i]->type; - if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX) - { - ret = is_hfa0 (elements[i]); - if (ret < 0) - continue; - } - break; - } - - return ret; -} - -/* A subroutine of is_vfp_type. Given a structure type, return true if all - of the non-structure elements are the same as CANDIDATE. */ - -static int -is_hfa1 (const ffi_type *ty, int candidate) -{ - ffi_type **elements = ty->elements; - int i; - - if (elements != NULL) - for (i = 0; elements[i]; ++i) - { - int t = elements[i]->type; - if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX) - { - if (!is_hfa1 (elements[i], candidate)) - return 0; - } - else if (t != candidate) - return 0; - } - - return 1; -} - -/* Determine if TY may be allocated to the FP registers. This is both an - fp scalar type as well as an homogenous floating point aggregate (HFA). - That is, a structure consisting of 1 to 4 members of all the same type, - where that type is an fp scalar. - - Returns non-zero iff TY is an HFA. The result is the AARCH64_RET_* - constant for the type. */ - -static int -is_vfp_type (const ffi_type *ty) -{ - ffi_type **elements; - int candidate, i; - size_t size, ele_count; - - /* Quickest tests first. */ - candidate = ty->type; - switch (candidate) - { - default: - return 0; - case FFI_TYPE_FLOAT: - case FFI_TYPE_DOUBLE: - case FFI_TYPE_LONGDOUBLE: - ele_count = 1; - goto done; - case FFI_TYPE_COMPLEX: - candidate = ty->elements[0]->type; - switch (candidate) - { - case FFI_TYPE_FLOAT: - case FFI_TYPE_DOUBLE: - case FFI_TYPE_LONGDOUBLE: - ele_count = 2; - goto done; - } - return 0; - case FFI_TYPE_STRUCT: - break; - } - - /* No HFA types are smaller than 4 bytes, or larger than 64 bytes. */ - size = ty->size; - if (size < 4 || size > 64) - return 0; - - /* Find the type of the first non-structure member. */ - elements = ty->elements; - candidate = elements[0]->type; - if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX) - { - for (i = 0; ; ++i) - { - candidate = is_hfa0 (elements[i]); - if (candidate >= 0) - break; - } - } - - /* If the first member is not a floating point type, it's not an HFA. - Also quickly re-check the size of the structure. */ - switch (candidate) - { - case FFI_TYPE_FLOAT: - ele_count = size / sizeof(float); - if (size != ele_count * sizeof(float)) - return 0; - break; - case FFI_TYPE_DOUBLE: - ele_count = size / sizeof(double); - if (size != ele_count * sizeof(double)) - return 0; - break; - case FFI_TYPE_LONGDOUBLE: - ele_count = size / sizeof(long double); - if (size != ele_count * sizeof(long double)) - return 0; - break; - default: - return 0; - } - if (ele_count > 4) - return 0; - - /* Finally, make sure that all scalar elements are the same type. */ - for (i = 0; elements[i]; ++i) - { - int t = elements[i]->type; - if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX) - { - if (!is_hfa1 (elements[i], candidate)) - return 0; - } - else if (t != candidate) - return 0; - } - - /* All tests succeeded. Encode the result. */ - done: - return candidate * 4 + (4 - (int)ele_count); -} - -/* Representation of the procedure call argument marshalling - state. - - The terse state variable names match the names used in the AARCH64 - PCS. */ - -struct arg_state -{ - unsigned ngrn; /* Next general-purpose register number. */ - unsigned nsrn; /* Next vector register number. */ - size_t nsaa; /* Next stack offset. */ - -#if defined (__APPLE__) - unsigned allocating_variadic; -#endif -}; - -/* Initialize a procedure call argument marshalling state. */ -static void -arg_init (struct arg_state *state) -{ - state->ngrn = 0; - state->nsrn = 0; - state->nsaa = 0; -#if defined (__APPLE__) - state->allocating_variadic = 0; -#endif -} - -/* Allocate an aligned slot on the stack and return a pointer to it. */ -static void * -allocate_to_stack (struct arg_state *state, void *stack, - size_t alignment, size_t size) -{ - size_t nsaa = state->nsaa; - - /* Round up the NSAA to the larger of 8 or the natural - alignment of the argument's type. */ -#if defined (__APPLE__) - if (state->allocating_variadic && alignment < 8) - alignment = 8; -#else - if (alignment < 8) - alignment = 8; -#endif - - nsaa = FFI_ALIGN (nsaa, alignment); - state->nsaa = nsaa + size; - - return (char *)stack + nsaa; -} - -static ffi_arg -extend_integer_type (void *source, int type) -{ - switch (type) - { - case FFI_TYPE_UINT8: - return *(UINT8 *) source; - case FFI_TYPE_SINT8: - return *(SINT8 *) source; - case FFI_TYPE_UINT16: - return *(UINT16 *) source; - case FFI_TYPE_SINT16: - return *(SINT16 *) source; - case FFI_TYPE_UINT32: - return *(UINT32 *) source; - case FFI_TYPE_INT: - case FFI_TYPE_SINT32: - return *(SINT32 *) source; - case FFI_TYPE_UINT64: - case FFI_TYPE_SINT64: - return *(UINT64 *) source; - break; - case FFI_TYPE_POINTER: - return *(uintptr_t *) source; - default: - abort(); - } -} - -#if defined(_MSC_VER) -void extend_hfa_type (void *dest, void *src, int h); -#else -static void -extend_hfa_type (void *dest, void *src, int h) -{ - ssize_t f = h - AARCH64_RET_S4; - void *x0; - - asm volatile ( - "adr %0, 0f\n" -" add %0, %0, %1\n" -" br %0\n" -"0: ldp s16, s17, [%3]\n" /* S4 */ -" ldp s18, s19, [%3, #8]\n" -" b 4f\n" -" ldp s16, s17, [%3]\n" /* S3 */ -" ldr s18, [%3, #8]\n" -" b 3f\n" -" ldp s16, s17, [%3]\n" /* S2 */ -" b 2f\n" -" nop\n" -" ldr s16, [%3]\n" /* S1 */ -" b 1f\n" -" nop\n" -" ldp d16, d17, [%3]\n" /* D4 */ -" ldp d18, d19, [%3, #16]\n" -" b 4f\n" -" ldp d16, d17, [%3]\n" /* D3 */ -" ldr d18, [%3, #16]\n" -" b 3f\n" -" ldp d16, d17, [%3]\n" /* D2 */ -" b 2f\n" -" nop\n" -" ldr d16, [%3]\n" /* D1 */ -" b 1f\n" -" nop\n" -" ldp q16, q17, [%3]\n" /* Q4 */ -" ldp q18, q19, [%3, #32]\n" -" b 4f\n" -" ldp q16, q17, [%3]\n" /* Q3 */ -" ldr q18, [%3, #32]\n" -" b 3f\n" -" ldp q16, q17, [%3]\n" /* Q2 */ -" b 2f\n" -" nop\n" -" ldr q16, [%3]\n" /* Q1 */ -" b 1f\n" -"4: str q19, [%2, #48]\n" -"3: str q18, [%2, #32]\n" -"2: str q17, [%2, #16]\n" -"1: str q16, [%2]" - : "=&r"(x0) - : "r"(f * 12), "r"(dest), "r"(src) - : "memory", "v16", "v17", "v18", "v19"); -} -#endif - -#if defined(_MSC_VER) -void* compress_hfa_type (void *dest, void *src, int h); -#else -static void * -compress_hfa_type (void *dest, void *reg, int h) -{ - switch (h) - { - case AARCH64_RET_S1: - if (dest == reg) - { -#ifdef __AARCH64EB__ - dest += 12; -#endif - } - else - *(float *)dest = *(float *)reg; - break; - case AARCH64_RET_S2: - asm ("ldp q16, q17, [%1]\n\t" - "st2 { v16.s, v17.s }[0], [%0]" - : : "r"(dest), "r"(reg) : "memory", "v16", "v17"); - break; - case AARCH64_RET_S3: - asm ("ldp q16, q17, [%1]\n\t" - "ldr q18, [%1, #32]\n\t" - "st3 { v16.s, v17.s, v18.s }[0], [%0]" - : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18"); - break; - case AARCH64_RET_S4: - asm ("ldp q16, q17, [%1]\n\t" - "ldp q18, q19, [%1, #32]\n\t" - "st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]" - : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19"); - break; - - case AARCH64_RET_D1: - if (dest == reg) - { -#ifdef __AARCH64EB__ - dest += 8; -#endif - } - else - *(double *)dest = *(double *)reg; - break; - case AARCH64_RET_D2: - asm ("ldp q16, q17, [%1]\n\t" - "st2 { v16.d, v17.d }[0], [%0]" - : : "r"(dest), "r"(reg) : "memory", "v16", "v17"); - break; - case AARCH64_RET_D3: - asm ("ldp q16, q17, [%1]\n\t" - "ldr q18, [%1, #32]\n\t" - "st3 { v16.d, v17.d, v18.d }[0], [%0]" - : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18"); - break; - case AARCH64_RET_D4: - asm ("ldp q16, q17, [%1]\n\t" - "ldp q18, q19, [%1, #32]\n\t" - "st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]" - : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19"); - break; - - default: - if (dest != reg) - return memcpy (dest, reg, 16 * (4 - (h & 3))); - break; - } - return dest; -} -#endif - -/* Either allocate an appropriate register for the argument type, or if - none are available, allocate a stack slot and return a pointer - to the allocated space. */ - -static void * -allocate_int_to_reg_or_stack (struct call_context *context, - struct arg_state *state, - void *stack, size_t size) -{ - if (state->ngrn < N_X_ARG_REG) - return &context->x[state->ngrn++]; - - state->ngrn = N_X_ARG_REG; - return allocate_to_stack (state, stack, size, size); -} - -ffi_status FFI_HIDDEN -ffi_prep_cif_machdep (ffi_cif *cif) -{ - ffi_type *rtype = cif->rtype; - size_t bytes = cif->bytes; - int flags, i, n; - - switch (rtype->type) - { - case FFI_TYPE_VOID: - flags = AARCH64_RET_VOID; - break; - case FFI_TYPE_UINT8: - flags = AARCH64_RET_UINT8; - break; - case FFI_TYPE_UINT16: - flags = AARCH64_RET_UINT16; - break; - case FFI_TYPE_UINT32: - flags = AARCH64_RET_UINT32; - break; - case FFI_TYPE_SINT8: - flags = AARCH64_RET_SINT8; - break; - case FFI_TYPE_SINT16: - flags = AARCH64_RET_SINT16; - break; - case FFI_TYPE_INT: - case FFI_TYPE_SINT32: - flags = AARCH64_RET_SINT32; - break; - case FFI_TYPE_SINT64: - case FFI_TYPE_UINT64: - flags = AARCH64_RET_INT64; - break; - case FFI_TYPE_POINTER: - flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64); - break; - - case FFI_TYPE_FLOAT: - case FFI_TYPE_DOUBLE: - case FFI_TYPE_LONGDOUBLE: - case FFI_TYPE_STRUCT: - case FFI_TYPE_COMPLEX: - flags = is_vfp_type (rtype); - if (flags == 0) - { - size_t s = rtype->size; - if (s > 16) - { - flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM; - bytes += 8; - } - else if (s == 16) - flags = AARCH64_RET_INT128; - else if (s == 8) - flags = AARCH64_RET_INT64; - else - flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY; - } - break; - - default: - abort(); - } - - for (i = 0, n = cif->nargs; i < n; i++) - if (is_vfp_type (cif->arg_types[i])) - { - flags |= AARCH64_FLAG_ARG_V; - break; - } - - /* Round the stack up to a multiple of the stack alignment requirement. */ - cif->bytes = (unsigned) FFI_ALIGN(bytes, 16); - cif->flags = flags; -#if defined (__APPLE__) - cif->aarch64_nfixedargs = 0; -#endif - - return FFI_OK; -} - -#if defined (__APPLE__) -/* Perform Apple-specific cif processing for variadic calls */ -ffi_status FFI_HIDDEN -ffi_prep_cif_machdep_var(ffi_cif *cif, unsigned int nfixedargs, - unsigned int ntotalargs) -{ - ffi_status status = ffi_prep_cif_machdep (cif); - cif->aarch64_nfixedargs = nfixedargs; - return status; -} -#endif /* __APPLE__ */ - -extern void ffi_call_SYSV (struct call_context *context, void *frame, - void (*fn)(void), void *rvalue, int flags, - void *closure) FFI_HIDDEN; - -/* Call a function with the provided arguments and capture the return - value. */ -static void -ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue, - void **avalue, void *closure) -{ - struct call_context *context; - void *stack, *frame, *rvalue; - struct arg_state state; - size_t stack_bytes, rtype_size, rsize; - int i, nargs, flags; - ffi_type *rtype; - - flags = cif->flags; - rtype = cif->rtype; - rtype_size = rtype->size; - stack_bytes = cif->bytes; - - /* If the target function returns a structure via hidden pointer, - then we cannot allow a null rvalue. Otherwise, mash a null - rvalue to void return type. */ - rsize = 0; - if (flags & AARCH64_RET_IN_MEM) - { - if (orig_rvalue == NULL) - rsize = rtype_size; - } - else if (orig_rvalue == NULL) - flags &= AARCH64_FLAG_ARG_V; - else if (flags & AARCH64_RET_NEED_COPY) - rsize = 16; - - /* Allocate consectutive stack for everything we'll need. */ - context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize); - stack = context + 1; - frame = (void*)((uintptr_t)stack + (uintptr_t)stack_bytes); - rvalue = (rsize ? (void*)((uintptr_t)frame + 32) : orig_rvalue); - - arg_init (&state); - for (i = 0, nargs = cif->nargs; i < nargs; i++) - { - ffi_type *ty = cif->arg_types[i]; - size_t s = ty->size; - void *a = avalue[i]; - int h, t; - - t = ty->type; - switch (t) - { - case FFI_TYPE_VOID: - FFI_ASSERT (0); - break; - - /* If the argument is a basic type the argument is allocated to an - appropriate register, or if none are available, to the stack. */ - case FFI_TYPE_INT: - case FFI_TYPE_UINT8: - case FFI_TYPE_SINT8: - case FFI_TYPE_UINT16: - case FFI_TYPE_SINT16: - case FFI_TYPE_UINT32: - case FFI_TYPE_SINT32: - case FFI_TYPE_UINT64: - case FFI_TYPE_SINT64: - case FFI_TYPE_POINTER: - do_pointer: - { - ffi_arg ext = extend_integer_type (a, t); - if (state.ngrn < N_X_ARG_REG) - context->x[state.ngrn++] = ext; - else - { - void *d = allocate_to_stack (&state, stack, ty->alignment, s); - state.ngrn = N_X_ARG_REG; - /* Note that the default abi extends each argument - to a full 64-bit slot, while the iOS abi allocates - only enough space. */ -#ifdef __APPLE__ - memcpy(d, a, s); -#else - *(ffi_arg *)d = ext; -#endif - } - } - break; - - case FFI_TYPE_FLOAT: - case FFI_TYPE_DOUBLE: - case FFI_TYPE_LONGDOUBLE: - case FFI_TYPE_STRUCT: - case FFI_TYPE_COMPLEX: - { - void *dest; - - h = is_vfp_type (ty); - if (h) - { - int elems = 4 - (h & 3); -#ifdef _M_ARM64 /* for handling armasm calling convention */ - if (cif->is_variadic) - { - if (state.ngrn + elems <= N_X_ARG_REG) - { - dest = &context->x[state.ngrn]; - state.ngrn += elems; - extend_hfa_type(dest, a, h); - break; - } - state.nsrn = N_X_ARG_REG; - dest = allocate_to_stack(&state, stack, ty->alignment, s); - } - else - { -#endif /* for handling armasm calling convention */ - if (state.nsrn + elems <= N_V_ARG_REG) - { - dest = &context->v[state.nsrn]; - state.nsrn += elems; - extend_hfa_type (dest, a, h); - break; - } - state.nsrn = N_V_ARG_REG; - dest = allocate_to_stack (&state, stack, ty->alignment, s); -#ifdef _M_ARM64 /* for handling armasm calling convention */ - } -#endif /* for handling armasm calling convention */ - } - else if (s > 16) - { - /* If the argument is a composite type that is larger than 16 - bytes, then the argument has been copied to memory, and - the argument is replaced by a pointer to the copy. */ - a = &avalue[i]; - t = FFI_TYPE_POINTER; - s = sizeof (void *); - goto do_pointer; - } - else - { - size_t n = (s + 7) / 8; - if (state.ngrn + n <= N_X_ARG_REG) - { - /* If the argument is a composite type and the size in - double-words is not more than the number of available - X registers, then the argument is copied into - consecutive X registers. */ - dest = &context->x[state.ngrn]; - state.ngrn += (unsigned int)n; - } - else - { - /* Otherwise, there are insufficient X registers. Further - X register allocations are prevented, the NSAA is - adjusted and the argument is copied to memory at the - adjusted NSAA. */ - state.ngrn = N_X_ARG_REG; - dest = allocate_to_stack (&state, stack, ty->alignment, s); - } - } - memcpy (dest, a, s); - } - break; - - default: - abort(); - } - -#if defined (__APPLE__) - if (i + 1 == cif->aarch64_nfixedargs) - { - state.ngrn = N_X_ARG_REG; - state.nsrn = N_V_ARG_REG; - state.allocating_variadic = 1; - } -#endif - } - - ffi_call_SYSV (context, frame, fn, rvalue, flags, closure); - - if (flags & AARCH64_RET_NEED_COPY) - memcpy (orig_rvalue, rvalue, rtype_size); -} - -void -ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue) -{ - ffi_call_int (cif, fn, rvalue, avalue, NULL); -} - -#ifdef FFI_GO_CLOSURES -void -ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue, - void **avalue, void *closure) -{ - ffi_call_int (cif, fn, rvalue, avalue, closure); -} -#endif /* FFI_GO_CLOSURES */ - -/* Build a trampoline. */ - -extern void ffi_closure_SYSV (void) FFI_HIDDEN; -extern void ffi_closure_SYSV_V (void) FFI_HIDDEN; - -ffi_status -ffi_prep_closure_loc (ffi_closure *closure, - ffi_cif* cif, - void (*fun)(ffi_cif*,void*,void**,void*), - void *user_data, - void *codeloc) -{ - if (cif->abi != FFI_SYSV) - return FFI_BAD_ABI; - - void (*start)(void); - - if (cif->flags & AARCH64_FLAG_ARG_V) - start = ffi_closure_SYSV_V; - else - start = ffi_closure_SYSV; - -#if FFI_EXEC_TRAMPOLINE_TABLE -#ifdef __MACH__ - void **config = (void **)((uint8_t *)codeloc - PAGE_MAX_SIZE); - config[0] = closure; - config[1] = start; -#endif -#else - static const unsigned char trampoline[16] = { - 0x90, 0x00, 0x00, 0x58, /* ldr x16, tramp+16 */ - 0xf1, 0xff, 0xff, 0x10, /* adr x17, tramp+0 */ - 0x00, 0x02, 0x1f, 0xd6 /* br x16 */ - }; - char *tramp = closure->tramp; - - memcpy (tramp, trampoline, sizeof(trampoline)); - - *(UINT64 *)(tramp + 16) = (uintptr_t)start; - - ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE); - - /* Also flush the cache for code mapping. */ -#ifdef _M_ARM64 - // Not using dlmalloc.c for Windows ARM64 builds - // so calling ffi_data_to_code_pointer() isn't necessary - unsigned char *tramp_code = tramp; - #else - unsigned char *tramp_code = ffi_data_to_code_pointer (tramp); - #endif - ffi_clear_cache (tramp_code, tramp_code + FFI_TRAMPOLINE_SIZE); -#endif - - closure->cif = cif; - closure->fun = fun; - closure->user_data = user_data; - - return FFI_OK; -} - -#ifdef FFI_GO_CLOSURES -extern void ffi_go_closure_SYSV (void) FFI_HIDDEN; -extern void ffi_go_closure_SYSV_V (void) FFI_HIDDEN; - -ffi_status -ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif* cif, - void (*fun)(ffi_cif*,void*,void**,void*)) -{ - void (*start)(void); - - if (cif->abi != FFI_SYSV) - return FFI_BAD_ABI; - - if (cif->flags & AARCH64_FLAG_ARG_V) - start = ffi_go_closure_SYSV_V; - else - start = ffi_go_closure_SYSV; - - closure->tramp = start; - closure->cif = cif; - closure->fun = fun; - - return FFI_OK; -} -#endif /* FFI_GO_CLOSURES */ - -/* Primary handler to setup and invoke a function within a closure. - - A closure when invoked enters via the assembler wrapper - ffi_closure_SYSV(). The wrapper allocates a call context on the - stack, saves the interesting registers (from the perspective of - the calling convention) into the context then passes control to - ffi_closure_SYSV_inner() passing the saved context and a pointer to - the stack at the point ffi_closure_SYSV() was invoked. - - On the return path the assembler wrapper will reload call context - registers. - - ffi_closure_SYSV_inner() marshalls the call context into ffi value - descriptors, invokes the wrapped function, then marshalls the return - value back into the call context. */ - -int FFI_HIDDEN -ffi_closure_SYSV_inner (ffi_cif *cif, - void (*fun)(ffi_cif*,void*,void**,void*), - void *user_data, - struct call_context *context, - void *stack, void *rvalue, void *struct_rvalue) -{ - void **avalue = (void**) alloca (cif->nargs * sizeof (void*)); - int i, h, nargs, flags; - struct arg_state state; - - arg_init (&state); - - for (i = 0, nargs = cif->nargs; i < nargs; i++) - { - ffi_type *ty = cif->arg_types[i]; - int t = ty->type; - size_t n, s = ty->size; - - switch (t) - { - case FFI_TYPE_VOID: - FFI_ASSERT (0); - break; - - case FFI_TYPE_INT: - case FFI_TYPE_UINT8: - case FFI_TYPE_SINT8: - case FFI_TYPE_UINT16: - case FFI_TYPE_SINT16: - case FFI_TYPE_UINT32: - case FFI_TYPE_SINT32: - case FFI_TYPE_UINT64: - case FFI_TYPE_SINT64: - case FFI_TYPE_POINTER: - avalue[i] = allocate_int_to_reg_or_stack (context, &state, stack, s); - break; - - case FFI_TYPE_FLOAT: - case FFI_TYPE_DOUBLE: - case FFI_TYPE_LONGDOUBLE: - case FFI_TYPE_STRUCT: - case FFI_TYPE_COMPLEX: - h = is_vfp_type (ty); - if (h) - { - n = 4 - (h & 3); -#ifdef _M_ARM64 /* for handling armasm calling convention */ - if (cif->is_variadic) - { - if (state.ngrn + n <= N_X_ARG_REG) - { - void *reg = &context->x[state.ngrn]; - state.ngrn += (unsigned int)n; - - /* Eeek! We need a pointer to the structure, however the - homogeneous float elements are being passed in individual - registers, therefore for float and double the structure - is not represented as a contiguous sequence of bytes in - our saved register context. We don't need the original - contents of the register storage, so we reformat the - structure into the same memory. */ - avalue[i] = compress_hfa_type(reg, reg, h); - } - else - { - state.ngrn = N_X_ARG_REG; - state.nsrn = N_V_ARG_REG; - avalue[i] = allocate_to_stack(&state, stack, - ty->alignment, s); - } - } - else - { -#endif /* for handling armasm calling convention */ - if (state.nsrn + n <= N_V_ARG_REG) - { - void *reg = &context->v[state.nsrn]; - state.nsrn += (unsigned int)n; - avalue[i] = compress_hfa_type(reg, reg, h); - } - else - { - state.nsrn = N_V_ARG_REG; - avalue[i] = allocate_to_stack(&state, stack, - ty->alignment, s); - } -#ifdef _M_ARM64 /* for handling armasm calling convention */ - } -#endif /* for handling armasm calling convention */ - } - else if (s > 16) - { - /* Replace Composite type of size greater than 16 with a - pointer. */ - avalue[i] = *(void **) - allocate_int_to_reg_or_stack (context, &state, stack, - sizeof (void *)); - } - else - { - n = (s + 7) / 8; - if (state.ngrn + n <= N_X_ARG_REG) - { - avalue[i] = &context->x[state.ngrn]; - state.ngrn += (unsigned int)n; - } - else - { - state.ngrn = N_X_ARG_REG; - avalue[i] = allocate_to_stack(&state, stack, - ty->alignment, s); - } - } - break; - - default: - abort(); - } - -#if defined (__APPLE__) - if (i + 1 == cif->aarch64_nfixedargs) - { - state.ngrn = N_X_ARG_REG; - state.nsrn = N_V_ARG_REG; - state.allocating_variadic = 1; - } -#endif - } - - flags = cif->flags; - if (flags & AARCH64_RET_IN_MEM) - rvalue = struct_rvalue; - - fun (cif, rvalue, avalue, user_data); - - return flags; -} - -#endif /* (__aarch64__) || defined(__arm64__)|| defined (_M_ARM64)*/ +/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +``Software''), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#if defined(__aarch64__) || defined(__arm64__)|| defined (_M_ARM64) +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <fficonfig.h> +#include <ffi.h> +#include <ffi_common.h> +#include "internal.h" +#ifdef _M_ARM64 +#include <windows.h> /* FlushInstructionCache */ +#endif + +/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE; + all further uses in this file will refer to the 128-bit type. */ +#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE +# if FFI_TYPE_LONGDOUBLE != 4 +# error FFI_TYPE_LONGDOUBLE out of date +# endif +#else +# undef FFI_TYPE_LONGDOUBLE +# define FFI_TYPE_LONGDOUBLE 4 +#endif + +union _d +{ + UINT64 d; + UINT32 s[2]; +}; + +struct _v +{ + union _d d[2] __attribute__((aligned(16))); +}; + +struct call_context +{ + struct _v v[N_V_ARG_REG]; + UINT64 x[N_X_ARG_REG]; +}; + +#if FFI_EXEC_TRAMPOLINE_TABLE + +#ifdef __MACH__ +#include <mach/vm_param.h> +#endif + +#else + +#if defined (__clang__) && defined (__APPLE__) +extern void sys_icache_invalidate (void *start, size_t len); +#endif + +static inline void +ffi_clear_cache (void *start, void *end) +{ +#if defined (__clang__) && defined (__APPLE__) + sys_icache_invalidate (start, (char *)end - (char *)start); +#elif defined (__GNUC__) + __builtin___clear_cache (start, end); +#elif defined (_M_ARM64) + FlushInstructionCache(GetCurrentProcess(), start, (char*)end - (char*)start); +#else +#error "Missing builtin to flush instruction cache" +#endif +} + +#endif + +/* A subroutine of is_vfp_type. Given a structure type, return the type code + of the first non-structure element. Recurse for structure elements. + Return -1 if the structure is in fact empty, i.e. no nested elements. */ + +static int +is_hfa0 (const ffi_type *ty) +{ + ffi_type **elements = ty->elements; + int i, ret = -1; + + if (elements != NULL) + for (i = 0; elements[i]; ++i) + { + ret = elements[i]->type; + if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX) + { + ret = is_hfa0 (elements[i]); + if (ret < 0) + continue; + } + break; + } + + return ret; +} + +/* A subroutine of is_vfp_type. Given a structure type, return true if all + of the non-structure elements are the same as CANDIDATE. */ + +static int +is_hfa1 (const ffi_type *ty, int candidate) +{ + ffi_type **elements = ty->elements; + int i; + + if (elements != NULL) + for (i = 0; elements[i]; ++i) + { + int t = elements[i]->type; + if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX) + { + if (!is_hfa1 (elements[i], candidate)) + return 0; + } + else if (t != candidate) + return 0; + } + + return 1; +} + +/* Determine if TY may be allocated to the FP registers. This is both an + fp scalar type as well as an homogenous floating point aggregate (HFA). + That is, a structure consisting of 1 to 4 members of all the same type, + where that type is an fp scalar. + + Returns non-zero iff TY is an HFA. The result is the AARCH64_RET_* + constant for the type. */ + +static int +is_vfp_type (const ffi_type *ty) +{ + ffi_type **elements; + int candidate, i; + size_t size, ele_count; + + /* Quickest tests first. */ + candidate = ty->type; + switch (candidate) + { + default: + return 0; + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_LONGDOUBLE: + ele_count = 1; + goto done; + case FFI_TYPE_COMPLEX: + candidate = ty->elements[0]->type; + switch (candidate) + { + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_LONGDOUBLE: + ele_count = 2; + goto done; + } + return 0; + case FFI_TYPE_STRUCT: + break; + } + + /* No HFA types are smaller than 4 bytes, or larger than 64 bytes. */ + size = ty->size; + if (size < 4 || size > 64) + return 0; + + /* Find the type of the first non-structure member. */ + elements = ty->elements; + candidate = elements[0]->type; + if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX) + { + for (i = 0; ; ++i) + { + candidate = is_hfa0 (elements[i]); + if (candidate >= 0) + break; + } + } + + /* If the first member is not a floating point type, it's not an HFA. + Also quickly re-check the size of the structure. */ + switch (candidate) + { + case FFI_TYPE_FLOAT: + ele_count = size / sizeof(float); + if (size != ele_count * sizeof(float)) + return 0; + break; + case FFI_TYPE_DOUBLE: + ele_count = size / sizeof(double); + if (size != ele_count * sizeof(double)) + return 0; + break; + case FFI_TYPE_LONGDOUBLE: + ele_count = size / sizeof(long double); + if (size != ele_count * sizeof(long double)) + return 0; + break; + default: + return 0; + } + if (ele_count > 4) + return 0; + + /* Finally, make sure that all scalar elements are the same type. */ + for (i = 0; elements[i]; ++i) + { + int t = elements[i]->type; + if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX) + { + if (!is_hfa1 (elements[i], candidate)) + return 0; + } + else if (t != candidate) + return 0; + } + + /* All tests succeeded. Encode the result. */ + done: + return candidate * 4 + (4 - (int)ele_count); +} + +/* Representation of the procedure call argument marshalling + state. + + The terse state variable names match the names used in the AARCH64 + PCS. */ + +struct arg_state +{ + unsigned ngrn; /* Next general-purpose register number. */ + unsigned nsrn; /* Next vector register number. */ + size_t nsaa; /* Next stack offset. */ + +#if defined (__APPLE__) + unsigned allocating_variadic; +#endif +}; + +/* Initialize a procedure call argument marshalling state. */ +static void +arg_init (struct arg_state *state) +{ + state->ngrn = 0; + state->nsrn = 0; + state->nsaa = 0; +#if defined (__APPLE__) + state->allocating_variadic = 0; +#endif +} + +/* Allocate an aligned slot on the stack and return a pointer to it. */ +static void * +allocate_to_stack (struct arg_state *state, void *stack, + size_t alignment, size_t size) +{ + size_t nsaa = state->nsaa; + + /* Round up the NSAA to the larger of 8 or the natural + alignment of the argument's type. */ +#if defined (__APPLE__) + if (state->allocating_variadic && alignment < 8) + alignment = 8; +#else + if (alignment < 8) + alignment = 8; +#endif + + nsaa = FFI_ALIGN (nsaa, alignment); + state->nsaa = nsaa + size; + + return (char *)stack + nsaa; +} + +static ffi_arg +extend_integer_type (void *source, int type) +{ + switch (type) + { + case FFI_TYPE_UINT8: + return *(UINT8 *) source; + case FFI_TYPE_SINT8: + return *(SINT8 *) source; + case FFI_TYPE_UINT16: + return *(UINT16 *) source; + case FFI_TYPE_SINT16: + return *(SINT16 *) source; + case FFI_TYPE_UINT32: + return *(UINT32 *) source; + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + return *(SINT32 *) source; + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + return *(UINT64 *) source; + break; + case FFI_TYPE_POINTER: + return *(uintptr_t *) source; + default: + abort(); + } +} + +#if defined(_MSC_VER) +void extend_hfa_type (void *dest, void *src, int h); +#else +static void +extend_hfa_type (void *dest, void *src, int h) +{ + ssize_t f = h - AARCH64_RET_S4; + void *x0; + + asm volatile ( + "adr %0, 0f\n" +" add %0, %0, %1\n" +" br %0\n" +"0: ldp s16, s17, [%3]\n" /* S4 */ +" ldp s18, s19, [%3, #8]\n" +" b 4f\n" +" ldp s16, s17, [%3]\n" /* S3 */ +" ldr s18, [%3, #8]\n" +" b 3f\n" +" ldp s16, s17, [%3]\n" /* S2 */ +" b 2f\n" +" nop\n" +" ldr s16, [%3]\n" /* S1 */ +" b 1f\n" +" nop\n" +" ldp d16, d17, [%3]\n" /* D4 */ +" ldp d18, d19, [%3, #16]\n" +" b 4f\n" +" ldp d16, d17, [%3]\n" /* D3 */ +" ldr d18, [%3, #16]\n" +" b 3f\n" +" ldp d16, d17, [%3]\n" /* D2 */ +" b 2f\n" +" nop\n" +" ldr d16, [%3]\n" /* D1 */ +" b 1f\n" +" nop\n" +" ldp q16, q17, [%3]\n" /* Q4 */ +" ldp q18, q19, [%3, #32]\n" +" b 4f\n" +" ldp q16, q17, [%3]\n" /* Q3 */ +" ldr q18, [%3, #32]\n" +" b 3f\n" +" ldp q16, q17, [%3]\n" /* Q2 */ +" b 2f\n" +" nop\n" +" ldr q16, [%3]\n" /* Q1 */ +" b 1f\n" +"4: str q19, [%2, #48]\n" +"3: str q18, [%2, #32]\n" +"2: str q17, [%2, #16]\n" +"1: str q16, [%2]" + : "=&r"(x0) + : "r"(f * 12), "r"(dest), "r"(src) + : "memory", "v16", "v17", "v18", "v19"); +} +#endif + +#if defined(_MSC_VER) +void* compress_hfa_type (void *dest, void *src, int h); +#else +static void * +compress_hfa_type (void *dest, void *reg, int h) +{ + switch (h) + { + case AARCH64_RET_S1: + if (dest == reg) + { +#ifdef __AARCH64EB__ + dest += 12; +#endif + } + else + *(float *)dest = *(float *)reg; + break; + case AARCH64_RET_S2: + asm ("ldp q16, q17, [%1]\n\t" + "st2 { v16.s, v17.s }[0], [%0]" + : : "r"(dest), "r"(reg) : "memory", "v16", "v17"); + break; + case AARCH64_RET_S3: + asm ("ldp q16, q17, [%1]\n\t" + "ldr q18, [%1, #32]\n\t" + "st3 { v16.s, v17.s, v18.s }[0], [%0]" + : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18"); + break; + case AARCH64_RET_S4: + asm ("ldp q16, q17, [%1]\n\t" + "ldp q18, q19, [%1, #32]\n\t" + "st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]" + : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19"); + break; + + case AARCH64_RET_D1: + if (dest == reg) + { +#ifdef __AARCH64EB__ + dest += 8; +#endif + } + else + *(double *)dest = *(double *)reg; + break; + case AARCH64_RET_D2: + asm ("ldp q16, q17, [%1]\n\t" + "st2 { v16.d, v17.d }[0], [%0]" + : : "r"(dest), "r"(reg) : "memory", "v16", "v17"); + break; + case AARCH64_RET_D3: + asm ("ldp q16, q17, [%1]\n\t" + "ldr q18, [%1, #32]\n\t" + "st3 { v16.d, v17.d, v18.d }[0], [%0]" + : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18"); + break; + case AARCH64_RET_D4: + asm ("ldp q16, q17, [%1]\n\t" + "ldp q18, q19, [%1, #32]\n\t" + "st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]" + : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19"); + break; + + default: + if (dest != reg) + return memcpy (dest, reg, 16 * (4 - (h & 3))); + break; + } + return dest; +} +#endif + +/* Either allocate an appropriate register for the argument type, or if + none are available, allocate a stack slot and return a pointer + to the allocated space. */ + +static void * +allocate_int_to_reg_or_stack (struct call_context *context, + struct arg_state *state, + void *stack, size_t size) +{ + if (state->ngrn < N_X_ARG_REG) + return &context->x[state->ngrn++]; + + state->ngrn = N_X_ARG_REG; + return allocate_to_stack (state, stack, size, size); +} + +ffi_status FFI_HIDDEN +ffi_prep_cif_machdep (ffi_cif *cif) +{ + ffi_type *rtype = cif->rtype; + size_t bytes = cif->bytes; + int flags, i, n; + + switch (rtype->type) + { + case FFI_TYPE_VOID: + flags = AARCH64_RET_VOID; + break; + case FFI_TYPE_UINT8: + flags = AARCH64_RET_UINT8; + break; + case FFI_TYPE_UINT16: + flags = AARCH64_RET_UINT16; + break; + case FFI_TYPE_UINT32: + flags = AARCH64_RET_UINT32; + break; + case FFI_TYPE_SINT8: + flags = AARCH64_RET_SINT8; + break; + case FFI_TYPE_SINT16: + flags = AARCH64_RET_SINT16; + break; + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + flags = AARCH64_RET_SINT32; + break; + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + flags = AARCH64_RET_INT64; + break; + case FFI_TYPE_POINTER: + flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64); + break; + + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_LONGDOUBLE: + case FFI_TYPE_STRUCT: + case FFI_TYPE_COMPLEX: + flags = is_vfp_type (rtype); + if (flags == 0) + { + size_t s = rtype->size; + if (s > 16) + { + flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM; + bytes += 8; + } + else if (s == 16) + flags = AARCH64_RET_INT128; + else if (s == 8) + flags = AARCH64_RET_INT64; + else + flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY; + } + break; + + default: + abort(); + } + + for (i = 0, n = cif->nargs; i < n; i++) + if (is_vfp_type (cif->arg_types[i])) + { + flags |= AARCH64_FLAG_ARG_V; + break; + } + + /* Round the stack up to a multiple of the stack alignment requirement. */ + cif->bytes = (unsigned) FFI_ALIGN(bytes, 16); + cif->flags = flags; +#if defined (__APPLE__) + cif->aarch64_nfixedargs = 0; +#endif + + return FFI_OK; +} + +#if defined (__APPLE__) +/* Perform Apple-specific cif processing for variadic calls */ +ffi_status FFI_HIDDEN +ffi_prep_cif_machdep_var(ffi_cif *cif, unsigned int nfixedargs, + unsigned int ntotalargs) +{ + ffi_status status = ffi_prep_cif_machdep (cif); + cif->aarch64_nfixedargs = nfixedargs; + return status; +} +#endif /* __APPLE__ */ + +extern void ffi_call_SYSV (struct call_context *context, void *frame, + void (*fn)(void), void *rvalue, int flags, + void *closure) FFI_HIDDEN; + +/* Call a function with the provided arguments and capture the return + value. */ +static void +ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue, + void **avalue, void *closure) +{ + struct call_context *context; + void *stack, *frame, *rvalue; + struct arg_state state; + size_t stack_bytes, rtype_size, rsize; + int i, nargs, flags; + ffi_type *rtype; + + flags = cif->flags; + rtype = cif->rtype; + rtype_size = rtype->size; + stack_bytes = cif->bytes; + + /* If the target function returns a structure via hidden pointer, + then we cannot allow a null rvalue. Otherwise, mash a null + rvalue to void return type. */ + rsize = 0; + if (flags & AARCH64_RET_IN_MEM) + { + if (orig_rvalue == NULL) + rsize = rtype_size; + } + else if (orig_rvalue == NULL) + flags &= AARCH64_FLAG_ARG_V; + else if (flags & AARCH64_RET_NEED_COPY) + rsize = 16; + + /* Allocate consectutive stack for everything we'll need. */ + context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize); + stack = context + 1; + frame = (void*)((uintptr_t)stack + (uintptr_t)stack_bytes); + rvalue = (rsize ? (void*)((uintptr_t)frame + 32) : orig_rvalue); + + arg_init (&state); + for (i = 0, nargs = cif->nargs; i < nargs; i++) + { + ffi_type *ty = cif->arg_types[i]; + size_t s = ty->size; + void *a = avalue[i]; + int h, t; + + t = ty->type; + switch (t) + { + case FFI_TYPE_VOID: + FFI_ASSERT (0); + break; + + /* If the argument is a basic type the argument is allocated to an + appropriate register, or if none are available, to the stack. */ + case FFI_TYPE_INT: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + case FFI_TYPE_POINTER: + do_pointer: + { + ffi_arg ext = extend_integer_type (a, t); + if (state.ngrn < N_X_ARG_REG) + context->x[state.ngrn++] = ext; + else + { + void *d = allocate_to_stack (&state, stack, ty->alignment, s); + state.ngrn = N_X_ARG_REG; + /* Note that the default abi extends each argument + to a full 64-bit slot, while the iOS abi allocates + only enough space. */ +#ifdef __APPLE__ + memcpy(d, a, s); +#else + *(ffi_arg *)d = ext; +#endif + } + } + break; + + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_LONGDOUBLE: + case FFI_TYPE_STRUCT: + case FFI_TYPE_COMPLEX: + { + void *dest; + + h = is_vfp_type (ty); + if (h) + { + int elems = 4 - (h & 3); +#ifdef _M_ARM64 /* for handling armasm calling convention */ + if (cif->is_variadic) + { + if (state.ngrn + elems <= N_X_ARG_REG) + { + dest = &context->x[state.ngrn]; + state.ngrn += elems; + extend_hfa_type(dest, a, h); + break; + } + state.nsrn = N_X_ARG_REG; + dest = allocate_to_stack(&state, stack, ty->alignment, s); + } + else + { +#endif /* for handling armasm calling convention */ + if (state.nsrn + elems <= N_V_ARG_REG) + { + dest = &context->v[state.nsrn]; + state.nsrn += elems; + extend_hfa_type (dest, a, h); + break; + } + state.nsrn = N_V_ARG_REG; + dest = allocate_to_stack (&state, stack, ty->alignment, s); +#ifdef _M_ARM64 /* for handling armasm calling convention */ + } +#endif /* for handling armasm calling convention */ + } + else if (s > 16) + { + /* If the argument is a composite type that is larger than 16 + bytes, then the argument has been copied to memory, and + the argument is replaced by a pointer to the copy. */ + a = &avalue[i]; + t = FFI_TYPE_POINTER; + s = sizeof (void *); + goto do_pointer; + } + else + { + size_t n = (s + 7) / 8; + if (state.ngrn + n <= N_X_ARG_REG) + { + /* If the argument is a composite type and the size in + double-words is not more than the number of available + X registers, then the argument is copied into + consecutive X registers. */ + dest = &context->x[state.ngrn]; + state.ngrn += (unsigned int)n; + } + else + { + /* Otherwise, there are insufficient X registers. Further + X register allocations are prevented, the NSAA is + adjusted and the argument is copied to memory at the + adjusted NSAA. */ + state.ngrn = N_X_ARG_REG; + dest = allocate_to_stack (&state, stack, ty->alignment, s); + } + } + memcpy (dest, a, s); + } + break; + + default: + abort(); + } + +#if defined (__APPLE__) + if (i + 1 == cif->aarch64_nfixedargs) + { + state.ngrn = N_X_ARG_REG; + state.nsrn = N_V_ARG_REG; + state.allocating_variadic = 1; + } +#endif + } + + ffi_call_SYSV (context, frame, fn, rvalue, flags, closure); + + if (flags & AARCH64_RET_NEED_COPY) + memcpy (orig_rvalue, rvalue, rtype_size); +} + +void +ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue) +{ + ffi_call_int (cif, fn, rvalue, avalue, NULL); +} + +#ifdef FFI_GO_CLOSURES +void +ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue, + void **avalue, void *closure) +{ + ffi_call_int (cif, fn, rvalue, avalue, closure); +} +#endif /* FFI_GO_CLOSURES */ + +/* Build a trampoline. */ + +extern void ffi_closure_SYSV (void) FFI_HIDDEN; +extern void ffi_closure_SYSV_V (void) FFI_HIDDEN; + +ffi_status +ffi_prep_closure_loc (ffi_closure *closure, + ffi_cif* cif, + void (*fun)(ffi_cif*,void*,void**,void*), + void *user_data, + void *codeloc) +{ + if (cif->abi != FFI_SYSV) + return FFI_BAD_ABI; + + void (*start)(void); + + if (cif->flags & AARCH64_FLAG_ARG_V) + start = ffi_closure_SYSV_V; + else + start = ffi_closure_SYSV; + +#if FFI_EXEC_TRAMPOLINE_TABLE +#ifdef __MACH__ + void **config = (void **)((uint8_t *)codeloc - PAGE_MAX_SIZE); + config[0] = closure; + config[1] = start; +#endif +#else + static const unsigned char trampoline[16] = { + 0x90, 0x00, 0x00, 0x58, /* ldr x16, tramp+16 */ + 0xf1, 0xff, 0xff, 0x10, /* adr x17, tramp+0 */ + 0x00, 0x02, 0x1f, 0xd6 /* br x16 */ + }; + char *tramp = closure->tramp; + + memcpy (tramp, trampoline, sizeof(trampoline)); + + *(UINT64 *)(tramp + 16) = (uintptr_t)start; + + ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE); + + /* Also flush the cache for code mapping. */ +#ifdef _M_ARM64 + // Not using dlmalloc.c for Windows ARM64 builds + // so calling ffi_data_to_code_pointer() isn't necessary + unsigned char *tramp_code = tramp; + #else + unsigned char *tramp_code = ffi_data_to_code_pointer (tramp); + #endif + ffi_clear_cache (tramp_code, tramp_code + FFI_TRAMPOLINE_SIZE); +#endif + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + return FFI_OK; +} + +#ifdef FFI_GO_CLOSURES +extern void ffi_go_closure_SYSV (void) FFI_HIDDEN; +extern void ffi_go_closure_SYSV_V (void) FFI_HIDDEN; + +ffi_status +ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif* cif, + void (*fun)(ffi_cif*,void*,void**,void*)) +{ + void (*start)(void); + + if (cif->abi != FFI_SYSV) + return FFI_BAD_ABI; + + if (cif->flags & AARCH64_FLAG_ARG_V) + start = ffi_go_closure_SYSV_V; + else + start = ffi_go_closure_SYSV; + + closure->tramp = start; + closure->cif = cif; + closure->fun = fun; + + return FFI_OK; +} +#endif /* FFI_GO_CLOSURES */ + +/* Primary handler to setup and invoke a function within a closure. + + A closure when invoked enters via the assembler wrapper + ffi_closure_SYSV(). The wrapper allocates a call context on the + stack, saves the interesting registers (from the perspective of + the calling convention) into the context then passes control to + ffi_closure_SYSV_inner() passing the saved context and a pointer to + the stack at the point ffi_closure_SYSV() was invoked. + + On the return path the assembler wrapper will reload call context + registers. + + ffi_closure_SYSV_inner() marshalls the call context into ffi value + descriptors, invokes the wrapped function, then marshalls the return + value back into the call context. */ + +int FFI_HIDDEN +ffi_closure_SYSV_inner (ffi_cif *cif, + void (*fun)(ffi_cif*,void*,void**,void*), + void *user_data, + struct call_context *context, + void *stack, void *rvalue, void *struct_rvalue) +{ + void **avalue = (void**) alloca (cif->nargs * sizeof (void*)); + int i, h, nargs, flags; + struct arg_state state; + + arg_init (&state); + + for (i = 0, nargs = cif->nargs; i < nargs; i++) + { + ffi_type *ty = cif->arg_types[i]; + int t = ty->type; + size_t n, s = ty->size; + + switch (t) + { + case FFI_TYPE_VOID: + FFI_ASSERT (0); + break; + + case FFI_TYPE_INT: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + case FFI_TYPE_POINTER: + avalue[i] = allocate_int_to_reg_or_stack (context, &state, stack, s); + break; + + case FFI_TYPE_FLOAT: + case FFI_TYPE_DOUBLE: + case FFI_TYPE_LONGDOUBLE: + case FFI_TYPE_STRUCT: + case FFI_TYPE_COMPLEX: + h = is_vfp_type (ty); + if (h) + { + n = 4 - (h & 3); +#ifdef _M_ARM64 /* for handling armasm calling convention */ + if (cif->is_variadic) + { + if (state.ngrn + n <= N_X_ARG_REG) + { + void *reg = &context->x[state.ngrn]; + state.ngrn += (unsigned int)n; + + /* Eeek! We need a pointer to the structure, however the + homogeneous float elements are being passed in individual + registers, therefore for float and double the structure + is not represented as a contiguous sequence of bytes in + our saved register context. We don't need the original + contents of the register storage, so we reformat the + structure into the same memory. */ + avalue[i] = compress_hfa_type(reg, reg, h); + } + else + { + state.ngrn = N_X_ARG_REG; + state.nsrn = N_V_ARG_REG; + avalue[i] = allocate_to_stack(&state, stack, + ty->alignment, s); + } + } + else + { +#endif /* for handling armasm calling convention */ + if (state.nsrn + n <= N_V_ARG_REG) + { + void *reg = &context->v[state.nsrn]; + state.nsrn += (unsigned int)n; + avalue[i] = compress_hfa_type(reg, reg, h); + } + else + { + state.nsrn = N_V_ARG_REG; + avalue[i] = allocate_to_stack(&state, stack, + ty->alignment, s); + } +#ifdef _M_ARM64 /* for handling armasm calling convention */ + } +#endif /* for handling armasm calling convention */ + } + else if (s > 16) + { + /* Replace Composite type of size greater than 16 with a + pointer. */ + avalue[i] = *(void **) + allocate_int_to_reg_or_stack (context, &state, stack, + sizeof (void *)); + } + else + { + n = (s + 7) / 8; + if (state.ngrn + n <= N_X_ARG_REG) + { + avalue[i] = &context->x[state.ngrn]; + state.ngrn += (unsigned int)n; + } + else + { + state.ngrn = N_X_ARG_REG; + avalue[i] = allocate_to_stack(&state, stack, + ty->alignment, s); + } + } + break; + + default: + abort(); + } + +#if defined (__APPLE__) + if (i + 1 == cif->aarch64_nfixedargs) + { + state.ngrn = N_X_ARG_REG; + state.nsrn = N_V_ARG_REG; + state.allocating_variadic = 1; + } +#endif + } + + flags = cif->flags; + if (flags & AARCH64_RET_IN_MEM) + rvalue = struct_rvalue; + + fun (cif, rvalue, avalue, user_data); + + return flags; +} + +#endif /* (__aarch64__) || defined(__arm64__)|| defined (_M_ARM64)*/ diff --git a/contrib/restricted/libffi/src/aarch64/ffitarget.h b/contrib/restricted/libffi/src/aarch64/ffitarget.h index ecb6d2deae..ddce8f21eb 100644 --- a/contrib/restricted/libffi/src/aarch64/ffitarget.h +++ b/contrib/restricted/libffi/src/aarch64/ffitarget.h @@ -1,92 +1,92 @@ -/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -``Software''), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#ifndef LIBFFI_TARGET_H -#define LIBFFI_TARGET_H - -#ifndef LIBFFI_H -#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead." -#endif - -#ifndef LIBFFI_ASM -#ifdef __ILP32__ -#define FFI_SIZEOF_ARG 8 -#define FFI_SIZEOF_JAVA_RAW 4 -typedef unsigned long long ffi_arg; -typedef signed long long ffi_sarg; -#elif defined(_M_ARM64) -#define FFI_SIZEOF_ARG 8 -typedef unsigned long long ffi_arg; -typedef signed long long ffi_sarg; -#else -typedef unsigned long ffi_arg; -typedef signed long ffi_sarg; -#endif - -typedef enum ffi_abi - { - FFI_FIRST_ABI = 0, - FFI_SYSV, - FFI_LAST_ABI, - FFI_DEFAULT_ABI = FFI_SYSV - } ffi_abi; -#endif - -/* ---- Definitions for closures ----------------------------------------- */ - -#define FFI_CLOSURES 1 -#define FFI_NATIVE_RAW_API 0 - -#if defined (FFI_EXEC_TRAMPOLINE_TABLE) && FFI_EXEC_TRAMPOLINE_TABLE - -#ifdef __MACH__ -#define FFI_TRAMPOLINE_SIZE 16 -#define FFI_TRAMPOLINE_CLOSURE_OFFSET 16 -#else -#error "No trampoline table implementation" -#endif - -#else -#define FFI_TRAMPOLINE_SIZE 24 -#define FFI_TRAMPOLINE_CLOSURE_OFFSET FFI_TRAMPOLINE_SIZE -#endif - -#ifdef _M_ARM64 -#define FFI_EXTRA_CIF_FIELDS unsigned is_variadic -#endif - -/* ---- Internal ---- */ - -#if defined (__APPLE__) -#define FFI_TARGET_SPECIFIC_VARIADIC -#define FFI_EXTRA_CIF_FIELDS unsigned aarch64_nfixedargs -#elif !defined(_M_ARM64) -/* iOS and Windows reserve x18 for the system. Disable Go closures until - a new static chain is chosen. */ -#define FFI_GO_CLOSURES 1 -#endif - -#ifndef _M_ARM64 -/* No complex type on Windows */ -#define FFI_TARGET_HAS_COMPLEX_TYPE -#endif - -#endif +/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +``Software''), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#ifndef LIBFFI_TARGET_H +#define LIBFFI_TARGET_H + +#ifndef LIBFFI_H +#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead." +#endif + +#ifndef LIBFFI_ASM +#ifdef __ILP32__ +#define FFI_SIZEOF_ARG 8 +#define FFI_SIZEOF_JAVA_RAW 4 +typedef unsigned long long ffi_arg; +typedef signed long long ffi_sarg; +#elif defined(_M_ARM64) +#define FFI_SIZEOF_ARG 8 +typedef unsigned long long ffi_arg; +typedef signed long long ffi_sarg; +#else +typedef unsigned long ffi_arg; +typedef signed long ffi_sarg; +#endif + +typedef enum ffi_abi + { + FFI_FIRST_ABI = 0, + FFI_SYSV, + FFI_LAST_ABI, + FFI_DEFAULT_ABI = FFI_SYSV + } ffi_abi; +#endif + +/* ---- Definitions for closures ----------------------------------------- */ + +#define FFI_CLOSURES 1 +#define FFI_NATIVE_RAW_API 0 + +#if defined (FFI_EXEC_TRAMPOLINE_TABLE) && FFI_EXEC_TRAMPOLINE_TABLE + +#ifdef __MACH__ +#define FFI_TRAMPOLINE_SIZE 16 +#define FFI_TRAMPOLINE_CLOSURE_OFFSET 16 +#else +#error "No trampoline table implementation" +#endif + +#else +#define FFI_TRAMPOLINE_SIZE 24 +#define FFI_TRAMPOLINE_CLOSURE_OFFSET FFI_TRAMPOLINE_SIZE +#endif + +#ifdef _M_ARM64 +#define FFI_EXTRA_CIF_FIELDS unsigned is_variadic +#endif + +/* ---- Internal ---- */ + +#if defined (__APPLE__) +#define FFI_TARGET_SPECIFIC_VARIADIC +#define FFI_EXTRA_CIF_FIELDS unsigned aarch64_nfixedargs +#elif !defined(_M_ARM64) +/* iOS and Windows reserve x18 for the system. Disable Go closures until + a new static chain is chosen. */ +#define FFI_GO_CLOSURES 1 +#endif + +#ifndef _M_ARM64 +/* No complex type on Windows */ +#define FFI_TARGET_HAS_COMPLEX_TYPE +#endif + +#endif diff --git a/contrib/restricted/libffi/src/aarch64/internal.h b/contrib/restricted/libffi/src/aarch64/internal.h index 9c3e07725a..2691dafa98 100644 --- a/contrib/restricted/libffi/src/aarch64/internal.h +++ b/contrib/restricted/libffi/src/aarch64/internal.h @@ -1,67 +1,67 @@ -/* -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -``Software''), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#define AARCH64_RET_VOID 0 -#define AARCH64_RET_INT64 1 -#define AARCH64_RET_INT128 2 - -#define AARCH64_RET_UNUSED3 3 -#define AARCH64_RET_UNUSED4 4 -#define AARCH64_RET_UNUSED5 5 -#define AARCH64_RET_UNUSED6 6 -#define AARCH64_RET_UNUSED7 7 - -/* Note that FFI_TYPE_FLOAT == 2, _DOUBLE == 3, _LONGDOUBLE == 4, - so _S4 through _Q1 are layed out as (TYPE * 4) + (4 - COUNT). */ -#define AARCH64_RET_S4 8 -#define AARCH64_RET_S3 9 -#define AARCH64_RET_S2 10 -#define AARCH64_RET_S1 11 - -#define AARCH64_RET_D4 12 -#define AARCH64_RET_D3 13 -#define AARCH64_RET_D2 14 -#define AARCH64_RET_D1 15 - -#define AARCH64_RET_Q4 16 -#define AARCH64_RET_Q3 17 -#define AARCH64_RET_Q2 18 -#define AARCH64_RET_Q1 19 - -/* Note that each of the sub-64-bit integers gets two entries. */ -#define AARCH64_RET_UINT8 20 -#define AARCH64_RET_UINT16 22 -#define AARCH64_RET_UINT32 24 - -#define AARCH64_RET_SINT8 26 -#define AARCH64_RET_SINT16 28 -#define AARCH64_RET_SINT32 30 - -#define AARCH64_RET_MASK 31 - -#define AARCH64_RET_IN_MEM (1 << 5) -#define AARCH64_RET_NEED_COPY (1 << 6) - -#define AARCH64_FLAG_ARG_V_BIT 7 -#define AARCH64_FLAG_ARG_V (1 << AARCH64_FLAG_ARG_V_BIT) - -#define N_X_ARG_REG 8 -#define N_V_ARG_REG 8 -#define CALL_CONTEXT_SIZE (N_V_ARG_REG * 16 + N_X_ARG_REG * 8) +/* +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +``Software''), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#define AARCH64_RET_VOID 0 +#define AARCH64_RET_INT64 1 +#define AARCH64_RET_INT128 2 + +#define AARCH64_RET_UNUSED3 3 +#define AARCH64_RET_UNUSED4 4 +#define AARCH64_RET_UNUSED5 5 +#define AARCH64_RET_UNUSED6 6 +#define AARCH64_RET_UNUSED7 7 + +/* Note that FFI_TYPE_FLOAT == 2, _DOUBLE == 3, _LONGDOUBLE == 4, + so _S4 through _Q1 are layed out as (TYPE * 4) + (4 - COUNT). */ +#define AARCH64_RET_S4 8 +#define AARCH64_RET_S3 9 +#define AARCH64_RET_S2 10 +#define AARCH64_RET_S1 11 + +#define AARCH64_RET_D4 12 +#define AARCH64_RET_D3 13 +#define AARCH64_RET_D2 14 +#define AARCH64_RET_D1 15 + +#define AARCH64_RET_Q4 16 +#define AARCH64_RET_Q3 17 +#define AARCH64_RET_Q2 18 +#define AARCH64_RET_Q1 19 + +/* Note that each of the sub-64-bit integers gets two entries. */ +#define AARCH64_RET_UINT8 20 +#define AARCH64_RET_UINT16 22 +#define AARCH64_RET_UINT32 24 + +#define AARCH64_RET_SINT8 26 +#define AARCH64_RET_SINT16 28 +#define AARCH64_RET_SINT32 30 + +#define AARCH64_RET_MASK 31 + +#define AARCH64_RET_IN_MEM (1 << 5) +#define AARCH64_RET_NEED_COPY (1 << 6) + +#define AARCH64_FLAG_ARG_V_BIT 7 +#define AARCH64_FLAG_ARG_V (1 << AARCH64_FLAG_ARG_V_BIT) + +#define N_X_ARG_REG 8 +#define N_V_ARG_REG 8 +#define CALL_CONTEXT_SIZE (N_V_ARG_REG * 16 + N_X_ARG_REG * 8) diff --git a/contrib/restricted/libffi/src/aarch64/sysv.S b/contrib/restricted/libffi/src/aarch64/sysv.S index 6761ee1ea9..4d8d85139a 100644 --- a/contrib/restricted/libffi/src/aarch64/sysv.S +++ b/contrib/restricted/libffi/src/aarch64/sysv.S @@ -1,440 +1,440 @@ -/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -``Software''), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#if defined(__aarch64__) || defined(__arm64__) -#define LIBFFI_ASM -#include <fficonfig.h> -#include <ffi.h> -#include <ffi_cfi.h> -#include "internal.h" - -#ifdef HAVE_MACHINE_ASM_H -#include <machine/asm.h> -#else -#ifdef __USER_LABEL_PREFIX__ -#define CONCAT1(a, b) CONCAT2(a, b) -#define CONCAT2(a, b) a ## b - -/* Use the right prefix for global labels. */ -#define CNAME(x) CONCAT1 (__USER_LABEL_PREFIX__, x) -#else -#define CNAME(x) x -#endif -#endif - -#ifdef __AARCH64EB__ -# define BE(X) X -#else -# define BE(X) 0 -#endif - -#ifdef __ILP32__ -#define PTR_REG(n) w##n -#else -#define PTR_REG(n) x##n -#endif - -#ifdef __ILP32__ -#define PTR_SIZE 4 -#else -#define PTR_SIZE 8 -#endif - - .text - .align 4 - -/* ffi_call_SYSV - extern void ffi_call_SYSV (void *stack, void *frame, - void (*fn)(void), void *rvalue, - int flags, void *closure); - - Therefore on entry we have: - - x0 stack - x1 frame - x2 fn - x3 rvalue - x4 flags - x5 closure -*/ - - cfi_startproc -CNAME(ffi_call_SYSV): - /* Use a stack frame allocated by our caller. */ - cfi_def_cfa(x1, 32); - stp x29, x30, [x1] - mov x29, x1 - mov sp, x0 - cfi_def_cfa_register(x29) - cfi_rel_offset (x29, 0) - cfi_rel_offset (x30, 8) - - mov x9, x2 /* save fn */ - mov x8, x3 /* install structure return */ -#ifdef FFI_GO_CLOSURES - mov x18, x5 /* install static chain */ -#endif - stp x3, x4, [x29, #16] /* save rvalue and flags */ - - /* Load the vector argument passing registers, if necessary. */ - tbz w4, #AARCH64_FLAG_ARG_V_BIT, 1f - ldp q0, q1, [sp, #0] - ldp q2, q3, [sp, #32] - ldp q4, q5, [sp, #64] - ldp q6, q7, [sp, #96] -1: - /* Load the core argument passing registers, including - the structure return pointer. */ - ldp x0, x1, [sp, #16*N_V_ARG_REG + 0] - ldp x2, x3, [sp, #16*N_V_ARG_REG + 16] - ldp x4, x5, [sp, #16*N_V_ARG_REG + 32] - ldp x6, x7, [sp, #16*N_V_ARG_REG + 48] - - /* Deallocate the context, leaving the stacked arguments. */ - add sp, sp, #CALL_CONTEXT_SIZE - - blr x9 /* call fn */ - - ldp x3, x4, [x29, #16] /* reload rvalue and flags */ - - /* Partially deconstruct the stack frame. */ - mov sp, x29 - cfi_def_cfa_register (sp) - ldp x29, x30, [x29] - - /* Save the return value as directed. */ - adr x5, 0f - and w4, w4, #AARCH64_RET_MASK - add x5, x5, x4, lsl #3 - br x5 - - /* Note that each table entry is 2 insns, and thus 8 bytes. - For integer data, note that we're storing into ffi_arg - and therefore we want to extend to 64 bits; these types - have two consecutive entries allocated for them. */ - .align 4 -0: ret /* VOID */ - nop -1: str x0, [x3] /* INT64 */ - ret -2: stp x0, x1, [x3] /* INT128 */ - ret -3: brk #1000 /* UNUSED */ - ret -4: brk #1000 /* UNUSED */ - ret -5: brk #1000 /* UNUSED */ - ret -6: brk #1000 /* UNUSED */ - ret -7: brk #1000 /* UNUSED */ - ret -8: st4 { v0.s, v1.s, v2.s, v3.s }[0], [x3] /* S4 */ - ret -9: st3 { v0.s, v1.s, v2.s }[0], [x3] /* S3 */ - ret -10: stp s0, s1, [x3] /* S2 */ - ret -11: str s0, [x3] /* S1 */ - ret -12: st4 { v0.d, v1.d, v2.d, v3.d }[0], [x3] /* D4 */ - ret -13: st3 { v0.d, v1.d, v2.d }[0], [x3] /* D3 */ - ret -14: stp d0, d1, [x3] /* D2 */ - ret -15: str d0, [x3] /* D1 */ - ret -16: str q3, [x3, #48] /* Q4 */ - nop -17: str q2, [x3, #32] /* Q3 */ - nop -18: stp q0, q1, [x3] /* Q2 */ - ret -19: str q0, [x3] /* Q1 */ - ret -20: uxtb w0, w0 /* UINT8 */ - str x0, [x3] -21: ret /* reserved */ - nop -22: uxth w0, w0 /* UINT16 */ - str x0, [x3] -23: ret /* reserved */ - nop -24: mov w0, w0 /* UINT32 */ - str x0, [x3] -25: ret /* reserved */ - nop -26: sxtb x0, w0 /* SINT8 */ - str x0, [x3] -27: ret /* reserved */ - nop -28: sxth x0, w0 /* SINT16 */ - str x0, [x3] -29: ret /* reserved */ - nop -30: sxtw x0, w0 /* SINT32 */ - str x0, [x3] -31: ret /* reserved */ - nop - - cfi_endproc - - .globl CNAME(ffi_call_SYSV) - FFI_HIDDEN(CNAME(ffi_call_SYSV)) -#ifdef __ELF__ - .type CNAME(ffi_call_SYSV), #function - .size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV) -#endif - -/* ffi_closure_SYSV - - Closure invocation glue. This is the low level code invoked directly by - the closure trampoline to setup and call a closure. - - On entry x17 points to a struct ffi_closure, x16 has been clobbered - all other registers are preserved. - - We allocate a call context and save the argument passing registers, - then invoked the generic C ffi_closure_SYSV_inner() function to do all - the real work, on return we load the result passing registers back from - the call context. -*/ - -#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64) - - .align 4 -CNAME(ffi_closure_SYSV_V): - cfi_startproc - stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! - cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) - cfi_rel_offset (x29, 0) - cfi_rel_offset (x30, 8) - - /* Save the argument passing vector registers. */ - stp q0, q1, [sp, #16 + 0] - stp q2, q3, [sp, #16 + 32] - stp q4, q5, [sp, #16 + 64] - stp q6, q7, [sp, #16 + 96] - b 0f - cfi_endproc - - .globl CNAME(ffi_closure_SYSV_V) - FFI_HIDDEN(CNAME(ffi_closure_SYSV_V)) -#ifdef __ELF__ - .type CNAME(ffi_closure_SYSV_V), #function - .size CNAME(ffi_closure_SYSV_V), . - CNAME(ffi_closure_SYSV_V) -#endif - - .align 4 - cfi_startproc -CNAME(ffi_closure_SYSV): - stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! - cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) - cfi_rel_offset (x29, 0) - cfi_rel_offset (x30, 8) -0: - mov x29, sp - - /* Save the argument passing core registers. */ - stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0] - stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16] - stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32] - stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48] - - /* Load ffi_closure_inner arguments. */ - ldp PTR_REG(0), PTR_REG(1), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET] /* load cif, fn */ - ldr PTR_REG(2), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET+PTR_SIZE*2] /* load user_data */ -.Ldo_closure: - add x3, sp, #16 /* load context */ - add x4, sp, #ffi_closure_SYSV_FS /* load stack */ - add x5, sp, #16+CALL_CONTEXT_SIZE /* load rvalue */ - mov x6, x8 /* load struct_rval */ - bl CNAME(ffi_closure_SYSV_inner) - - /* Load the return value as directed. */ - adr x1, 0f - and w0, w0, #AARCH64_RET_MASK - add x1, x1, x0, lsl #3 - add x3, sp, #16+CALL_CONTEXT_SIZE - br x1 - - /* Note that each table entry is 2 insns, and thus 8 bytes. */ - .align 4 -0: b 99f /* VOID */ - nop -1: ldr x0, [x3] /* INT64 */ - b 99f -2: ldp x0, x1, [x3] /* INT128 */ - b 99f -3: brk #1000 /* UNUSED */ - nop -4: brk #1000 /* UNUSED */ - nop -5: brk #1000 /* UNUSED */ - nop -6: brk #1000 /* UNUSED */ - nop -7: brk #1000 /* UNUSED */ - nop -8: ldr s3, [x3, #12] /* S4 */ - nop -9: ldr s2, [x3, #8] /* S3 */ - nop -10: ldp s0, s1, [x3] /* S2 */ - b 99f -11: ldr s0, [x3] /* S1 */ - b 99f -12: ldr d3, [x3, #24] /* D4 */ - nop -13: ldr d2, [x3, #16] /* D3 */ - nop -14: ldp d0, d1, [x3] /* D2 */ - b 99f -15: ldr d0, [x3] /* D1 */ - b 99f -16: ldr q3, [x3, #48] /* Q4 */ - nop -17: ldr q2, [x3, #32] /* Q3 */ - nop -18: ldp q0, q1, [x3] /* Q2 */ - b 99f -19: ldr q0, [x3] /* Q1 */ - b 99f -20: ldrb w0, [x3, #BE(7)] /* UINT8 */ - b 99f -21: brk #1000 /* reserved */ - nop -22: ldrh w0, [x3, #BE(6)] /* UINT16 */ - b 99f -23: brk #1000 /* reserved */ - nop -24: ldr w0, [x3, #BE(4)] /* UINT32 */ - b 99f -25: brk #1000 /* reserved */ - nop -26: ldrsb x0, [x3, #BE(7)] /* SINT8 */ - b 99f -27: brk #1000 /* reserved */ - nop -28: ldrsh x0, [x3, #BE(6)] /* SINT16 */ - b 99f -29: brk #1000 /* reserved */ - nop -30: ldrsw x0, [x3, #BE(4)] /* SINT32 */ - nop -31: /* reserved */ -99: ldp x29, x30, [sp], #ffi_closure_SYSV_FS - cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS) - cfi_restore (x29) - cfi_restore (x30) - ret - cfi_endproc - - .globl CNAME(ffi_closure_SYSV) - FFI_HIDDEN(CNAME(ffi_closure_SYSV)) -#ifdef __ELF__ - .type CNAME(ffi_closure_SYSV), #function - .size CNAME(ffi_closure_SYSV), . - CNAME(ffi_closure_SYSV) -#endif - -#if FFI_EXEC_TRAMPOLINE_TABLE - -#ifdef __MACH__ -#include <mach/machine/vm_param.h> - .align PAGE_MAX_SHIFT -CNAME(ffi_closure_trampoline_table_page): - .rept PAGE_MAX_SIZE / FFI_TRAMPOLINE_SIZE - adr x16, -PAGE_MAX_SIZE - ldp x17, x16, [x16] - br x16 - nop /* each entry in the trampoline config page is 2*sizeof(void*) so the trampoline itself cannot be smaller that 16 bytes */ - .endr - - .globl CNAME(ffi_closure_trampoline_table_page) - FFI_HIDDEN(CNAME(ffi_closure_trampoline_table_page)) - #ifdef __ELF__ - .type CNAME(ffi_closure_trampoline_table_page), #function - .size CNAME(ffi_closure_trampoline_table_page), . - CNAME(ffi_closure_trampoline_table_page) - #endif -#endif - -#endif /* FFI_EXEC_TRAMPOLINE_TABLE */ - -#ifdef FFI_GO_CLOSURES - .align 4 -CNAME(ffi_go_closure_SYSV_V): - cfi_startproc - stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! - cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) - cfi_rel_offset (x29, 0) - cfi_rel_offset (x30, 8) - - /* Save the argument passing vector registers. */ - stp q0, q1, [sp, #16 + 0] - stp q2, q3, [sp, #16 + 32] - stp q4, q5, [sp, #16 + 64] - stp q6, q7, [sp, #16 + 96] - b 0f - cfi_endproc - - .globl CNAME(ffi_go_closure_SYSV_V) - FFI_HIDDEN(CNAME(ffi_go_closure_SYSV_V)) -#ifdef __ELF__ - .type CNAME(ffi_go_closure_SYSV_V), #function - .size CNAME(ffi_go_closure_SYSV_V), . - CNAME(ffi_go_closure_SYSV_V) -#endif - - .align 4 - cfi_startproc -CNAME(ffi_go_closure_SYSV): - stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! - cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) - cfi_rel_offset (x29, 0) - cfi_rel_offset (x30, 8) -0: - mov x29, sp - - /* Save the argument passing core registers. */ - stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0] - stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16] - stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32] - stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48] - - /* Load ffi_closure_inner arguments. */ - ldp PTR_REG(0), PTR_REG(1), [x18, #PTR_SIZE]/* load cif, fn */ - mov x2, x18 /* load user_data */ - b .Ldo_closure - cfi_endproc - - .globl CNAME(ffi_go_closure_SYSV) - FFI_HIDDEN(CNAME(ffi_go_closure_SYSV)) -#ifdef __ELF__ - .type CNAME(ffi_go_closure_SYSV), #function - .size CNAME(ffi_go_closure_SYSV), . - CNAME(ffi_go_closure_SYSV) -#endif -#endif /* FFI_GO_CLOSURES */ -#endif /* __arm64__ */ - -#if defined __ELF__ && defined __linux__ - .section .note.GNU-stack,"",%progbits -#endif - +/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +``Software''), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#if defined(__aarch64__) || defined(__arm64__) +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> +#include <ffi_cfi.h> +#include "internal.h" + +#ifdef HAVE_MACHINE_ASM_H +#include <machine/asm.h> +#else +#ifdef __USER_LABEL_PREFIX__ +#define CONCAT1(a, b) CONCAT2(a, b) +#define CONCAT2(a, b) a ## b + +/* Use the right prefix for global labels. */ +#define CNAME(x) CONCAT1 (__USER_LABEL_PREFIX__, x) +#else +#define CNAME(x) x +#endif +#endif + +#ifdef __AARCH64EB__ +# define BE(X) X +#else +# define BE(X) 0 +#endif + +#ifdef __ILP32__ +#define PTR_REG(n) w##n +#else +#define PTR_REG(n) x##n +#endif + +#ifdef __ILP32__ +#define PTR_SIZE 4 +#else +#define PTR_SIZE 8 +#endif + + .text + .align 4 + +/* ffi_call_SYSV + extern void ffi_call_SYSV (void *stack, void *frame, + void (*fn)(void), void *rvalue, + int flags, void *closure); + + Therefore on entry we have: + + x0 stack + x1 frame + x2 fn + x3 rvalue + x4 flags + x5 closure +*/ + + cfi_startproc +CNAME(ffi_call_SYSV): + /* Use a stack frame allocated by our caller. */ + cfi_def_cfa(x1, 32); + stp x29, x30, [x1] + mov x29, x1 + mov sp, x0 + cfi_def_cfa_register(x29) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) + + mov x9, x2 /* save fn */ + mov x8, x3 /* install structure return */ +#ifdef FFI_GO_CLOSURES + mov x18, x5 /* install static chain */ +#endif + stp x3, x4, [x29, #16] /* save rvalue and flags */ + + /* Load the vector argument passing registers, if necessary. */ + tbz w4, #AARCH64_FLAG_ARG_V_BIT, 1f + ldp q0, q1, [sp, #0] + ldp q2, q3, [sp, #32] + ldp q4, q5, [sp, #64] + ldp q6, q7, [sp, #96] +1: + /* Load the core argument passing registers, including + the structure return pointer. */ + ldp x0, x1, [sp, #16*N_V_ARG_REG + 0] + ldp x2, x3, [sp, #16*N_V_ARG_REG + 16] + ldp x4, x5, [sp, #16*N_V_ARG_REG + 32] + ldp x6, x7, [sp, #16*N_V_ARG_REG + 48] + + /* Deallocate the context, leaving the stacked arguments. */ + add sp, sp, #CALL_CONTEXT_SIZE + + blr x9 /* call fn */ + + ldp x3, x4, [x29, #16] /* reload rvalue and flags */ + + /* Partially deconstruct the stack frame. */ + mov sp, x29 + cfi_def_cfa_register (sp) + ldp x29, x30, [x29] + + /* Save the return value as directed. */ + adr x5, 0f + and w4, w4, #AARCH64_RET_MASK + add x5, x5, x4, lsl #3 + br x5 + + /* Note that each table entry is 2 insns, and thus 8 bytes. + For integer data, note that we're storing into ffi_arg + and therefore we want to extend to 64 bits; these types + have two consecutive entries allocated for them. */ + .align 4 +0: ret /* VOID */ + nop +1: str x0, [x3] /* INT64 */ + ret +2: stp x0, x1, [x3] /* INT128 */ + ret +3: brk #1000 /* UNUSED */ + ret +4: brk #1000 /* UNUSED */ + ret +5: brk #1000 /* UNUSED */ + ret +6: brk #1000 /* UNUSED */ + ret +7: brk #1000 /* UNUSED */ + ret +8: st4 { v0.s, v1.s, v2.s, v3.s }[0], [x3] /* S4 */ + ret +9: st3 { v0.s, v1.s, v2.s }[0], [x3] /* S3 */ + ret +10: stp s0, s1, [x3] /* S2 */ + ret +11: str s0, [x3] /* S1 */ + ret +12: st4 { v0.d, v1.d, v2.d, v3.d }[0], [x3] /* D4 */ + ret +13: st3 { v0.d, v1.d, v2.d }[0], [x3] /* D3 */ + ret +14: stp d0, d1, [x3] /* D2 */ + ret +15: str d0, [x3] /* D1 */ + ret +16: str q3, [x3, #48] /* Q4 */ + nop +17: str q2, [x3, #32] /* Q3 */ + nop +18: stp q0, q1, [x3] /* Q2 */ + ret +19: str q0, [x3] /* Q1 */ + ret +20: uxtb w0, w0 /* UINT8 */ + str x0, [x3] +21: ret /* reserved */ + nop +22: uxth w0, w0 /* UINT16 */ + str x0, [x3] +23: ret /* reserved */ + nop +24: mov w0, w0 /* UINT32 */ + str x0, [x3] +25: ret /* reserved */ + nop +26: sxtb x0, w0 /* SINT8 */ + str x0, [x3] +27: ret /* reserved */ + nop +28: sxth x0, w0 /* SINT16 */ + str x0, [x3] +29: ret /* reserved */ + nop +30: sxtw x0, w0 /* SINT32 */ + str x0, [x3] +31: ret /* reserved */ + nop + + cfi_endproc + + .globl CNAME(ffi_call_SYSV) + FFI_HIDDEN(CNAME(ffi_call_SYSV)) +#ifdef __ELF__ + .type CNAME(ffi_call_SYSV), #function + .size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV) +#endif + +/* ffi_closure_SYSV + + Closure invocation glue. This is the low level code invoked directly by + the closure trampoline to setup and call a closure. + + On entry x17 points to a struct ffi_closure, x16 has been clobbered + all other registers are preserved. + + We allocate a call context and save the argument passing registers, + then invoked the generic C ffi_closure_SYSV_inner() function to do all + the real work, on return we load the result passing registers back from + the call context. +*/ + +#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64) + + .align 4 +CNAME(ffi_closure_SYSV_V): + cfi_startproc + stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! + cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) + + /* Save the argument passing vector registers. */ + stp q0, q1, [sp, #16 + 0] + stp q2, q3, [sp, #16 + 32] + stp q4, q5, [sp, #16 + 64] + stp q6, q7, [sp, #16 + 96] + b 0f + cfi_endproc + + .globl CNAME(ffi_closure_SYSV_V) + FFI_HIDDEN(CNAME(ffi_closure_SYSV_V)) +#ifdef __ELF__ + .type CNAME(ffi_closure_SYSV_V), #function + .size CNAME(ffi_closure_SYSV_V), . - CNAME(ffi_closure_SYSV_V) +#endif + + .align 4 + cfi_startproc +CNAME(ffi_closure_SYSV): + stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! + cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) +0: + mov x29, sp + + /* Save the argument passing core registers. */ + stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0] + stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16] + stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32] + stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48] + + /* Load ffi_closure_inner arguments. */ + ldp PTR_REG(0), PTR_REG(1), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET] /* load cif, fn */ + ldr PTR_REG(2), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET+PTR_SIZE*2] /* load user_data */ +.Ldo_closure: + add x3, sp, #16 /* load context */ + add x4, sp, #ffi_closure_SYSV_FS /* load stack */ + add x5, sp, #16+CALL_CONTEXT_SIZE /* load rvalue */ + mov x6, x8 /* load struct_rval */ + bl CNAME(ffi_closure_SYSV_inner) + + /* Load the return value as directed. */ + adr x1, 0f + and w0, w0, #AARCH64_RET_MASK + add x1, x1, x0, lsl #3 + add x3, sp, #16+CALL_CONTEXT_SIZE + br x1 + + /* Note that each table entry is 2 insns, and thus 8 bytes. */ + .align 4 +0: b 99f /* VOID */ + nop +1: ldr x0, [x3] /* INT64 */ + b 99f +2: ldp x0, x1, [x3] /* INT128 */ + b 99f +3: brk #1000 /* UNUSED */ + nop +4: brk #1000 /* UNUSED */ + nop +5: brk #1000 /* UNUSED */ + nop +6: brk #1000 /* UNUSED */ + nop +7: brk #1000 /* UNUSED */ + nop +8: ldr s3, [x3, #12] /* S4 */ + nop +9: ldr s2, [x3, #8] /* S3 */ + nop +10: ldp s0, s1, [x3] /* S2 */ + b 99f +11: ldr s0, [x3] /* S1 */ + b 99f +12: ldr d3, [x3, #24] /* D4 */ + nop +13: ldr d2, [x3, #16] /* D3 */ + nop +14: ldp d0, d1, [x3] /* D2 */ + b 99f +15: ldr d0, [x3] /* D1 */ + b 99f +16: ldr q3, [x3, #48] /* Q4 */ + nop +17: ldr q2, [x3, #32] /* Q3 */ + nop +18: ldp q0, q1, [x3] /* Q2 */ + b 99f +19: ldr q0, [x3] /* Q1 */ + b 99f +20: ldrb w0, [x3, #BE(7)] /* UINT8 */ + b 99f +21: brk #1000 /* reserved */ + nop +22: ldrh w0, [x3, #BE(6)] /* UINT16 */ + b 99f +23: brk #1000 /* reserved */ + nop +24: ldr w0, [x3, #BE(4)] /* UINT32 */ + b 99f +25: brk #1000 /* reserved */ + nop +26: ldrsb x0, [x3, #BE(7)] /* SINT8 */ + b 99f +27: brk #1000 /* reserved */ + nop +28: ldrsh x0, [x3, #BE(6)] /* SINT16 */ + b 99f +29: brk #1000 /* reserved */ + nop +30: ldrsw x0, [x3, #BE(4)] /* SINT32 */ + nop +31: /* reserved */ +99: ldp x29, x30, [sp], #ffi_closure_SYSV_FS + cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS) + cfi_restore (x29) + cfi_restore (x30) + ret + cfi_endproc + + .globl CNAME(ffi_closure_SYSV) + FFI_HIDDEN(CNAME(ffi_closure_SYSV)) +#ifdef __ELF__ + .type CNAME(ffi_closure_SYSV), #function + .size CNAME(ffi_closure_SYSV), . - CNAME(ffi_closure_SYSV) +#endif + +#if FFI_EXEC_TRAMPOLINE_TABLE + +#ifdef __MACH__ +#include <mach/machine/vm_param.h> + .align PAGE_MAX_SHIFT +CNAME(ffi_closure_trampoline_table_page): + .rept PAGE_MAX_SIZE / FFI_TRAMPOLINE_SIZE + adr x16, -PAGE_MAX_SIZE + ldp x17, x16, [x16] + br x16 + nop /* each entry in the trampoline config page is 2*sizeof(void*) so the trampoline itself cannot be smaller that 16 bytes */ + .endr + + .globl CNAME(ffi_closure_trampoline_table_page) + FFI_HIDDEN(CNAME(ffi_closure_trampoline_table_page)) + #ifdef __ELF__ + .type CNAME(ffi_closure_trampoline_table_page), #function + .size CNAME(ffi_closure_trampoline_table_page), . - CNAME(ffi_closure_trampoline_table_page) + #endif +#endif + +#endif /* FFI_EXEC_TRAMPOLINE_TABLE */ + +#ifdef FFI_GO_CLOSURES + .align 4 +CNAME(ffi_go_closure_SYSV_V): + cfi_startproc + stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! + cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) + + /* Save the argument passing vector registers. */ + stp q0, q1, [sp, #16 + 0] + stp q2, q3, [sp, #16 + 32] + stp q4, q5, [sp, #16 + 64] + stp q6, q7, [sp, #16 + 96] + b 0f + cfi_endproc + + .globl CNAME(ffi_go_closure_SYSV_V) + FFI_HIDDEN(CNAME(ffi_go_closure_SYSV_V)) +#ifdef __ELF__ + .type CNAME(ffi_go_closure_SYSV_V), #function + .size CNAME(ffi_go_closure_SYSV_V), . - CNAME(ffi_go_closure_SYSV_V) +#endif + + .align 4 + cfi_startproc +CNAME(ffi_go_closure_SYSV): + stp x29, x30, [sp, #-ffi_closure_SYSV_FS]! + cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) + cfi_rel_offset (x29, 0) + cfi_rel_offset (x30, 8) +0: + mov x29, sp + + /* Save the argument passing core registers. */ + stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0] + stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16] + stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32] + stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48] + + /* Load ffi_closure_inner arguments. */ + ldp PTR_REG(0), PTR_REG(1), [x18, #PTR_SIZE]/* load cif, fn */ + mov x2, x18 /* load user_data */ + b .Ldo_closure + cfi_endproc + + .globl CNAME(ffi_go_closure_SYSV) + FFI_HIDDEN(CNAME(ffi_go_closure_SYSV)) +#ifdef __ELF__ + .type CNAME(ffi_go_closure_SYSV), #function + .size CNAME(ffi_go_closure_SYSV), . - CNAME(ffi_go_closure_SYSV) +#endif +#endif /* FFI_GO_CLOSURES */ +#endif /* __arm64__ */ + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",%progbits +#endif + diff --git a/contrib/restricted/libffi/src/aarch64/win64_armasm.S b/contrib/restricted/libffi/src/aarch64/win64_armasm.S index a79f8a8aa9..90b95def5c 100644 --- a/contrib/restricted/libffi/src/aarch64/win64_armasm.S +++ b/contrib/restricted/libffi/src/aarch64/win64_armasm.S @@ -1,506 +1,506 @@ -/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -``Software''), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. -THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY -CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, -TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE -SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ - -#define LIBFFI_ASM -#include <fficonfig.h> -#include <ffi.h> -#include <ffi_cfi.h> -#include "internal.h" - - OPT 2 /*disable listing */ -/* For some macros to add unwind information */ -#include "ksarm64.h" - OPT 1 /*re-enable listing */ - -#define BE(X) 0 -#define PTR_REG(n) x##n -#define PTR_SIZE 8 - - IMPORT ffi_closure_SYSV_inner - EXPORT ffi_call_SYSV - EXPORT ffi_closure_SYSV_V - EXPORT ffi_closure_SYSV - EXPORT extend_hfa_type - EXPORT compress_hfa_type -#ifdef FFI_GO_CLOSURES - EXPORT ffi_go_closure_SYSV_V - EXPORT ffi_go_closure_SYSV -#endif - - TEXTAREA, ALLIGN=8 - -/* ffi_call_SYSV - extern void ffi_call_SYSV (void *stack, void *frame, - void (*fn)(void), void *rvalue, - int flags, void *closure); - Therefore on entry we have: - x0 stack - x1 frame - x2 fn - x3 rvalue - x4 flags - x5 closure -*/ - - NESTED_ENTRY ffi_call_SYSV_fake - - /* For unwind information, Windows has to store fp and lr */ - PROLOG_SAVE_REG_PAIR x29, x30, #-32! - - ALTERNATE_ENTRY ffi_call_SYSV - /* Use a stack frame allocated by our caller. */ - stp x29, x30, [x1] - mov x29, x1 - mov sp, x0 - - mov x9, x2 /* save fn */ - mov x8, x3 /* install structure return */ -#ifdef FFI_GO_CLOSURES - /*mov x18, x5 install static chain */ -#endif - stp x3, x4, [x29, #16] /* save rvalue and flags */ - - /* Load the vector argument passing registers, if necessary. */ - tbz x4, #AARCH64_FLAG_ARG_V_BIT, ffi_call_SYSV_L1 - ldp q0, q1, [sp, #0] - ldp q2, q3, [sp, #32] - ldp q4, q5, [sp, #64] - ldp q6, q7, [sp, #96] - -ffi_call_SYSV_L1 - /* Load the core argument passing registers, including - the structure return pointer. */ - ldp x0, x1, [sp, #16*N_V_ARG_REG + 0] - ldp x2, x3, [sp, #16*N_V_ARG_REG + 16] - ldp x4, x5, [sp, #16*N_V_ARG_REG + 32] - ldp x6, x7, [sp, #16*N_V_ARG_REG + 48] - - /* Deallocate the context, leaving the stacked arguments. */ - add sp, sp, #CALL_CONTEXT_SIZE - - blr x9 /* call fn */ - - ldp x3, x4, [x29, #16] /* reload rvalue and flags */ - - /* Partially deconstruct the stack frame. */ - mov sp, x29 - ldp x29, x30, [x29] - - /* Save the return value as directed. */ - adr x5, ffi_call_SYSV_return - and w4, w4, #AARCH64_RET_MASK - add x5, x5, x4, lsl #3 - br x5 - - /* Note that each table entry is 2 insns, and thus 8 bytes. - For integer data, note that we're storing into ffi_arg - and therefore we want to extend to 64 bits; these types - have two consecutive entries allocated for them. */ - ALIGN 4 -ffi_call_SYSV_return - ret /* VOID */ - nop - str x0, [x3] /* INT64 */ - ret - stp x0, x1, [x3] /* INT128 */ - ret - brk #1000 /* UNUSED */ - ret - brk #1000 /* UNUSED */ - ret - brk #1000 /* UNUSED */ - ret - brk #1000 /* UNUSED */ - ret - brk #1000 /* UNUSED */ - ret - st4 { v0.s, v1.s, v2.s, v3.s }[0], [x3] /* S4 */ - ret - st3 { v0.s, v1.s, v2.s }[0], [x3] /* S3 */ - ret - stp s0, s1, [x3] /* S2 */ - ret - str s0, [x3] /* S1 */ - ret - st4 { v0.d, v1.d, v2.d, v3.d }[0], [x3] /* D4 */ - ret - st3 { v0.d, v1.d, v2.d }[0], [x3] /* D3 */ - ret - stp d0, d1, [x3] /* D2 */ - ret - str d0, [x3] /* D1 */ - ret - str q3, [x3, #48] /* Q4 */ - nop - str q2, [x3, #32] /* Q3 */ - nop - stp q0, q1, [x3] /* Q2 */ - ret - str q0, [x3] /* Q1 */ - ret - uxtb w0, w0 /* UINT8 */ - str x0, [x3] - ret /* reserved */ - nop - uxth w0, w0 /* UINT16 */ - str x0, [x3] - ret /* reserved */ - nop - mov w0, w0 /* UINT32 */ - str x0, [x3] - ret /* reserved */ - nop - sxtb x0, w0 /* SINT8 */ - str x0, [x3] - ret /* reserved */ - nop - sxth x0, w0 /* SINT16 */ - str x0, [x3] - ret /* reserved */ - nop - sxtw x0, w0 /* SINT32 */ - str x0, [x3] - ret /* reserved */ - nop - - - NESTED_END ffi_call_SYSV_fake - - -/* ffi_closure_SYSV - Closure invocation glue. This is the low level code invoked directly by - the closure trampoline to setup and call a closure. - On entry x17 points to a struct ffi_closure, x16 has been clobbered - all other registers are preserved. - We allocate a call context and save the argument passing registers, - then invoked the generic C ffi_closure_SYSV_inner() function to do all - the real work, on return we load the result passing registers back from - the call context. -*/ - -#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64) - - NESTED_ENTRY ffi_closure_SYSV_V - PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS! - - /* Save the argument passing vector registers. */ - stp q0, q1, [sp, #16 + 0] - stp q2, q3, [sp, #16 + 32] - stp q4, q5, [sp, #16 + 64] - stp q6, q7, [sp, #16 + 96] - - b ffi_closure_SYSV_save_argument - NESTED_END ffi_closure_SYSV_V - - NESTED_ENTRY ffi_closure_SYSV - PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS! - -ffi_closure_SYSV_save_argument - /* Save the argument passing core registers. */ - stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0] - stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16] - stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32] - stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48] - - /* Load ffi_closure_inner arguments. */ - ldp PTR_REG(0), PTR_REG(1), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET] /* load cif, fn */ - ldr PTR_REG(2), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET+PTR_SIZE*2] /* load user_data */ - -do_closure - add x3, sp, #16 /* load context */ - add x4, sp, #ffi_closure_SYSV_FS /* load stack */ - add x5, sp, #16+CALL_CONTEXT_SIZE /* load rvalue */ - mov x6, x8 /* load struct_rval */ - - bl ffi_closure_SYSV_inner - - /* Load the return value as directed. */ - adr x1, ffi_closure_SYSV_return_base - and w0, w0, #AARCH64_RET_MASK - add x1, x1, x0, lsl #3 - add x3, sp, #16+CALL_CONTEXT_SIZE - br x1 - - /* Note that each table entry is 2 insns, and thus 8 bytes. */ - ALIGN 8 -ffi_closure_SYSV_return_base - b ffi_closure_SYSV_epilog /* VOID */ - nop - ldr x0, [x3] /* INT64 */ - b ffi_closure_SYSV_epilog - ldp x0, x1, [x3] /* INT128 */ - b ffi_closure_SYSV_epilog - brk #1000 /* UNUSED */ - nop - brk #1000 /* UNUSED */ - nop - brk #1000 /* UNUSED */ - nop - brk #1000 /* UNUSED */ - nop - brk #1000 /* UNUSED */ - nop - ldr s3, [x3, #12] /* S4 */ - nop - ldr s2, [x3, #8] /* S3 */ - nop - ldp s0, s1, [x3] /* S2 */ - b ffi_closure_SYSV_epilog - ldr s0, [x3] /* S1 */ - b ffi_closure_SYSV_epilog - ldr d3, [x3, #24] /* D4 */ - nop - ldr d2, [x3, #16] /* D3 */ - nop - ldp d0, d1, [x3] /* D2 */ - b ffi_closure_SYSV_epilog - ldr d0, [x3] /* D1 */ - b ffi_closure_SYSV_epilog - ldr q3, [x3, #48] /* Q4 */ - nop - ldr q2, [x3, #32] /* Q3 */ - nop - ldp q0, q1, [x3] /* Q2 */ - b ffi_closure_SYSV_epilog - ldr q0, [x3] /* Q1 */ - b ffi_closure_SYSV_epilog - ldrb w0, [x3, #BE(7)] /* UINT8 */ - b ffi_closure_SYSV_epilog - brk #1000 /* reserved */ - nop - ldrh w0, [x3, #BE(6)] /* UINT16 */ - b ffi_closure_SYSV_epilog - brk #1000 /* reserved */ - nop - ldr w0, [x3, #BE(4)] /* UINT32 */ - b ffi_closure_SYSV_epilog - brk #1000 /* reserved */ - nop - ldrsb x0, [x3, #BE(7)] /* SINT8 */ - b ffi_closure_SYSV_epilog - brk #1000 /* reserved */ - nop - ldrsh x0, [x3, #BE(6)] /* SINT16 */ - b ffi_closure_SYSV_epilog - brk #1000 /* reserved */ - nop - ldrsw x0, [x3, #BE(4)] /* SINT32 */ - nop - /* reserved */ - -ffi_closure_SYSV_epilog - EPILOG_RESTORE_REG_PAIR x29, x30, #ffi_closure_SYSV_FS! - EPILOG_RETURN - NESTED_END ffi_closure_SYSV - - -#ifdef FFI_GO_CLOSURES - NESTED_ENTRY ffi_go_closure_SYSV_V - PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS! - - /* Save the argument passing vector registers. */ - stp q0, q1, [sp, #16 + 0] - stp q2, q3, [sp, #16 + 32] - stp q4, q5, [sp, #16 + 64] - stp q6, q7, [sp, #16 + 96] - b ffi_go_closure_SYSV_save_argument - NESTED_END ffi_go_closure_SYSV_V - - NESTED_ENTRY ffi_go_closure_SYSV - PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS! - -ffi_go_closure_SYSV_save_argument - /* Save the argument passing core registers. */ - stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0] - stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16] - stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32] - stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48] - - /* Load ffi_closure_inner arguments. */ - ldp PTR_REG(0), PTR_REG(1), [x18, #PTR_SIZE]/* load cif, fn */ - mov x2, x18 /* load user_data */ - b do_closure - NESTED_END ffi_go_closure_SYSV - -#endif /* FFI_GO_CLOSURES */ - - -/* void extend_hfa_type (void *dest, void *src, int h) */ - - LEAF_ENTRY extend_hfa_type - - adr x3, extend_hfa_type_jump_base - and w2, w2, #AARCH64_RET_MASK - sub x2, x2, #AARCH64_RET_S4 - add x3, x3, x2, lsl #4 - br x3 - - ALIGN 4 -extend_hfa_type_jump_base - ldp s16, s17, [x1] /* S4 */ - ldp s18, s19, [x1, #8] - b extend_hfa_type_store_4 - nop - - ldp s16, s17, [x1] /* S3 */ - ldr s18, [x1, #8] - b extend_hfa_type_store_3 - nop - - ldp s16, s17, [x1] /* S2 */ - b extend_hfa_type_store_2 - nop - nop - - ldr s16, [x1] /* S1 */ - b extend_hfa_type_store_1 - nop - nop - - ldp d16, d17, [x1] /* D4 */ - ldp d18, d19, [x1, #16] - b extend_hfa_type_store_4 - nop - - ldp d16, d17, [x1] /* D3 */ - ldr d18, [x1, #16] - b extend_hfa_type_store_3 - nop - - ldp d16, d17, [x1] /* D2 */ - b extend_hfa_type_store_2 - nop - nop - - ldr d16, [x1] /* D1 */ - b extend_hfa_type_store_1 - nop - nop - - ldp q16, q17, [x1] /* Q4 */ - ldp q18, q19, [x1, #16] - b extend_hfa_type_store_4 - nop - - ldp q16, q17, [x1] /* Q3 */ - ldr q18, [x1, #16] - b extend_hfa_type_store_3 - nop - - ldp q16, q17, [x1] /* Q2 */ - b extend_hfa_type_store_2 - nop - nop - - ldr q16, [x1] /* Q1 */ - b extend_hfa_type_store_1 - -extend_hfa_type_store_4 - str q19, [x0, #48] -extend_hfa_type_store_3 - str q18, [x0, #32] -extend_hfa_type_store_2 - str q17, [x0, #16] -extend_hfa_type_store_1 - str q16, [x0] - ret - - LEAF_END extend_hfa_type - - -/* void compress_hfa_type (void *dest, void *reg, int h) */ - - LEAF_ENTRY compress_hfa_type - - adr x3, compress_hfa_type_jump_base - and w2, w2, #AARCH64_RET_MASK - sub x2, x2, #AARCH64_RET_S4 - add x3, x3, x2, lsl #4 - br x3 - - ALIGN 4 -compress_hfa_type_jump_base - ldp q16, q17, [x1] /* S4 */ - ldp q18, q19, [x1, #32] - st4 { v16.s, v17.s, v18.s, v19.s }[0], [x0] - ret - - ldp q16, q17, [x1] /* S3 */ - ldr q18, [x1, #32] - st3 { v16.s, v17.s, v18.s }[0], [x0] - ret - - ldp q16, q17, [x1] /* S2 */ - st2 { v16.s, v17.s }[0], [x0] - ret - nop - - ldr q16, [x1] /* S1 */ - st1 { v16.s }[0], [x0] - ret - nop - - ldp q16, q17, [x1] /* D4 */ - ldp q18, q19, [x1, #32] - st4 { v16.d, v17.d, v18.d, v19.d }[0], [x0] - ret - - ldp q16, q17, [x1] /* D3 */ - ldr q18, [x1, #32] - st3 { v16.d, v17.d, v18.d }[0], [x0] - ret - - ldp q16, q17, [x1] /* D2 */ - st2 { v16.d, v17.d }[0], [x0] - ret - nop - - ldr q16, [x1] /* D1 */ - st1 { v16.d }[0], [x0] - ret - nop - - ldp q16, q17, [x1] /* Q4 */ - ldp q18, q19, [x1, #32] - b compress_hfa_type_store_q4 - nop - - ldp q16, q17, [x1] /* Q3 */ - ldr q18, [x1, #32] - b compress_hfa_type_store_q3 - nop - - ldp q16, q17, [x1] /* Q2 */ - stp q16, q17, [x0] - ret - nop - - ldr q16, [x1] /* Q1 */ - str q16, [x0] - ret - -compress_hfa_type_store_q4 - str q19, [x0, #48] -compress_hfa_type_store_q3 - str q18, [x0, #32] - stp q16, q17, [x0] - ret - - LEAF_END compress_hfa_type - - END
\ No newline at end of file +/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +``Software''), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. +THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> +#include <ffi_cfi.h> +#include "internal.h" + + OPT 2 /*disable listing */ +/* For some macros to add unwind information */ +#include "ksarm64.h" + OPT 1 /*re-enable listing */ + +#define BE(X) 0 +#define PTR_REG(n) x##n +#define PTR_SIZE 8 + + IMPORT ffi_closure_SYSV_inner + EXPORT ffi_call_SYSV + EXPORT ffi_closure_SYSV_V + EXPORT ffi_closure_SYSV + EXPORT extend_hfa_type + EXPORT compress_hfa_type +#ifdef FFI_GO_CLOSURES + EXPORT ffi_go_closure_SYSV_V + EXPORT ffi_go_closure_SYSV +#endif + + TEXTAREA, ALLIGN=8 + +/* ffi_call_SYSV + extern void ffi_call_SYSV (void *stack, void *frame, + void (*fn)(void), void *rvalue, + int flags, void *closure); + Therefore on entry we have: + x0 stack + x1 frame + x2 fn + x3 rvalue + x4 flags + x5 closure +*/ + + NESTED_ENTRY ffi_call_SYSV_fake + + /* For unwind information, Windows has to store fp and lr */ + PROLOG_SAVE_REG_PAIR x29, x30, #-32! + + ALTERNATE_ENTRY ffi_call_SYSV + /* Use a stack frame allocated by our caller. */ + stp x29, x30, [x1] + mov x29, x1 + mov sp, x0 + + mov x9, x2 /* save fn */ + mov x8, x3 /* install structure return */ +#ifdef FFI_GO_CLOSURES + /*mov x18, x5 install static chain */ +#endif + stp x3, x4, [x29, #16] /* save rvalue and flags */ + + /* Load the vector argument passing registers, if necessary. */ + tbz x4, #AARCH64_FLAG_ARG_V_BIT, ffi_call_SYSV_L1 + ldp q0, q1, [sp, #0] + ldp q2, q3, [sp, #32] + ldp q4, q5, [sp, #64] + ldp q6, q7, [sp, #96] + +ffi_call_SYSV_L1 + /* Load the core argument passing registers, including + the structure return pointer. */ + ldp x0, x1, [sp, #16*N_V_ARG_REG + 0] + ldp x2, x3, [sp, #16*N_V_ARG_REG + 16] + ldp x4, x5, [sp, #16*N_V_ARG_REG + 32] + ldp x6, x7, [sp, #16*N_V_ARG_REG + 48] + + /* Deallocate the context, leaving the stacked arguments. */ + add sp, sp, #CALL_CONTEXT_SIZE + + blr x9 /* call fn */ + + ldp x3, x4, [x29, #16] /* reload rvalue and flags */ + + /* Partially deconstruct the stack frame. */ + mov sp, x29 + ldp x29, x30, [x29] + + /* Save the return value as directed. */ + adr x5, ffi_call_SYSV_return + and w4, w4, #AARCH64_RET_MASK + add x5, x5, x4, lsl #3 + br x5 + + /* Note that each table entry is 2 insns, and thus 8 bytes. + For integer data, note that we're storing into ffi_arg + and therefore we want to extend to 64 bits; these types + have two consecutive entries allocated for them. */ + ALIGN 4 +ffi_call_SYSV_return + ret /* VOID */ + nop + str x0, [x3] /* INT64 */ + ret + stp x0, x1, [x3] /* INT128 */ + ret + brk #1000 /* UNUSED */ + ret + brk #1000 /* UNUSED */ + ret + brk #1000 /* UNUSED */ + ret + brk #1000 /* UNUSED */ + ret + brk #1000 /* UNUSED */ + ret + st4 { v0.s, v1.s, v2.s, v3.s }[0], [x3] /* S4 */ + ret + st3 { v0.s, v1.s, v2.s }[0], [x3] /* S3 */ + ret + stp s0, s1, [x3] /* S2 */ + ret + str s0, [x3] /* S1 */ + ret + st4 { v0.d, v1.d, v2.d, v3.d }[0], [x3] /* D4 */ + ret + st3 { v0.d, v1.d, v2.d }[0], [x3] /* D3 */ + ret + stp d0, d1, [x3] /* D2 */ + ret + str d0, [x3] /* D1 */ + ret + str q3, [x3, #48] /* Q4 */ + nop + str q2, [x3, #32] /* Q3 */ + nop + stp q0, q1, [x3] /* Q2 */ + ret + str q0, [x3] /* Q1 */ + ret + uxtb w0, w0 /* UINT8 */ + str x0, [x3] + ret /* reserved */ + nop + uxth w0, w0 /* UINT16 */ + str x0, [x3] + ret /* reserved */ + nop + mov w0, w0 /* UINT32 */ + str x0, [x3] + ret /* reserved */ + nop + sxtb x0, w0 /* SINT8 */ + str x0, [x3] + ret /* reserved */ + nop + sxth x0, w0 /* SINT16 */ + str x0, [x3] + ret /* reserved */ + nop + sxtw x0, w0 /* SINT32 */ + str x0, [x3] + ret /* reserved */ + nop + + + NESTED_END ffi_call_SYSV_fake + + +/* ffi_closure_SYSV + Closure invocation glue. This is the low level code invoked directly by + the closure trampoline to setup and call a closure. + On entry x17 points to a struct ffi_closure, x16 has been clobbered + all other registers are preserved. + We allocate a call context and save the argument passing registers, + then invoked the generic C ffi_closure_SYSV_inner() function to do all + the real work, on return we load the result passing registers back from + the call context. +*/ + +#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64) + + NESTED_ENTRY ffi_closure_SYSV_V + PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS! + + /* Save the argument passing vector registers. */ + stp q0, q1, [sp, #16 + 0] + stp q2, q3, [sp, #16 + 32] + stp q4, q5, [sp, #16 + 64] + stp q6, q7, [sp, #16 + 96] + + b ffi_closure_SYSV_save_argument + NESTED_END ffi_closure_SYSV_V + + NESTED_ENTRY ffi_closure_SYSV + PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS! + +ffi_closure_SYSV_save_argument + /* Save the argument passing core registers. */ + stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0] + stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16] + stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32] + stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48] + + /* Load ffi_closure_inner arguments. */ + ldp PTR_REG(0), PTR_REG(1), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET] /* load cif, fn */ + ldr PTR_REG(2), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET+PTR_SIZE*2] /* load user_data */ + +do_closure + add x3, sp, #16 /* load context */ + add x4, sp, #ffi_closure_SYSV_FS /* load stack */ + add x5, sp, #16+CALL_CONTEXT_SIZE /* load rvalue */ + mov x6, x8 /* load struct_rval */ + + bl ffi_closure_SYSV_inner + + /* Load the return value as directed. */ + adr x1, ffi_closure_SYSV_return_base + and w0, w0, #AARCH64_RET_MASK + add x1, x1, x0, lsl #3 + add x3, sp, #16+CALL_CONTEXT_SIZE + br x1 + + /* Note that each table entry is 2 insns, and thus 8 bytes. */ + ALIGN 8 +ffi_closure_SYSV_return_base + b ffi_closure_SYSV_epilog /* VOID */ + nop + ldr x0, [x3] /* INT64 */ + b ffi_closure_SYSV_epilog + ldp x0, x1, [x3] /* INT128 */ + b ffi_closure_SYSV_epilog + brk #1000 /* UNUSED */ + nop + brk #1000 /* UNUSED */ + nop + brk #1000 /* UNUSED */ + nop + brk #1000 /* UNUSED */ + nop + brk #1000 /* UNUSED */ + nop + ldr s3, [x3, #12] /* S4 */ + nop + ldr s2, [x3, #8] /* S3 */ + nop + ldp s0, s1, [x3] /* S2 */ + b ffi_closure_SYSV_epilog + ldr s0, [x3] /* S1 */ + b ffi_closure_SYSV_epilog + ldr d3, [x3, #24] /* D4 */ + nop + ldr d2, [x3, #16] /* D3 */ + nop + ldp d0, d1, [x3] /* D2 */ + b ffi_closure_SYSV_epilog + ldr d0, [x3] /* D1 */ + b ffi_closure_SYSV_epilog + ldr q3, [x3, #48] /* Q4 */ + nop + ldr q2, [x3, #32] /* Q3 */ + nop + ldp q0, q1, [x3] /* Q2 */ + b ffi_closure_SYSV_epilog + ldr q0, [x3] /* Q1 */ + b ffi_closure_SYSV_epilog + ldrb w0, [x3, #BE(7)] /* UINT8 */ + b ffi_closure_SYSV_epilog + brk #1000 /* reserved */ + nop + ldrh w0, [x3, #BE(6)] /* UINT16 */ + b ffi_closure_SYSV_epilog + brk #1000 /* reserved */ + nop + ldr w0, [x3, #BE(4)] /* UINT32 */ + b ffi_closure_SYSV_epilog + brk #1000 /* reserved */ + nop + ldrsb x0, [x3, #BE(7)] /* SINT8 */ + b ffi_closure_SYSV_epilog + brk #1000 /* reserved */ + nop + ldrsh x0, [x3, #BE(6)] /* SINT16 */ + b ffi_closure_SYSV_epilog + brk #1000 /* reserved */ + nop + ldrsw x0, [x3, #BE(4)] /* SINT32 */ + nop + /* reserved */ + +ffi_closure_SYSV_epilog + EPILOG_RESTORE_REG_PAIR x29, x30, #ffi_closure_SYSV_FS! + EPILOG_RETURN + NESTED_END ffi_closure_SYSV + + +#ifdef FFI_GO_CLOSURES + NESTED_ENTRY ffi_go_closure_SYSV_V + PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS! + + /* Save the argument passing vector registers. */ + stp q0, q1, [sp, #16 + 0] + stp q2, q3, [sp, #16 + 32] + stp q4, q5, [sp, #16 + 64] + stp q6, q7, [sp, #16 + 96] + b ffi_go_closure_SYSV_save_argument + NESTED_END ffi_go_closure_SYSV_V + + NESTED_ENTRY ffi_go_closure_SYSV + PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS! + +ffi_go_closure_SYSV_save_argument + /* Save the argument passing core registers. */ + stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0] + stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16] + stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32] + stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48] + + /* Load ffi_closure_inner arguments. */ + ldp PTR_REG(0), PTR_REG(1), [x18, #PTR_SIZE]/* load cif, fn */ + mov x2, x18 /* load user_data */ + b do_closure + NESTED_END ffi_go_closure_SYSV + +#endif /* FFI_GO_CLOSURES */ + + +/* void extend_hfa_type (void *dest, void *src, int h) */ + + LEAF_ENTRY extend_hfa_type + + adr x3, extend_hfa_type_jump_base + and w2, w2, #AARCH64_RET_MASK + sub x2, x2, #AARCH64_RET_S4 + add x3, x3, x2, lsl #4 + br x3 + + ALIGN 4 +extend_hfa_type_jump_base + ldp s16, s17, [x1] /* S4 */ + ldp s18, s19, [x1, #8] + b extend_hfa_type_store_4 + nop + + ldp s16, s17, [x1] /* S3 */ + ldr s18, [x1, #8] + b extend_hfa_type_store_3 + nop + + ldp s16, s17, [x1] /* S2 */ + b extend_hfa_type_store_2 + nop + nop + + ldr s16, [x1] /* S1 */ + b extend_hfa_type_store_1 + nop + nop + + ldp d16, d17, [x1] /* D4 */ + ldp d18, d19, [x1, #16] + b extend_hfa_type_store_4 + nop + + ldp d16, d17, [x1] /* D3 */ + ldr d18, [x1, #16] + b extend_hfa_type_store_3 + nop + + ldp d16, d17, [x1] /* D2 */ + b extend_hfa_type_store_2 + nop + nop + + ldr d16, [x1] /* D1 */ + b extend_hfa_type_store_1 + nop + nop + + ldp q16, q17, [x1] /* Q4 */ + ldp q18, q19, [x1, #16] + b extend_hfa_type_store_4 + nop + + ldp q16, q17, [x1] /* Q3 */ + ldr q18, [x1, #16] + b extend_hfa_type_store_3 + nop + + ldp q16, q17, [x1] /* Q2 */ + b extend_hfa_type_store_2 + nop + nop + + ldr q16, [x1] /* Q1 */ + b extend_hfa_type_store_1 + +extend_hfa_type_store_4 + str q19, [x0, #48] +extend_hfa_type_store_3 + str q18, [x0, #32] +extend_hfa_type_store_2 + str q17, [x0, #16] +extend_hfa_type_store_1 + str q16, [x0] + ret + + LEAF_END extend_hfa_type + + +/* void compress_hfa_type (void *dest, void *reg, int h) */ + + LEAF_ENTRY compress_hfa_type + + adr x3, compress_hfa_type_jump_base + and w2, w2, #AARCH64_RET_MASK + sub x2, x2, #AARCH64_RET_S4 + add x3, x3, x2, lsl #4 + br x3 + + ALIGN 4 +compress_hfa_type_jump_base + ldp q16, q17, [x1] /* S4 */ + ldp q18, q19, [x1, #32] + st4 { v16.s, v17.s, v18.s, v19.s }[0], [x0] + ret + + ldp q16, q17, [x1] /* S3 */ + ldr q18, [x1, #32] + st3 { v16.s, v17.s, v18.s }[0], [x0] + ret + + ldp q16, q17, [x1] /* S2 */ + st2 { v16.s, v17.s }[0], [x0] + ret + nop + + ldr q16, [x1] /* S1 */ + st1 { v16.s }[0], [x0] + ret + nop + + ldp q16, q17, [x1] /* D4 */ + ldp q18, q19, [x1, #32] + st4 { v16.d, v17.d, v18.d, v19.d }[0], [x0] + ret + + ldp q16, q17, [x1] /* D3 */ + ldr q18, [x1, #32] + st3 { v16.d, v17.d, v18.d }[0], [x0] + ret + + ldp q16, q17, [x1] /* D2 */ + st2 { v16.d, v17.d }[0], [x0] + ret + nop + + ldr q16, [x1] /* D1 */ + st1 { v16.d }[0], [x0] + ret + nop + + ldp q16, q17, [x1] /* Q4 */ + ldp q18, q19, [x1, #32] + b compress_hfa_type_store_q4 + nop + + ldp q16, q17, [x1] /* Q3 */ + ldr q18, [x1, #32] + b compress_hfa_type_store_q3 + nop + + ldp q16, q17, [x1] /* Q2 */ + stp q16, q17, [x0] + ret + nop + + ldr q16, [x1] /* Q1 */ + str q16, [x0] + ret + +compress_hfa_type_store_q4 + str q19, [x0, #48] +compress_hfa_type_store_q3 + str q18, [x0, #32] + stp q16, q17, [x0] + ret + + LEAF_END compress_hfa_type + + END
\ No newline at end of file |