aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/restricted/libffi/src/aarch64
diff options
context:
space:
mode:
authorMikhail Borisov <borisov.mikhail@gmail.com>2022-02-10 16:45:39 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:45:39 +0300
commita6a92afe03e02795227d2641b49819b687f088f8 (patch)
treef6984a1d27d5a7ec88a6fdd6e20cd5b7693b6ece /contrib/restricted/libffi/src/aarch64
parentc6dc8b8bd530985bc4cce0137e9a5de32f1087cb (diff)
downloadydb-a6a92afe03e02795227d2641b49819b687f088f8.tar.gz
Restoring authorship annotation for Mikhail Borisov <borisov.mikhail@gmail.com>. Commit 1 of 2.
Diffstat (limited to 'contrib/restricted/libffi/src/aarch64')
-rw-r--r--contrib/restricted/libffi/src/aarch64/ffi.c2018
-rw-r--r--contrib/restricted/libffi/src/aarch64/ffitarget.h184
-rw-r--r--contrib/restricted/libffi/src/aarch64/internal.h134
-rw-r--r--contrib/restricted/libffi/src/aarch64/sysv.S880
-rw-r--r--contrib/restricted/libffi/src/aarch64/win64_armasm.S1012
5 files changed, 2114 insertions, 2114 deletions
diff --git a/contrib/restricted/libffi/src/aarch64/ffi.c b/contrib/restricted/libffi/src/aarch64/ffi.c
index 1ebf43c192..84d44ab74a 100644
--- a/contrib/restricted/libffi/src/aarch64/ffi.c
+++ b/contrib/restricted/libffi/src/aarch64/ffi.c
@@ -1,1009 +1,1009 @@
-/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-``Software''), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#if defined(__aarch64__) || defined(__arm64__)|| defined (_M_ARM64)
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <fficonfig.h>
-#include <ffi.h>
-#include <ffi_common.h>
-#include "internal.h"
-#ifdef _M_ARM64
-#include <windows.h> /* FlushInstructionCache */
-#endif
-
-/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
- all further uses in this file will refer to the 128-bit type. */
-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
-# if FFI_TYPE_LONGDOUBLE != 4
-# error FFI_TYPE_LONGDOUBLE out of date
-# endif
-#else
-# undef FFI_TYPE_LONGDOUBLE
-# define FFI_TYPE_LONGDOUBLE 4
-#endif
-
-union _d
-{
- UINT64 d;
- UINT32 s[2];
-};
-
-struct _v
-{
- union _d d[2] __attribute__((aligned(16)));
-};
-
-struct call_context
-{
- struct _v v[N_V_ARG_REG];
- UINT64 x[N_X_ARG_REG];
-};
-
-#if FFI_EXEC_TRAMPOLINE_TABLE
-
-#ifdef __MACH__
-#include <mach/vm_param.h>
-#endif
-
-#else
-
-#if defined (__clang__) && defined (__APPLE__)
-extern void sys_icache_invalidate (void *start, size_t len);
-#endif
-
-static inline void
-ffi_clear_cache (void *start, void *end)
-{
-#if defined (__clang__) && defined (__APPLE__)
- sys_icache_invalidate (start, (char *)end - (char *)start);
-#elif defined (__GNUC__)
- __builtin___clear_cache (start, end);
-#elif defined (_M_ARM64)
- FlushInstructionCache(GetCurrentProcess(), start, (char*)end - (char*)start);
-#else
-#error "Missing builtin to flush instruction cache"
-#endif
-}
-
-#endif
-
-/* A subroutine of is_vfp_type. Given a structure type, return the type code
- of the first non-structure element. Recurse for structure elements.
- Return -1 if the structure is in fact empty, i.e. no nested elements. */
-
-static int
-is_hfa0 (const ffi_type *ty)
-{
- ffi_type **elements = ty->elements;
- int i, ret = -1;
-
- if (elements != NULL)
- for (i = 0; elements[i]; ++i)
- {
- ret = elements[i]->type;
- if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX)
- {
- ret = is_hfa0 (elements[i]);
- if (ret < 0)
- continue;
- }
- break;
- }
-
- return ret;
-}
-
-/* A subroutine of is_vfp_type. Given a structure type, return true if all
- of the non-structure elements are the same as CANDIDATE. */
-
-static int
-is_hfa1 (const ffi_type *ty, int candidate)
-{
- ffi_type **elements = ty->elements;
- int i;
-
- if (elements != NULL)
- for (i = 0; elements[i]; ++i)
- {
- int t = elements[i]->type;
- if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
- {
- if (!is_hfa1 (elements[i], candidate))
- return 0;
- }
- else if (t != candidate)
- return 0;
- }
-
- return 1;
-}
-
-/* Determine if TY may be allocated to the FP registers. This is both an
- fp scalar type as well as an homogenous floating point aggregate (HFA).
- That is, a structure consisting of 1 to 4 members of all the same type,
- where that type is an fp scalar.
-
- Returns non-zero iff TY is an HFA. The result is the AARCH64_RET_*
- constant for the type. */
-
-static int
-is_vfp_type (const ffi_type *ty)
-{
- ffi_type **elements;
- int candidate, i;
- size_t size, ele_count;
-
- /* Quickest tests first. */
- candidate = ty->type;
- switch (candidate)
- {
- default:
- return 0;
- case FFI_TYPE_FLOAT:
- case FFI_TYPE_DOUBLE:
- case FFI_TYPE_LONGDOUBLE:
- ele_count = 1;
- goto done;
- case FFI_TYPE_COMPLEX:
- candidate = ty->elements[0]->type;
- switch (candidate)
- {
- case FFI_TYPE_FLOAT:
- case FFI_TYPE_DOUBLE:
- case FFI_TYPE_LONGDOUBLE:
- ele_count = 2;
- goto done;
- }
- return 0;
- case FFI_TYPE_STRUCT:
- break;
- }
-
- /* No HFA types are smaller than 4 bytes, or larger than 64 bytes. */
- size = ty->size;
- if (size < 4 || size > 64)
- return 0;
-
- /* Find the type of the first non-structure member. */
- elements = ty->elements;
- candidate = elements[0]->type;
- if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX)
- {
- for (i = 0; ; ++i)
- {
- candidate = is_hfa0 (elements[i]);
- if (candidate >= 0)
- break;
- }
- }
-
- /* If the first member is not a floating point type, it's not an HFA.
- Also quickly re-check the size of the structure. */
- switch (candidate)
- {
- case FFI_TYPE_FLOAT:
- ele_count = size / sizeof(float);
- if (size != ele_count * sizeof(float))
- return 0;
- break;
- case FFI_TYPE_DOUBLE:
- ele_count = size / sizeof(double);
- if (size != ele_count * sizeof(double))
- return 0;
- break;
- case FFI_TYPE_LONGDOUBLE:
- ele_count = size / sizeof(long double);
- if (size != ele_count * sizeof(long double))
- return 0;
- break;
- default:
- return 0;
- }
- if (ele_count > 4)
- return 0;
-
- /* Finally, make sure that all scalar elements are the same type. */
- for (i = 0; elements[i]; ++i)
- {
- int t = elements[i]->type;
- if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
- {
- if (!is_hfa1 (elements[i], candidate))
- return 0;
- }
- else if (t != candidate)
- return 0;
- }
-
- /* All tests succeeded. Encode the result. */
- done:
- return candidate * 4 + (4 - (int)ele_count);
-}
-
-/* Representation of the procedure call argument marshalling
- state.
-
- The terse state variable names match the names used in the AARCH64
- PCS. */
-
-struct arg_state
-{
- unsigned ngrn; /* Next general-purpose register number. */
- unsigned nsrn; /* Next vector register number. */
- size_t nsaa; /* Next stack offset. */
-
-#if defined (__APPLE__)
- unsigned allocating_variadic;
-#endif
-};
-
-/* Initialize a procedure call argument marshalling state. */
-static void
-arg_init (struct arg_state *state)
-{
- state->ngrn = 0;
- state->nsrn = 0;
- state->nsaa = 0;
-#if defined (__APPLE__)
- state->allocating_variadic = 0;
-#endif
-}
-
-/* Allocate an aligned slot on the stack and return a pointer to it. */
-static void *
-allocate_to_stack (struct arg_state *state, void *stack,
- size_t alignment, size_t size)
-{
- size_t nsaa = state->nsaa;
-
- /* Round up the NSAA to the larger of 8 or the natural
- alignment of the argument's type. */
-#if defined (__APPLE__)
- if (state->allocating_variadic && alignment < 8)
- alignment = 8;
-#else
- if (alignment < 8)
- alignment = 8;
-#endif
-
- nsaa = FFI_ALIGN (nsaa, alignment);
- state->nsaa = nsaa + size;
-
- return (char *)stack + nsaa;
-}
-
-static ffi_arg
-extend_integer_type (void *source, int type)
-{
- switch (type)
- {
- case FFI_TYPE_UINT8:
- return *(UINT8 *) source;
- case FFI_TYPE_SINT8:
- return *(SINT8 *) source;
- case FFI_TYPE_UINT16:
- return *(UINT16 *) source;
- case FFI_TYPE_SINT16:
- return *(SINT16 *) source;
- case FFI_TYPE_UINT32:
- return *(UINT32 *) source;
- case FFI_TYPE_INT:
- case FFI_TYPE_SINT32:
- return *(SINT32 *) source;
- case FFI_TYPE_UINT64:
- case FFI_TYPE_SINT64:
- return *(UINT64 *) source;
- break;
- case FFI_TYPE_POINTER:
- return *(uintptr_t *) source;
- default:
- abort();
- }
-}
-
-#if defined(_MSC_VER)
-void extend_hfa_type (void *dest, void *src, int h);
-#else
-static void
-extend_hfa_type (void *dest, void *src, int h)
-{
- ssize_t f = h - AARCH64_RET_S4;
- void *x0;
-
- asm volatile (
- "adr %0, 0f\n"
-" add %0, %0, %1\n"
-" br %0\n"
-"0: ldp s16, s17, [%3]\n" /* S4 */
-" ldp s18, s19, [%3, #8]\n"
-" b 4f\n"
-" ldp s16, s17, [%3]\n" /* S3 */
-" ldr s18, [%3, #8]\n"
-" b 3f\n"
-" ldp s16, s17, [%3]\n" /* S2 */
-" b 2f\n"
-" nop\n"
-" ldr s16, [%3]\n" /* S1 */
-" b 1f\n"
-" nop\n"
-" ldp d16, d17, [%3]\n" /* D4 */
-" ldp d18, d19, [%3, #16]\n"
-" b 4f\n"
-" ldp d16, d17, [%3]\n" /* D3 */
-" ldr d18, [%3, #16]\n"
-" b 3f\n"
-" ldp d16, d17, [%3]\n" /* D2 */
-" b 2f\n"
-" nop\n"
-" ldr d16, [%3]\n" /* D1 */
-" b 1f\n"
-" nop\n"
-" ldp q16, q17, [%3]\n" /* Q4 */
-" ldp q18, q19, [%3, #32]\n"
-" b 4f\n"
-" ldp q16, q17, [%3]\n" /* Q3 */
-" ldr q18, [%3, #32]\n"
-" b 3f\n"
-" ldp q16, q17, [%3]\n" /* Q2 */
-" b 2f\n"
-" nop\n"
-" ldr q16, [%3]\n" /* Q1 */
-" b 1f\n"
-"4: str q19, [%2, #48]\n"
-"3: str q18, [%2, #32]\n"
-"2: str q17, [%2, #16]\n"
-"1: str q16, [%2]"
- : "=&r"(x0)
- : "r"(f * 12), "r"(dest), "r"(src)
- : "memory", "v16", "v17", "v18", "v19");
-}
-#endif
-
-#if defined(_MSC_VER)
-void* compress_hfa_type (void *dest, void *src, int h);
-#else
-static void *
-compress_hfa_type (void *dest, void *reg, int h)
-{
- switch (h)
- {
- case AARCH64_RET_S1:
- if (dest == reg)
- {
-#ifdef __AARCH64EB__
- dest += 12;
-#endif
- }
- else
- *(float *)dest = *(float *)reg;
- break;
- case AARCH64_RET_S2:
- asm ("ldp q16, q17, [%1]\n\t"
- "st2 { v16.s, v17.s }[0], [%0]"
- : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
- break;
- case AARCH64_RET_S3:
- asm ("ldp q16, q17, [%1]\n\t"
- "ldr q18, [%1, #32]\n\t"
- "st3 { v16.s, v17.s, v18.s }[0], [%0]"
- : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
- break;
- case AARCH64_RET_S4:
- asm ("ldp q16, q17, [%1]\n\t"
- "ldp q18, q19, [%1, #32]\n\t"
- "st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]"
- : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
- break;
-
- case AARCH64_RET_D1:
- if (dest == reg)
- {
-#ifdef __AARCH64EB__
- dest += 8;
-#endif
- }
- else
- *(double *)dest = *(double *)reg;
- break;
- case AARCH64_RET_D2:
- asm ("ldp q16, q17, [%1]\n\t"
- "st2 { v16.d, v17.d }[0], [%0]"
- : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
- break;
- case AARCH64_RET_D3:
- asm ("ldp q16, q17, [%1]\n\t"
- "ldr q18, [%1, #32]\n\t"
- "st3 { v16.d, v17.d, v18.d }[0], [%0]"
- : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
- break;
- case AARCH64_RET_D4:
- asm ("ldp q16, q17, [%1]\n\t"
- "ldp q18, q19, [%1, #32]\n\t"
- "st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]"
- : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
- break;
-
- default:
- if (dest != reg)
- return memcpy (dest, reg, 16 * (4 - (h & 3)));
- break;
- }
- return dest;
-}
-#endif
-
-/* Either allocate an appropriate register for the argument type, or if
- none are available, allocate a stack slot and return a pointer
- to the allocated space. */
-
-static void *
-allocate_int_to_reg_or_stack (struct call_context *context,
- struct arg_state *state,
- void *stack, size_t size)
-{
- if (state->ngrn < N_X_ARG_REG)
- return &context->x[state->ngrn++];
-
- state->ngrn = N_X_ARG_REG;
- return allocate_to_stack (state, stack, size, size);
-}
-
-ffi_status FFI_HIDDEN
-ffi_prep_cif_machdep (ffi_cif *cif)
-{
- ffi_type *rtype = cif->rtype;
- size_t bytes = cif->bytes;
- int flags, i, n;
-
- switch (rtype->type)
- {
- case FFI_TYPE_VOID:
- flags = AARCH64_RET_VOID;
- break;
- case FFI_TYPE_UINT8:
- flags = AARCH64_RET_UINT8;
- break;
- case FFI_TYPE_UINT16:
- flags = AARCH64_RET_UINT16;
- break;
- case FFI_TYPE_UINT32:
- flags = AARCH64_RET_UINT32;
- break;
- case FFI_TYPE_SINT8:
- flags = AARCH64_RET_SINT8;
- break;
- case FFI_TYPE_SINT16:
- flags = AARCH64_RET_SINT16;
- break;
- case FFI_TYPE_INT:
- case FFI_TYPE_SINT32:
- flags = AARCH64_RET_SINT32;
- break;
- case FFI_TYPE_SINT64:
- case FFI_TYPE_UINT64:
- flags = AARCH64_RET_INT64;
- break;
- case FFI_TYPE_POINTER:
- flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64);
- break;
-
- case FFI_TYPE_FLOAT:
- case FFI_TYPE_DOUBLE:
- case FFI_TYPE_LONGDOUBLE:
- case FFI_TYPE_STRUCT:
- case FFI_TYPE_COMPLEX:
- flags = is_vfp_type (rtype);
- if (flags == 0)
- {
- size_t s = rtype->size;
- if (s > 16)
- {
- flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM;
- bytes += 8;
- }
- else if (s == 16)
- flags = AARCH64_RET_INT128;
- else if (s == 8)
- flags = AARCH64_RET_INT64;
- else
- flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY;
- }
- break;
-
- default:
- abort();
- }
-
- for (i = 0, n = cif->nargs; i < n; i++)
- if (is_vfp_type (cif->arg_types[i]))
- {
- flags |= AARCH64_FLAG_ARG_V;
- break;
- }
-
- /* Round the stack up to a multiple of the stack alignment requirement. */
- cif->bytes = (unsigned) FFI_ALIGN(bytes, 16);
- cif->flags = flags;
-#if defined (__APPLE__)
- cif->aarch64_nfixedargs = 0;
-#endif
-
- return FFI_OK;
-}
-
-#if defined (__APPLE__)
-/* Perform Apple-specific cif processing for variadic calls */
-ffi_status FFI_HIDDEN
-ffi_prep_cif_machdep_var(ffi_cif *cif, unsigned int nfixedargs,
- unsigned int ntotalargs)
-{
- ffi_status status = ffi_prep_cif_machdep (cif);
- cif->aarch64_nfixedargs = nfixedargs;
- return status;
-}
-#endif /* __APPLE__ */
-
-extern void ffi_call_SYSV (struct call_context *context, void *frame,
- void (*fn)(void), void *rvalue, int flags,
- void *closure) FFI_HIDDEN;
-
-/* Call a function with the provided arguments and capture the return
- value. */
-static void
-ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
- void **avalue, void *closure)
-{
- struct call_context *context;
- void *stack, *frame, *rvalue;
- struct arg_state state;
- size_t stack_bytes, rtype_size, rsize;
- int i, nargs, flags;
- ffi_type *rtype;
-
- flags = cif->flags;
- rtype = cif->rtype;
- rtype_size = rtype->size;
- stack_bytes = cif->bytes;
-
- /* If the target function returns a structure via hidden pointer,
- then we cannot allow a null rvalue. Otherwise, mash a null
- rvalue to void return type. */
- rsize = 0;
- if (flags & AARCH64_RET_IN_MEM)
- {
- if (orig_rvalue == NULL)
- rsize = rtype_size;
- }
- else if (orig_rvalue == NULL)
- flags &= AARCH64_FLAG_ARG_V;
- else if (flags & AARCH64_RET_NEED_COPY)
- rsize = 16;
-
- /* Allocate consectutive stack for everything we'll need. */
- context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize);
- stack = context + 1;
- frame = (void*)((uintptr_t)stack + (uintptr_t)stack_bytes);
- rvalue = (rsize ? (void*)((uintptr_t)frame + 32) : orig_rvalue);
-
- arg_init (&state);
- for (i = 0, nargs = cif->nargs; i < nargs; i++)
- {
- ffi_type *ty = cif->arg_types[i];
- size_t s = ty->size;
- void *a = avalue[i];
- int h, t;
-
- t = ty->type;
- switch (t)
- {
- case FFI_TYPE_VOID:
- FFI_ASSERT (0);
- break;
-
- /* If the argument is a basic type the argument is allocated to an
- appropriate register, or if none are available, to the stack. */
- case FFI_TYPE_INT:
- case FFI_TYPE_UINT8:
- case FFI_TYPE_SINT8:
- case FFI_TYPE_UINT16:
- case FFI_TYPE_SINT16:
- case FFI_TYPE_UINT32:
- case FFI_TYPE_SINT32:
- case FFI_TYPE_UINT64:
- case FFI_TYPE_SINT64:
- case FFI_TYPE_POINTER:
- do_pointer:
- {
- ffi_arg ext = extend_integer_type (a, t);
- if (state.ngrn < N_X_ARG_REG)
- context->x[state.ngrn++] = ext;
- else
- {
- void *d = allocate_to_stack (&state, stack, ty->alignment, s);
- state.ngrn = N_X_ARG_REG;
- /* Note that the default abi extends each argument
- to a full 64-bit slot, while the iOS abi allocates
- only enough space. */
-#ifdef __APPLE__
- memcpy(d, a, s);
-#else
- *(ffi_arg *)d = ext;
-#endif
- }
- }
- break;
-
- case FFI_TYPE_FLOAT:
- case FFI_TYPE_DOUBLE:
- case FFI_TYPE_LONGDOUBLE:
- case FFI_TYPE_STRUCT:
- case FFI_TYPE_COMPLEX:
- {
- void *dest;
-
- h = is_vfp_type (ty);
- if (h)
- {
- int elems = 4 - (h & 3);
-#ifdef _M_ARM64 /* for handling armasm calling convention */
- if (cif->is_variadic)
- {
- if (state.ngrn + elems <= N_X_ARG_REG)
- {
- dest = &context->x[state.ngrn];
- state.ngrn += elems;
- extend_hfa_type(dest, a, h);
- break;
- }
- state.nsrn = N_X_ARG_REG;
- dest = allocate_to_stack(&state, stack, ty->alignment, s);
- }
- else
- {
-#endif /* for handling armasm calling convention */
- if (state.nsrn + elems <= N_V_ARG_REG)
- {
- dest = &context->v[state.nsrn];
- state.nsrn += elems;
- extend_hfa_type (dest, a, h);
- break;
- }
- state.nsrn = N_V_ARG_REG;
- dest = allocate_to_stack (&state, stack, ty->alignment, s);
-#ifdef _M_ARM64 /* for handling armasm calling convention */
- }
-#endif /* for handling armasm calling convention */
- }
- else if (s > 16)
- {
- /* If the argument is a composite type that is larger than 16
- bytes, then the argument has been copied to memory, and
- the argument is replaced by a pointer to the copy. */
- a = &avalue[i];
- t = FFI_TYPE_POINTER;
- s = sizeof (void *);
- goto do_pointer;
- }
- else
- {
- size_t n = (s + 7) / 8;
- if (state.ngrn + n <= N_X_ARG_REG)
- {
- /* If the argument is a composite type and the size in
- double-words is not more than the number of available
- X registers, then the argument is copied into
- consecutive X registers. */
- dest = &context->x[state.ngrn];
- state.ngrn += (unsigned int)n;
- }
- else
- {
- /* Otherwise, there are insufficient X registers. Further
- X register allocations are prevented, the NSAA is
- adjusted and the argument is copied to memory at the
- adjusted NSAA. */
- state.ngrn = N_X_ARG_REG;
- dest = allocate_to_stack (&state, stack, ty->alignment, s);
- }
- }
- memcpy (dest, a, s);
- }
- break;
-
- default:
- abort();
- }
-
-#if defined (__APPLE__)
- if (i + 1 == cif->aarch64_nfixedargs)
- {
- state.ngrn = N_X_ARG_REG;
- state.nsrn = N_V_ARG_REG;
- state.allocating_variadic = 1;
- }
-#endif
- }
-
- ffi_call_SYSV (context, frame, fn, rvalue, flags, closure);
-
- if (flags & AARCH64_RET_NEED_COPY)
- memcpy (orig_rvalue, rvalue, rtype_size);
-}
-
-void
-ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
-{
- ffi_call_int (cif, fn, rvalue, avalue, NULL);
-}
-
-#ifdef FFI_GO_CLOSURES
-void
-ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
- void **avalue, void *closure)
-{
- ffi_call_int (cif, fn, rvalue, avalue, closure);
-}
-#endif /* FFI_GO_CLOSURES */
-
-/* Build a trampoline. */
-
-extern void ffi_closure_SYSV (void) FFI_HIDDEN;
-extern void ffi_closure_SYSV_V (void) FFI_HIDDEN;
-
-ffi_status
-ffi_prep_closure_loc (ffi_closure *closure,
- ffi_cif* cif,
- void (*fun)(ffi_cif*,void*,void**,void*),
- void *user_data,
- void *codeloc)
-{
- if (cif->abi != FFI_SYSV)
- return FFI_BAD_ABI;
-
- void (*start)(void);
-
- if (cif->flags & AARCH64_FLAG_ARG_V)
- start = ffi_closure_SYSV_V;
- else
- start = ffi_closure_SYSV;
-
-#if FFI_EXEC_TRAMPOLINE_TABLE
-#ifdef __MACH__
- void **config = (void **)((uint8_t *)codeloc - PAGE_MAX_SIZE);
- config[0] = closure;
- config[1] = start;
-#endif
-#else
- static const unsigned char trampoline[16] = {
- 0x90, 0x00, 0x00, 0x58, /* ldr x16, tramp+16 */
- 0xf1, 0xff, 0xff, 0x10, /* adr x17, tramp+0 */
- 0x00, 0x02, 0x1f, 0xd6 /* br x16 */
- };
- char *tramp = closure->tramp;
-
- memcpy (tramp, trampoline, sizeof(trampoline));
-
- *(UINT64 *)(tramp + 16) = (uintptr_t)start;
-
- ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE);
-
- /* Also flush the cache for code mapping. */
-#ifdef _M_ARM64
- // Not using dlmalloc.c for Windows ARM64 builds
- // so calling ffi_data_to_code_pointer() isn't necessary
- unsigned char *tramp_code = tramp;
- #else
- unsigned char *tramp_code = ffi_data_to_code_pointer (tramp);
- #endif
- ffi_clear_cache (tramp_code, tramp_code + FFI_TRAMPOLINE_SIZE);
-#endif
-
- closure->cif = cif;
- closure->fun = fun;
- closure->user_data = user_data;
-
- return FFI_OK;
-}
-
-#ifdef FFI_GO_CLOSURES
-extern void ffi_go_closure_SYSV (void) FFI_HIDDEN;
-extern void ffi_go_closure_SYSV_V (void) FFI_HIDDEN;
-
-ffi_status
-ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif* cif,
- void (*fun)(ffi_cif*,void*,void**,void*))
-{
- void (*start)(void);
-
- if (cif->abi != FFI_SYSV)
- return FFI_BAD_ABI;
-
- if (cif->flags & AARCH64_FLAG_ARG_V)
- start = ffi_go_closure_SYSV_V;
- else
- start = ffi_go_closure_SYSV;
-
- closure->tramp = start;
- closure->cif = cif;
- closure->fun = fun;
-
- return FFI_OK;
-}
-#endif /* FFI_GO_CLOSURES */
-
-/* Primary handler to setup and invoke a function within a closure.
-
- A closure when invoked enters via the assembler wrapper
- ffi_closure_SYSV(). The wrapper allocates a call context on the
- stack, saves the interesting registers (from the perspective of
- the calling convention) into the context then passes control to
- ffi_closure_SYSV_inner() passing the saved context and a pointer to
- the stack at the point ffi_closure_SYSV() was invoked.
-
- On the return path the assembler wrapper will reload call context
- registers.
-
- ffi_closure_SYSV_inner() marshalls the call context into ffi value
- descriptors, invokes the wrapped function, then marshalls the return
- value back into the call context. */
-
-int FFI_HIDDEN
-ffi_closure_SYSV_inner (ffi_cif *cif,
- void (*fun)(ffi_cif*,void*,void**,void*),
- void *user_data,
- struct call_context *context,
- void *stack, void *rvalue, void *struct_rvalue)
-{
- void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
- int i, h, nargs, flags;
- struct arg_state state;
-
- arg_init (&state);
-
- for (i = 0, nargs = cif->nargs; i < nargs; i++)
- {
- ffi_type *ty = cif->arg_types[i];
- int t = ty->type;
- size_t n, s = ty->size;
-
- switch (t)
- {
- case FFI_TYPE_VOID:
- FFI_ASSERT (0);
- break;
-
- case FFI_TYPE_INT:
- case FFI_TYPE_UINT8:
- case FFI_TYPE_SINT8:
- case FFI_TYPE_UINT16:
- case FFI_TYPE_SINT16:
- case FFI_TYPE_UINT32:
- case FFI_TYPE_SINT32:
- case FFI_TYPE_UINT64:
- case FFI_TYPE_SINT64:
- case FFI_TYPE_POINTER:
- avalue[i] = allocate_int_to_reg_or_stack (context, &state, stack, s);
- break;
-
- case FFI_TYPE_FLOAT:
- case FFI_TYPE_DOUBLE:
- case FFI_TYPE_LONGDOUBLE:
- case FFI_TYPE_STRUCT:
- case FFI_TYPE_COMPLEX:
- h = is_vfp_type (ty);
- if (h)
- {
- n = 4 - (h & 3);
-#ifdef _M_ARM64 /* for handling armasm calling convention */
- if (cif->is_variadic)
- {
- if (state.ngrn + n <= N_X_ARG_REG)
- {
- void *reg = &context->x[state.ngrn];
- state.ngrn += (unsigned int)n;
-
- /* Eeek! We need a pointer to the structure, however the
- homogeneous float elements are being passed in individual
- registers, therefore for float and double the structure
- is not represented as a contiguous sequence of bytes in
- our saved register context. We don't need the original
- contents of the register storage, so we reformat the
- structure into the same memory. */
- avalue[i] = compress_hfa_type(reg, reg, h);
- }
- else
- {
- state.ngrn = N_X_ARG_REG;
- state.nsrn = N_V_ARG_REG;
- avalue[i] = allocate_to_stack(&state, stack,
- ty->alignment, s);
- }
- }
- else
- {
-#endif /* for handling armasm calling convention */
- if (state.nsrn + n <= N_V_ARG_REG)
- {
- void *reg = &context->v[state.nsrn];
- state.nsrn += (unsigned int)n;
- avalue[i] = compress_hfa_type(reg, reg, h);
- }
- else
- {
- state.nsrn = N_V_ARG_REG;
- avalue[i] = allocate_to_stack(&state, stack,
- ty->alignment, s);
- }
-#ifdef _M_ARM64 /* for handling armasm calling convention */
- }
-#endif /* for handling armasm calling convention */
- }
- else if (s > 16)
- {
- /* Replace Composite type of size greater than 16 with a
- pointer. */
- avalue[i] = *(void **)
- allocate_int_to_reg_or_stack (context, &state, stack,
- sizeof (void *));
- }
- else
- {
- n = (s + 7) / 8;
- if (state.ngrn + n <= N_X_ARG_REG)
- {
- avalue[i] = &context->x[state.ngrn];
- state.ngrn += (unsigned int)n;
- }
- else
- {
- state.ngrn = N_X_ARG_REG;
- avalue[i] = allocate_to_stack(&state, stack,
- ty->alignment, s);
- }
- }
- break;
-
- default:
- abort();
- }
-
-#if defined (__APPLE__)
- if (i + 1 == cif->aarch64_nfixedargs)
- {
- state.ngrn = N_X_ARG_REG;
- state.nsrn = N_V_ARG_REG;
- state.allocating_variadic = 1;
- }
-#endif
- }
-
- flags = cif->flags;
- if (flags & AARCH64_RET_IN_MEM)
- rvalue = struct_rvalue;
-
- fun (cif, rvalue, avalue, user_data);
-
- return flags;
-}
-
-#endif /* (__aarch64__) || defined(__arm64__)|| defined (_M_ARM64)*/
+/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#if defined(__aarch64__) || defined(__arm64__)|| defined (_M_ARM64)
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_common.h>
+#include "internal.h"
+#ifdef _M_ARM64
+#include <windows.h> /* FlushInstructionCache */
+#endif
+
+/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
+ all further uses in this file will refer to the 128-bit type. */
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+# if FFI_TYPE_LONGDOUBLE != 4
+# error FFI_TYPE_LONGDOUBLE out of date
+# endif
+#else
+# undef FFI_TYPE_LONGDOUBLE
+# define FFI_TYPE_LONGDOUBLE 4
+#endif
+
+union _d
+{
+ UINT64 d;
+ UINT32 s[2];
+};
+
+struct _v
+{
+ union _d d[2] __attribute__((aligned(16)));
+};
+
+struct call_context
+{
+ struct _v v[N_V_ARG_REG];
+ UINT64 x[N_X_ARG_REG];
+};
+
+#if FFI_EXEC_TRAMPOLINE_TABLE
+
+#ifdef __MACH__
+#include <mach/vm_param.h>
+#endif
+
+#else
+
+#if defined (__clang__) && defined (__APPLE__)
+extern void sys_icache_invalidate (void *start, size_t len);
+#endif
+
+static inline void
+ffi_clear_cache (void *start, void *end)
+{
+#if defined (__clang__) && defined (__APPLE__)
+ sys_icache_invalidate (start, (char *)end - (char *)start);
+#elif defined (__GNUC__)
+ __builtin___clear_cache (start, end);
+#elif defined (_M_ARM64)
+ FlushInstructionCache(GetCurrentProcess(), start, (char*)end - (char*)start);
+#else
+#error "Missing builtin to flush instruction cache"
+#endif
+}
+
+#endif
+
+/* A subroutine of is_vfp_type. Given a structure type, return the type code
+ of the first non-structure element. Recurse for structure elements.
+ Return -1 if the structure is in fact empty, i.e. no nested elements. */
+
+static int
+is_hfa0 (const ffi_type *ty)
+{
+ ffi_type **elements = ty->elements;
+ int i, ret = -1;
+
+ if (elements != NULL)
+ for (i = 0; elements[i]; ++i)
+ {
+ ret = elements[i]->type;
+ if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX)
+ {
+ ret = is_hfa0 (elements[i]);
+ if (ret < 0)
+ continue;
+ }
+ break;
+ }
+
+ return ret;
+}
+
+/* A subroutine of is_vfp_type. Given a structure type, return true if all
+ of the non-structure elements are the same as CANDIDATE. */
+
+static int
+is_hfa1 (const ffi_type *ty, int candidate)
+{
+ ffi_type **elements = ty->elements;
+ int i;
+
+ if (elements != NULL)
+ for (i = 0; elements[i]; ++i)
+ {
+ int t = elements[i]->type;
+ if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
+ {
+ if (!is_hfa1 (elements[i], candidate))
+ return 0;
+ }
+ else if (t != candidate)
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Determine if TY may be allocated to the FP registers. This is both an
+ fp scalar type as well as an homogenous floating point aggregate (HFA).
+ That is, a structure consisting of 1 to 4 members of all the same type,
+ where that type is an fp scalar.
+
+ Returns non-zero iff TY is an HFA. The result is the AARCH64_RET_*
+ constant for the type. */
+
+static int
+is_vfp_type (const ffi_type *ty)
+{
+ ffi_type **elements;
+ int candidate, i;
+ size_t size, ele_count;
+
+ /* Quickest tests first. */
+ candidate = ty->type;
+ switch (candidate)
+ {
+ default:
+ return 0;
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+ case FFI_TYPE_LONGDOUBLE:
+ ele_count = 1;
+ goto done;
+ case FFI_TYPE_COMPLEX:
+ candidate = ty->elements[0]->type;
+ switch (candidate)
+ {
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+ case FFI_TYPE_LONGDOUBLE:
+ ele_count = 2;
+ goto done;
+ }
+ return 0;
+ case FFI_TYPE_STRUCT:
+ break;
+ }
+
+ /* No HFA types are smaller than 4 bytes, or larger than 64 bytes. */
+ size = ty->size;
+ if (size < 4 || size > 64)
+ return 0;
+
+ /* Find the type of the first non-structure member. */
+ elements = ty->elements;
+ candidate = elements[0]->type;
+ if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX)
+ {
+ for (i = 0; ; ++i)
+ {
+ candidate = is_hfa0 (elements[i]);
+ if (candidate >= 0)
+ break;
+ }
+ }
+
+ /* If the first member is not a floating point type, it's not an HFA.
+ Also quickly re-check the size of the structure. */
+ switch (candidate)
+ {
+ case FFI_TYPE_FLOAT:
+ ele_count = size / sizeof(float);
+ if (size != ele_count * sizeof(float))
+ return 0;
+ break;
+ case FFI_TYPE_DOUBLE:
+ ele_count = size / sizeof(double);
+ if (size != ele_count * sizeof(double))
+ return 0;
+ break;
+ case FFI_TYPE_LONGDOUBLE:
+ ele_count = size / sizeof(long double);
+ if (size != ele_count * sizeof(long double))
+ return 0;
+ break;
+ default:
+ return 0;
+ }
+ if (ele_count > 4)
+ return 0;
+
+ /* Finally, make sure that all scalar elements are the same type. */
+ for (i = 0; elements[i]; ++i)
+ {
+ int t = elements[i]->type;
+ if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
+ {
+ if (!is_hfa1 (elements[i], candidate))
+ return 0;
+ }
+ else if (t != candidate)
+ return 0;
+ }
+
+ /* All tests succeeded. Encode the result. */
+ done:
+ return candidate * 4 + (4 - (int)ele_count);
+}
+
+/* Representation of the procedure call argument marshalling
+ state.
+
+ The terse state variable names match the names used in the AARCH64
+ PCS. */
+
+struct arg_state
+{
+ unsigned ngrn; /* Next general-purpose register number. */
+ unsigned nsrn; /* Next vector register number. */
+ size_t nsaa; /* Next stack offset. */
+
+#if defined (__APPLE__)
+ unsigned allocating_variadic;
+#endif
+};
+
+/* Initialize a procedure call argument marshalling state. */
+static void
+arg_init (struct arg_state *state)
+{
+ state->ngrn = 0;
+ state->nsrn = 0;
+ state->nsaa = 0;
+#if defined (__APPLE__)
+ state->allocating_variadic = 0;
+#endif
+}
+
+/* Allocate an aligned slot on the stack and return a pointer to it. */
+static void *
+allocate_to_stack (struct arg_state *state, void *stack,
+ size_t alignment, size_t size)
+{
+ size_t nsaa = state->nsaa;
+
+ /* Round up the NSAA to the larger of 8 or the natural
+ alignment of the argument's type. */
+#if defined (__APPLE__)
+ if (state->allocating_variadic && alignment < 8)
+ alignment = 8;
+#else
+ if (alignment < 8)
+ alignment = 8;
+#endif
+
+ nsaa = FFI_ALIGN (nsaa, alignment);
+ state->nsaa = nsaa + size;
+
+ return (char *)stack + nsaa;
+}
+
+static ffi_arg
+extend_integer_type (void *source, int type)
+{
+ switch (type)
+ {
+ case FFI_TYPE_UINT8:
+ return *(UINT8 *) source;
+ case FFI_TYPE_SINT8:
+ return *(SINT8 *) source;
+ case FFI_TYPE_UINT16:
+ return *(UINT16 *) source;
+ case FFI_TYPE_SINT16:
+ return *(SINT16 *) source;
+ case FFI_TYPE_UINT32:
+ return *(UINT32 *) source;
+ case FFI_TYPE_INT:
+ case FFI_TYPE_SINT32:
+ return *(SINT32 *) source;
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ return *(UINT64 *) source;
+ break;
+ case FFI_TYPE_POINTER:
+ return *(uintptr_t *) source;
+ default:
+ abort();
+ }
+}
+
+#if defined(_MSC_VER)
+void extend_hfa_type (void *dest, void *src, int h);
+#else
+static void
+extend_hfa_type (void *dest, void *src, int h)
+{
+ ssize_t f = h - AARCH64_RET_S4;
+ void *x0;
+
+ asm volatile (
+ "adr %0, 0f\n"
+" add %0, %0, %1\n"
+" br %0\n"
+"0: ldp s16, s17, [%3]\n" /* S4 */
+" ldp s18, s19, [%3, #8]\n"
+" b 4f\n"
+" ldp s16, s17, [%3]\n" /* S3 */
+" ldr s18, [%3, #8]\n"
+" b 3f\n"
+" ldp s16, s17, [%3]\n" /* S2 */
+" b 2f\n"
+" nop\n"
+" ldr s16, [%3]\n" /* S1 */
+" b 1f\n"
+" nop\n"
+" ldp d16, d17, [%3]\n" /* D4 */
+" ldp d18, d19, [%3, #16]\n"
+" b 4f\n"
+" ldp d16, d17, [%3]\n" /* D3 */
+" ldr d18, [%3, #16]\n"
+" b 3f\n"
+" ldp d16, d17, [%3]\n" /* D2 */
+" b 2f\n"
+" nop\n"
+" ldr d16, [%3]\n" /* D1 */
+" b 1f\n"
+" nop\n"
+" ldp q16, q17, [%3]\n" /* Q4 */
+" ldp q18, q19, [%3, #32]\n"
+" b 4f\n"
+" ldp q16, q17, [%3]\n" /* Q3 */
+" ldr q18, [%3, #32]\n"
+" b 3f\n"
+" ldp q16, q17, [%3]\n" /* Q2 */
+" b 2f\n"
+" nop\n"
+" ldr q16, [%3]\n" /* Q1 */
+" b 1f\n"
+"4: str q19, [%2, #48]\n"
+"3: str q18, [%2, #32]\n"
+"2: str q17, [%2, #16]\n"
+"1: str q16, [%2]"
+ : "=&r"(x0)
+ : "r"(f * 12), "r"(dest), "r"(src)
+ : "memory", "v16", "v17", "v18", "v19");
+}
+#endif
+
+#if defined(_MSC_VER)
+void* compress_hfa_type (void *dest, void *src, int h);
+#else
+static void *
+compress_hfa_type (void *dest, void *reg, int h)
+{
+ switch (h)
+ {
+ case AARCH64_RET_S1:
+ if (dest == reg)
+ {
+#ifdef __AARCH64EB__
+ dest += 12;
+#endif
+ }
+ else
+ *(float *)dest = *(float *)reg;
+ break;
+ case AARCH64_RET_S2:
+ asm ("ldp q16, q17, [%1]\n\t"
+ "st2 { v16.s, v17.s }[0], [%0]"
+ : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
+ break;
+ case AARCH64_RET_S3:
+ asm ("ldp q16, q17, [%1]\n\t"
+ "ldr q18, [%1, #32]\n\t"
+ "st3 { v16.s, v17.s, v18.s }[0], [%0]"
+ : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
+ break;
+ case AARCH64_RET_S4:
+ asm ("ldp q16, q17, [%1]\n\t"
+ "ldp q18, q19, [%1, #32]\n\t"
+ "st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]"
+ : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
+ break;
+
+ case AARCH64_RET_D1:
+ if (dest == reg)
+ {
+#ifdef __AARCH64EB__
+ dest += 8;
+#endif
+ }
+ else
+ *(double *)dest = *(double *)reg;
+ break;
+ case AARCH64_RET_D2:
+ asm ("ldp q16, q17, [%1]\n\t"
+ "st2 { v16.d, v17.d }[0], [%0]"
+ : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
+ break;
+ case AARCH64_RET_D3:
+ asm ("ldp q16, q17, [%1]\n\t"
+ "ldr q18, [%1, #32]\n\t"
+ "st3 { v16.d, v17.d, v18.d }[0], [%0]"
+ : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
+ break;
+ case AARCH64_RET_D4:
+ asm ("ldp q16, q17, [%1]\n\t"
+ "ldp q18, q19, [%1, #32]\n\t"
+ "st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]"
+ : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
+ break;
+
+ default:
+ if (dest != reg)
+ return memcpy (dest, reg, 16 * (4 - (h & 3)));
+ break;
+ }
+ return dest;
+}
+#endif
+
+/* Either allocate an appropriate register for the argument type, or if
+ none are available, allocate a stack slot and return a pointer
+ to the allocated space. */
+
+static void *
+allocate_int_to_reg_or_stack (struct call_context *context,
+ struct arg_state *state,
+ void *stack, size_t size)
+{
+ if (state->ngrn < N_X_ARG_REG)
+ return &context->x[state->ngrn++];
+
+ state->ngrn = N_X_ARG_REG;
+ return allocate_to_stack (state, stack, size, size);
+}
+
+ffi_status FFI_HIDDEN
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+ ffi_type *rtype = cif->rtype;
+ size_t bytes = cif->bytes;
+ int flags, i, n;
+
+ switch (rtype->type)
+ {
+ case FFI_TYPE_VOID:
+ flags = AARCH64_RET_VOID;
+ break;
+ case FFI_TYPE_UINT8:
+ flags = AARCH64_RET_UINT8;
+ break;
+ case FFI_TYPE_UINT16:
+ flags = AARCH64_RET_UINT16;
+ break;
+ case FFI_TYPE_UINT32:
+ flags = AARCH64_RET_UINT32;
+ break;
+ case FFI_TYPE_SINT8:
+ flags = AARCH64_RET_SINT8;
+ break;
+ case FFI_TYPE_SINT16:
+ flags = AARCH64_RET_SINT16;
+ break;
+ case FFI_TYPE_INT:
+ case FFI_TYPE_SINT32:
+ flags = AARCH64_RET_SINT32;
+ break;
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_UINT64:
+ flags = AARCH64_RET_INT64;
+ break;
+ case FFI_TYPE_POINTER:
+ flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64);
+ break;
+
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+ case FFI_TYPE_LONGDOUBLE:
+ case FFI_TYPE_STRUCT:
+ case FFI_TYPE_COMPLEX:
+ flags = is_vfp_type (rtype);
+ if (flags == 0)
+ {
+ size_t s = rtype->size;
+ if (s > 16)
+ {
+ flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM;
+ bytes += 8;
+ }
+ else if (s == 16)
+ flags = AARCH64_RET_INT128;
+ else if (s == 8)
+ flags = AARCH64_RET_INT64;
+ else
+ flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY;
+ }
+ break;
+
+ default:
+ abort();
+ }
+
+ for (i = 0, n = cif->nargs; i < n; i++)
+ if (is_vfp_type (cif->arg_types[i]))
+ {
+ flags |= AARCH64_FLAG_ARG_V;
+ break;
+ }
+
+ /* Round the stack up to a multiple of the stack alignment requirement. */
+ cif->bytes = (unsigned) FFI_ALIGN(bytes, 16);
+ cif->flags = flags;
+#if defined (__APPLE__)
+ cif->aarch64_nfixedargs = 0;
+#endif
+
+ return FFI_OK;
+}
+
+#if defined (__APPLE__)
+/* Perform Apple-specific cif processing for variadic calls */
+ffi_status FFI_HIDDEN
+ffi_prep_cif_machdep_var(ffi_cif *cif, unsigned int nfixedargs,
+ unsigned int ntotalargs)
+{
+ ffi_status status = ffi_prep_cif_machdep (cif);
+ cif->aarch64_nfixedargs = nfixedargs;
+ return status;
+}
+#endif /* __APPLE__ */
+
+extern void ffi_call_SYSV (struct call_context *context, void *frame,
+ void (*fn)(void), void *rvalue, int flags,
+ void *closure) FFI_HIDDEN;
+
+/* Call a function with the provided arguments and capture the return
+ value. */
+static void
+ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
+ void **avalue, void *closure)
+{
+ struct call_context *context;
+ void *stack, *frame, *rvalue;
+ struct arg_state state;
+ size_t stack_bytes, rtype_size, rsize;
+ int i, nargs, flags;
+ ffi_type *rtype;
+
+ flags = cif->flags;
+ rtype = cif->rtype;
+ rtype_size = rtype->size;
+ stack_bytes = cif->bytes;
+
+ /* If the target function returns a structure via hidden pointer,
+ then we cannot allow a null rvalue. Otherwise, mash a null
+ rvalue to void return type. */
+ rsize = 0;
+ if (flags & AARCH64_RET_IN_MEM)
+ {
+ if (orig_rvalue == NULL)
+ rsize = rtype_size;
+ }
+ else if (orig_rvalue == NULL)
+ flags &= AARCH64_FLAG_ARG_V;
+ else if (flags & AARCH64_RET_NEED_COPY)
+ rsize = 16;
+
+ /* Allocate consectutive stack for everything we'll need. */
+ context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize);
+ stack = context + 1;
+ frame = (void*)((uintptr_t)stack + (uintptr_t)stack_bytes);
+ rvalue = (rsize ? (void*)((uintptr_t)frame + 32) : orig_rvalue);
+
+ arg_init (&state);
+ for (i = 0, nargs = cif->nargs; i < nargs; i++)
+ {
+ ffi_type *ty = cif->arg_types[i];
+ size_t s = ty->size;
+ void *a = avalue[i];
+ int h, t;
+
+ t = ty->type;
+ switch (t)
+ {
+ case FFI_TYPE_VOID:
+ FFI_ASSERT (0);
+ break;
+
+ /* If the argument is a basic type the argument is allocated to an
+ appropriate register, or if none are available, to the stack. */
+ case FFI_TYPE_INT:
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_POINTER:
+ do_pointer:
+ {
+ ffi_arg ext = extend_integer_type (a, t);
+ if (state.ngrn < N_X_ARG_REG)
+ context->x[state.ngrn++] = ext;
+ else
+ {
+ void *d = allocate_to_stack (&state, stack, ty->alignment, s);
+ state.ngrn = N_X_ARG_REG;
+ /* Note that the default abi extends each argument
+ to a full 64-bit slot, while the iOS abi allocates
+ only enough space. */
+#ifdef __APPLE__
+ memcpy(d, a, s);
+#else
+ *(ffi_arg *)d = ext;
+#endif
+ }
+ }
+ break;
+
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+ case FFI_TYPE_LONGDOUBLE:
+ case FFI_TYPE_STRUCT:
+ case FFI_TYPE_COMPLEX:
+ {
+ void *dest;
+
+ h = is_vfp_type (ty);
+ if (h)
+ {
+ int elems = 4 - (h & 3);
+#ifdef _M_ARM64 /* for handling armasm calling convention */
+ if (cif->is_variadic)
+ {
+ if (state.ngrn + elems <= N_X_ARG_REG)
+ {
+ dest = &context->x[state.ngrn];
+ state.ngrn += elems;
+ extend_hfa_type(dest, a, h);
+ break;
+ }
+ state.nsrn = N_X_ARG_REG;
+ dest = allocate_to_stack(&state, stack, ty->alignment, s);
+ }
+ else
+ {
+#endif /* for handling armasm calling convention */
+ if (state.nsrn + elems <= N_V_ARG_REG)
+ {
+ dest = &context->v[state.nsrn];
+ state.nsrn += elems;
+ extend_hfa_type (dest, a, h);
+ break;
+ }
+ state.nsrn = N_V_ARG_REG;
+ dest = allocate_to_stack (&state, stack, ty->alignment, s);
+#ifdef _M_ARM64 /* for handling armasm calling convention */
+ }
+#endif /* for handling armasm calling convention */
+ }
+ else if (s > 16)
+ {
+ /* If the argument is a composite type that is larger than 16
+ bytes, then the argument has been copied to memory, and
+ the argument is replaced by a pointer to the copy. */
+ a = &avalue[i];
+ t = FFI_TYPE_POINTER;
+ s = sizeof (void *);
+ goto do_pointer;
+ }
+ else
+ {
+ size_t n = (s + 7) / 8;
+ if (state.ngrn + n <= N_X_ARG_REG)
+ {
+ /* If the argument is a composite type and the size in
+ double-words is not more than the number of available
+ X registers, then the argument is copied into
+ consecutive X registers. */
+ dest = &context->x[state.ngrn];
+ state.ngrn += (unsigned int)n;
+ }
+ else
+ {
+ /* Otherwise, there are insufficient X registers. Further
+ X register allocations are prevented, the NSAA is
+ adjusted and the argument is copied to memory at the
+ adjusted NSAA. */
+ state.ngrn = N_X_ARG_REG;
+ dest = allocate_to_stack (&state, stack, ty->alignment, s);
+ }
+ }
+ memcpy (dest, a, s);
+ }
+ break;
+
+ default:
+ abort();
+ }
+
+#if defined (__APPLE__)
+ if (i + 1 == cif->aarch64_nfixedargs)
+ {
+ state.ngrn = N_X_ARG_REG;
+ state.nsrn = N_V_ARG_REG;
+ state.allocating_variadic = 1;
+ }
+#endif
+ }
+
+ ffi_call_SYSV (context, frame, fn, rvalue, flags, closure);
+
+ if (flags & AARCH64_RET_NEED_COPY)
+ memcpy (orig_rvalue, rvalue, rtype_size);
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+#ifdef FFI_GO_CLOSURES
+void
+ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
+ void **avalue, void *closure)
+{
+ ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
+#endif /* FFI_GO_CLOSURES */
+
+/* Build a trampoline. */
+
+extern void ffi_closure_SYSV (void) FFI_HIDDEN;
+extern void ffi_closure_SYSV_V (void) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure *closure,
+ ffi_cif* cif,
+ void (*fun)(ffi_cif*,void*,void**,void*),
+ void *user_data,
+ void *codeloc)
+{
+ if (cif->abi != FFI_SYSV)
+ return FFI_BAD_ABI;
+
+ void (*start)(void);
+
+ if (cif->flags & AARCH64_FLAG_ARG_V)
+ start = ffi_closure_SYSV_V;
+ else
+ start = ffi_closure_SYSV;
+
+#if FFI_EXEC_TRAMPOLINE_TABLE
+#ifdef __MACH__
+ void **config = (void **)((uint8_t *)codeloc - PAGE_MAX_SIZE);
+ config[0] = closure;
+ config[1] = start;
+#endif
+#else
+ static const unsigned char trampoline[16] = {
+ 0x90, 0x00, 0x00, 0x58, /* ldr x16, tramp+16 */
+ 0xf1, 0xff, 0xff, 0x10, /* adr x17, tramp+0 */
+ 0x00, 0x02, 0x1f, 0xd6 /* br x16 */
+ };
+ char *tramp = closure->tramp;
+
+ memcpy (tramp, trampoline, sizeof(trampoline));
+
+ *(UINT64 *)(tramp + 16) = (uintptr_t)start;
+
+ ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE);
+
+ /* Also flush the cache for code mapping. */
+#ifdef _M_ARM64
+ // Not using dlmalloc.c for Windows ARM64 builds
+ // so calling ffi_data_to_code_pointer() isn't necessary
+ unsigned char *tramp_code = tramp;
+ #else
+ unsigned char *tramp_code = ffi_data_to_code_pointer (tramp);
+ #endif
+ ffi_clear_cache (tramp_code, tramp_code + FFI_TRAMPOLINE_SIZE);
+#endif
+
+ closure->cif = cif;
+ closure->fun = fun;
+ closure->user_data = user_data;
+
+ return FFI_OK;
+}
+
+#ifdef FFI_GO_CLOSURES
+extern void ffi_go_closure_SYSV (void) FFI_HIDDEN;
+extern void ffi_go_closure_SYSV_V (void) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif* cif,
+ void (*fun)(ffi_cif*,void*,void**,void*))
+{
+ void (*start)(void);
+
+ if (cif->abi != FFI_SYSV)
+ return FFI_BAD_ABI;
+
+ if (cif->flags & AARCH64_FLAG_ARG_V)
+ start = ffi_go_closure_SYSV_V;
+ else
+ start = ffi_go_closure_SYSV;
+
+ closure->tramp = start;
+ closure->cif = cif;
+ closure->fun = fun;
+
+ return FFI_OK;
+}
+#endif /* FFI_GO_CLOSURES */
+
+/* Primary handler to setup and invoke a function within a closure.
+
+ A closure when invoked enters via the assembler wrapper
+ ffi_closure_SYSV(). The wrapper allocates a call context on the
+ stack, saves the interesting registers (from the perspective of
+ the calling convention) into the context then passes control to
+ ffi_closure_SYSV_inner() passing the saved context and a pointer to
+ the stack at the point ffi_closure_SYSV() was invoked.
+
+ On the return path the assembler wrapper will reload call context
+ registers.
+
+ ffi_closure_SYSV_inner() marshalls the call context into ffi value
+ descriptors, invokes the wrapped function, then marshalls the return
+ value back into the call context. */
+
+int FFI_HIDDEN
+ffi_closure_SYSV_inner (ffi_cif *cif,
+ void (*fun)(ffi_cif*,void*,void**,void*),
+ void *user_data,
+ struct call_context *context,
+ void *stack, void *rvalue, void *struct_rvalue)
+{
+ void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
+ int i, h, nargs, flags;
+ struct arg_state state;
+
+ arg_init (&state);
+
+ for (i = 0, nargs = cif->nargs; i < nargs; i++)
+ {
+ ffi_type *ty = cif->arg_types[i];
+ int t = ty->type;
+ size_t n, s = ty->size;
+
+ switch (t)
+ {
+ case FFI_TYPE_VOID:
+ FFI_ASSERT (0);
+ break;
+
+ case FFI_TYPE_INT:
+ case FFI_TYPE_UINT8:
+ case FFI_TYPE_SINT8:
+ case FFI_TYPE_UINT16:
+ case FFI_TYPE_SINT16:
+ case FFI_TYPE_UINT32:
+ case FFI_TYPE_SINT32:
+ case FFI_TYPE_UINT64:
+ case FFI_TYPE_SINT64:
+ case FFI_TYPE_POINTER:
+ avalue[i] = allocate_int_to_reg_or_stack (context, &state, stack, s);
+ break;
+
+ case FFI_TYPE_FLOAT:
+ case FFI_TYPE_DOUBLE:
+ case FFI_TYPE_LONGDOUBLE:
+ case FFI_TYPE_STRUCT:
+ case FFI_TYPE_COMPLEX:
+ h = is_vfp_type (ty);
+ if (h)
+ {
+ n = 4 - (h & 3);
+#ifdef _M_ARM64 /* for handling armasm calling convention */
+ if (cif->is_variadic)
+ {
+ if (state.ngrn + n <= N_X_ARG_REG)
+ {
+ void *reg = &context->x[state.ngrn];
+ state.ngrn += (unsigned int)n;
+
+ /* Eeek! We need a pointer to the structure, however the
+ homogeneous float elements are being passed in individual
+ registers, therefore for float and double the structure
+ is not represented as a contiguous sequence of bytes in
+ our saved register context. We don't need the original
+ contents of the register storage, so we reformat the
+ structure into the same memory. */
+ avalue[i] = compress_hfa_type(reg, reg, h);
+ }
+ else
+ {
+ state.ngrn = N_X_ARG_REG;
+ state.nsrn = N_V_ARG_REG;
+ avalue[i] = allocate_to_stack(&state, stack,
+ ty->alignment, s);
+ }
+ }
+ else
+ {
+#endif /* for handling armasm calling convention */
+ if (state.nsrn + n <= N_V_ARG_REG)
+ {
+ void *reg = &context->v[state.nsrn];
+ state.nsrn += (unsigned int)n;
+ avalue[i] = compress_hfa_type(reg, reg, h);
+ }
+ else
+ {
+ state.nsrn = N_V_ARG_REG;
+ avalue[i] = allocate_to_stack(&state, stack,
+ ty->alignment, s);
+ }
+#ifdef _M_ARM64 /* for handling armasm calling convention */
+ }
+#endif /* for handling armasm calling convention */
+ }
+ else if (s > 16)
+ {
+ /* Replace Composite type of size greater than 16 with a
+ pointer. */
+ avalue[i] = *(void **)
+ allocate_int_to_reg_or_stack (context, &state, stack,
+ sizeof (void *));
+ }
+ else
+ {
+ n = (s + 7) / 8;
+ if (state.ngrn + n <= N_X_ARG_REG)
+ {
+ avalue[i] = &context->x[state.ngrn];
+ state.ngrn += (unsigned int)n;
+ }
+ else
+ {
+ state.ngrn = N_X_ARG_REG;
+ avalue[i] = allocate_to_stack(&state, stack,
+ ty->alignment, s);
+ }
+ }
+ break;
+
+ default:
+ abort();
+ }
+
+#if defined (__APPLE__)
+ if (i + 1 == cif->aarch64_nfixedargs)
+ {
+ state.ngrn = N_X_ARG_REG;
+ state.nsrn = N_V_ARG_REG;
+ state.allocating_variadic = 1;
+ }
+#endif
+ }
+
+ flags = cif->flags;
+ if (flags & AARCH64_RET_IN_MEM)
+ rvalue = struct_rvalue;
+
+ fun (cif, rvalue, avalue, user_data);
+
+ return flags;
+}
+
+#endif /* (__aarch64__) || defined(__arm64__)|| defined (_M_ARM64)*/
diff --git a/contrib/restricted/libffi/src/aarch64/ffitarget.h b/contrib/restricted/libffi/src/aarch64/ffitarget.h
index ecb6d2deae..ddce8f21eb 100644
--- a/contrib/restricted/libffi/src/aarch64/ffitarget.h
+++ b/contrib/restricted/libffi/src/aarch64/ffitarget.h
@@ -1,92 +1,92 @@
-/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-``Software''), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef LIBFFI_TARGET_H
-#define LIBFFI_TARGET_H
-
-#ifndef LIBFFI_H
-#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead."
-#endif
-
-#ifndef LIBFFI_ASM
-#ifdef __ILP32__
-#define FFI_SIZEOF_ARG 8
-#define FFI_SIZEOF_JAVA_RAW 4
-typedef unsigned long long ffi_arg;
-typedef signed long long ffi_sarg;
-#elif defined(_M_ARM64)
-#define FFI_SIZEOF_ARG 8
-typedef unsigned long long ffi_arg;
-typedef signed long long ffi_sarg;
-#else
-typedef unsigned long ffi_arg;
-typedef signed long ffi_sarg;
-#endif
-
-typedef enum ffi_abi
- {
- FFI_FIRST_ABI = 0,
- FFI_SYSV,
- FFI_LAST_ABI,
- FFI_DEFAULT_ABI = FFI_SYSV
- } ffi_abi;
-#endif
-
-/* ---- Definitions for closures ----------------------------------------- */
-
-#define FFI_CLOSURES 1
-#define FFI_NATIVE_RAW_API 0
-
-#if defined (FFI_EXEC_TRAMPOLINE_TABLE) && FFI_EXEC_TRAMPOLINE_TABLE
-
-#ifdef __MACH__
-#define FFI_TRAMPOLINE_SIZE 16
-#define FFI_TRAMPOLINE_CLOSURE_OFFSET 16
-#else
-#error "No trampoline table implementation"
-#endif
-
-#else
-#define FFI_TRAMPOLINE_SIZE 24
-#define FFI_TRAMPOLINE_CLOSURE_OFFSET FFI_TRAMPOLINE_SIZE
-#endif
-
-#ifdef _M_ARM64
-#define FFI_EXTRA_CIF_FIELDS unsigned is_variadic
-#endif
-
-/* ---- Internal ---- */
-
-#if defined (__APPLE__)
-#define FFI_TARGET_SPECIFIC_VARIADIC
-#define FFI_EXTRA_CIF_FIELDS unsigned aarch64_nfixedargs
-#elif !defined(_M_ARM64)
-/* iOS and Windows reserve x18 for the system. Disable Go closures until
- a new static chain is chosen. */
-#define FFI_GO_CLOSURES 1
-#endif
-
-#ifndef _M_ARM64
-/* No complex type on Windows */
-#define FFI_TARGET_HAS_COMPLEX_TYPE
-#endif
-
-#endif
+/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source. Use ffi.h instead."
+#endif
+
+#ifndef LIBFFI_ASM
+#ifdef __ILP32__
+#define FFI_SIZEOF_ARG 8
+#define FFI_SIZEOF_JAVA_RAW 4
+typedef unsigned long long ffi_arg;
+typedef signed long long ffi_sarg;
+#elif defined(_M_ARM64)
+#define FFI_SIZEOF_ARG 8
+typedef unsigned long long ffi_arg;
+typedef signed long long ffi_sarg;
+#else
+typedef unsigned long ffi_arg;
+typedef signed long ffi_sarg;
+#endif
+
+typedef enum ffi_abi
+ {
+ FFI_FIRST_ABI = 0,
+ FFI_SYSV,
+ FFI_LAST_ABI,
+ FFI_DEFAULT_ABI = FFI_SYSV
+ } ffi_abi;
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_NATIVE_RAW_API 0
+
+#if defined (FFI_EXEC_TRAMPOLINE_TABLE) && FFI_EXEC_TRAMPOLINE_TABLE
+
+#ifdef __MACH__
+#define FFI_TRAMPOLINE_SIZE 16
+#define FFI_TRAMPOLINE_CLOSURE_OFFSET 16
+#else
+#error "No trampoline table implementation"
+#endif
+
+#else
+#define FFI_TRAMPOLINE_SIZE 24
+#define FFI_TRAMPOLINE_CLOSURE_OFFSET FFI_TRAMPOLINE_SIZE
+#endif
+
+#ifdef _M_ARM64
+#define FFI_EXTRA_CIF_FIELDS unsigned is_variadic
+#endif
+
+/* ---- Internal ---- */
+
+#if defined (__APPLE__)
+#define FFI_TARGET_SPECIFIC_VARIADIC
+#define FFI_EXTRA_CIF_FIELDS unsigned aarch64_nfixedargs
+#elif !defined(_M_ARM64)
+/* iOS and Windows reserve x18 for the system. Disable Go closures until
+ a new static chain is chosen. */
+#define FFI_GO_CLOSURES 1
+#endif
+
+#ifndef _M_ARM64
+/* No complex type on Windows */
+#define FFI_TARGET_HAS_COMPLEX_TYPE
+#endif
+
+#endif
diff --git a/contrib/restricted/libffi/src/aarch64/internal.h b/contrib/restricted/libffi/src/aarch64/internal.h
index 9c3e07725a..2691dafa98 100644
--- a/contrib/restricted/libffi/src/aarch64/internal.h
+++ b/contrib/restricted/libffi/src/aarch64/internal.h
@@ -1,67 +1,67 @@
-/*
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-``Software''), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#define AARCH64_RET_VOID 0
-#define AARCH64_RET_INT64 1
-#define AARCH64_RET_INT128 2
-
-#define AARCH64_RET_UNUSED3 3
-#define AARCH64_RET_UNUSED4 4
-#define AARCH64_RET_UNUSED5 5
-#define AARCH64_RET_UNUSED6 6
-#define AARCH64_RET_UNUSED7 7
-
-/* Note that FFI_TYPE_FLOAT == 2, _DOUBLE == 3, _LONGDOUBLE == 4,
- so _S4 through _Q1 are layed out as (TYPE * 4) + (4 - COUNT). */
-#define AARCH64_RET_S4 8
-#define AARCH64_RET_S3 9
-#define AARCH64_RET_S2 10
-#define AARCH64_RET_S1 11
-
-#define AARCH64_RET_D4 12
-#define AARCH64_RET_D3 13
-#define AARCH64_RET_D2 14
-#define AARCH64_RET_D1 15
-
-#define AARCH64_RET_Q4 16
-#define AARCH64_RET_Q3 17
-#define AARCH64_RET_Q2 18
-#define AARCH64_RET_Q1 19
-
-/* Note that each of the sub-64-bit integers gets two entries. */
-#define AARCH64_RET_UINT8 20
-#define AARCH64_RET_UINT16 22
-#define AARCH64_RET_UINT32 24
-
-#define AARCH64_RET_SINT8 26
-#define AARCH64_RET_SINT16 28
-#define AARCH64_RET_SINT32 30
-
-#define AARCH64_RET_MASK 31
-
-#define AARCH64_RET_IN_MEM (1 << 5)
-#define AARCH64_RET_NEED_COPY (1 << 6)
-
-#define AARCH64_FLAG_ARG_V_BIT 7
-#define AARCH64_FLAG_ARG_V (1 << AARCH64_FLAG_ARG_V_BIT)
-
-#define N_X_ARG_REG 8
-#define N_V_ARG_REG 8
-#define CALL_CONTEXT_SIZE (N_V_ARG_REG * 16 + N_X_ARG_REG * 8)
+/*
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#define AARCH64_RET_VOID 0
+#define AARCH64_RET_INT64 1
+#define AARCH64_RET_INT128 2
+
+#define AARCH64_RET_UNUSED3 3
+#define AARCH64_RET_UNUSED4 4
+#define AARCH64_RET_UNUSED5 5
+#define AARCH64_RET_UNUSED6 6
+#define AARCH64_RET_UNUSED7 7
+
+/* Note that FFI_TYPE_FLOAT == 2, _DOUBLE == 3, _LONGDOUBLE == 4,
+ so _S4 through _Q1 are layed out as (TYPE * 4) + (4 - COUNT). */
+#define AARCH64_RET_S4 8
+#define AARCH64_RET_S3 9
+#define AARCH64_RET_S2 10
+#define AARCH64_RET_S1 11
+
+#define AARCH64_RET_D4 12
+#define AARCH64_RET_D3 13
+#define AARCH64_RET_D2 14
+#define AARCH64_RET_D1 15
+
+#define AARCH64_RET_Q4 16
+#define AARCH64_RET_Q3 17
+#define AARCH64_RET_Q2 18
+#define AARCH64_RET_Q1 19
+
+/* Note that each of the sub-64-bit integers gets two entries. */
+#define AARCH64_RET_UINT8 20
+#define AARCH64_RET_UINT16 22
+#define AARCH64_RET_UINT32 24
+
+#define AARCH64_RET_SINT8 26
+#define AARCH64_RET_SINT16 28
+#define AARCH64_RET_SINT32 30
+
+#define AARCH64_RET_MASK 31
+
+#define AARCH64_RET_IN_MEM (1 << 5)
+#define AARCH64_RET_NEED_COPY (1 << 6)
+
+#define AARCH64_FLAG_ARG_V_BIT 7
+#define AARCH64_FLAG_ARG_V (1 << AARCH64_FLAG_ARG_V_BIT)
+
+#define N_X_ARG_REG 8
+#define N_V_ARG_REG 8
+#define CALL_CONTEXT_SIZE (N_V_ARG_REG * 16 + N_X_ARG_REG * 8)
diff --git a/contrib/restricted/libffi/src/aarch64/sysv.S b/contrib/restricted/libffi/src/aarch64/sysv.S
index 6761ee1ea9..4d8d85139a 100644
--- a/contrib/restricted/libffi/src/aarch64/sysv.S
+++ b/contrib/restricted/libffi/src/aarch64/sysv.S
@@ -1,440 +1,440 @@
-/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
-
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-``Software''), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#if defined(__aarch64__) || defined(__arm64__)
-#define LIBFFI_ASM
-#include <fficonfig.h>
-#include <ffi.h>
-#include <ffi_cfi.h>
-#include "internal.h"
-
-#ifdef HAVE_MACHINE_ASM_H
-#include <machine/asm.h>
-#else
-#ifdef __USER_LABEL_PREFIX__
-#define CONCAT1(a, b) CONCAT2(a, b)
-#define CONCAT2(a, b) a ## b
-
-/* Use the right prefix for global labels. */
-#define CNAME(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
-#else
-#define CNAME(x) x
-#endif
-#endif
-
-#ifdef __AARCH64EB__
-# define BE(X) X
-#else
-# define BE(X) 0
-#endif
-
-#ifdef __ILP32__
-#define PTR_REG(n) w##n
-#else
-#define PTR_REG(n) x##n
-#endif
-
-#ifdef __ILP32__
-#define PTR_SIZE 4
-#else
-#define PTR_SIZE 8
-#endif
-
- .text
- .align 4
-
-/* ffi_call_SYSV
- extern void ffi_call_SYSV (void *stack, void *frame,
- void (*fn)(void), void *rvalue,
- int flags, void *closure);
-
- Therefore on entry we have:
-
- x0 stack
- x1 frame
- x2 fn
- x3 rvalue
- x4 flags
- x5 closure
-*/
-
- cfi_startproc
-CNAME(ffi_call_SYSV):
- /* Use a stack frame allocated by our caller. */
- cfi_def_cfa(x1, 32);
- stp x29, x30, [x1]
- mov x29, x1
- mov sp, x0
- cfi_def_cfa_register(x29)
- cfi_rel_offset (x29, 0)
- cfi_rel_offset (x30, 8)
-
- mov x9, x2 /* save fn */
- mov x8, x3 /* install structure return */
-#ifdef FFI_GO_CLOSURES
- mov x18, x5 /* install static chain */
-#endif
- stp x3, x4, [x29, #16] /* save rvalue and flags */
-
- /* Load the vector argument passing registers, if necessary. */
- tbz w4, #AARCH64_FLAG_ARG_V_BIT, 1f
- ldp q0, q1, [sp, #0]
- ldp q2, q3, [sp, #32]
- ldp q4, q5, [sp, #64]
- ldp q6, q7, [sp, #96]
-1:
- /* Load the core argument passing registers, including
- the structure return pointer. */
- ldp x0, x1, [sp, #16*N_V_ARG_REG + 0]
- ldp x2, x3, [sp, #16*N_V_ARG_REG + 16]
- ldp x4, x5, [sp, #16*N_V_ARG_REG + 32]
- ldp x6, x7, [sp, #16*N_V_ARG_REG + 48]
-
- /* Deallocate the context, leaving the stacked arguments. */
- add sp, sp, #CALL_CONTEXT_SIZE
-
- blr x9 /* call fn */
-
- ldp x3, x4, [x29, #16] /* reload rvalue and flags */
-
- /* Partially deconstruct the stack frame. */
- mov sp, x29
- cfi_def_cfa_register (sp)
- ldp x29, x30, [x29]
-
- /* Save the return value as directed. */
- adr x5, 0f
- and w4, w4, #AARCH64_RET_MASK
- add x5, x5, x4, lsl #3
- br x5
-
- /* Note that each table entry is 2 insns, and thus 8 bytes.
- For integer data, note that we're storing into ffi_arg
- and therefore we want to extend to 64 bits; these types
- have two consecutive entries allocated for them. */
- .align 4
-0: ret /* VOID */
- nop
-1: str x0, [x3] /* INT64 */
- ret
-2: stp x0, x1, [x3] /* INT128 */
- ret
-3: brk #1000 /* UNUSED */
- ret
-4: brk #1000 /* UNUSED */
- ret
-5: brk #1000 /* UNUSED */
- ret
-6: brk #1000 /* UNUSED */
- ret
-7: brk #1000 /* UNUSED */
- ret
-8: st4 { v0.s, v1.s, v2.s, v3.s }[0], [x3] /* S4 */
- ret
-9: st3 { v0.s, v1.s, v2.s }[0], [x3] /* S3 */
- ret
-10: stp s0, s1, [x3] /* S2 */
- ret
-11: str s0, [x3] /* S1 */
- ret
-12: st4 { v0.d, v1.d, v2.d, v3.d }[0], [x3] /* D4 */
- ret
-13: st3 { v0.d, v1.d, v2.d }[0], [x3] /* D3 */
- ret
-14: stp d0, d1, [x3] /* D2 */
- ret
-15: str d0, [x3] /* D1 */
- ret
-16: str q3, [x3, #48] /* Q4 */
- nop
-17: str q2, [x3, #32] /* Q3 */
- nop
-18: stp q0, q1, [x3] /* Q2 */
- ret
-19: str q0, [x3] /* Q1 */
- ret
-20: uxtb w0, w0 /* UINT8 */
- str x0, [x3]
-21: ret /* reserved */
- nop
-22: uxth w0, w0 /* UINT16 */
- str x0, [x3]
-23: ret /* reserved */
- nop
-24: mov w0, w0 /* UINT32 */
- str x0, [x3]
-25: ret /* reserved */
- nop
-26: sxtb x0, w0 /* SINT8 */
- str x0, [x3]
-27: ret /* reserved */
- nop
-28: sxth x0, w0 /* SINT16 */
- str x0, [x3]
-29: ret /* reserved */
- nop
-30: sxtw x0, w0 /* SINT32 */
- str x0, [x3]
-31: ret /* reserved */
- nop
-
- cfi_endproc
-
- .globl CNAME(ffi_call_SYSV)
- FFI_HIDDEN(CNAME(ffi_call_SYSV))
-#ifdef __ELF__
- .type CNAME(ffi_call_SYSV), #function
- .size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
-#endif
-
-/* ffi_closure_SYSV
-
- Closure invocation glue. This is the low level code invoked directly by
- the closure trampoline to setup and call a closure.
-
- On entry x17 points to a struct ffi_closure, x16 has been clobbered
- all other registers are preserved.
-
- We allocate a call context and save the argument passing registers,
- then invoked the generic C ffi_closure_SYSV_inner() function to do all
- the real work, on return we load the result passing registers back from
- the call context.
-*/
-
-#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64)
-
- .align 4
-CNAME(ffi_closure_SYSV_V):
- cfi_startproc
- stp x29, x30, [sp, #-ffi_closure_SYSV_FS]!
- cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
- cfi_rel_offset (x29, 0)
- cfi_rel_offset (x30, 8)
-
- /* Save the argument passing vector registers. */
- stp q0, q1, [sp, #16 + 0]
- stp q2, q3, [sp, #16 + 32]
- stp q4, q5, [sp, #16 + 64]
- stp q6, q7, [sp, #16 + 96]
- b 0f
- cfi_endproc
-
- .globl CNAME(ffi_closure_SYSV_V)
- FFI_HIDDEN(CNAME(ffi_closure_SYSV_V))
-#ifdef __ELF__
- .type CNAME(ffi_closure_SYSV_V), #function
- .size CNAME(ffi_closure_SYSV_V), . - CNAME(ffi_closure_SYSV_V)
-#endif
-
- .align 4
- cfi_startproc
-CNAME(ffi_closure_SYSV):
- stp x29, x30, [sp, #-ffi_closure_SYSV_FS]!
- cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
- cfi_rel_offset (x29, 0)
- cfi_rel_offset (x30, 8)
-0:
- mov x29, sp
-
- /* Save the argument passing core registers. */
- stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
- stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
- stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
- stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
-
- /* Load ffi_closure_inner arguments. */
- ldp PTR_REG(0), PTR_REG(1), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET] /* load cif, fn */
- ldr PTR_REG(2), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET+PTR_SIZE*2] /* load user_data */
-.Ldo_closure:
- add x3, sp, #16 /* load context */
- add x4, sp, #ffi_closure_SYSV_FS /* load stack */
- add x5, sp, #16+CALL_CONTEXT_SIZE /* load rvalue */
- mov x6, x8 /* load struct_rval */
- bl CNAME(ffi_closure_SYSV_inner)
-
- /* Load the return value as directed. */
- adr x1, 0f
- and w0, w0, #AARCH64_RET_MASK
- add x1, x1, x0, lsl #3
- add x3, sp, #16+CALL_CONTEXT_SIZE
- br x1
-
- /* Note that each table entry is 2 insns, and thus 8 bytes. */
- .align 4
-0: b 99f /* VOID */
- nop
-1: ldr x0, [x3] /* INT64 */
- b 99f
-2: ldp x0, x1, [x3] /* INT128 */
- b 99f
-3: brk #1000 /* UNUSED */
- nop
-4: brk #1000 /* UNUSED */
- nop
-5: brk #1000 /* UNUSED */
- nop
-6: brk #1000 /* UNUSED */
- nop
-7: brk #1000 /* UNUSED */
- nop
-8: ldr s3, [x3, #12] /* S4 */
- nop
-9: ldr s2, [x3, #8] /* S3 */
- nop
-10: ldp s0, s1, [x3] /* S2 */
- b 99f
-11: ldr s0, [x3] /* S1 */
- b 99f
-12: ldr d3, [x3, #24] /* D4 */
- nop
-13: ldr d2, [x3, #16] /* D3 */
- nop
-14: ldp d0, d1, [x3] /* D2 */
- b 99f
-15: ldr d0, [x3] /* D1 */
- b 99f
-16: ldr q3, [x3, #48] /* Q4 */
- nop
-17: ldr q2, [x3, #32] /* Q3 */
- nop
-18: ldp q0, q1, [x3] /* Q2 */
- b 99f
-19: ldr q0, [x3] /* Q1 */
- b 99f
-20: ldrb w0, [x3, #BE(7)] /* UINT8 */
- b 99f
-21: brk #1000 /* reserved */
- nop
-22: ldrh w0, [x3, #BE(6)] /* UINT16 */
- b 99f
-23: brk #1000 /* reserved */
- nop
-24: ldr w0, [x3, #BE(4)] /* UINT32 */
- b 99f
-25: brk #1000 /* reserved */
- nop
-26: ldrsb x0, [x3, #BE(7)] /* SINT8 */
- b 99f
-27: brk #1000 /* reserved */
- nop
-28: ldrsh x0, [x3, #BE(6)] /* SINT16 */
- b 99f
-29: brk #1000 /* reserved */
- nop
-30: ldrsw x0, [x3, #BE(4)] /* SINT32 */
- nop
-31: /* reserved */
-99: ldp x29, x30, [sp], #ffi_closure_SYSV_FS
- cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS)
- cfi_restore (x29)
- cfi_restore (x30)
- ret
- cfi_endproc
-
- .globl CNAME(ffi_closure_SYSV)
- FFI_HIDDEN(CNAME(ffi_closure_SYSV))
-#ifdef __ELF__
- .type CNAME(ffi_closure_SYSV), #function
- .size CNAME(ffi_closure_SYSV), . - CNAME(ffi_closure_SYSV)
-#endif
-
-#if FFI_EXEC_TRAMPOLINE_TABLE
-
-#ifdef __MACH__
-#include <mach/machine/vm_param.h>
- .align PAGE_MAX_SHIFT
-CNAME(ffi_closure_trampoline_table_page):
- .rept PAGE_MAX_SIZE / FFI_TRAMPOLINE_SIZE
- adr x16, -PAGE_MAX_SIZE
- ldp x17, x16, [x16]
- br x16
- nop /* each entry in the trampoline config page is 2*sizeof(void*) so the trampoline itself cannot be smaller that 16 bytes */
- .endr
-
- .globl CNAME(ffi_closure_trampoline_table_page)
- FFI_HIDDEN(CNAME(ffi_closure_trampoline_table_page))
- #ifdef __ELF__
- .type CNAME(ffi_closure_trampoline_table_page), #function
- .size CNAME(ffi_closure_trampoline_table_page), . - CNAME(ffi_closure_trampoline_table_page)
- #endif
-#endif
-
-#endif /* FFI_EXEC_TRAMPOLINE_TABLE */
-
-#ifdef FFI_GO_CLOSURES
- .align 4
-CNAME(ffi_go_closure_SYSV_V):
- cfi_startproc
- stp x29, x30, [sp, #-ffi_closure_SYSV_FS]!
- cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
- cfi_rel_offset (x29, 0)
- cfi_rel_offset (x30, 8)
-
- /* Save the argument passing vector registers. */
- stp q0, q1, [sp, #16 + 0]
- stp q2, q3, [sp, #16 + 32]
- stp q4, q5, [sp, #16 + 64]
- stp q6, q7, [sp, #16 + 96]
- b 0f
- cfi_endproc
-
- .globl CNAME(ffi_go_closure_SYSV_V)
- FFI_HIDDEN(CNAME(ffi_go_closure_SYSV_V))
-#ifdef __ELF__
- .type CNAME(ffi_go_closure_SYSV_V), #function
- .size CNAME(ffi_go_closure_SYSV_V), . - CNAME(ffi_go_closure_SYSV_V)
-#endif
-
- .align 4
- cfi_startproc
-CNAME(ffi_go_closure_SYSV):
- stp x29, x30, [sp, #-ffi_closure_SYSV_FS]!
- cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
- cfi_rel_offset (x29, 0)
- cfi_rel_offset (x30, 8)
-0:
- mov x29, sp
-
- /* Save the argument passing core registers. */
- stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
- stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
- stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
- stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
-
- /* Load ffi_closure_inner arguments. */
- ldp PTR_REG(0), PTR_REG(1), [x18, #PTR_SIZE]/* load cif, fn */
- mov x2, x18 /* load user_data */
- b .Ldo_closure
- cfi_endproc
-
- .globl CNAME(ffi_go_closure_SYSV)
- FFI_HIDDEN(CNAME(ffi_go_closure_SYSV))
-#ifdef __ELF__
- .type CNAME(ffi_go_closure_SYSV), #function
- .size CNAME(ffi_go_closure_SYSV), . - CNAME(ffi_go_closure_SYSV)
-#endif
-#endif /* FFI_GO_CLOSURES */
-#endif /* __arm64__ */
-
-#if defined __ELF__ && defined __linux__
- .section .note.GNU-stack,"",%progbits
-#endif
-
+/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#if defined(__aarch64__) || defined(__arm64__)
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_cfi.h>
+#include "internal.h"
+
+#ifdef HAVE_MACHINE_ASM_H
+#include <machine/asm.h>
+#else
+#ifdef __USER_LABEL_PREFIX__
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+/* Use the right prefix for global labels. */
+#define CNAME(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
+#else
+#define CNAME(x) x
+#endif
+#endif
+
+#ifdef __AARCH64EB__
+# define BE(X) X
+#else
+# define BE(X) 0
+#endif
+
+#ifdef __ILP32__
+#define PTR_REG(n) w##n
+#else
+#define PTR_REG(n) x##n
+#endif
+
+#ifdef __ILP32__
+#define PTR_SIZE 4
+#else
+#define PTR_SIZE 8
+#endif
+
+ .text
+ .align 4
+
+/* ffi_call_SYSV
+ extern void ffi_call_SYSV (void *stack, void *frame,
+ void (*fn)(void), void *rvalue,
+ int flags, void *closure);
+
+ Therefore on entry we have:
+
+ x0 stack
+ x1 frame
+ x2 fn
+ x3 rvalue
+ x4 flags
+ x5 closure
+*/
+
+ cfi_startproc
+CNAME(ffi_call_SYSV):
+ /* Use a stack frame allocated by our caller. */
+ cfi_def_cfa(x1, 32);
+ stp x29, x30, [x1]
+ mov x29, x1
+ mov sp, x0
+ cfi_def_cfa_register(x29)
+ cfi_rel_offset (x29, 0)
+ cfi_rel_offset (x30, 8)
+
+ mov x9, x2 /* save fn */
+ mov x8, x3 /* install structure return */
+#ifdef FFI_GO_CLOSURES
+ mov x18, x5 /* install static chain */
+#endif
+ stp x3, x4, [x29, #16] /* save rvalue and flags */
+
+ /* Load the vector argument passing registers, if necessary. */
+ tbz w4, #AARCH64_FLAG_ARG_V_BIT, 1f
+ ldp q0, q1, [sp, #0]
+ ldp q2, q3, [sp, #32]
+ ldp q4, q5, [sp, #64]
+ ldp q6, q7, [sp, #96]
+1:
+ /* Load the core argument passing registers, including
+ the structure return pointer. */
+ ldp x0, x1, [sp, #16*N_V_ARG_REG + 0]
+ ldp x2, x3, [sp, #16*N_V_ARG_REG + 16]
+ ldp x4, x5, [sp, #16*N_V_ARG_REG + 32]
+ ldp x6, x7, [sp, #16*N_V_ARG_REG + 48]
+
+ /* Deallocate the context, leaving the stacked arguments. */
+ add sp, sp, #CALL_CONTEXT_SIZE
+
+ blr x9 /* call fn */
+
+ ldp x3, x4, [x29, #16] /* reload rvalue and flags */
+
+ /* Partially deconstruct the stack frame. */
+ mov sp, x29
+ cfi_def_cfa_register (sp)
+ ldp x29, x30, [x29]
+
+ /* Save the return value as directed. */
+ adr x5, 0f
+ and w4, w4, #AARCH64_RET_MASK
+ add x5, x5, x4, lsl #3
+ br x5
+
+ /* Note that each table entry is 2 insns, and thus 8 bytes.
+ For integer data, note that we're storing into ffi_arg
+ and therefore we want to extend to 64 bits; these types
+ have two consecutive entries allocated for them. */
+ .align 4
+0: ret /* VOID */
+ nop
+1: str x0, [x3] /* INT64 */
+ ret
+2: stp x0, x1, [x3] /* INT128 */
+ ret
+3: brk #1000 /* UNUSED */
+ ret
+4: brk #1000 /* UNUSED */
+ ret
+5: brk #1000 /* UNUSED */
+ ret
+6: brk #1000 /* UNUSED */
+ ret
+7: brk #1000 /* UNUSED */
+ ret
+8: st4 { v0.s, v1.s, v2.s, v3.s }[0], [x3] /* S4 */
+ ret
+9: st3 { v0.s, v1.s, v2.s }[0], [x3] /* S3 */
+ ret
+10: stp s0, s1, [x3] /* S2 */
+ ret
+11: str s0, [x3] /* S1 */
+ ret
+12: st4 { v0.d, v1.d, v2.d, v3.d }[0], [x3] /* D4 */
+ ret
+13: st3 { v0.d, v1.d, v2.d }[0], [x3] /* D3 */
+ ret
+14: stp d0, d1, [x3] /* D2 */
+ ret
+15: str d0, [x3] /* D1 */
+ ret
+16: str q3, [x3, #48] /* Q4 */
+ nop
+17: str q2, [x3, #32] /* Q3 */
+ nop
+18: stp q0, q1, [x3] /* Q2 */
+ ret
+19: str q0, [x3] /* Q1 */
+ ret
+20: uxtb w0, w0 /* UINT8 */
+ str x0, [x3]
+21: ret /* reserved */
+ nop
+22: uxth w0, w0 /* UINT16 */
+ str x0, [x3]
+23: ret /* reserved */
+ nop
+24: mov w0, w0 /* UINT32 */
+ str x0, [x3]
+25: ret /* reserved */
+ nop
+26: sxtb x0, w0 /* SINT8 */
+ str x0, [x3]
+27: ret /* reserved */
+ nop
+28: sxth x0, w0 /* SINT16 */
+ str x0, [x3]
+29: ret /* reserved */
+ nop
+30: sxtw x0, w0 /* SINT32 */
+ str x0, [x3]
+31: ret /* reserved */
+ nop
+
+ cfi_endproc
+
+ .globl CNAME(ffi_call_SYSV)
+ FFI_HIDDEN(CNAME(ffi_call_SYSV))
+#ifdef __ELF__
+ .type CNAME(ffi_call_SYSV), #function
+ .size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
+#endif
+
+/* ffi_closure_SYSV
+
+ Closure invocation glue. This is the low level code invoked directly by
+ the closure trampoline to setup and call a closure.
+
+ On entry x17 points to a struct ffi_closure, x16 has been clobbered
+ all other registers are preserved.
+
+ We allocate a call context and save the argument passing registers,
+ then invoked the generic C ffi_closure_SYSV_inner() function to do all
+ the real work, on return we load the result passing registers back from
+ the call context.
+*/
+
+#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64)
+
+ .align 4
+CNAME(ffi_closure_SYSV_V):
+ cfi_startproc
+ stp x29, x30, [sp, #-ffi_closure_SYSV_FS]!
+ cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
+ cfi_rel_offset (x29, 0)
+ cfi_rel_offset (x30, 8)
+
+ /* Save the argument passing vector registers. */
+ stp q0, q1, [sp, #16 + 0]
+ stp q2, q3, [sp, #16 + 32]
+ stp q4, q5, [sp, #16 + 64]
+ stp q6, q7, [sp, #16 + 96]
+ b 0f
+ cfi_endproc
+
+ .globl CNAME(ffi_closure_SYSV_V)
+ FFI_HIDDEN(CNAME(ffi_closure_SYSV_V))
+#ifdef __ELF__
+ .type CNAME(ffi_closure_SYSV_V), #function
+ .size CNAME(ffi_closure_SYSV_V), . - CNAME(ffi_closure_SYSV_V)
+#endif
+
+ .align 4
+ cfi_startproc
+CNAME(ffi_closure_SYSV):
+ stp x29, x30, [sp, #-ffi_closure_SYSV_FS]!
+ cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
+ cfi_rel_offset (x29, 0)
+ cfi_rel_offset (x30, 8)
+0:
+ mov x29, sp
+
+ /* Save the argument passing core registers. */
+ stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
+ stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
+ stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
+ stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
+
+ /* Load ffi_closure_inner arguments. */
+ ldp PTR_REG(0), PTR_REG(1), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET] /* load cif, fn */
+ ldr PTR_REG(2), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET+PTR_SIZE*2] /* load user_data */
+.Ldo_closure:
+ add x3, sp, #16 /* load context */
+ add x4, sp, #ffi_closure_SYSV_FS /* load stack */
+ add x5, sp, #16+CALL_CONTEXT_SIZE /* load rvalue */
+ mov x6, x8 /* load struct_rval */
+ bl CNAME(ffi_closure_SYSV_inner)
+
+ /* Load the return value as directed. */
+ adr x1, 0f
+ and w0, w0, #AARCH64_RET_MASK
+ add x1, x1, x0, lsl #3
+ add x3, sp, #16+CALL_CONTEXT_SIZE
+ br x1
+
+ /* Note that each table entry is 2 insns, and thus 8 bytes. */
+ .align 4
+0: b 99f /* VOID */
+ nop
+1: ldr x0, [x3] /* INT64 */
+ b 99f
+2: ldp x0, x1, [x3] /* INT128 */
+ b 99f
+3: brk #1000 /* UNUSED */
+ nop
+4: brk #1000 /* UNUSED */
+ nop
+5: brk #1000 /* UNUSED */
+ nop
+6: brk #1000 /* UNUSED */
+ nop
+7: brk #1000 /* UNUSED */
+ nop
+8: ldr s3, [x3, #12] /* S4 */
+ nop
+9: ldr s2, [x3, #8] /* S3 */
+ nop
+10: ldp s0, s1, [x3] /* S2 */
+ b 99f
+11: ldr s0, [x3] /* S1 */
+ b 99f
+12: ldr d3, [x3, #24] /* D4 */
+ nop
+13: ldr d2, [x3, #16] /* D3 */
+ nop
+14: ldp d0, d1, [x3] /* D2 */
+ b 99f
+15: ldr d0, [x3] /* D1 */
+ b 99f
+16: ldr q3, [x3, #48] /* Q4 */
+ nop
+17: ldr q2, [x3, #32] /* Q3 */
+ nop
+18: ldp q0, q1, [x3] /* Q2 */
+ b 99f
+19: ldr q0, [x3] /* Q1 */
+ b 99f
+20: ldrb w0, [x3, #BE(7)] /* UINT8 */
+ b 99f
+21: brk #1000 /* reserved */
+ nop
+22: ldrh w0, [x3, #BE(6)] /* UINT16 */
+ b 99f
+23: brk #1000 /* reserved */
+ nop
+24: ldr w0, [x3, #BE(4)] /* UINT32 */
+ b 99f
+25: brk #1000 /* reserved */
+ nop
+26: ldrsb x0, [x3, #BE(7)] /* SINT8 */
+ b 99f
+27: brk #1000 /* reserved */
+ nop
+28: ldrsh x0, [x3, #BE(6)] /* SINT16 */
+ b 99f
+29: brk #1000 /* reserved */
+ nop
+30: ldrsw x0, [x3, #BE(4)] /* SINT32 */
+ nop
+31: /* reserved */
+99: ldp x29, x30, [sp], #ffi_closure_SYSV_FS
+ cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS)
+ cfi_restore (x29)
+ cfi_restore (x30)
+ ret
+ cfi_endproc
+
+ .globl CNAME(ffi_closure_SYSV)
+ FFI_HIDDEN(CNAME(ffi_closure_SYSV))
+#ifdef __ELF__
+ .type CNAME(ffi_closure_SYSV), #function
+ .size CNAME(ffi_closure_SYSV), . - CNAME(ffi_closure_SYSV)
+#endif
+
+#if FFI_EXEC_TRAMPOLINE_TABLE
+
+#ifdef __MACH__
+#include <mach/machine/vm_param.h>
+ .align PAGE_MAX_SHIFT
+CNAME(ffi_closure_trampoline_table_page):
+ .rept PAGE_MAX_SIZE / FFI_TRAMPOLINE_SIZE
+ adr x16, -PAGE_MAX_SIZE
+ ldp x17, x16, [x16]
+ br x16
+ nop /* each entry in the trampoline config page is 2*sizeof(void*) so the trampoline itself cannot be smaller that 16 bytes */
+ .endr
+
+ .globl CNAME(ffi_closure_trampoline_table_page)
+ FFI_HIDDEN(CNAME(ffi_closure_trampoline_table_page))
+ #ifdef __ELF__
+ .type CNAME(ffi_closure_trampoline_table_page), #function
+ .size CNAME(ffi_closure_trampoline_table_page), . - CNAME(ffi_closure_trampoline_table_page)
+ #endif
+#endif
+
+#endif /* FFI_EXEC_TRAMPOLINE_TABLE */
+
+#ifdef FFI_GO_CLOSURES
+ .align 4
+CNAME(ffi_go_closure_SYSV_V):
+ cfi_startproc
+ stp x29, x30, [sp, #-ffi_closure_SYSV_FS]!
+ cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
+ cfi_rel_offset (x29, 0)
+ cfi_rel_offset (x30, 8)
+
+ /* Save the argument passing vector registers. */
+ stp q0, q1, [sp, #16 + 0]
+ stp q2, q3, [sp, #16 + 32]
+ stp q4, q5, [sp, #16 + 64]
+ stp q6, q7, [sp, #16 + 96]
+ b 0f
+ cfi_endproc
+
+ .globl CNAME(ffi_go_closure_SYSV_V)
+ FFI_HIDDEN(CNAME(ffi_go_closure_SYSV_V))
+#ifdef __ELF__
+ .type CNAME(ffi_go_closure_SYSV_V), #function
+ .size CNAME(ffi_go_closure_SYSV_V), . - CNAME(ffi_go_closure_SYSV_V)
+#endif
+
+ .align 4
+ cfi_startproc
+CNAME(ffi_go_closure_SYSV):
+ stp x29, x30, [sp, #-ffi_closure_SYSV_FS]!
+ cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
+ cfi_rel_offset (x29, 0)
+ cfi_rel_offset (x30, 8)
+0:
+ mov x29, sp
+
+ /* Save the argument passing core registers. */
+ stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
+ stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
+ stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
+ stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
+
+ /* Load ffi_closure_inner arguments. */
+ ldp PTR_REG(0), PTR_REG(1), [x18, #PTR_SIZE]/* load cif, fn */
+ mov x2, x18 /* load user_data */
+ b .Ldo_closure
+ cfi_endproc
+
+ .globl CNAME(ffi_go_closure_SYSV)
+ FFI_HIDDEN(CNAME(ffi_go_closure_SYSV))
+#ifdef __ELF__
+ .type CNAME(ffi_go_closure_SYSV), #function
+ .size CNAME(ffi_go_closure_SYSV), . - CNAME(ffi_go_closure_SYSV)
+#endif
+#endif /* FFI_GO_CLOSURES */
+#endif /* __arm64__ */
+
+#if defined __ELF__ && defined __linux__
+ .section .note.GNU-stack,"",%progbits
+#endif
+
diff --git a/contrib/restricted/libffi/src/aarch64/win64_armasm.S b/contrib/restricted/libffi/src/aarch64/win64_armasm.S
index a79f8a8aa9..90b95def5c 100644
--- a/contrib/restricted/libffi/src/aarch64/win64_armasm.S
+++ b/contrib/restricted/libffi/src/aarch64/win64_armasm.S
@@ -1,506 +1,506 @@
-/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
-Permission is hereby granted, free of charge, to any person obtaining
-a copy of this software and associated documentation files (the
-``Software''), to deal in the Software without restriction, including
-without limitation the rights to use, copy, modify, merge, publish,
-distribute, sublicense, and/or sell copies of the Software, and to
-permit persons to whom the Software is furnished to do so, subject to
-the following conditions:
-The above copyright notice and this permission notice shall be
-included in all copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#define LIBFFI_ASM
-#include <fficonfig.h>
-#include <ffi.h>
-#include <ffi_cfi.h>
-#include "internal.h"
-
- OPT 2 /*disable listing */
-/* For some macros to add unwind information */
-#include "ksarm64.h"
- OPT 1 /*re-enable listing */
-
-#define BE(X) 0
-#define PTR_REG(n) x##n
-#define PTR_SIZE 8
-
- IMPORT ffi_closure_SYSV_inner
- EXPORT ffi_call_SYSV
- EXPORT ffi_closure_SYSV_V
- EXPORT ffi_closure_SYSV
- EXPORT extend_hfa_type
- EXPORT compress_hfa_type
-#ifdef FFI_GO_CLOSURES
- EXPORT ffi_go_closure_SYSV_V
- EXPORT ffi_go_closure_SYSV
-#endif
-
- TEXTAREA, ALLIGN=8
-
-/* ffi_call_SYSV
- extern void ffi_call_SYSV (void *stack, void *frame,
- void (*fn)(void), void *rvalue,
- int flags, void *closure);
- Therefore on entry we have:
- x0 stack
- x1 frame
- x2 fn
- x3 rvalue
- x4 flags
- x5 closure
-*/
-
- NESTED_ENTRY ffi_call_SYSV_fake
-
- /* For unwind information, Windows has to store fp and lr */
- PROLOG_SAVE_REG_PAIR x29, x30, #-32!
-
- ALTERNATE_ENTRY ffi_call_SYSV
- /* Use a stack frame allocated by our caller. */
- stp x29, x30, [x1]
- mov x29, x1
- mov sp, x0
-
- mov x9, x2 /* save fn */
- mov x8, x3 /* install structure return */
-#ifdef FFI_GO_CLOSURES
- /*mov x18, x5 install static chain */
-#endif
- stp x3, x4, [x29, #16] /* save rvalue and flags */
-
- /* Load the vector argument passing registers, if necessary. */
- tbz x4, #AARCH64_FLAG_ARG_V_BIT, ffi_call_SYSV_L1
- ldp q0, q1, [sp, #0]
- ldp q2, q3, [sp, #32]
- ldp q4, q5, [sp, #64]
- ldp q6, q7, [sp, #96]
-
-ffi_call_SYSV_L1
- /* Load the core argument passing registers, including
- the structure return pointer. */
- ldp x0, x1, [sp, #16*N_V_ARG_REG + 0]
- ldp x2, x3, [sp, #16*N_V_ARG_REG + 16]
- ldp x4, x5, [sp, #16*N_V_ARG_REG + 32]
- ldp x6, x7, [sp, #16*N_V_ARG_REG + 48]
-
- /* Deallocate the context, leaving the stacked arguments. */
- add sp, sp, #CALL_CONTEXT_SIZE
-
- blr x9 /* call fn */
-
- ldp x3, x4, [x29, #16] /* reload rvalue and flags */
-
- /* Partially deconstruct the stack frame. */
- mov sp, x29
- ldp x29, x30, [x29]
-
- /* Save the return value as directed. */
- adr x5, ffi_call_SYSV_return
- and w4, w4, #AARCH64_RET_MASK
- add x5, x5, x4, lsl #3
- br x5
-
- /* Note that each table entry is 2 insns, and thus 8 bytes.
- For integer data, note that we're storing into ffi_arg
- and therefore we want to extend to 64 bits; these types
- have two consecutive entries allocated for them. */
- ALIGN 4
-ffi_call_SYSV_return
- ret /* VOID */
- nop
- str x0, [x3] /* INT64 */
- ret
- stp x0, x1, [x3] /* INT128 */
- ret
- brk #1000 /* UNUSED */
- ret
- brk #1000 /* UNUSED */
- ret
- brk #1000 /* UNUSED */
- ret
- brk #1000 /* UNUSED */
- ret
- brk #1000 /* UNUSED */
- ret
- st4 { v0.s, v1.s, v2.s, v3.s }[0], [x3] /* S4 */
- ret
- st3 { v0.s, v1.s, v2.s }[0], [x3] /* S3 */
- ret
- stp s0, s1, [x3] /* S2 */
- ret
- str s0, [x3] /* S1 */
- ret
- st4 { v0.d, v1.d, v2.d, v3.d }[0], [x3] /* D4 */
- ret
- st3 { v0.d, v1.d, v2.d }[0], [x3] /* D3 */
- ret
- stp d0, d1, [x3] /* D2 */
- ret
- str d0, [x3] /* D1 */
- ret
- str q3, [x3, #48] /* Q4 */
- nop
- str q2, [x3, #32] /* Q3 */
- nop
- stp q0, q1, [x3] /* Q2 */
- ret
- str q0, [x3] /* Q1 */
- ret
- uxtb w0, w0 /* UINT8 */
- str x0, [x3]
- ret /* reserved */
- nop
- uxth w0, w0 /* UINT16 */
- str x0, [x3]
- ret /* reserved */
- nop
- mov w0, w0 /* UINT32 */
- str x0, [x3]
- ret /* reserved */
- nop
- sxtb x0, w0 /* SINT8 */
- str x0, [x3]
- ret /* reserved */
- nop
- sxth x0, w0 /* SINT16 */
- str x0, [x3]
- ret /* reserved */
- nop
- sxtw x0, w0 /* SINT32 */
- str x0, [x3]
- ret /* reserved */
- nop
-
-
- NESTED_END ffi_call_SYSV_fake
-
-
-/* ffi_closure_SYSV
- Closure invocation glue. This is the low level code invoked directly by
- the closure trampoline to setup and call a closure.
- On entry x17 points to a struct ffi_closure, x16 has been clobbered
- all other registers are preserved.
- We allocate a call context and save the argument passing registers,
- then invoked the generic C ffi_closure_SYSV_inner() function to do all
- the real work, on return we load the result passing registers back from
- the call context.
-*/
-
-#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64)
-
- NESTED_ENTRY ffi_closure_SYSV_V
- PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS!
-
- /* Save the argument passing vector registers. */
- stp q0, q1, [sp, #16 + 0]
- stp q2, q3, [sp, #16 + 32]
- stp q4, q5, [sp, #16 + 64]
- stp q6, q7, [sp, #16 + 96]
-
- b ffi_closure_SYSV_save_argument
- NESTED_END ffi_closure_SYSV_V
-
- NESTED_ENTRY ffi_closure_SYSV
- PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS!
-
-ffi_closure_SYSV_save_argument
- /* Save the argument passing core registers. */
- stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
- stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
- stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
- stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
-
- /* Load ffi_closure_inner arguments. */
- ldp PTR_REG(0), PTR_REG(1), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET] /* load cif, fn */
- ldr PTR_REG(2), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET+PTR_SIZE*2] /* load user_data */
-
-do_closure
- add x3, sp, #16 /* load context */
- add x4, sp, #ffi_closure_SYSV_FS /* load stack */
- add x5, sp, #16+CALL_CONTEXT_SIZE /* load rvalue */
- mov x6, x8 /* load struct_rval */
-
- bl ffi_closure_SYSV_inner
-
- /* Load the return value as directed. */
- adr x1, ffi_closure_SYSV_return_base
- and w0, w0, #AARCH64_RET_MASK
- add x1, x1, x0, lsl #3
- add x3, sp, #16+CALL_CONTEXT_SIZE
- br x1
-
- /* Note that each table entry is 2 insns, and thus 8 bytes. */
- ALIGN 8
-ffi_closure_SYSV_return_base
- b ffi_closure_SYSV_epilog /* VOID */
- nop
- ldr x0, [x3] /* INT64 */
- b ffi_closure_SYSV_epilog
- ldp x0, x1, [x3] /* INT128 */
- b ffi_closure_SYSV_epilog
- brk #1000 /* UNUSED */
- nop
- brk #1000 /* UNUSED */
- nop
- brk #1000 /* UNUSED */
- nop
- brk #1000 /* UNUSED */
- nop
- brk #1000 /* UNUSED */
- nop
- ldr s3, [x3, #12] /* S4 */
- nop
- ldr s2, [x3, #8] /* S3 */
- nop
- ldp s0, s1, [x3] /* S2 */
- b ffi_closure_SYSV_epilog
- ldr s0, [x3] /* S1 */
- b ffi_closure_SYSV_epilog
- ldr d3, [x3, #24] /* D4 */
- nop
- ldr d2, [x3, #16] /* D3 */
- nop
- ldp d0, d1, [x3] /* D2 */
- b ffi_closure_SYSV_epilog
- ldr d0, [x3] /* D1 */
- b ffi_closure_SYSV_epilog
- ldr q3, [x3, #48] /* Q4 */
- nop
- ldr q2, [x3, #32] /* Q3 */
- nop
- ldp q0, q1, [x3] /* Q2 */
- b ffi_closure_SYSV_epilog
- ldr q0, [x3] /* Q1 */
- b ffi_closure_SYSV_epilog
- ldrb w0, [x3, #BE(7)] /* UINT8 */
- b ffi_closure_SYSV_epilog
- brk #1000 /* reserved */
- nop
- ldrh w0, [x3, #BE(6)] /* UINT16 */
- b ffi_closure_SYSV_epilog
- brk #1000 /* reserved */
- nop
- ldr w0, [x3, #BE(4)] /* UINT32 */
- b ffi_closure_SYSV_epilog
- brk #1000 /* reserved */
- nop
- ldrsb x0, [x3, #BE(7)] /* SINT8 */
- b ffi_closure_SYSV_epilog
- brk #1000 /* reserved */
- nop
- ldrsh x0, [x3, #BE(6)] /* SINT16 */
- b ffi_closure_SYSV_epilog
- brk #1000 /* reserved */
- nop
- ldrsw x0, [x3, #BE(4)] /* SINT32 */
- nop
- /* reserved */
-
-ffi_closure_SYSV_epilog
- EPILOG_RESTORE_REG_PAIR x29, x30, #ffi_closure_SYSV_FS!
- EPILOG_RETURN
- NESTED_END ffi_closure_SYSV
-
-
-#ifdef FFI_GO_CLOSURES
- NESTED_ENTRY ffi_go_closure_SYSV_V
- PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS!
-
- /* Save the argument passing vector registers. */
- stp q0, q1, [sp, #16 + 0]
- stp q2, q3, [sp, #16 + 32]
- stp q4, q5, [sp, #16 + 64]
- stp q6, q7, [sp, #16 + 96]
- b ffi_go_closure_SYSV_save_argument
- NESTED_END ffi_go_closure_SYSV_V
-
- NESTED_ENTRY ffi_go_closure_SYSV
- PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS!
-
-ffi_go_closure_SYSV_save_argument
- /* Save the argument passing core registers. */
- stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
- stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
- stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
- stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
-
- /* Load ffi_closure_inner arguments. */
- ldp PTR_REG(0), PTR_REG(1), [x18, #PTR_SIZE]/* load cif, fn */
- mov x2, x18 /* load user_data */
- b do_closure
- NESTED_END ffi_go_closure_SYSV
-
-#endif /* FFI_GO_CLOSURES */
-
-
-/* void extend_hfa_type (void *dest, void *src, int h) */
-
- LEAF_ENTRY extend_hfa_type
-
- adr x3, extend_hfa_type_jump_base
- and w2, w2, #AARCH64_RET_MASK
- sub x2, x2, #AARCH64_RET_S4
- add x3, x3, x2, lsl #4
- br x3
-
- ALIGN 4
-extend_hfa_type_jump_base
- ldp s16, s17, [x1] /* S4 */
- ldp s18, s19, [x1, #8]
- b extend_hfa_type_store_4
- nop
-
- ldp s16, s17, [x1] /* S3 */
- ldr s18, [x1, #8]
- b extend_hfa_type_store_3
- nop
-
- ldp s16, s17, [x1] /* S2 */
- b extend_hfa_type_store_2
- nop
- nop
-
- ldr s16, [x1] /* S1 */
- b extend_hfa_type_store_1
- nop
- nop
-
- ldp d16, d17, [x1] /* D4 */
- ldp d18, d19, [x1, #16]
- b extend_hfa_type_store_4
- nop
-
- ldp d16, d17, [x1] /* D3 */
- ldr d18, [x1, #16]
- b extend_hfa_type_store_3
- nop
-
- ldp d16, d17, [x1] /* D2 */
- b extend_hfa_type_store_2
- nop
- nop
-
- ldr d16, [x1] /* D1 */
- b extend_hfa_type_store_1
- nop
- nop
-
- ldp q16, q17, [x1] /* Q4 */
- ldp q18, q19, [x1, #16]
- b extend_hfa_type_store_4
- nop
-
- ldp q16, q17, [x1] /* Q3 */
- ldr q18, [x1, #16]
- b extend_hfa_type_store_3
- nop
-
- ldp q16, q17, [x1] /* Q2 */
- b extend_hfa_type_store_2
- nop
- nop
-
- ldr q16, [x1] /* Q1 */
- b extend_hfa_type_store_1
-
-extend_hfa_type_store_4
- str q19, [x0, #48]
-extend_hfa_type_store_3
- str q18, [x0, #32]
-extend_hfa_type_store_2
- str q17, [x0, #16]
-extend_hfa_type_store_1
- str q16, [x0]
- ret
-
- LEAF_END extend_hfa_type
-
-
-/* void compress_hfa_type (void *dest, void *reg, int h) */
-
- LEAF_ENTRY compress_hfa_type
-
- adr x3, compress_hfa_type_jump_base
- and w2, w2, #AARCH64_RET_MASK
- sub x2, x2, #AARCH64_RET_S4
- add x3, x3, x2, lsl #4
- br x3
-
- ALIGN 4
-compress_hfa_type_jump_base
- ldp q16, q17, [x1] /* S4 */
- ldp q18, q19, [x1, #32]
- st4 { v16.s, v17.s, v18.s, v19.s }[0], [x0]
- ret
-
- ldp q16, q17, [x1] /* S3 */
- ldr q18, [x1, #32]
- st3 { v16.s, v17.s, v18.s }[0], [x0]
- ret
-
- ldp q16, q17, [x1] /* S2 */
- st2 { v16.s, v17.s }[0], [x0]
- ret
- nop
-
- ldr q16, [x1] /* S1 */
- st1 { v16.s }[0], [x0]
- ret
- nop
-
- ldp q16, q17, [x1] /* D4 */
- ldp q18, q19, [x1, #32]
- st4 { v16.d, v17.d, v18.d, v19.d }[0], [x0]
- ret
-
- ldp q16, q17, [x1] /* D3 */
- ldr q18, [x1, #32]
- st3 { v16.d, v17.d, v18.d }[0], [x0]
- ret
-
- ldp q16, q17, [x1] /* D2 */
- st2 { v16.d, v17.d }[0], [x0]
- ret
- nop
-
- ldr q16, [x1] /* D1 */
- st1 { v16.d }[0], [x0]
- ret
- nop
-
- ldp q16, q17, [x1] /* Q4 */
- ldp q18, q19, [x1, #32]
- b compress_hfa_type_store_q4
- nop
-
- ldp q16, q17, [x1] /* Q3 */
- ldr q18, [x1, #32]
- b compress_hfa_type_store_q3
- nop
-
- ldp q16, q17, [x1] /* Q2 */
- stp q16, q17, [x0]
- ret
- nop
-
- ldr q16, [x1] /* Q1 */
- str q16, [x0]
- ret
-
-compress_hfa_type_store_q4
- str q19, [x0, #48]
-compress_hfa_type_store_q3
- str q18, [x0, #32]
- stp q16, q17, [x0]
- ret
-
- LEAF_END compress_hfa_type
-
- END \ No newline at end of file
+/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_cfi.h>
+#include "internal.h"
+
+ OPT 2 /*disable listing */
+/* For some macros to add unwind information */
+#include "ksarm64.h"
+ OPT 1 /*re-enable listing */
+
+#define BE(X) 0
+#define PTR_REG(n) x##n
+#define PTR_SIZE 8
+
+ IMPORT ffi_closure_SYSV_inner
+ EXPORT ffi_call_SYSV
+ EXPORT ffi_closure_SYSV_V
+ EXPORT ffi_closure_SYSV
+ EXPORT extend_hfa_type
+ EXPORT compress_hfa_type
+#ifdef FFI_GO_CLOSURES
+ EXPORT ffi_go_closure_SYSV_V
+ EXPORT ffi_go_closure_SYSV
+#endif
+
+ TEXTAREA, ALLIGN=8
+
+/* ffi_call_SYSV
+ extern void ffi_call_SYSV (void *stack, void *frame,
+ void (*fn)(void), void *rvalue,
+ int flags, void *closure);
+ Therefore on entry we have:
+ x0 stack
+ x1 frame
+ x2 fn
+ x3 rvalue
+ x4 flags
+ x5 closure
+*/
+
+ NESTED_ENTRY ffi_call_SYSV_fake
+
+ /* For unwind information, Windows has to store fp and lr */
+ PROLOG_SAVE_REG_PAIR x29, x30, #-32!
+
+ ALTERNATE_ENTRY ffi_call_SYSV
+ /* Use a stack frame allocated by our caller. */
+ stp x29, x30, [x1]
+ mov x29, x1
+ mov sp, x0
+
+ mov x9, x2 /* save fn */
+ mov x8, x3 /* install structure return */
+#ifdef FFI_GO_CLOSURES
+ /*mov x18, x5 install static chain */
+#endif
+ stp x3, x4, [x29, #16] /* save rvalue and flags */
+
+ /* Load the vector argument passing registers, if necessary. */
+ tbz x4, #AARCH64_FLAG_ARG_V_BIT, ffi_call_SYSV_L1
+ ldp q0, q1, [sp, #0]
+ ldp q2, q3, [sp, #32]
+ ldp q4, q5, [sp, #64]
+ ldp q6, q7, [sp, #96]
+
+ffi_call_SYSV_L1
+ /* Load the core argument passing registers, including
+ the structure return pointer. */
+ ldp x0, x1, [sp, #16*N_V_ARG_REG + 0]
+ ldp x2, x3, [sp, #16*N_V_ARG_REG + 16]
+ ldp x4, x5, [sp, #16*N_V_ARG_REG + 32]
+ ldp x6, x7, [sp, #16*N_V_ARG_REG + 48]
+
+ /* Deallocate the context, leaving the stacked arguments. */
+ add sp, sp, #CALL_CONTEXT_SIZE
+
+ blr x9 /* call fn */
+
+ ldp x3, x4, [x29, #16] /* reload rvalue and flags */
+
+ /* Partially deconstruct the stack frame. */
+ mov sp, x29
+ ldp x29, x30, [x29]
+
+ /* Save the return value as directed. */
+ adr x5, ffi_call_SYSV_return
+ and w4, w4, #AARCH64_RET_MASK
+ add x5, x5, x4, lsl #3
+ br x5
+
+ /* Note that each table entry is 2 insns, and thus 8 bytes.
+ For integer data, note that we're storing into ffi_arg
+ and therefore we want to extend to 64 bits; these types
+ have two consecutive entries allocated for them. */
+ ALIGN 4
+ffi_call_SYSV_return
+ ret /* VOID */
+ nop
+ str x0, [x3] /* INT64 */
+ ret
+ stp x0, x1, [x3] /* INT128 */
+ ret
+ brk #1000 /* UNUSED */
+ ret
+ brk #1000 /* UNUSED */
+ ret
+ brk #1000 /* UNUSED */
+ ret
+ brk #1000 /* UNUSED */
+ ret
+ brk #1000 /* UNUSED */
+ ret
+ st4 { v0.s, v1.s, v2.s, v3.s }[0], [x3] /* S4 */
+ ret
+ st3 { v0.s, v1.s, v2.s }[0], [x3] /* S3 */
+ ret
+ stp s0, s1, [x3] /* S2 */
+ ret
+ str s0, [x3] /* S1 */
+ ret
+ st4 { v0.d, v1.d, v2.d, v3.d }[0], [x3] /* D4 */
+ ret
+ st3 { v0.d, v1.d, v2.d }[0], [x3] /* D3 */
+ ret
+ stp d0, d1, [x3] /* D2 */
+ ret
+ str d0, [x3] /* D1 */
+ ret
+ str q3, [x3, #48] /* Q4 */
+ nop
+ str q2, [x3, #32] /* Q3 */
+ nop
+ stp q0, q1, [x3] /* Q2 */
+ ret
+ str q0, [x3] /* Q1 */
+ ret
+ uxtb w0, w0 /* UINT8 */
+ str x0, [x3]
+ ret /* reserved */
+ nop
+ uxth w0, w0 /* UINT16 */
+ str x0, [x3]
+ ret /* reserved */
+ nop
+ mov w0, w0 /* UINT32 */
+ str x0, [x3]
+ ret /* reserved */
+ nop
+ sxtb x0, w0 /* SINT8 */
+ str x0, [x3]
+ ret /* reserved */
+ nop
+ sxth x0, w0 /* SINT16 */
+ str x0, [x3]
+ ret /* reserved */
+ nop
+ sxtw x0, w0 /* SINT32 */
+ str x0, [x3]
+ ret /* reserved */
+ nop
+
+
+ NESTED_END ffi_call_SYSV_fake
+
+
+/* ffi_closure_SYSV
+ Closure invocation glue. This is the low level code invoked directly by
+ the closure trampoline to setup and call a closure.
+ On entry x17 points to a struct ffi_closure, x16 has been clobbered
+ all other registers are preserved.
+ We allocate a call context and save the argument passing registers,
+ then invoked the generic C ffi_closure_SYSV_inner() function to do all
+ the real work, on return we load the result passing registers back from
+ the call context.
+*/
+
+#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64)
+
+ NESTED_ENTRY ffi_closure_SYSV_V
+ PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS!
+
+ /* Save the argument passing vector registers. */
+ stp q0, q1, [sp, #16 + 0]
+ stp q2, q3, [sp, #16 + 32]
+ stp q4, q5, [sp, #16 + 64]
+ stp q6, q7, [sp, #16 + 96]
+
+ b ffi_closure_SYSV_save_argument
+ NESTED_END ffi_closure_SYSV_V
+
+ NESTED_ENTRY ffi_closure_SYSV
+ PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS!
+
+ffi_closure_SYSV_save_argument
+ /* Save the argument passing core registers. */
+ stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
+ stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
+ stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
+ stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
+
+ /* Load ffi_closure_inner arguments. */
+ ldp PTR_REG(0), PTR_REG(1), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET] /* load cif, fn */
+ ldr PTR_REG(2), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET+PTR_SIZE*2] /* load user_data */
+
+do_closure
+ add x3, sp, #16 /* load context */
+ add x4, sp, #ffi_closure_SYSV_FS /* load stack */
+ add x5, sp, #16+CALL_CONTEXT_SIZE /* load rvalue */
+ mov x6, x8 /* load struct_rval */
+
+ bl ffi_closure_SYSV_inner
+
+ /* Load the return value as directed. */
+ adr x1, ffi_closure_SYSV_return_base
+ and w0, w0, #AARCH64_RET_MASK
+ add x1, x1, x0, lsl #3
+ add x3, sp, #16+CALL_CONTEXT_SIZE
+ br x1
+
+ /* Note that each table entry is 2 insns, and thus 8 bytes. */
+ ALIGN 8
+ffi_closure_SYSV_return_base
+ b ffi_closure_SYSV_epilog /* VOID */
+ nop
+ ldr x0, [x3] /* INT64 */
+ b ffi_closure_SYSV_epilog
+ ldp x0, x1, [x3] /* INT128 */
+ b ffi_closure_SYSV_epilog
+ brk #1000 /* UNUSED */
+ nop
+ brk #1000 /* UNUSED */
+ nop
+ brk #1000 /* UNUSED */
+ nop
+ brk #1000 /* UNUSED */
+ nop
+ brk #1000 /* UNUSED */
+ nop
+ ldr s3, [x3, #12] /* S4 */
+ nop
+ ldr s2, [x3, #8] /* S3 */
+ nop
+ ldp s0, s1, [x3] /* S2 */
+ b ffi_closure_SYSV_epilog
+ ldr s0, [x3] /* S1 */
+ b ffi_closure_SYSV_epilog
+ ldr d3, [x3, #24] /* D4 */
+ nop
+ ldr d2, [x3, #16] /* D3 */
+ nop
+ ldp d0, d1, [x3] /* D2 */
+ b ffi_closure_SYSV_epilog
+ ldr d0, [x3] /* D1 */
+ b ffi_closure_SYSV_epilog
+ ldr q3, [x3, #48] /* Q4 */
+ nop
+ ldr q2, [x3, #32] /* Q3 */
+ nop
+ ldp q0, q1, [x3] /* Q2 */
+ b ffi_closure_SYSV_epilog
+ ldr q0, [x3] /* Q1 */
+ b ffi_closure_SYSV_epilog
+ ldrb w0, [x3, #BE(7)] /* UINT8 */
+ b ffi_closure_SYSV_epilog
+ brk #1000 /* reserved */
+ nop
+ ldrh w0, [x3, #BE(6)] /* UINT16 */
+ b ffi_closure_SYSV_epilog
+ brk #1000 /* reserved */
+ nop
+ ldr w0, [x3, #BE(4)] /* UINT32 */
+ b ffi_closure_SYSV_epilog
+ brk #1000 /* reserved */
+ nop
+ ldrsb x0, [x3, #BE(7)] /* SINT8 */
+ b ffi_closure_SYSV_epilog
+ brk #1000 /* reserved */
+ nop
+ ldrsh x0, [x3, #BE(6)] /* SINT16 */
+ b ffi_closure_SYSV_epilog
+ brk #1000 /* reserved */
+ nop
+ ldrsw x0, [x3, #BE(4)] /* SINT32 */
+ nop
+ /* reserved */
+
+ffi_closure_SYSV_epilog
+ EPILOG_RESTORE_REG_PAIR x29, x30, #ffi_closure_SYSV_FS!
+ EPILOG_RETURN
+ NESTED_END ffi_closure_SYSV
+
+
+#ifdef FFI_GO_CLOSURES
+ NESTED_ENTRY ffi_go_closure_SYSV_V
+ PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS!
+
+ /* Save the argument passing vector registers. */
+ stp q0, q1, [sp, #16 + 0]
+ stp q2, q3, [sp, #16 + 32]
+ stp q4, q5, [sp, #16 + 64]
+ stp q6, q7, [sp, #16 + 96]
+ b ffi_go_closure_SYSV_save_argument
+ NESTED_END ffi_go_closure_SYSV_V
+
+ NESTED_ENTRY ffi_go_closure_SYSV
+ PROLOG_SAVE_REG_PAIR x29, x30, #-ffi_closure_SYSV_FS!
+
+ffi_go_closure_SYSV_save_argument
+ /* Save the argument passing core registers. */
+ stp x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
+ stp x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
+ stp x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
+ stp x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
+
+ /* Load ffi_closure_inner arguments. */
+ ldp PTR_REG(0), PTR_REG(1), [x18, #PTR_SIZE]/* load cif, fn */
+ mov x2, x18 /* load user_data */
+ b do_closure
+ NESTED_END ffi_go_closure_SYSV
+
+#endif /* FFI_GO_CLOSURES */
+
+
+/* void extend_hfa_type (void *dest, void *src, int h) */
+
+ LEAF_ENTRY extend_hfa_type
+
+ adr x3, extend_hfa_type_jump_base
+ and w2, w2, #AARCH64_RET_MASK
+ sub x2, x2, #AARCH64_RET_S4
+ add x3, x3, x2, lsl #4
+ br x3
+
+ ALIGN 4
+extend_hfa_type_jump_base
+ ldp s16, s17, [x1] /* S4 */
+ ldp s18, s19, [x1, #8]
+ b extend_hfa_type_store_4
+ nop
+
+ ldp s16, s17, [x1] /* S3 */
+ ldr s18, [x1, #8]
+ b extend_hfa_type_store_3
+ nop
+
+ ldp s16, s17, [x1] /* S2 */
+ b extend_hfa_type_store_2
+ nop
+ nop
+
+ ldr s16, [x1] /* S1 */
+ b extend_hfa_type_store_1
+ nop
+ nop
+
+ ldp d16, d17, [x1] /* D4 */
+ ldp d18, d19, [x1, #16]
+ b extend_hfa_type_store_4
+ nop
+
+ ldp d16, d17, [x1] /* D3 */
+ ldr d18, [x1, #16]
+ b extend_hfa_type_store_3
+ nop
+
+ ldp d16, d17, [x1] /* D2 */
+ b extend_hfa_type_store_2
+ nop
+ nop
+
+ ldr d16, [x1] /* D1 */
+ b extend_hfa_type_store_1
+ nop
+ nop
+
+ ldp q16, q17, [x1] /* Q4 */
+ ldp q18, q19, [x1, #16]
+ b extend_hfa_type_store_4
+ nop
+
+ ldp q16, q17, [x1] /* Q3 */
+ ldr q18, [x1, #16]
+ b extend_hfa_type_store_3
+ nop
+
+ ldp q16, q17, [x1] /* Q2 */
+ b extend_hfa_type_store_2
+ nop
+ nop
+
+ ldr q16, [x1] /* Q1 */
+ b extend_hfa_type_store_1
+
+extend_hfa_type_store_4
+ str q19, [x0, #48]
+extend_hfa_type_store_3
+ str q18, [x0, #32]
+extend_hfa_type_store_2
+ str q17, [x0, #16]
+extend_hfa_type_store_1
+ str q16, [x0]
+ ret
+
+ LEAF_END extend_hfa_type
+
+
+/* void compress_hfa_type (void *dest, void *reg, int h) */
+
+ LEAF_ENTRY compress_hfa_type
+
+ adr x3, compress_hfa_type_jump_base
+ and w2, w2, #AARCH64_RET_MASK
+ sub x2, x2, #AARCH64_RET_S4
+ add x3, x3, x2, lsl #4
+ br x3
+
+ ALIGN 4
+compress_hfa_type_jump_base
+ ldp q16, q17, [x1] /* S4 */
+ ldp q18, q19, [x1, #32]
+ st4 { v16.s, v17.s, v18.s, v19.s }[0], [x0]
+ ret
+
+ ldp q16, q17, [x1] /* S3 */
+ ldr q18, [x1, #32]
+ st3 { v16.s, v17.s, v18.s }[0], [x0]
+ ret
+
+ ldp q16, q17, [x1] /* S2 */
+ st2 { v16.s, v17.s }[0], [x0]
+ ret
+ nop
+
+ ldr q16, [x1] /* S1 */
+ st1 { v16.s }[0], [x0]
+ ret
+ nop
+
+ ldp q16, q17, [x1] /* D4 */
+ ldp q18, q19, [x1, #32]
+ st4 { v16.d, v17.d, v18.d, v19.d }[0], [x0]
+ ret
+
+ ldp q16, q17, [x1] /* D3 */
+ ldr q18, [x1, #32]
+ st3 { v16.d, v17.d, v18.d }[0], [x0]
+ ret
+
+ ldp q16, q17, [x1] /* D2 */
+ st2 { v16.d, v17.d }[0], [x0]
+ ret
+ nop
+
+ ldr q16, [x1] /* D1 */
+ st1 { v16.d }[0], [x0]
+ ret
+ nop
+
+ ldp q16, q17, [x1] /* Q4 */
+ ldp q18, q19, [x1, #32]
+ b compress_hfa_type_store_q4
+ nop
+
+ ldp q16, q17, [x1] /* Q3 */
+ ldr q18, [x1, #32]
+ b compress_hfa_type_store_q3
+ nop
+
+ ldp q16, q17, [x1] /* Q2 */
+ stp q16, q17, [x0]
+ ret
+ nop
+
+ ldr q16, [x1] /* Q1 */
+ str q16, [x0]
+ ret
+
+compress_hfa_type_store_q4
+ str q19, [x0, #48]
+compress_hfa_type_store_q3
+ str q18, [x0, #32]
+ stp q16, q17, [x0]
+ ret
+
+ LEAF_END compress_hfa_type
+
+ END \ No newline at end of file