diff options
author | Mikhail Borisov <borisov.mikhail@gmail.com> | 2022-02-10 16:45:40 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:40 +0300 |
commit | 5d50718e66d9c037dc587a0211110b7d25a66185 (patch) | |
tree | e98df59de24d2ef7c77baed9f41e4875a2fef972 /contrib/restricted/libffi/src/x86 | |
parent | a6a92afe03e02795227d2641b49819b687f088f8 (diff) | |
download | ydb-5d50718e66d9c037dc587a0211110b7d25a66185.tar.gz |
Restoring authorship annotation for Mikhail Borisov <borisov.mikhail@gmail.com>. Commit 2 of 2.
Diffstat (limited to 'contrib/restricted/libffi/src/x86')
-rw-r--r-- | contrib/restricted/libffi/src/x86/asmnames.h | 60 | ||||
-rw-r--r-- | contrib/restricted/libffi/src/x86/ffi.c | 1258 | ||||
-rw-r--r-- | contrib/restricted/libffi/src/x86/ffi64.c | 684 | ||||
-rw-r--r-- | contrib/restricted/libffi/src/x86/ffitarget.h | 98 | ||||
-rw-r--r-- | contrib/restricted/libffi/src/x86/ffiw64.c | 622 | ||||
-rw-r--r-- | contrib/restricted/libffi/src/x86/internal.h | 58 | ||||
-rw-r--r-- | contrib/restricted/libffi/src/x86/internal64.h | 44 | ||||
-rw-r--r-- | contrib/restricted/libffi/src/x86/sysv.S | 2146 | ||||
-rw-r--r-- | contrib/restricted/libffi/src/x86/sysv_intel.S | 1990 | ||||
-rw-r--r-- | contrib/restricted/libffi/src/x86/unix64.S | 884 | ||||
-rw-r--r-- | contrib/restricted/libffi/src/x86/win64.S | 460 | ||||
-rw-r--r-- | contrib/restricted/libffi/src/x86/win64_intel.S | 474 |
12 files changed, 4389 insertions, 4389 deletions
diff --git a/contrib/restricted/libffi/src/x86/asmnames.h b/contrib/restricted/libffi/src/x86/asmnames.h index fc5c4cb9e7..7551021e17 100644 --- a/contrib/restricted/libffi/src/x86/asmnames.h +++ b/contrib/restricted/libffi/src/x86/asmnames.h @@ -1,30 +1,30 @@ -#ifndef ASMNAMES_H -#define ASMNAMES_H - -#define C2(X, Y) X ## Y -#define C1(X, Y) C2(X, Y) -#ifdef __USER_LABEL_PREFIX__ -# define C(X) C1(__USER_LABEL_PREFIX__, X) -#else -# define C(X) X -#endif - -#ifdef __APPLE__ -# define L(X) C1(L, X) -#else -# define L(X) C1(.L, X) -#endif - -#if defined(__ELF__) && defined(__PIC__) -# define PLT(X) X@PLT -#else -# define PLT(X) X -#endif - -#ifdef __ELF__ -# define ENDF(X) .type X,@function; .size X, . - X -#else -# define ENDF(X) -#endif - -#endif /* ASMNAMES_H */ +#ifndef ASMNAMES_H +#define ASMNAMES_H + +#define C2(X, Y) X ## Y +#define C1(X, Y) C2(X, Y) +#ifdef __USER_LABEL_PREFIX__ +# define C(X) C1(__USER_LABEL_PREFIX__, X) +#else +# define C(X) X +#endif + +#ifdef __APPLE__ +# define L(X) C1(L, X) +#else +# define L(X) C1(.L, X) +#endif + +#if defined(__ELF__) && defined(__PIC__) +# define PLT(X) X@PLT +#else +# define PLT(X) X +#endif + +#ifdef __ELF__ +# define ENDF(X) .type X,@function; .size X, . - X +#else +# define ENDF(X) +#endif + +#endif /* ASMNAMES_H */ diff --git a/contrib/restricted/libffi/src/x86/ffi.c b/contrib/restricted/libffi/src/x86/ffi.c index 2950ba8fca..9a592185a1 100644 --- a/contrib/restricted/libffi/src/x86/ffi.c +++ b/contrib/restricted/libffi/src/x86/ffi.c @@ -1,6 +1,6 @@ /* ----------------------------------------------------------------------- - ffi.c - Copyright (c) 2017 Anthony Green - Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008 Red Hat, Inc. + ffi.c - Copyright (c) 2017 Anthony Green + Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008 Red Hat, Inc. Copyright (c) 2002 Ranjit Mathew Copyright (c) 2002 Bo Thorsen Copyright (c) 2002 Roger Sayle @@ -29,502 +29,502 @@ DEALINGS IN THE SOFTWARE. ----------------------------------------------------------------------- */ -#if defined(__i386__) || defined(_M_IX86) +#if defined(__i386__) || defined(_M_IX86) #include <ffi.h> #include <ffi_common.h> -#include <stdint.h> +#include <stdint.h> #include <stdlib.h> -#include "internal.h" - -/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE; - all further uses in this file will refer to the 80-bit type. */ -#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE -# if FFI_TYPE_LONGDOUBLE != 4 -# error FFI_TYPE_LONGDOUBLE out of date -# endif -#else -# undef FFI_TYPE_LONGDOUBLE -# define FFI_TYPE_LONGDOUBLE 4 +#include "internal.h" + +/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE; + all further uses in this file will refer to the 80-bit type. */ +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE +# if FFI_TYPE_LONGDOUBLE != 4 +# error FFI_TYPE_LONGDOUBLE out of date +# endif +#else +# undef FFI_TYPE_LONGDOUBLE +# define FFI_TYPE_LONGDOUBLE 4 #endif -#if defined(__GNUC__) && !defined(__declspec) -# define __declspec(x) __attribute__((x)) +#if defined(__GNUC__) && !defined(__declspec) +# define __declspec(x) __attribute__((x)) #endif -#if defined(_MSC_VER) && defined(_M_IX86) -/* Stack is not 16-byte aligned on Windows. */ -#define STACK_ALIGN(bytes) (bytes) -#else -#define STACK_ALIGN(bytes) FFI_ALIGN (bytes, 16) +#if defined(_MSC_VER) && defined(_M_IX86) +/* Stack is not 16-byte aligned on Windows. */ +#define STACK_ALIGN(bytes) (bytes) +#else +#define STACK_ALIGN(bytes) FFI_ALIGN (bytes, 16) #endif -/* Perform machine dependent cif processing. */ -ffi_status FFI_HIDDEN -ffi_prep_cif_machdep(ffi_cif *cif) -{ - size_t bytes = 0; - int i, n, flags, cabi = cif->abi; +/* Perform machine dependent cif processing. */ +ffi_status FFI_HIDDEN +ffi_prep_cif_machdep(ffi_cif *cif) +{ + size_t bytes = 0; + int i, n, flags, cabi = cif->abi; - switch (cabi) + switch (cabi) { - case FFI_SYSV: - case FFI_STDCALL: - case FFI_THISCALL: - case FFI_FASTCALL: - case FFI_MS_CDECL: - case FFI_PASCAL: - case FFI_REGISTER: - break; - default: - return FFI_BAD_ABI; + case FFI_SYSV: + case FFI_STDCALL: + case FFI_THISCALL: + case FFI_FASTCALL: + case FFI_MS_CDECL: + case FFI_PASCAL: + case FFI_REGISTER: + break; + default: + return FFI_BAD_ABI; } switch (cif->rtype->type) { case FFI_TYPE_VOID: - flags = X86_RET_VOID; - break; - case FFI_TYPE_FLOAT: - flags = X86_RET_FLOAT; - break; - case FFI_TYPE_DOUBLE: - flags = X86_RET_DOUBLE; - break; - case FFI_TYPE_LONGDOUBLE: - flags = X86_RET_LDOUBLE; - break; + flags = X86_RET_VOID; + break; + case FFI_TYPE_FLOAT: + flags = X86_RET_FLOAT; + break; + case FFI_TYPE_DOUBLE: + flags = X86_RET_DOUBLE; + break; + case FFI_TYPE_LONGDOUBLE: + flags = X86_RET_LDOUBLE; + break; case FFI_TYPE_UINT8: - flags = X86_RET_UINT8; - break; + flags = X86_RET_UINT8; + break; case FFI_TYPE_UINT16: - flags = X86_RET_UINT16; - break; + flags = X86_RET_UINT16; + break; case FFI_TYPE_SINT8: - flags = X86_RET_SINT8; - break; + flags = X86_RET_SINT8; + break; case FFI_TYPE_SINT16: - flags = X86_RET_SINT16; - break; - case FFI_TYPE_INT: - case FFI_TYPE_SINT32: + flags = X86_RET_SINT16; + break; + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: case FFI_TYPE_UINT32: - case FFI_TYPE_POINTER: - flags = X86_RET_INT32; - break; + case FFI_TYPE_POINTER: + flags = X86_RET_INT32; + break; case FFI_TYPE_SINT64: case FFI_TYPE_UINT64: - flags = X86_RET_INT64; + flags = X86_RET_INT64; break; case FFI_TYPE_STRUCT: #ifndef X86 - /* ??? This should be a different ABI rather than an ifdef. */ + /* ??? This should be a different ABI rather than an ifdef. */ if (cif->rtype->size == 1) - flags = X86_RET_STRUCT_1B; + flags = X86_RET_STRUCT_1B; else if (cif->rtype->size == 2) - flags = X86_RET_STRUCT_2B; + flags = X86_RET_STRUCT_2B; else if (cif->rtype->size == 4) - flags = X86_RET_INT32; + flags = X86_RET_INT32; else if (cif->rtype->size == 8) - flags = X86_RET_INT64; + flags = X86_RET_INT64; else #endif - { - do_struct: - switch (cabi) - { - case FFI_THISCALL: - case FFI_FASTCALL: - case FFI_STDCALL: - case FFI_MS_CDECL: - flags = X86_RET_STRUCTARG; - break; - default: - flags = X86_RET_STRUCTPOP; - break; - } - /* Allocate space for return value pointer. */ - bytes += FFI_ALIGN (sizeof(void*), FFI_SIZEOF_ARG); - } + { + do_struct: + switch (cabi) + { + case FFI_THISCALL: + case FFI_FASTCALL: + case FFI_STDCALL: + case FFI_MS_CDECL: + flags = X86_RET_STRUCTARG; + break; + default: + flags = X86_RET_STRUCTPOP; + break; + } + /* Allocate space for return value pointer. */ + bytes += FFI_ALIGN (sizeof(void*), FFI_SIZEOF_ARG); + } + break; + case FFI_TYPE_COMPLEX: + switch (cif->rtype->elements[0]->type) + { + case FFI_TYPE_DOUBLE: + case FFI_TYPE_LONGDOUBLE: + case FFI_TYPE_SINT64: + case FFI_TYPE_UINT64: + goto do_struct; + case FFI_TYPE_FLOAT: + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + flags = X86_RET_INT64; + break; + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT16: + flags = X86_RET_INT32; + break; + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT8: + flags = X86_RET_STRUCT_2B; + break; + default: + return FFI_BAD_TYPEDEF; + } break; - case FFI_TYPE_COMPLEX: - switch (cif->rtype->elements[0]->type) - { - case FFI_TYPE_DOUBLE: - case FFI_TYPE_LONGDOUBLE: - case FFI_TYPE_SINT64: - case FFI_TYPE_UINT64: - goto do_struct; - case FFI_TYPE_FLOAT: - case FFI_TYPE_INT: - case FFI_TYPE_SINT32: - case FFI_TYPE_UINT32: - flags = X86_RET_INT64; - break; - case FFI_TYPE_SINT16: - case FFI_TYPE_UINT16: - flags = X86_RET_INT32; - break; - case FFI_TYPE_SINT8: - case FFI_TYPE_UINT8: - flags = X86_RET_STRUCT_2B; - break; - default: - return FFI_BAD_TYPEDEF; - } - break; default: - return FFI_BAD_TYPEDEF; + return FFI_BAD_TYPEDEF; + } + cif->flags = flags; + + for (i = 0, n = cif->nargs; i < n; i++) + { + ffi_type *t = cif->arg_types[i]; + + bytes = FFI_ALIGN (bytes, t->alignment); + bytes += FFI_ALIGN (t->size, FFI_SIZEOF_ARG); } - cif->flags = flags; + cif->bytes = bytes; + + return FFI_OK; +} - for (i = 0, n = cif->nargs; i < n; i++) +static ffi_arg +extend_basic_type(void *arg, int type) +{ + switch (type) { - ffi_type *t = cif->arg_types[i]; - - bytes = FFI_ALIGN (bytes, t->alignment); - bytes += FFI_ALIGN (t->size, FFI_SIZEOF_ARG); + case FFI_TYPE_SINT8: + return *(SINT8 *)arg; + case FFI_TYPE_UINT8: + return *(UINT8 *)arg; + case FFI_TYPE_SINT16: + return *(SINT16 *)arg; + case FFI_TYPE_UINT16: + return *(UINT16 *)arg; + + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT32: + case FFI_TYPE_POINTER: + case FFI_TYPE_FLOAT: + return *(UINT32 *)arg; + + default: + abort(); } - cif->bytes = bytes; - - return FFI_OK; -} - -static ffi_arg -extend_basic_type(void *arg, int type) -{ - switch (type) - { - case FFI_TYPE_SINT8: - return *(SINT8 *)arg; - case FFI_TYPE_UINT8: - return *(UINT8 *)arg; - case FFI_TYPE_SINT16: - return *(SINT16 *)arg; - case FFI_TYPE_UINT16: - return *(UINT16 *)arg; - - case FFI_TYPE_SINT32: - case FFI_TYPE_UINT32: - case FFI_TYPE_POINTER: - case FFI_TYPE_FLOAT: - return *(UINT32 *)arg; - - default: - abort(); - } } -struct call_frame -{ - void *ebp; /* 0 */ - void *retaddr; /* 4 */ - void (*fn)(void); /* 8 */ - int flags; /* 12 */ - void *rvalue; /* 16 */ - unsigned regs[3]; /* 20-28 */ -}; - -struct abi_params -{ - int dir; /* parameter growth direction */ - int static_chain; /* the static chain register used by gcc */ - int nregs; /* number of register parameters */ - int regs[3]; -}; - -static const struct abi_params abi_params[FFI_LAST_ABI] = { - [FFI_SYSV] = { 1, R_ECX, 0 }, - [FFI_THISCALL] = { 1, R_EAX, 1, { R_ECX } }, - [FFI_FASTCALL] = { 1, R_EAX, 2, { R_ECX, R_EDX } }, - [FFI_STDCALL] = { 1, R_ECX, 0 }, - [FFI_PASCAL] = { -1, R_ECX, 0 }, - /* ??? No defined static chain; gcc does not support REGISTER. */ - [FFI_REGISTER] = { -1, R_ECX, 3, { R_EAX, R_EDX, R_ECX } }, - [FFI_MS_CDECL] = { 1, R_ECX, 0 } -}; - -#ifdef HAVE_FASTCALL - #ifdef _MSC_VER - #define FFI_DECLARE_FASTCALL __fastcall - #else - #define FFI_DECLARE_FASTCALL __declspec(fastcall) - #endif +struct call_frame +{ + void *ebp; /* 0 */ + void *retaddr; /* 4 */ + void (*fn)(void); /* 8 */ + int flags; /* 12 */ + void *rvalue; /* 16 */ + unsigned regs[3]; /* 20-28 */ +}; + +struct abi_params +{ + int dir; /* parameter growth direction */ + int static_chain; /* the static chain register used by gcc */ + int nregs; /* number of register parameters */ + int regs[3]; +}; + +static const struct abi_params abi_params[FFI_LAST_ABI] = { + [FFI_SYSV] = { 1, R_ECX, 0 }, + [FFI_THISCALL] = { 1, R_EAX, 1, { R_ECX } }, + [FFI_FASTCALL] = { 1, R_EAX, 2, { R_ECX, R_EDX } }, + [FFI_STDCALL] = { 1, R_ECX, 0 }, + [FFI_PASCAL] = { -1, R_ECX, 0 }, + /* ??? No defined static chain; gcc does not support REGISTER. */ + [FFI_REGISTER] = { -1, R_ECX, 3, { R_EAX, R_EDX, R_ECX } }, + [FFI_MS_CDECL] = { 1, R_ECX, 0 } +}; + +#ifdef HAVE_FASTCALL + #ifdef _MSC_VER + #define FFI_DECLARE_FASTCALL __fastcall + #else + #define FFI_DECLARE_FASTCALL __declspec(fastcall) + #endif #else - #define FFI_DECLARE_FASTCALL + #define FFI_DECLARE_FASTCALL #endif -extern void FFI_DECLARE_FASTCALL ffi_call_i386(struct call_frame *, char *) FFI_HIDDEN; - -static void -ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, - void **avalue, void *closure) +extern void FFI_DECLARE_FASTCALL ffi_call_i386(struct call_frame *, char *) FFI_HIDDEN; + +static void +ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, + void **avalue, void *closure) { - size_t rsize, bytes; - struct call_frame *frame; - char *stack, *argp; - ffi_type **arg_types; - int flags, cabi, i, n, dir, narg_reg; - const struct abi_params *pabi; - - flags = cif->flags; - cabi = cif->abi; - pabi = &abi_params[cabi]; - dir = pabi->dir; - - rsize = 0; - if (rvalue == NULL) + size_t rsize, bytes; + struct call_frame *frame; + char *stack, *argp; + ffi_type **arg_types; + int flags, cabi, i, n, dir, narg_reg; + const struct abi_params *pabi; + + flags = cif->flags; + cabi = cif->abi; + pabi = &abi_params[cabi]; + dir = pabi->dir; + + rsize = 0; + if (rvalue == NULL) { - switch (flags) - { - case X86_RET_FLOAT: - case X86_RET_DOUBLE: - case X86_RET_LDOUBLE: - case X86_RET_STRUCTPOP: - case X86_RET_STRUCTARG: - /* The float cases need to pop the 387 stack. - The struct cases need to pass a valid pointer to the callee. */ - rsize = cif->rtype->size; - break; - default: - /* We can pretend that the callee returns nothing. */ - flags = X86_RET_VOID; - break; - } + switch (flags) + { + case X86_RET_FLOAT: + case X86_RET_DOUBLE: + case X86_RET_LDOUBLE: + case X86_RET_STRUCTPOP: + case X86_RET_STRUCTARG: + /* The float cases need to pop the 387 stack. + The struct cases need to pass a valid pointer to the callee. */ + rsize = cif->rtype->size; + break; + default: + /* We can pretend that the callee returns nothing. */ + flags = X86_RET_VOID; + break; + } } - bytes = STACK_ALIGN (cif->bytes); - stack = alloca(bytes + sizeof(*frame) + rsize); - argp = (dir < 0 ? stack + bytes : stack); - frame = (struct call_frame *)(stack + bytes); - if (rsize) - rvalue = frame + 1; - - frame->fn = fn; - frame->flags = flags; - frame->rvalue = rvalue; - frame->regs[pabi->static_chain] = (unsigned)closure; - - narg_reg = 0; - switch (flags) - { - case X86_RET_STRUCTARG: - /* The pointer is passed as the first argument. */ - if (pabi->nregs > 0) - { - frame->regs[pabi->regs[0]] = (unsigned)rvalue; - narg_reg = 1; - break; - } - /* fallthru */ - case X86_RET_STRUCTPOP: - *(void **)argp = rvalue; - argp += sizeof(void *); + bytes = STACK_ALIGN (cif->bytes); + stack = alloca(bytes + sizeof(*frame) + rsize); + argp = (dir < 0 ? stack + bytes : stack); + frame = (struct call_frame *)(stack + bytes); + if (rsize) + rvalue = frame + 1; + + frame->fn = fn; + frame->flags = flags; + frame->rvalue = rvalue; + frame->regs[pabi->static_chain] = (unsigned)closure; + + narg_reg = 0; + switch (flags) + { + case X86_RET_STRUCTARG: + /* The pointer is passed as the first argument. */ + if (pabi->nregs > 0) + { + frame->regs[pabi->regs[0]] = (unsigned)rvalue; + narg_reg = 1; + break; + } + /* fallthru */ + case X86_RET_STRUCTPOP: + *(void **)argp = rvalue; + argp += sizeof(void *); break; } - arg_types = cif->arg_types; - for (i = 0, n = cif->nargs; i < n; i++) - { - ffi_type *ty = arg_types[i]; - void *valp = avalue[i]; - size_t z = ty->size; - int t = ty->type; - - if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT) - { - ffi_arg val = extend_basic_type (valp, t); - - if (t != FFI_TYPE_FLOAT && narg_reg < pabi->nregs) - frame->regs[pabi->regs[narg_reg++]] = val; - else if (dir < 0) - { - argp -= 4; - *(ffi_arg *)argp = val; - } - else - { - *(ffi_arg *)argp = val; - argp += 4; - } - } - else - { - size_t za = FFI_ALIGN (z, FFI_SIZEOF_ARG); - size_t align = FFI_SIZEOF_ARG; - - /* Issue 434: For thiscall and fastcall, if the paramter passed - as 64-bit integer or struct, all following integer paramters - will be passed on stack. */ - if ((cabi == FFI_THISCALL || cabi == FFI_FASTCALL) - && (t == FFI_TYPE_SINT64 - || t == FFI_TYPE_UINT64 - || t == FFI_TYPE_STRUCT)) - narg_reg = 2; - - /* Alignment rules for arguments are quite complex. Vectors and - structures with 16 byte alignment get it. Note that long double - on Darwin does have 16 byte alignment, and does not get this - alignment if passed directly; a structure with a long double - inside, however, would get 16 byte alignment. Since libffi does - not support vectors, we need non concern ourselves with other - cases. */ - if (t == FFI_TYPE_STRUCT && ty->alignment >= 16) - align = 16; - - if (dir < 0) - { - /* ??? These reverse argument ABIs are probably too old - to have cared about alignment. Someone should check. */ - argp -= za; - memcpy (argp, valp, z); - } - else - { - argp = (char *)FFI_ALIGN (argp, align); - memcpy (argp, valp, z); - argp += za; - } - } - } - FFI_ASSERT (dir > 0 || argp == stack); - - ffi_call_i386 (frame, stack); -} - -void -ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) -{ - ffi_call_int (cif, fn, rvalue, avalue, NULL); + arg_types = cif->arg_types; + for (i = 0, n = cif->nargs; i < n; i++) + { + ffi_type *ty = arg_types[i]; + void *valp = avalue[i]; + size_t z = ty->size; + int t = ty->type; + + if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT) + { + ffi_arg val = extend_basic_type (valp, t); + + if (t != FFI_TYPE_FLOAT && narg_reg < pabi->nregs) + frame->regs[pabi->regs[narg_reg++]] = val; + else if (dir < 0) + { + argp -= 4; + *(ffi_arg *)argp = val; + } + else + { + *(ffi_arg *)argp = val; + argp += 4; + } + } + else + { + size_t za = FFI_ALIGN (z, FFI_SIZEOF_ARG); + size_t align = FFI_SIZEOF_ARG; + + /* Issue 434: For thiscall and fastcall, if the paramter passed + as 64-bit integer or struct, all following integer paramters + will be passed on stack. */ + if ((cabi == FFI_THISCALL || cabi == FFI_FASTCALL) + && (t == FFI_TYPE_SINT64 + || t == FFI_TYPE_UINT64 + || t == FFI_TYPE_STRUCT)) + narg_reg = 2; + + /* Alignment rules for arguments are quite complex. Vectors and + structures with 16 byte alignment get it. Note that long double + on Darwin does have 16 byte alignment, and does not get this + alignment if passed directly; a structure with a long double + inside, however, would get 16 byte alignment. Since libffi does + not support vectors, we need non concern ourselves with other + cases. */ + if (t == FFI_TYPE_STRUCT && ty->alignment >= 16) + align = 16; + + if (dir < 0) + { + /* ??? These reverse argument ABIs are probably too old + to have cared about alignment. Someone should check. */ + argp -= za; + memcpy (argp, valp, z); + } + else + { + argp = (char *)FFI_ALIGN (argp, align); + memcpy (argp, valp, z); + argp += za; + } + } + } + FFI_ASSERT (dir > 0 || argp == stack); + + ffi_call_i386 (frame, stack); +} + +void +ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + ffi_call_int (cif, fn, rvalue, avalue, NULL); +} + +void +ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue, + void **avalue, void *closure) +{ + ffi_call_int (cif, fn, rvalue, avalue, closure); } -void -ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue, - void **avalue, void *closure) +/** private members **/ + +void FFI_HIDDEN ffi_closure_i386(void); +void FFI_HIDDEN ffi_closure_STDCALL(void); +void FFI_HIDDEN ffi_closure_REGISTER(void); + +struct closure_frame { - ffi_call_int (cif, fn, rvalue, avalue, closure); -} - -/** private members **/ - -void FFI_HIDDEN ffi_closure_i386(void); -void FFI_HIDDEN ffi_closure_STDCALL(void); -void FFI_HIDDEN ffi_closure_REGISTER(void); - -struct closure_frame -{ - unsigned rettemp[4]; /* 0 */ - unsigned regs[3]; /* 16-24 */ - ffi_cif *cif; /* 28 */ - void (*fun)(ffi_cif*,void*,void**,void*); /* 32 */ - void *user_data; /* 36 */ -}; - -int FFI_HIDDEN FFI_DECLARE_FASTCALL -ffi_closure_inner (struct closure_frame *frame, char *stack) + unsigned rettemp[4]; /* 0 */ + unsigned regs[3]; /* 16-24 */ + ffi_cif *cif; /* 28 */ + void (*fun)(ffi_cif*,void*,void**,void*); /* 32 */ + void *user_data; /* 36 */ +}; + +int FFI_HIDDEN FFI_DECLARE_FASTCALL +ffi_closure_inner (struct closure_frame *frame, char *stack) { - ffi_cif *cif = frame->cif; - int cabi, i, n, flags, dir, narg_reg; - const struct abi_params *pabi; - ffi_type **arg_types; + ffi_cif *cif = frame->cif; + int cabi, i, n, flags, dir, narg_reg; + const struct abi_params *pabi; + ffi_type **arg_types; char *argp; - void *rvalue; - void **avalue; - - cabi = cif->abi; - flags = cif->flags; - narg_reg = 0; - rvalue = frame->rettemp; - pabi = &abi_params[cabi]; - dir = pabi->dir; - argp = (dir < 0 ? stack + STACK_ALIGN (cif->bytes) : stack); - - switch (flags) - { - case X86_RET_STRUCTARG: - if (pabi->nregs > 0) - { - rvalue = (void *)frame->regs[pabi->regs[0]]; - narg_reg = 1; - frame->rettemp[0] = (unsigned)rvalue; - break; - } - /* fallthru */ - case X86_RET_STRUCTPOP: - rvalue = *(void **)argp; - argp += sizeof(void *); - frame->rettemp[0] = (unsigned)rvalue; - break; - } - - n = cif->nargs; - avalue = alloca(sizeof(void *) * n); - - arg_types = cif->arg_types; - for (i = 0; i < n; ++i) + void *rvalue; + void **avalue; + + cabi = cif->abi; + flags = cif->flags; + narg_reg = 0; + rvalue = frame->rettemp; + pabi = &abi_params[cabi]; + dir = pabi->dir; + argp = (dir < 0 ? stack + STACK_ALIGN (cif->bytes) : stack); + + switch (flags) + { + case X86_RET_STRUCTARG: + if (pabi->nregs > 0) + { + rvalue = (void *)frame->regs[pabi->regs[0]]; + narg_reg = 1; + frame->rettemp[0] = (unsigned)rvalue; + break; + } + /* fallthru */ + case X86_RET_STRUCTPOP: + rvalue = *(void **)argp; + argp += sizeof(void *); + frame->rettemp[0] = (unsigned)rvalue; + break; + } + + n = cif->nargs; + avalue = alloca(sizeof(void *) * n); + + arg_types = cif->arg_types; + for (i = 0; i < n; ++i) { - ffi_type *ty = arg_types[i]; - size_t z = ty->size; - int t = ty->type; - void *valp; - - if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT) - { - if (t != FFI_TYPE_FLOAT && narg_reg < pabi->nregs) - valp = &frame->regs[pabi->regs[narg_reg++]]; - else if (dir < 0) - { - argp -= 4; - valp = argp; - } - else - { - valp = argp; - argp += 4; - } - } + ffi_type *ty = arg_types[i]; + size_t z = ty->size; + int t = ty->type; + void *valp; + + if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT) + { + if (t != FFI_TYPE_FLOAT && narg_reg < pabi->nregs) + valp = &frame->regs[pabi->regs[narg_reg++]]; + else if (dir < 0) + { + argp -= 4; + valp = argp; + } + else + { + valp = argp; + argp += 4; + } + } else - { - size_t za = FFI_ALIGN (z, FFI_SIZEOF_ARG); - size_t align = FFI_SIZEOF_ARG; - - /* See the comment in ffi_call_int. */ - if (t == FFI_TYPE_STRUCT && ty->alignment >= 16) - align = 16; - - /* Issue 434: For thiscall and fastcall, if the paramter passed - as 64-bit integer or struct, all following integer paramters - will be passed on stack. */ - if ((cabi == FFI_THISCALL || cabi == FFI_FASTCALL) - && (t == FFI_TYPE_SINT64 - || t == FFI_TYPE_UINT64 - || t == FFI_TYPE_STRUCT)) - narg_reg = 2; - - if (dir < 0) - { - /* ??? These reverse argument ABIs are probably too old - to have cared about alignment. Someone should check. */ - argp -= za; - valp = argp; - } - else - { - argp = (char *)FFI_ALIGN (argp, align); - valp = argp; - argp += za; - } - } - - avalue[i] = valp; - } - - frame->fun (cif, rvalue, avalue, frame->user_data); - - if (cabi == FFI_STDCALL) - return flags + (cif->bytes << X86_RET_POP_SHIFT); - else - return flags; -} + { + size_t za = FFI_ALIGN (z, FFI_SIZEOF_ARG); + size_t align = FFI_SIZEOF_ARG; + + /* See the comment in ffi_call_int. */ + if (t == FFI_TYPE_STRUCT && ty->alignment >= 16) + align = 16; + + /* Issue 434: For thiscall and fastcall, if the paramter passed + as 64-bit integer or struct, all following integer paramters + will be passed on stack. */ + if ((cabi == FFI_THISCALL || cabi == FFI_FASTCALL) + && (t == FFI_TYPE_SINT64 + || t == FFI_TYPE_UINT64 + || t == FFI_TYPE_STRUCT)) + narg_reg = 2; + + if (dir < 0) + { + /* ??? These reverse argument ABIs are probably too old + to have cared about alignment. Someone should check. */ + argp -= za; + valp = argp; + } + else + { + argp = (char *)FFI_ALIGN (argp, align); + valp = argp; + argp += za; + } + } + + avalue[i] = valp; + } + + frame->fun (cif, rvalue, avalue, frame->user_data); + + if (cabi == FFI_STDCALL) + return flags + (cif->bytes << X86_RET_POP_SHIFT); + else + return flags; +} ffi_status ffi_prep_closure_loc (ffi_closure* closure, @@ -533,78 +533,78 @@ ffi_prep_closure_loc (ffi_closure* closure, void *user_data, void *codeloc) { - char *tramp = closure->tramp; - void (*dest)(void); - int op = 0xb8; /* movl imm, %eax */ - - switch (cif->abi) + char *tramp = closure->tramp; + void (*dest)(void); + int op = 0xb8; /* movl imm, %eax */ + + switch (cif->abi) { - case FFI_SYSV: - case FFI_THISCALL: - case FFI_FASTCALL: - case FFI_MS_CDECL: - dest = ffi_closure_i386; - break; - case FFI_STDCALL: - case FFI_PASCAL: - dest = ffi_closure_STDCALL; - break; - case FFI_REGISTER: - dest = ffi_closure_REGISTER; - op = 0x68; /* pushl imm */ - break; - default: - return FFI_BAD_ABI; + case FFI_SYSV: + case FFI_THISCALL: + case FFI_FASTCALL: + case FFI_MS_CDECL: + dest = ffi_closure_i386; + break; + case FFI_STDCALL: + case FFI_PASCAL: + dest = ffi_closure_STDCALL; + break; + case FFI_REGISTER: + dest = ffi_closure_REGISTER; + op = 0x68; /* pushl imm */ + break; + default: + return FFI_BAD_ABI; } - - /* movl or pushl immediate. */ - tramp[0] = op; - *(void **)(tramp + 1) = codeloc; - - /* jmp dest */ - tramp[5] = 0xe9; - *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10); - - closure->cif = cif; - closure->fun = fun; - closure->user_data = user_data; - - return FFI_OK; -} - -void FFI_HIDDEN ffi_go_closure_EAX(void); -void FFI_HIDDEN ffi_go_closure_ECX(void); -void FFI_HIDDEN ffi_go_closure_STDCALL(void); - -ffi_status -ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif, - void (*fun)(ffi_cif*,void*,void**,void*)) -{ - void (*dest)(void); - - switch (cif->abi) + + /* movl or pushl immediate. */ + tramp[0] = op; + *(void **)(tramp + 1) = codeloc; + + /* jmp dest */ + tramp[5] = 0xe9; + *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10); + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + return FFI_OK; +} + +void FFI_HIDDEN ffi_go_closure_EAX(void); +void FFI_HIDDEN ffi_go_closure_ECX(void); +void FFI_HIDDEN ffi_go_closure_STDCALL(void); + +ffi_status +ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif, + void (*fun)(ffi_cif*,void*,void**,void*)) +{ + void (*dest)(void); + + switch (cif->abi) { - case FFI_SYSV: - case FFI_MS_CDECL: - dest = ffi_go_closure_ECX; - break; - case FFI_THISCALL: - case FFI_FASTCALL: - dest = ffi_go_closure_EAX; - break; - case FFI_STDCALL: - case FFI_PASCAL: - dest = ffi_go_closure_STDCALL; - break; - case FFI_REGISTER: - default: + case FFI_SYSV: + case FFI_MS_CDECL: + dest = ffi_go_closure_ECX; + break; + case FFI_THISCALL: + case FFI_FASTCALL: + dest = ffi_go_closure_EAX; + break; + case FFI_STDCALL: + case FFI_PASCAL: + dest = ffi_go_closure_STDCALL; + break; + case FFI_REGISTER: + default: return FFI_BAD_ABI; } - closure->tramp = dest; - closure->cif = cif; - closure->fun = fun; - + closure->tramp = dest; + closure->cif = cif; + closure->fun = fun; + return FFI_OK; } @@ -612,150 +612,150 @@ ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif, #if !FFI_NO_RAW_API -void FFI_HIDDEN ffi_closure_raw_SYSV(void); -void FFI_HIDDEN ffi_closure_raw_THISCALL(void); - +void FFI_HIDDEN ffi_closure_raw_SYSV(void); +void FFI_HIDDEN ffi_closure_raw_THISCALL(void); + ffi_status -ffi_prep_raw_closure_loc (ffi_raw_closure *closure, - ffi_cif *cif, +ffi_prep_raw_closure_loc (ffi_raw_closure *closure, + ffi_cif *cif, void (*fun)(ffi_cif*,void*,ffi_raw*,void*), void *user_data, void *codeloc) { - char *tramp = closure->tramp; - void (*dest)(void); + char *tramp = closure->tramp; + void (*dest)(void); int i; - /* We currently don't support certain kinds of arguments for raw + /* We currently don't support certain kinds of arguments for raw closures. This should be implemented by a separate assembly language routine, since it would require argument processing, something we don't do now for performance. */ - for (i = cif->nargs-1; i >= 0; i--) - switch (cif->arg_types[i]->type) - { - case FFI_TYPE_STRUCT: - case FFI_TYPE_LONGDOUBLE: - return FFI_BAD_TYPEDEF; - } - - switch (cif->abi) + for (i = cif->nargs-1; i >= 0; i--) + switch (cif->arg_types[i]->type) + { + case FFI_TYPE_STRUCT: + case FFI_TYPE_LONGDOUBLE: + return FFI_BAD_TYPEDEF; + } + + switch (cif->abi) { - case FFI_THISCALL: - dest = ffi_closure_raw_THISCALL; - break; - case FFI_SYSV: - dest = ffi_closure_raw_SYSV; - break; - default: - return FFI_BAD_ABI; + case FFI_THISCALL: + dest = ffi_closure_raw_THISCALL; + break; + case FFI_SYSV: + dest = ffi_closure_raw_SYSV; + break; + default: + return FFI_BAD_ABI; } - - /* movl imm, %eax. */ - tramp[0] = 0xb8; - *(void **)(tramp + 1) = codeloc; - - /* jmp dest */ - tramp[5] = 0xe9; - *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10); - - closure->cif = cif; - closure->fun = fun; + + /* movl imm, %eax. */ + tramp[0] = 0xb8; + *(void **)(tramp + 1) = codeloc; + + /* jmp dest */ + tramp[5] = 0xe9; + *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10); + + closure->cif = cif; + closure->fun = fun; closure->user_data = user_data; return FFI_OK; } void -ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue) +ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue) { - size_t rsize, bytes; - struct call_frame *frame; - char *stack, *argp; - ffi_type **arg_types; - int flags, cabi, i, n, narg_reg; - const struct abi_params *pabi; - - flags = cif->flags; - cabi = cif->abi; - pabi = &abi_params[cabi]; - - rsize = 0; - if (rvalue == NULL) + size_t rsize, bytes; + struct call_frame *frame; + char *stack, *argp; + ffi_type **arg_types; + int flags, cabi, i, n, narg_reg; + const struct abi_params *pabi; + + flags = cif->flags; + cabi = cif->abi; + pabi = &abi_params[cabi]; + + rsize = 0; + if (rvalue == NULL) { - switch (flags) - { - case X86_RET_FLOAT: - case X86_RET_DOUBLE: - case X86_RET_LDOUBLE: - case X86_RET_STRUCTPOP: - case X86_RET_STRUCTARG: - /* The float cases need to pop the 387 stack. - The struct cases need to pass a valid pointer to the callee. */ - rsize = cif->rtype->size; - break; - default: - /* We can pretend that the callee returns nothing. */ - flags = X86_RET_VOID; - break; - } + switch (flags) + { + case X86_RET_FLOAT: + case X86_RET_DOUBLE: + case X86_RET_LDOUBLE: + case X86_RET_STRUCTPOP: + case X86_RET_STRUCTARG: + /* The float cases need to pop the 387 stack. + The struct cases need to pass a valid pointer to the callee. */ + rsize = cif->rtype->size; + break; + default: + /* We can pretend that the callee returns nothing. */ + flags = X86_RET_VOID; + break; + } } - bytes = STACK_ALIGN (cif->bytes); - argp = stack = - (void *)((uintptr_t)alloca(bytes + sizeof(*frame) + rsize + 15) & ~16); - frame = (struct call_frame *)(stack + bytes); - if (rsize) - rvalue = frame + 1; - - frame->fn = fn; - frame->flags = flags; - frame->rvalue = rvalue; - - narg_reg = 0; - switch (flags) - { - case X86_RET_STRUCTARG: - /* The pointer is passed as the first argument. */ - if (pabi->nregs > 0) - { - frame->regs[pabi->regs[0]] = (unsigned)rvalue; - narg_reg = 1; - break; - } - /* fallthru */ - case X86_RET_STRUCTPOP: - *(void **)argp = rvalue; - argp += sizeof(void *); - bytes -= sizeof(void *); + bytes = STACK_ALIGN (cif->bytes); + argp = stack = + (void *)((uintptr_t)alloca(bytes + sizeof(*frame) + rsize + 15) & ~16); + frame = (struct call_frame *)(stack + bytes); + if (rsize) + rvalue = frame + 1; + + frame->fn = fn; + frame->flags = flags; + frame->rvalue = rvalue; + + narg_reg = 0; + switch (flags) + { + case X86_RET_STRUCTARG: + /* The pointer is passed as the first argument. */ + if (pabi->nregs > 0) + { + frame->regs[pabi->regs[0]] = (unsigned)rvalue; + narg_reg = 1; + break; + } + /* fallthru */ + case X86_RET_STRUCTPOP: + *(void **)argp = rvalue; + argp += sizeof(void *); + bytes -= sizeof(void *); break; } - arg_types = cif->arg_types; - for (i = 0, n = cif->nargs; narg_reg < pabi->nregs && i < n; i++) - { - ffi_type *ty = arg_types[i]; - size_t z = ty->size; - int t = ty->type; - - if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT && t != FFI_TYPE_FLOAT) - { - ffi_arg val = extend_basic_type (avalue, t); - frame->regs[pabi->regs[narg_reg++]] = val; - z = FFI_SIZEOF_ARG; - } - else - { - memcpy (argp, avalue, z); - z = FFI_ALIGN (z, FFI_SIZEOF_ARG); - argp += z; - } - avalue += z; - bytes -= z; - } - if (i < n) - memcpy (argp, avalue, bytes); - - ffi_call_i386 (frame, stack); -} -#endif /* !FFI_NO_RAW_API */ -#endif /* __i386__ */ + arg_types = cif->arg_types; + for (i = 0, n = cif->nargs; narg_reg < pabi->nregs && i < n; i++) + { + ffi_type *ty = arg_types[i]; + size_t z = ty->size; + int t = ty->type; + + if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT && t != FFI_TYPE_FLOAT) + { + ffi_arg val = extend_basic_type (avalue, t); + frame->regs[pabi->regs[narg_reg++]] = val; + z = FFI_SIZEOF_ARG; + } + else + { + memcpy (argp, avalue, z); + z = FFI_ALIGN (z, FFI_SIZEOF_ARG); + argp += z; + } + avalue += z; + bytes -= z; + } + if (i < n) + memcpy (argp, avalue, bytes); + + ffi_call_i386 (frame, stack); +} +#endif /* !FFI_NO_RAW_API */ +#endif /* __i386__ */ diff --git a/contrib/restricted/libffi/src/x86/ffi64.c b/contrib/restricted/libffi/src/x86/ffi64.c index 2d493d0b0a..dec331c958 100644 --- a/contrib/restricted/libffi/src/x86/ffi64.c +++ b/contrib/restricted/libffi/src/x86/ffi64.c @@ -1,11 +1,11 @@ /* ----------------------------------------------------------------------- - ffi64.c - Copyright (c) 2011, 2018 Anthony Green - Copyright (c) 2013 The Written Word, Inc. + ffi64.c - Copyright (c) 2011, 2018 Anthony Green + Copyright (c) 2013 The Written Word, Inc. Copyright (c) 2008, 2010 Red Hat, Inc. Copyright (c) 2002, 2007 Bo Thorsen <bo@suse.de> - x86-64 Foreign Function Interface - + x86-64 Foreign Function Interface + Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the ``Software''), to deal in the Software without restriction, including @@ -32,8 +32,8 @@ #include <stdlib.h> #include <stdarg.h> -#include <stdint.h> -#include "internal64.h" +#include <stdint.h> +#include "internal64.h" #ifdef __x86_64__ @@ -41,7 +41,7 @@ #define MAX_SSE_REGS 8 #if defined(__INTEL_COMPILER) -#include "xmmintrin.h" +#include "xmmintrin.h" #define UINT128 __m128 #else #if defined(__SUNPRO_C) @@ -63,13 +63,13 @@ struct register_args { /* Registers for argument passing. */ UINT64 gpr[MAX_GPR_REGS]; - union big_int_union sse[MAX_SSE_REGS]; - UINT64 rax; /* ssecount */ - UINT64 r10; /* static chain */ + union big_int_union sse[MAX_SSE_REGS]; + UINT64 rax; /* ssecount */ + UINT64 r10; /* static chain */ }; extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, - void *raddr, void (*fnaddr)(void)) FFI_HIDDEN; + void *raddr, void (*fnaddr)(void)) FFI_HIDDEN; /* All reference to register classes here is identical to the code in gcc/config/i386/i386.c. Do *not* change one without the other. */ @@ -156,7 +156,7 @@ merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) See the x86-64 PS ABI for details. */ -static size_t +static size_t classify_argument (ffi_type *type, enum x86_64_reg_class classes[], size_t byte_offset) { @@ -171,9 +171,9 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[], case FFI_TYPE_UINT64: case FFI_TYPE_SINT64: case FFI_TYPE_POINTER: - do_integer: + do_integer: { - size_t size = byte_offset + type->size; + size_t size = byte_offset + type->size; if (size <= 4) { @@ -193,7 +193,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[], } else if (size <= 16) { - classes[0] = classes[1] = X86_64_INTEGER_CLASS; + classes[0] = classes[1] = X86_64_INTEGER_CLASS; return 2; } else @@ -208,18 +208,18 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[], case FFI_TYPE_DOUBLE: classes[0] = X86_64_SSEDF_CLASS; return 1; -#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE case FFI_TYPE_LONGDOUBLE: classes[0] = X86_64_X87_CLASS; classes[1] = X86_64_X87UP_CLASS; return 2; -#endif +#endif case FFI_TYPE_STRUCT: { - const size_t UNITS_PER_WORD = 8; - size_t words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; - ffi_type **ptr; - unsigned int i; + const size_t UNITS_PER_WORD = 8; + size_t words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + ffi_type **ptr; + unsigned int i; enum x86_64_reg_class subclasses[MAX_CLASSES]; /* If the struct is larger than 32 bytes, pass it on the stack. */ @@ -233,7 +233,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[], signalize memory class, so handle it as special case. */ if (!words) { - case FFI_TYPE_VOID: + case FFI_TYPE_VOID: classes[0] = X86_64_NO_CLASS; return 1; } @@ -241,16 +241,16 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[], /* Merge the fields of structure. */ for (ptr = type->elements; *ptr != NULL; ptr++) { - size_t num; + size_t num; - byte_offset = FFI_ALIGN (byte_offset, (*ptr)->alignment); + byte_offset = FFI_ALIGN (byte_offset, (*ptr)->alignment); num = classify_argument (*ptr, subclasses, byte_offset % 8); if (num == 0) return 0; for (i = 0; i < num; i++) { - size_t pos = byte_offset / 8; + size_t pos = byte_offset / 8; classes[i + pos] = merge_classes (subclasses[i], classes[i + pos]); } @@ -282,7 +282,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[], /* The X86_64_SSEUP_CLASS should be always preceded by X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ - if (i > 1 && classes[i] == X86_64_SSEUP_CLASS + if (i > 1 && classes[i] == X86_64_SSEUP_CLASS && classes[i - 1] != X86_64_SSE_CLASS && classes[i - 1] != X86_64_SSEUP_CLASS) { @@ -293,7 +293,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[], /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, everything should be passed in memory. */ - if (i > 1 && classes[i] == X86_64_X87UP_CLASS + if (i > 1 && classes[i] == X86_64_X87UP_CLASS && (classes[i - 1] != X86_64_X87_CLASS)) { /* The first one should never be X86_64_X87UP_CLASS. */ @@ -303,55 +303,55 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[], } return words; } - case FFI_TYPE_COMPLEX: - { - ffi_type *inner = type->elements[0]; - switch (inner->type) - { - case FFI_TYPE_INT: - case FFI_TYPE_UINT8: - case FFI_TYPE_SINT8: - case FFI_TYPE_UINT16: - case FFI_TYPE_SINT16: - case FFI_TYPE_UINT32: - case FFI_TYPE_SINT32: - case FFI_TYPE_UINT64: - case FFI_TYPE_SINT64: - goto do_integer; - - case FFI_TYPE_FLOAT: - classes[0] = X86_64_SSE_CLASS; - if (byte_offset % 8) - { - classes[1] = X86_64_SSESF_CLASS; - return 2; - } - return 1; - case FFI_TYPE_DOUBLE: - classes[0] = classes[1] = X86_64_SSEDF_CLASS; - return 2; -#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE - case FFI_TYPE_LONGDOUBLE: - classes[0] = X86_64_COMPLEX_X87_CLASS; - return 1; -#endif - } - } + case FFI_TYPE_COMPLEX: + { + ffi_type *inner = type->elements[0]; + switch (inner->type) + { + case FFI_TYPE_INT: + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + goto do_integer; + + case FFI_TYPE_FLOAT: + classes[0] = X86_64_SSE_CLASS; + if (byte_offset % 8) + { + classes[1] = X86_64_SSESF_CLASS; + return 2; + } + return 1; + case FFI_TYPE_DOUBLE: + classes[0] = classes[1] = X86_64_SSEDF_CLASS; + return 2; +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: + classes[0] = X86_64_COMPLEX_X87_CLASS; + return 1; +#endif + } + } } - abort(); + abort(); } /* Examine the argument and return set number of register required in each class. Return zero iff parameter should be passed in memory, otherwise the number of registers. */ -static size_t +static size_t examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES], _Bool in_return, int *pngpr, int *pnsse) { - size_t n; - unsigned int i; - int ngpr, nsse; + size_t n; + unsigned int i; + int ngpr, nsse; n = classify_argument (type, classes, 0); if (n == 0) @@ -389,74 +389,74 @@ examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES], /* Perform machine dependent cif processing. */ -#ifndef __ILP32__ -extern ffi_status -ffi_prep_cif_machdep_efi64(ffi_cif *cif); -#endif - -ffi_status FFI_HIDDEN +#ifndef __ILP32__ +extern ffi_status +ffi_prep_cif_machdep_efi64(ffi_cif *cif); +#endif + +ffi_status FFI_HIDDEN ffi_prep_cif_machdep (ffi_cif *cif) { - int gprcount, ssecount, i, avn, ngpr, nsse; - unsigned flags; + int gprcount, ssecount, i, avn, ngpr, nsse; + unsigned flags; enum x86_64_reg_class classes[MAX_CLASSES]; - size_t bytes, n, rtype_size; - ffi_type *rtype; - -#ifndef __ILP32__ - if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64) - return ffi_prep_cif_machdep_efi64(cif); -#endif - if (cif->abi != FFI_UNIX64) - return FFI_BAD_ABI; - + size_t bytes, n, rtype_size; + ffi_type *rtype; + +#ifndef __ILP32__ + if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64) + return ffi_prep_cif_machdep_efi64(cif); +#endif + if (cif->abi != FFI_UNIX64) + return FFI_BAD_ABI; + gprcount = ssecount = 0; - rtype = cif->rtype; - rtype_size = rtype->size; - switch (rtype->type) + rtype = cif->rtype; + rtype_size = rtype->size; + switch (rtype->type) { - case FFI_TYPE_VOID: - flags = UNIX64_RET_VOID; - break; - case FFI_TYPE_UINT8: - flags = UNIX64_RET_UINT8; - break; - case FFI_TYPE_SINT8: - flags = UNIX64_RET_SINT8; - break; - case FFI_TYPE_UINT16: - flags = UNIX64_RET_UINT16; - break; - case FFI_TYPE_SINT16: - flags = UNIX64_RET_SINT16; - break; - case FFI_TYPE_UINT32: - flags = UNIX64_RET_UINT32; - break; - case FFI_TYPE_INT: - case FFI_TYPE_SINT32: - flags = UNIX64_RET_SINT32; - break; - case FFI_TYPE_UINT64: - case FFI_TYPE_SINT64: - flags = UNIX64_RET_INT64; - break; - case FFI_TYPE_POINTER: - flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64); - break; - case FFI_TYPE_FLOAT: - flags = UNIX64_RET_XMM32; - break; - case FFI_TYPE_DOUBLE: - flags = UNIX64_RET_XMM64; - break; -#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE - case FFI_TYPE_LONGDOUBLE: - flags = UNIX64_RET_X87; - break; -#endif - case FFI_TYPE_STRUCT: + case FFI_TYPE_VOID: + flags = UNIX64_RET_VOID; + break; + case FFI_TYPE_UINT8: + flags = UNIX64_RET_UINT8; + break; + case FFI_TYPE_SINT8: + flags = UNIX64_RET_SINT8; + break; + case FFI_TYPE_UINT16: + flags = UNIX64_RET_UINT16; + break; + case FFI_TYPE_SINT16: + flags = UNIX64_RET_SINT16; + break; + case FFI_TYPE_UINT32: + flags = UNIX64_RET_UINT32; + break; + case FFI_TYPE_INT: + case FFI_TYPE_SINT32: + flags = UNIX64_RET_SINT32; + break; + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + flags = UNIX64_RET_INT64; + break; + case FFI_TYPE_POINTER: + flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64); + break; + case FFI_TYPE_FLOAT: + flags = UNIX64_RET_XMM32; + break; + case FFI_TYPE_DOUBLE: + flags = UNIX64_RET_XMM64; + break; +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: + flags = UNIX64_RET_X87; + break; +#endif + case FFI_TYPE_STRUCT: n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse); if (n == 0) { @@ -464,62 +464,62 @@ ffi_prep_cif_machdep (ffi_cif *cif) memory is the first argument. Allocate a register for it. */ gprcount++; /* We don't have to do anything in asm for the return. */ - flags = UNIX64_RET_VOID | UNIX64_FLAG_RET_IN_MEM; + flags = UNIX64_RET_VOID | UNIX64_FLAG_RET_IN_MEM; } - else + else { _Bool sse0 = SSE_CLASS_P (classes[0]); - - if (rtype_size == 4 && sse0) - flags = UNIX64_RET_XMM32; - else if (rtype_size == 8) - flags = sse0 ? UNIX64_RET_XMM64 : UNIX64_RET_INT64; - else - { - _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]); - if (sse0 && sse1) - flags = UNIX64_RET_ST_XMM0_XMM1; - else if (sse0) - flags = UNIX64_RET_ST_XMM0_RAX; - else if (sse1) - flags = UNIX64_RET_ST_RAX_XMM0; - else - flags = UNIX64_RET_ST_RAX_RDX; - flags |= rtype_size << UNIX64_SIZE_SHIFT; - } + + if (rtype_size == 4 && sse0) + flags = UNIX64_RET_XMM32; + else if (rtype_size == 8) + flags = sse0 ? UNIX64_RET_XMM64 : UNIX64_RET_INT64; + else + { + _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]); + if (sse0 && sse1) + flags = UNIX64_RET_ST_XMM0_XMM1; + else if (sse0) + flags = UNIX64_RET_ST_XMM0_RAX; + else if (sse1) + flags = UNIX64_RET_ST_RAX_XMM0; + else + flags = UNIX64_RET_ST_RAX_RDX; + flags |= rtype_size << UNIX64_SIZE_SHIFT; + } + } + break; + case FFI_TYPE_COMPLEX: + switch (rtype->elements[0]->type) + { + case FFI_TYPE_UINT8: + case FFI_TYPE_SINT8: + case FFI_TYPE_UINT16: + case FFI_TYPE_SINT16: + case FFI_TYPE_INT: + case FFI_TYPE_UINT32: + case FFI_TYPE_SINT32: + case FFI_TYPE_UINT64: + case FFI_TYPE_SINT64: + flags = UNIX64_RET_ST_RAX_RDX | ((unsigned) rtype_size << UNIX64_SIZE_SHIFT); + break; + case FFI_TYPE_FLOAT: + flags = UNIX64_RET_XMM64; + break; + case FFI_TYPE_DOUBLE: + flags = UNIX64_RET_ST_XMM0_XMM1 | (16 << UNIX64_SIZE_SHIFT); + break; +#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE + case FFI_TYPE_LONGDOUBLE: + flags = UNIX64_RET_X87_2; + break; +#endif + default: + return FFI_BAD_TYPEDEF; } - break; - case FFI_TYPE_COMPLEX: - switch (rtype->elements[0]->type) - { - case FFI_TYPE_UINT8: - case FFI_TYPE_SINT8: - case FFI_TYPE_UINT16: - case FFI_TYPE_SINT16: - case FFI_TYPE_INT: - case FFI_TYPE_UINT32: - case FFI_TYPE_SINT32: - case FFI_TYPE_UINT64: - case FFI_TYPE_SINT64: - flags = UNIX64_RET_ST_RAX_RDX | ((unsigned) rtype_size << UNIX64_SIZE_SHIFT); - break; - case FFI_TYPE_FLOAT: - flags = UNIX64_RET_XMM64; - break; - case FFI_TYPE_DOUBLE: - flags = UNIX64_RET_ST_XMM0_XMM1 | (16 << UNIX64_SIZE_SHIFT); - break; -#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE - case FFI_TYPE_LONGDOUBLE: - flags = UNIX64_RET_X87_2; - break; -#endif - default: - return FFI_BAD_TYPEDEF; - } - break; - default: - return FFI_BAD_TYPEDEF; + break; + default: + return FFI_BAD_TYPEDEF; } /* Go over all arguments and determine the way they should be passed. @@ -536,7 +536,7 @@ ffi_prep_cif_machdep (ffi_cif *cif) if (align < 8) align = 8; - bytes = FFI_ALIGN (bytes, align); + bytes = FFI_ALIGN (bytes, align); bytes += cif->arg_types[i]->size; } else @@ -546,50 +546,50 @@ ffi_prep_cif_machdep (ffi_cif *cif) } } if (ssecount) - flags |= UNIX64_FLAG_XMM_ARGS; - + flags |= UNIX64_FLAG_XMM_ARGS; + cif->flags = flags; - cif->bytes = (unsigned) FFI_ALIGN (bytes, 8); + cif->bytes = (unsigned) FFI_ALIGN (bytes, 8); return FFI_OK; } -static void -ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, - void **avalue, void *closure) +static void +ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, + void **avalue, void *closure) { enum x86_64_reg_class classes[MAX_CLASSES]; char *stack, *argp; ffi_type **arg_types; - int gprcount, ssecount, ngpr, nsse, i, avn, flags; + int gprcount, ssecount, ngpr, nsse, i, avn, flags; struct register_args *reg_args; /* Can't call 32-bit mode from 64-bit mode. */ FFI_ASSERT (cif->abi == FFI_UNIX64); /* If the return value is a struct and we don't have a return value - address then we need to make one. Otherwise we can ignore it. */ - flags = cif->flags; - if (rvalue == NULL) - { - if (flags & UNIX64_FLAG_RET_IN_MEM) - rvalue = alloca (cif->rtype->size); - else - flags = UNIX64_RET_VOID; - } + address then we need to make one. Otherwise we can ignore it. */ + flags = cif->flags; + if (rvalue == NULL) + { + if (flags & UNIX64_FLAG_RET_IN_MEM) + rvalue = alloca (cif->rtype->size); + else + flags = UNIX64_RET_VOID; + } /* Allocate the space for the arguments, plus 4 words of temp space. */ stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8); reg_args = (struct register_args *) stack; argp = stack + sizeof (struct register_args); - reg_args->r10 = (uintptr_t) closure; - + reg_args->r10 = (uintptr_t) closure; + gprcount = ssecount = 0; /* If the return value is passed in memory, add the pointer as the first integer argument. */ - if (flags & UNIX64_FLAG_RET_IN_MEM) + if (flags & UNIX64_FLAG_RET_IN_MEM) reg_args->gpr[gprcount++] = (unsigned long) rvalue; avn = cif->nargs; @@ -597,7 +597,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, for (i = 0; i < avn; ++i) { - size_t n, size = arg_types[i]->size; + size_t n, size = arg_types[i]->size; n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse); if (n == 0 @@ -611,7 +611,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, align = 8; /* Pass this argument in memory. */ - argp = (void *) FFI_ALIGN (argp, align); + argp = (void *) FFI_ALIGN (argp, align); memcpy (argp, avalue[i], size); argp += size; } @@ -619,15 +619,15 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, { /* The argument is passed entirely in registers. */ char *a = (char *) avalue[i]; - unsigned int j; + unsigned int j; for (j = 0; j < n; j++, a += 8, size -= 8) { switch (classes[j]) { - case X86_64_NO_CLASS: - case X86_64_SSEUP_CLASS: - break; + case X86_64_NO_CLASS: + case X86_64_SSEUP_CLASS: + break; case X86_64_INTEGER_CLASS: case X86_64_INTEGERSI_CLASS: /* Sign-extend integer arguments passed in general @@ -637,26 +637,26 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, switch (arg_types[i]->type) { case FFI_TYPE_SINT8: - reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a); + reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a); break; case FFI_TYPE_SINT16: - reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a); + reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a); break; case FFI_TYPE_SINT32: - reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a); + reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a); break; default: reg_args->gpr[gprcount] = 0; - memcpy (®_args->gpr[gprcount], a, size); + memcpy (®_args->gpr[gprcount], a, size); } gprcount++; break; case X86_64_SSE_CLASS: case X86_64_SSEDF_CLASS: - memcpy (®_args->sse[ssecount++].i64, a, sizeof(UINT64)); + memcpy (®_args->sse[ssecount++].i64, a, sizeof(UINT64)); break; case X86_64_SSESF_CLASS: - memcpy (®_args->sse[ssecount++].i32, a, sizeof(UINT32)); + memcpy (®_args->sse[ssecount++].i32, a, sizeof(UINT32)); break; default: abort(); @@ -664,63 +664,63 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, } } } - reg_args->rax = ssecount; + reg_args->rax = ssecount; ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args), - flags, rvalue, fn); + flags, rvalue, fn); +} + +#ifndef __ILP32__ +extern void +ffi_call_efi64(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue); +#endif + +void +ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ +#ifndef __ILP32__ + if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64) + { + ffi_call_efi64(cif, fn, rvalue, avalue); + return; + } +#endif + ffi_call_int (cif, fn, rvalue, avalue, NULL); } -#ifndef __ILP32__ -extern void -ffi_call_efi64(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue); -#endif - -void -ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) -{ -#ifndef __ILP32__ - if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64) - { - ffi_call_efi64(cif, fn, rvalue, avalue); - return; - } -#endif - ffi_call_int (cif, fn, rvalue, avalue, NULL); -} - -#ifndef __ILP32__ -extern void -ffi_call_go_efi64(ffi_cif *cif, void (*fn)(void), void *rvalue, - void **avalue, void *closure); -#endif - -void -ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue, - void **avalue, void *closure) -{ -#ifndef __ILP32__ - if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64) - { - ffi_call_go_efi64(cif, fn, rvalue, avalue, closure); - return; - } -#endif - ffi_call_int (cif, fn, rvalue, avalue, closure); -} - - -extern void ffi_closure_unix64(void) FFI_HIDDEN; -extern void ffi_closure_unix64_sse(void) FFI_HIDDEN; - -#ifndef __ILP32__ -extern ffi_status -ffi_prep_closure_loc_efi64(ffi_closure* closure, - ffi_cif* cif, - void (*fun)(ffi_cif*, void*, void**, void*), - void *user_data, - void *codeloc); -#endif - +#ifndef __ILP32__ +extern void +ffi_call_go_efi64(ffi_cif *cif, void (*fn)(void), void *rvalue, + void **avalue, void *closure); +#endif + +void +ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue, + void **avalue, void *closure) +{ +#ifndef __ILP32__ + if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64) + { + ffi_call_go_efi64(cif, fn, rvalue, avalue, closure); + return; + } +#endif + ffi_call_int (cif, fn, rvalue, avalue, closure); +} + + +extern void ffi_closure_unix64(void) FFI_HIDDEN; +extern void ffi_closure_unix64_sse(void) FFI_HIDDEN; + +#ifndef __ILP32__ +extern ffi_status +ffi_prep_closure_loc_efi64(ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data, + void *codeloc); +#endif + ffi_status ffi_prep_closure_loc (ffi_closure* closure, ffi_cif* cif, @@ -728,31 +728,31 @@ ffi_prep_closure_loc (ffi_closure* closure, void *user_data, void *codeloc) { - static const unsigned char trampoline[16] = { - /* leaq -0x7(%rip),%r10 # 0x0 */ - 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff, - /* jmpq *0x3(%rip) # 0x10 */ - 0xff, 0x25, 0x03, 0x00, 0x00, 0x00, - /* nopl (%rax) */ - 0x0f, 0x1f, 0x00 - }; - void (*dest)(void); - char *tramp = closure->tramp; - -#ifndef __ILP32__ - if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64) - return ffi_prep_closure_loc_efi64(closure, cif, fun, user_data, codeloc); -#endif - if (cif->abi != FFI_UNIX64) - return FFI_BAD_ABI; - - if (cif->flags & UNIX64_FLAG_XMM_ARGS) - dest = ffi_closure_unix64_sse; - else - dest = ffi_closure_unix64; - - memcpy (tramp, trampoline, sizeof(trampoline)); - *(UINT64 *)(tramp + 16) = (uintptr_t)dest; + static const unsigned char trampoline[16] = { + /* leaq -0x7(%rip),%r10 # 0x0 */ + 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff, + /* jmpq *0x3(%rip) # 0x10 */ + 0xff, 0x25, 0x03, 0x00, 0x00, 0x00, + /* nopl (%rax) */ + 0x0f, 0x1f, 0x00 + }; + void (*dest)(void); + char *tramp = closure->tramp; + +#ifndef __ILP32__ + if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64) + return ffi_prep_closure_loc_efi64(closure, cif, fun, user_data, codeloc); +#endif + if (cif->abi != FFI_UNIX64) + return FFI_BAD_ABI; + + if (cif->flags & UNIX64_FLAG_XMM_ARGS) + dest = ffi_closure_unix64_sse; + else + dest = ffi_closure_unix64; + + memcpy (tramp, trampoline, sizeof(trampoline)); + *(UINT64 *)(tramp + 16) = (uintptr_t)dest; closure->cif = cif; closure->fun = fun; @@ -761,40 +761,40 @@ ffi_prep_closure_loc (ffi_closure* closure, return FFI_OK; } -int FFI_HIDDEN -ffi_closure_unix64_inner(ffi_cif *cif, - void (*fun)(ffi_cif*, void*, void**, void*), - void *user_data, - void *rvalue, - struct register_args *reg_args, - char *argp) +int FFI_HIDDEN +ffi_closure_unix64_inner(ffi_cif *cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data, + void *rvalue, + struct register_args *reg_args, + char *argp) { void **avalue; ffi_type **arg_types; long i, avn; int gprcount, ssecount, ngpr, nsse; - int flags; + int flags; - avn = cif->nargs; - flags = cif->flags; - avalue = alloca(avn * sizeof(void *)); + avn = cif->nargs; + flags = cif->flags; + avalue = alloca(avn * sizeof(void *)); gprcount = ssecount = 0; - if (flags & UNIX64_FLAG_RET_IN_MEM) + if (flags & UNIX64_FLAG_RET_IN_MEM) { - /* On return, %rax will contain the address that was passed - by the caller in %rdi. */ - void *r = (void *)(uintptr_t)reg_args->gpr[gprcount++]; - *(void **)rvalue = r; - rvalue = r; - flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64); + /* On return, %rax will contain the address that was passed + by the caller in %rdi. */ + void *r = (void *)(uintptr_t)reg_args->gpr[gprcount++]; + *(void **)rvalue = r; + rvalue = r; + flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64); } arg_types = cif->arg_types; for (i = 0; i < avn; ++i) { enum x86_64_reg_class classes[MAX_CLASSES]; - size_t n; + size_t n; n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse); if (n == 0 @@ -808,7 +808,7 @@ ffi_closure_unix64_inner(ffi_cif *cif, align = 8; /* Pass this argument in memory. */ - argp = (void *) FFI_ALIGN (argp, align); + argp = (void *) FFI_ALIGN (argp, align); avalue[i] = argp; argp += arg_types[i]->size; } @@ -834,7 +834,7 @@ ffi_closure_unix64_inner(ffi_cif *cif, else { char *a = alloca (16); - unsigned int j; + unsigned int j; avalue[i] = a; for (j = 0; j < n; j++, a += 8) @@ -848,39 +848,39 @@ ffi_closure_unix64_inner(ffi_cif *cif, } /* Invoke the closure. */ - fun (cif, rvalue, avalue, user_data); + fun (cif, rvalue, avalue, user_data); /* Tell assembly how to perform return type promotions. */ - return flags; + return flags; +} + +extern void ffi_go_closure_unix64(void) FFI_HIDDEN; +extern void ffi_go_closure_unix64_sse(void) FFI_HIDDEN; + +#ifndef __ILP32__ +extern ffi_status +ffi_prep_go_closure_efi64(ffi_go_closure* closure, ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*)); +#endif + +ffi_status +ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*)) +{ +#ifndef __ILP32__ + if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64) + return ffi_prep_go_closure_efi64(closure, cif, fun); +#endif + if (cif->abi != FFI_UNIX64) + return FFI_BAD_ABI; + + closure->tramp = (cif->flags & UNIX64_FLAG_XMM_ARGS + ? ffi_go_closure_unix64_sse + : ffi_go_closure_unix64); + closure->cif = cif; + closure->fun = fun; + + return FFI_OK; } -extern void ffi_go_closure_unix64(void) FFI_HIDDEN; -extern void ffi_go_closure_unix64_sse(void) FFI_HIDDEN; - -#ifndef __ILP32__ -extern ffi_status -ffi_prep_go_closure_efi64(ffi_go_closure* closure, ffi_cif* cif, - void (*fun)(ffi_cif*, void*, void**, void*)); -#endif - -ffi_status -ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif, - void (*fun)(ffi_cif*, void*, void**, void*)) -{ -#ifndef __ILP32__ - if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64) - return ffi_prep_go_closure_efi64(closure, cif, fun); -#endif - if (cif->abi != FFI_UNIX64) - return FFI_BAD_ABI; - - closure->tramp = (cif->flags & UNIX64_FLAG_XMM_ARGS - ? ffi_go_closure_unix64_sse - : ffi_go_closure_unix64); - closure->cif = cif; - closure->fun = fun; - - return FFI_OK; -} - #endif /* __x86_64__ */ diff --git a/contrib/restricted/libffi/src/x86/ffitarget.h b/contrib/restricted/libffi/src/x86/ffitarget.h index ab04dfa791..85ccedfedc 100644 --- a/contrib/restricted/libffi/src/x86/ffitarget.h +++ b/contrib/restricted/libffi/src/x86/ffitarget.h @@ -1,5 +1,5 @@ /* -----------------------------------------------------------------*-C-*- - ffitarget.h - Copyright (c) 2012, 2014, 2018 Anthony Green + ffitarget.h - Copyright (c) 2012, 2014, 2018 Anthony Green Copyright (c) 1996-2003, 2010 Red Hat, Inc. Copyright (C) 2008 Free Software Foundation, Inc. @@ -49,11 +49,11 @@ #define USE_BUILTIN_FFS 0 /* not yet implemented in mingw-64 */ #endif -#define FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION -#ifndef _MSC_VER -#define FFI_TARGET_HAS_COMPLEX_TYPE -#endif - +#define FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION +#ifndef _MSC_VER +#define FFI_TARGET_HAS_COMPLEX_TYPE +#endif + /* ---- Generic type definitions ----------------------------------------- */ #ifndef LIBFFI_ASM @@ -78,46 +78,46 @@ typedef signed long ffi_sarg; #endif typedef enum ffi_abi { -#if defined(X86_WIN64) +#if defined(X86_WIN64) FFI_FIRST_ABI = 0, - FFI_WIN64, /* sizeof(long double) == 8 - microsoft compilers */ - FFI_GNUW64, /* sizeof(long double) == 16 - GNU compilers */ + FFI_WIN64, /* sizeof(long double) == 8 - microsoft compilers */ + FFI_GNUW64, /* sizeof(long double) == 16 - GNU compilers */ FFI_LAST_ABI, -#ifdef __GNUC__ - FFI_DEFAULT_ABI = FFI_GNUW64 -#else - FFI_DEFAULT_ABI = FFI_WIN64 -#endif - -#elif defined(X86_64) || (defined (__x86_64__) && defined (X86_DARWIN)) - FFI_FIRST_ABI = 1, - FFI_UNIX64, +#ifdef __GNUC__ + FFI_DEFAULT_ABI = FFI_GNUW64 +#else + FFI_DEFAULT_ABI = FFI_WIN64 +#endif + +#elif defined(X86_64) || (defined (__x86_64__) && defined (X86_DARWIN)) + FFI_FIRST_ABI = 1, + FFI_UNIX64, FFI_WIN64, - FFI_EFI64 = FFI_WIN64, - FFI_GNUW64, + FFI_EFI64 = FFI_WIN64, + FFI_GNUW64, FFI_LAST_ABI, - FFI_DEFAULT_ABI = FFI_UNIX64 - -#elif defined(X86_WIN32) - FFI_FIRST_ABI = 0, - FFI_SYSV = 1, - FFI_STDCALL = 2, - FFI_THISCALL = 3, - FFI_FASTCALL = 4, - FFI_MS_CDECL = 5, - FFI_PASCAL = 6, - FFI_REGISTER = 7, - FFI_LAST_ABI, - FFI_DEFAULT_ABI = FFI_MS_CDECL + FFI_DEFAULT_ABI = FFI_UNIX64 + +#elif defined(X86_WIN32) + FFI_FIRST_ABI = 0, + FFI_SYSV = 1, + FFI_STDCALL = 2, + FFI_THISCALL = 3, + FFI_FASTCALL = 4, + FFI_MS_CDECL = 5, + FFI_PASCAL = 6, + FFI_REGISTER = 7, + FFI_LAST_ABI, + FFI_DEFAULT_ABI = FFI_MS_CDECL #else - FFI_FIRST_ABI = 0, - FFI_SYSV = 1, - FFI_THISCALL = 3, - FFI_FASTCALL = 4, - FFI_STDCALL = 5, - FFI_PASCAL = 6, - FFI_REGISTER = 7, - FFI_MS_CDECL = 8, + FFI_FIRST_ABI = 0, + FFI_SYSV = 1, + FFI_THISCALL = 3, + FFI_FASTCALL = 4, + FFI_STDCALL = 5, + FFI_PASCAL = 6, + FFI_REGISTER = 7, + FFI_MS_CDECL = 8, FFI_LAST_ABI, FFI_DEFAULT_ABI = FFI_SYSV #endif @@ -127,20 +127,20 @@ typedef enum ffi_abi { /* ---- Definitions for closures ----------------------------------------- */ #define FFI_CLOSURES 1 -#define FFI_GO_CLOSURES 1 - +#define FFI_GO_CLOSURES 1 + #define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1) #define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2) #define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3) #define FFI_TYPE_MS_STRUCT (FFI_TYPE_LAST + 4) -#if defined (X86_64) || defined(X86_WIN64) \ - || (defined (__x86_64__) && defined (X86_DARWIN)) -# define FFI_TRAMPOLINE_SIZE 24 -# define FFI_NATIVE_RAW_API 0 +#if defined (X86_64) || defined(X86_WIN64) \ + || (defined (__x86_64__) && defined (X86_DARWIN)) +# define FFI_TRAMPOLINE_SIZE 24 +# define FFI_NATIVE_RAW_API 0 #else -# define FFI_TRAMPOLINE_SIZE 12 -# define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */ +# define FFI_TRAMPOLINE_SIZE 12 +# define FFI_NATIVE_RAW_API 1 /* x86 has native raw api support */ #endif #endif diff --git a/contrib/restricted/libffi/src/x86/ffiw64.c b/contrib/restricted/libffi/src/x86/ffiw64.c index 3eafc9d3e9..b68f69ccf6 100644 --- a/contrib/restricted/libffi/src/x86/ffiw64.c +++ b/contrib/restricted/libffi/src/x86/ffiw64.c @@ -1,311 +1,311 @@ -/* ----------------------------------------------------------------------- - ffiw64.c - Copyright (c) 2018 Anthony Green - Copyright (c) 2014 Red Hat, Inc. - - x86 win64 Foreign Function Interface - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - ``Software''), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - DEALINGS IN THE SOFTWARE. - ----------------------------------------------------------------------- */ - -#if defined(__x86_64__) || defined(_M_AMD64) -#include <ffi.h> -#include <ffi_common.h> -#include <stdlib.h> -#include <stdint.h> - -#ifdef X86_WIN64 -#define EFI64(name) name -#else -#define EFI64(name) FFI_HIDDEN name##_efi64 -#endif - -struct win64_call_frame -{ - UINT64 rbp; /* 0 */ - UINT64 retaddr; /* 8 */ - UINT64 fn; /* 16 */ - UINT64 flags; /* 24 */ - UINT64 rvalue; /* 32 */ -}; - -extern void ffi_call_win64 (void *stack, struct win64_call_frame *, - void *closure) FFI_HIDDEN; - -ffi_status FFI_HIDDEN -EFI64(ffi_prep_cif_machdep)(ffi_cif *cif) -{ - int flags, n; - - switch (cif->abi) - { - case FFI_WIN64: - case FFI_GNUW64: - break; - default: - return FFI_BAD_ABI; - } - - flags = cif->rtype->type; - switch (flags) - { - default: - break; - case FFI_TYPE_LONGDOUBLE: - /* GCC returns long double values by reference, like a struct */ - if (cif->abi == FFI_GNUW64) - flags = FFI_TYPE_STRUCT; - break; - case FFI_TYPE_COMPLEX: - flags = FFI_TYPE_STRUCT; - /* FALLTHRU */ - case FFI_TYPE_STRUCT: - switch (cif->rtype->size) - { - case 8: - flags = FFI_TYPE_UINT64; - break; - case 4: - flags = FFI_TYPE_SMALL_STRUCT_4B; - break; - case 2: - flags = FFI_TYPE_SMALL_STRUCT_2B; - break; - case 1: - flags = FFI_TYPE_SMALL_STRUCT_1B; - break; - } - break; - } - cif->flags = flags; - - /* Each argument either fits in a register, an 8 byte slot, or is - passed by reference with the pointer in the 8 byte slot. */ - n = cif->nargs; - n += (flags == FFI_TYPE_STRUCT); - if (n < 4) - n = 4; - cif->bytes = n * 8; - - return FFI_OK; -} - -static void -ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, - void **avalue, void *closure) -{ - int i, j, n, flags; - UINT64 *stack; - size_t rsize; - struct win64_call_frame *frame; - - FFI_ASSERT(cif->abi == FFI_GNUW64 || cif->abi == FFI_WIN64); - - flags = cif->flags; - rsize = 0; - - /* If we have no return value for a structure, we need to create one. - Otherwise we can ignore the return type entirely. */ - if (rvalue == NULL) - { - if (flags == FFI_TYPE_STRUCT) - rsize = cif->rtype->size; - else - flags = FFI_TYPE_VOID; - } - - stack = alloca(cif->bytes + sizeof(struct win64_call_frame) + rsize); - frame = (struct win64_call_frame *)((char *)stack + cif->bytes); - if (rsize) - rvalue = frame + 1; - - frame->fn = (uintptr_t)fn; - frame->flags = flags; - frame->rvalue = (uintptr_t)rvalue; - - j = 0; - if (flags == FFI_TYPE_STRUCT) - { - stack[0] = (uintptr_t)rvalue; - j = 1; - } - - for (i = 0, n = cif->nargs; i < n; ++i, ++j) - { - switch (cif->arg_types[i]->size) - { - case 8: - stack[j] = *(UINT64 *)avalue[i]; - break; - case 4: - stack[j] = *(UINT32 *)avalue[i]; - break; - case 2: - stack[j] = *(UINT16 *)avalue[i]; - break; - case 1: - stack[j] = *(UINT8 *)avalue[i]; - break; - default: - stack[j] = (uintptr_t)avalue[i]; - break; - } - } - - ffi_call_win64 (stack, frame, closure); -} - -void -EFI64(ffi_call)(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) -{ - ffi_call_int (cif, fn, rvalue, avalue, NULL); -} - -void -EFI64(ffi_call_go)(ffi_cif *cif, void (*fn)(void), void *rvalue, - void **avalue, void *closure) -{ - ffi_call_int (cif, fn, rvalue, avalue, closure); -} - - -extern void ffi_closure_win64(void) FFI_HIDDEN; -extern void ffi_go_closure_win64(void) FFI_HIDDEN; - -ffi_status -EFI64(ffi_prep_closure_loc)(ffi_closure* closure, - ffi_cif* cif, - void (*fun)(ffi_cif*, void*, void**, void*), - void *user_data, - void *codeloc) -{ - static const unsigned char trampoline[16] = { - /* leaq -0x7(%rip),%r10 # 0x0 */ - 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff, - /* jmpq *0x3(%rip) # 0x10 */ - 0xff, 0x25, 0x03, 0x00, 0x00, 0x00, - /* nopl (%rax) */ - 0x0f, 0x1f, 0x00 - }; - char *tramp = closure->tramp; - - switch (cif->abi) - { - case FFI_WIN64: - case FFI_GNUW64: - break; - default: - return FFI_BAD_ABI; - } - - memcpy (tramp, trampoline, sizeof(trampoline)); - *(UINT64 *)(tramp + 16) = (uintptr_t)ffi_closure_win64; - - closure->cif = cif; - closure->fun = fun; - closure->user_data = user_data; - - return FFI_OK; -} - -ffi_status -EFI64(ffi_prep_go_closure)(ffi_go_closure* closure, ffi_cif* cif, - void (*fun)(ffi_cif*, void*, void**, void*)) -{ - switch (cif->abi) - { - case FFI_WIN64: - case FFI_GNUW64: - break; - default: - return FFI_BAD_ABI; - } - - closure->tramp = ffi_go_closure_win64; - closure->cif = cif; - closure->fun = fun; - - return FFI_OK; -} - -struct win64_closure_frame -{ - UINT64 rvalue[2]; - UINT64 fargs[4]; - UINT64 retaddr; - UINT64 args[]; -}; - -/* Force the inner function to use the MS ABI. When compiling on win64 - this is a nop. When compiling on unix, this simplifies the assembly, - and places the burden of saving the extra call-saved registers on - the compiler. */ -int FFI_HIDDEN __attribute__((ms_abi)) -ffi_closure_win64_inner(ffi_cif *cif, - void (*fun)(ffi_cif*, void*, void**, void*), - void *user_data, - struct win64_closure_frame *frame) -{ - void **avalue; - void *rvalue; - int i, n, nreg, flags; - - avalue = alloca(cif->nargs * sizeof(void *)); - rvalue = frame->rvalue; - nreg = 0; - - /* When returning a structure, the address is in the first argument. - We must also be prepared to return the same address in eax, so - install that address in the frame and pretend we return a pointer. */ - flags = cif->flags; - if (flags == FFI_TYPE_STRUCT) - { - rvalue = (void *)(uintptr_t)frame->args[0]; - frame->rvalue[0] = frame->args[0]; - nreg = 1; - } - - for (i = 0, n = cif->nargs; i < n; ++i, ++nreg) - { - size_t size = cif->arg_types[i]->size; - size_t type = cif->arg_types[i]->type; - void *a; - - if (type == FFI_TYPE_DOUBLE || type == FFI_TYPE_FLOAT) - { - if (nreg < 4) - a = &frame->fargs[nreg]; - else - a = &frame->args[nreg]; - } - else if (size == 1 || size == 2 || size == 4 || size == 8) - a = &frame->args[nreg]; - else - a = (void *)(uintptr_t)frame->args[nreg]; - - avalue[i] = a; - } - - /* Invoke the closure. */ - fun (cif, rvalue, avalue, user_data); - return flags; -} - -#endif /* __x86_64__ */ +/* ----------------------------------------------------------------------- + ffiw64.c - Copyright (c) 2018 Anthony Green + Copyright (c) 2014 Red Hat, Inc. + + x86 win64 Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#if defined(__x86_64__) || defined(_M_AMD64) +#include <ffi.h> +#include <ffi_common.h> +#include <stdlib.h> +#include <stdint.h> + +#ifdef X86_WIN64 +#define EFI64(name) name +#else +#define EFI64(name) FFI_HIDDEN name##_efi64 +#endif + +struct win64_call_frame +{ + UINT64 rbp; /* 0 */ + UINT64 retaddr; /* 8 */ + UINT64 fn; /* 16 */ + UINT64 flags; /* 24 */ + UINT64 rvalue; /* 32 */ +}; + +extern void ffi_call_win64 (void *stack, struct win64_call_frame *, + void *closure) FFI_HIDDEN; + +ffi_status FFI_HIDDEN +EFI64(ffi_prep_cif_machdep)(ffi_cif *cif) +{ + int flags, n; + + switch (cif->abi) + { + case FFI_WIN64: + case FFI_GNUW64: + break; + default: + return FFI_BAD_ABI; + } + + flags = cif->rtype->type; + switch (flags) + { + default: + break; + case FFI_TYPE_LONGDOUBLE: + /* GCC returns long double values by reference, like a struct */ + if (cif->abi == FFI_GNUW64) + flags = FFI_TYPE_STRUCT; + break; + case FFI_TYPE_COMPLEX: + flags = FFI_TYPE_STRUCT; + /* FALLTHRU */ + case FFI_TYPE_STRUCT: + switch (cif->rtype->size) + { + case 8: + flags = FFI_TYPE_UINT64; + break; + case 4: + flags = FFI_TYPE_SMALL_STRUCT_4B; + break; + case 2: + flags = FFI_TYPE_SMALL_STRUCT_2B; + break; + case 1: + flags = FFI_TYPE_SMALL_STRUCT_1B; + break; + } + break; + } + cif->flags = flags; + + /* Each argument either fits in a register, an 8 byte slot, or is + passed by reference with the pointer in the 8 byte slot. */ + n = cif->nargs; + n += (flags == FFI_TYPE_STRUCT); + if (n < 4) + n = 4; + cif->bytes = n * 8; + + return FFI_OK; +} + +static void +ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, + void **avalue, void *closure) +{ + int i, j, n, flags; + UINT64 *stack; + size_t rsize; + struct win64_call_frame *frame; + + FFI_ASSERT(cif->abi == FFI_GNUW64 || cif->abi == FFI_WIN64); + + flags = cif->flags; + rsize = 0; + + /* If we have no return value for a structure, we need to create one. + Otherwise we can ignore the return type entirely. */ + if (rvalue == NULL) + { + if (flags == FFI_TYPE_STRUCT) + rsize = cif->rtype->size; + else + flags = FFI_TYPE_VOID; + } + + stack = alloca(cif->bytes + sizeof(struct win64_call_frame) + rsize); + frame = (struct win64_call_frame *)((char *)stack + cif->bytes); + if (rsize) + rvalue = frame + 1; + + frame->fn = (uintptr_t)fn; + frame->flags = flags; + frame->rvalue = (uintptr_t)rvalue; + + j = 0; + if (flags == FFI_TYPE_STRUCT) + { + stack[0] = (uintptr_t)rvalue; + j = 1; + } + + for (i = 0, n = cif->nargs; i < n; ++i, ++j) + { + switch (cif->arg_types[i]->size) + { + case 8: + stack[j] = *(UINT64 *)avalue[i]; + break; + case 4: + stack[j] = *(UINT32 *)avalue[i]; + break; + case 2: + stack[j] = *(UINT16 *)avalue[i]; + break; + case 1: + stack[j] = *(UINT8 *)avalue[i]; + break; + default: + stack[j] = (uintptr_t)avalue[i]; + break; + } + } + + ffi_call_win64 (stack, frame, closure); +} + +void +EFI64(ffi_call)(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) +{ + ffi_call_int (cif, fn, rvalue, avalue, NULL); +} + +void +EFI64(ffi_call_go)(ffi_cif *cif, void (*fn)(void), void *rvalue, + void **avalue, void *closure) +{ + ffi_call_int (cif, fn, rvalue, avalue, closure); +} + + +extern void ffi_closure_win64(void) FFI_HIDDEN; +extern void ffi_go_closure_win64(void) FFI_HIDDEN; + +ffi_status +EFI64(ffi_prep_closure_loc)(ffi_closure* closure, + ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data, + void *codeloc) +{ + static const unsigned char trampoline[16] = { + /* leaq -0x7(%rip),%r10 # 0x0 */ + 0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff, + /* jmpq *0x3(%rip) # 0x10 */ + 0xff, 0x25, 0x03, 0x00, 0x00, 0x00, + /* nopl (%rax) */ + 0x0f, 0x1f, 0x00 + }; + char *tramp = closure->tramp; + + switch (cif->abi) + { + case FFI_WIN64: + case FFI_GNUW64: + break; + default: + return FFI_BAD_ABI; + } + + memcpy (tramp, trampoline, sizeof(trampoline)); + *(UINT64 *)(tramp + 16) = (uintptr_t)ffi_closure_win64; + + closure->cif = cif; + closure->fun = fun; + closure->user_data = user_data; + + return FFI_OK; +} + +ffi_status +EFI64(ffi_prep_go_closure)(ffi_go_closure* closure, ffi_cif* cif, + void (*fun)(ffi_cif*, void*, void**, void*)) +{ + switch (cif->abi) + { + case FFI_WIN64: + case FFI_GNUW64: + break; + default: + return FFI_BAD_ABI; + } + + closure->tramp = ffi_go_closure_win64; + closure->cif = cif; + closure->fun = fun; + + return FFI_OK; +} + +struct win64_closure_frame +{ + UINT64 rvalue[2]; + UINT64 fargs[4]; + UINT64 retaddr; + UINT64 args[]; +}; + +/* Force the inner function to use the MS ABI. When compiling on win64 + this is a nop. When compiling on unix, this simplifies the assembly, + and places the burden of saving the extra call-saved registers on + the compiler. */ +int FFI_HIDDEN __attribute__((ms_abi)) +ffi_closure_win64_inner(ffi_cif *cif, + void (*fun)(ffi_cif*, void*, void**, void*), + void *user_data, + struct win64_closure_frame *frame) +{ + void **avalue; + void *rvalue; + int i, n, nreg, flags; + + avalue = alloca(cif->nargs * sizeof(void *)); + rvalue = frame->rvalue; + nreg = 0; + + /* When returning a structure, the address is in the first argument. + We must also be prepared to return the same address in eax, so + install that address in the frame and pretend we return a pointer. */ + flags = cif->flags; + if (flags == FFI_TYPE_STRUCT) + { + rvalue = (void *)(uintptr_t)frame->args[0]; + frame->rvalue[0] = frame->args[0]; + nreg = 1; + } + + for (i = 0, n = cif->nargs; i < n; ++i, ++nreg) + { + size_t size = cif->arg_types[i]->size; + size_t type = cif->arg_types[i]->type; + void *a; + + if (type == FFI_TYPE_DOUBLE || type == FFI_TYPE_FLOAT) + { + if (nreg < 4) + a = &frame->fargs[nreg]; + else + a = &frame->args[nreg]; + } + else if (size == 1 || size == 2 || size == 4 || size == 8) + a = &frame->args[nreg]; + else + a = (void *)(uintptr_t)frame->args[nreg]; + + avalue[i] = a; + } + + /* Invoke the closure. */ + fun (cif, rvalue, avalue, user_data); + return flags; +} + +#endif /* __x86_64__ */ diff --git a/contrib/restricted/libffi/src/x86/internal.h b/contrib/restricted/libffi/src/x86/internal.h index 7cfca13a30..09771ba8cf 100644 --- a/contrib/restricted/libffi/src/x86/internal.h +++ b/contrib/restricted/libffi/src/x86/internal.h @@ -1,29 +1,29 @@ -#define X86_RET_FLOAT 0 -#define X86_RET_DOUBLE 1 -#define X86_RET_LDOUBLE 2 -#define X86_RET_SINT8 3 -#define X86_RET_SINT16 4 -#define X86_RET_UINT8 5 -#define X86_RET_UINT16 6 -#define X86_RET_INT64 7 -#define X86_RET_INT32 8 -#define X86_RET_VOID 9 -#define X86_RET_STRUCTPOP 10 -#define X86_RET_STRUCTARG 11 -#define X86_RET_STRUCT_1B 12 -#define X86_RET_STRUCT_2B 13 -#define X86_RET_UNUSED14 14 -#define X86_RET_UNUSED15 15 - -#define X86_RET_TYPE_MASK 15 -#define X86_RET_POP_SHIFT 4 - -#define R_EAX 0 -#define R_EDX 1 -#define R_ECX 2 - -#ifdef __PCC__ -# define HAVE_FASTCALL 0 -#else -# define HAVE_FASTCALL 1 -#endif +#define X86_RET_FLOAT 0 +#define X86_RET_DOUBLE 1 +#define X86_RET_LDOUBLE 2 +#define X86_RET_SINT8 3 +#define X86_RET_SINT16 4 +#define X86_RET_UINT8 5 +#define X86_RET_UINT16 6 +#define X86_RET_INT64 7 +#define X86_RET_INT32 8 +#define X86_RET_VOID 9 +#define X86_RET_STRUCTPOP 10 +#define X86_RET_STRUCTARG 11 +#define X86_RET_STRUCT_1B 12 +#define X86_RET_STRUCT_2B 13 +#define X86_RET_UNUSED14 14 +#define X86_RET_UNUSED15 15 + +#define X86_RET_TYPE_MASK 15 +#define X86_RET_POP_SHIFT 4 + +#define R_EAX 0 +#define R_EDX 1 +#define R_ECX 2 + +#ifdef __PCC__ +# define HAVE_FASTCALL 0 +#else +# define HAVE_FASTCALL 1 +#endif diff --git a/contrib/restricted/libffi/src/x86/internal64.h b/contrib/restricted/libffi/src/x86/internal64.h index 62afd4a7e4..512e95523e 100644 --- a/contrib/restricted/libffi/src/x86/internal64.h +++ b/contrib/restricted/libffi/src/x86/internal64.h @@ -1,22 +1,22 @@ -#define UNIX64_RET_VOID 0 -#define UNIX64_RET_UINT8 1 -#define UNIX64_RET_UINT16 2 -#define UNIX64_RET_UINT32 3 -#define UNIX64_RET_SINT8 4 -#define UNIX64_RET_SINT16 5 -#define UNIX64_RET_SINT32 6 -#define UNIX64_RET_INT64 7 -#define UNIX64_RET_XMM32 8 -#define UNIX64_RET_XMM64 9 -#define UNIX64_RET_X87 10 -#define UNIX64_RET_X87_2 11 -#define UNIX64_RET_ST_XMM0_RAX 12 -#define UNIX64_RET_ST_RAX_XMM0 13 -#define UNIX64_RET_ST_XMM0_XMM1 14 -#define UNIX64_RET_ST_RAX_RDX 15 - -#define UNIX64_RET_LAST 15 - -#define UNIX64_FLAG_RET_IN_MEM (1 << 10) -#define UNIX64_FLAG_XMM_ARGS (1 << 11) -#define UNIX64_SIZE_SHIFT 12 +#define UNIX64_RET_VOID 0 +#define UNIX64_RET_UINT8 1 +#define UNIX64_RET_UINT16 2 +#define UNIX64_RET_UINT32 3 +#define UNIX64_RET_SINT8 4 +#define UNIX64_RET_SINT16 5 +#define UNIX64_RET_SINT32 6 +#define UNIX64_RET_INT64 7 +#define UNIX64_RET_XMM32 8 +#define UNIX64_RET_XMM64 9 +#define UNIX64_RET_X87 10 +#define UNIX64_RET_X87_2 11 +#define UNIX64_RET_ST_XMM0_RAX 12 +#define UNIX64_RET_ST_RAX_XMM0 13 +#define UNIX64_RET_ST_XMM0_XMM1 14 +#define UNIX64_RET_ST_RAX_RDX 15 + +#define UNIX64_RET_LAST 15 + +#define UNIX64_FLAG_RET_IN_MEM (1 << 10) +#define UNIX64_FLAG_XMM_ARGS (1 << 11) +#define UNIX64_SIZE_SHIFT 12 diff --git a/contrib/restricted/libffi/src/x86/sysv.S b/contrib/restricted/libffi/src/x86/sysv.S index 5cf58668fe..7c9598c93c 100644 --- a/contrib/restricted/libffi/src/x86/sysv.S +++ b/contrib/restricted/libffi/src/x86/sysv.S @@ -1,7 +1,7 @@ /* ----------------------------------------------------------------------- - sysv.S - Copyright (c) 2017 Anthony Green - - Copyright (c) 2013 The Written Word, Inc. - - Copyright (c) 1996,1998,2001-2003,2005,2008,2010 Red Hat, Inc. + sysv.S - Copyright (c) 2017 Anthony Green + - Copyright (c) 2013 The Written Word, Inc. + - Copyright (c) 1996,1998,2001-2003,2005,2008,2010 Red Hat, Inc. X86 Foreign Function Interface @@ -26,1104 +26,1104 @@ DEALINGS IN THE SOFTWARE. ----------------------------------------------------------------------- */ -#ifdef __i386__ -#ifndef _MSC_VER +#ifdef __i386__ +#ifndef _MSC_VER #define LIBFFI_ASM #include <fficonfig.h> #include <ffi.h> -#include "internal.h" - -#define C2(X, Y) X ## Y -#define C1(X, Y) C2(X, Y) -#ifdef __USER_LABEL_PREFIX__ -# define C(X) C1(__USER_LABEL_PREFIX__, X) -#else -# define C(X) X -#endif - -#ifdef X86_DARWIN -# define L(X) C1(L, X) -#else -# define L(X) C1(.L, X) -#endif - -#ifdef __ELF__ -# define ENDF(X) .type X,@function; .size X, . - X -#else -# define ENDF(X) -#endif - -/* Handle win32 fastcall name mangling. */ -#ifdef X86_WIN32 -# define ffi_call_i386 @ffi_call_i386@8 -# define ffi_closure_inner @ffi_closure_inner@8 -#else -# define ffi_call_i386 C(ffi_call_i386) -# define ffi_closure_inner C(ffi_closure_inner) -#endif - -/* This macro allows the safe creation of jump tables without an - actual table. The entry points into the table are all 8 bytes. - The use of ORG asserts that we're at the correct location. */ -/* ??? The clang assembler doesn't handle .org with symbolic expressions. */ -#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) -# define E(BASE, X) .balign 8 -#else -# define E(BASE, X) .balign 8; .org BASE + X * 8 -#endif - - .text - .balign 16 - .globl ffi_call_i386 - FFI_HIDDEN(ffi_call_i386) - -/* This is declared as - - void ffi_call_i386(struct call_frame *frame, char *argp) - __attribute__((fastcall)); - - Thus the arguments are present in - - ecx: frame - edx: argp -*/ - -ffi_call_i386: -L(UW0): - # cfi_startproc -#if !HAVE_FASTCALL - movl 4(%esp), %ecx - movl 8(%esp), %edx -#endif - movl (%esp), %eax /* move the return address */ - movl %ebp, (%ecx) /* store %ebp into local frame */ - movl %eax, 4(%ecx) /* store retaddr into local frame */ - - /* New stack frame based off ebp. This is a itty bit of unwind - trickery in that the CFA *has* changed. There is no easy way - to describe it correctly on entry to the function. Fortunately, - it doesn't matter too much since at all points we can correctly - unwind back to ffi_call. Note that the location to which we - moved the return address is (the new) CFA-4, so from the - perspective of the unwind info, it hasn't moved. */ - movl %ecx, %ebp -L(UW1): - # cfi_def_cfa(%ebp, 8) - # cfi_rel_offset(%ebp, 0) - - movl %edx, %esp /* set outgoing argument stack */ - movl 20+R_EAX*4(%ebp), %eax /* set register arguments */ - movl 20+R_EDX*4(%ebp), %edx - movl 20+R_ECX*4(%ebp), %ecx - - call *8(%ebp) - - movl 12(%ebp), %ecx /* load return type code */ - movl %ebx, 8(%ebp) /* preserve %ebx */ -L(UW2): - # cfi_rel_offset(%ebx, 8) - - andl $X86_RET_TYPE_MASK, %ecx -#ifdef __PIC__ - call C(__x86.get_pc_thunk.bx) -L(pc1): - leal L(store_table)-L(pc1)(%ebx, %ecx, 8), %ebx -#else - leal L(store_table)(,%ecx, 8), %ebx -#endif - movl 16(%ebp), %ecx /* load result address */ - jmp *%ebx - - .balign 8 -L(store_table): -E(L(store_table), X86_RET_FLOAT) - fstps (%ecx) - jmp L(e1) -E(L(store_table), X86_RET_DOUBLE) - fstpl (%ecx) - jmp L(e1) -E(L(store_table), X86_RET_LDOUBLE) - fstpt (%ecx) - jmp L(e1) -E(L(store_table), X86_RET_SINT8) - movsbl %al, %eax - mov %eax, (%ecx) - jmp L(e1) -E(L(store_table), X86_RET_SINT16) - movswl %ax, %eax - mov %eax, (%ecx) - jmp L(e1) -E(L(store_table), X86_RET_UINT8) - movzbl %al, %eax - mov %eax, (%ecx) - jmp L(e1) -E(L(store_table), X86_RET_UINT16) - movzwl %ax, %eax - mov %eax, (%ecx) - jmp L(e1) -E(L(store_table), X86_RET_INT64) - movl %edx, 4(%ecx) - /* fallthru */ -E(L(store_table), X86_RET_INT32) - movl %eax, (%ecx) - /* fallthru */ -E(L(store_table), X86_RET_VOID) -L(e1): - movl 8(%ebp), %ebx - movl %ebp, %esp - popl %ebp -L(UW3): - # cfi_remember_state - # cfi_def_cfa(%esp, 4) - # cfi_restore(%ebx) - # cfi_restore(%ebp) - ret -L(UW4): - # cfi_restore_state - -E(L(store_table), X86_RET_STRUCTPOP) - jmp L(e1) -E(L(store_table), X86_RET_STRUCTARG) - jmp L(e1) -E(L(store_table), X86_RET_STRUCT_1B) - movb %al, (%ecx) - jmp L(e1) -E(L(store_table), X86_RET_STRUCT_2B) - movw %ax, (%ecx) - jmp L(e1) - - /* Fill out the table so that bad values are predictable. */ -E(L(store_table), X86_RET_UNUSED14) - ud2 -E(L(store_table), X86_RET_UNUSED15) - ud2 - -L(UW5): - # cfi_endproc -ENDF(ffi_call_i386) - -/* The inner helper is declared as - - void ffi_closure_inner(struct closure_frame *frame, char *argp) - __attribute_((fastcall)) - - Thus the arguments are placed in - - ecx: frame - edx: argp -*/ - -/* Macros to help setting up the closure_data structure. */ - -#if HAVE_FASTCALL -# define closure_FS (40 + 4) -# define closure_CF 0 -#else -# define closure_FS (8 + 40 + 12) -# define closure_CF 8 -#endif - -#define FFI_CLOSURE_SAVE_REGS \ - movl %eax, closure_CF+16+R_EAX*4(%esp); \ - movl %edx, closure_CF+16+R_EDX*4(%esp); \ - movl %ecx, closure_CF+16+R_ECX*4(%esp) - -#define FFI_CLOSURE_COPY_TRAMP_DATA \ - movl FFI_TRAMPOLINE_SIZE(%eax), %edx; /* copy cif */ \ - movl FFI_TRAMPOLINE_SIZE+4(%eax), %ecx; /* copy fun */ \ - movl FFI_TRAMPOLINE_SIZE+8(%eax), %eax; /* copy user_data */ \ - movl %edx, closure_CF+28(%esp); \ - movl %ecx, closure_CF+32(%esp); \ - movl %eax, closure_CF+36(%esp) - -#if HAVE_FASTCALL -# define FFI_CLOSURE_PREP_CALL \ - movl %esp, %ecx; /* load closure_data */ \ - leal closure_FS+4(%esp), %edx; /* load incoming stack */ +#include "internal.h" + +#define C2(X, Y) X ## Y +#define C1(X, Y) C2(X, Y) +#ifdef __USER_LABEL_PREFIX__ +# define C(X) C1(__USER_LABEL_PREFIX__, X) +#else +# define C(X) X +#endif + +#ifdef X86_DARWIN +# define L(X) C1(L, X) +#else +# define L(X) C1(.L, X) +#endif + +#ifdef __ELF__ +# define ENDF(X) .type X,@function; .size X, . - X +#else +# define ENDF(X) +#endif + +/* Handle win32 fastcall name mangling. */ +#ifdef X86_WIN32 +# define ffi_call_i386 @ffi_call_i386@8 +# define ffi_closure_inner @ffi_closure_inner@8 +#else +# define ffi_call_i386 C(ffi_call_i386) +# define ffi_closure_inner C(ffi_closure_inner) +#endif + +/* This macro allows the safe creation of jump tables without an + actual table. The entry points into the table are all 8 bytes. + The use of ORG asserts that we're at the correct location. */ +/* ??? The clang assembler doesn't handle .org with symbolic expressions. */ +#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) +# define E(BASE, X) .balign 8 +#else +# define E(BASE, X) .balign 8; .org BASE + X * 8 +#endif + + .text + .balign 16 + .globl ffi_call_i386 + FFI_HIDDEN(ffi_call_i386) + +/* This is declared as + + void ffi_call_i386(struct call_frame *frame, char *argp) + __attribute__((fastcall)); + + Thus the arguments are present in + + ecx: frame + edx: argp +*/ + +ffi_call_i386: +L(UW0): + # cfi_startproc +#if !HAVE_FASTCALL + movl 4(%esp), %ecx + movl 8(%esp), %edx +#endif + movl (%esp), %eax /* move the return address */ + movl %ebp, (%ecx) /* store %ebp into local frame */ + movl %eax, 4(%ecx) /* store retaddr into local frame */ + + /* New stack frame based off ebp. This is a itty bit of unwind + trickery in that the CFA *has* changed. There is no easy way + to describe it correctly on entry to the function. Fortunately, + it doesn't matter too much since at all points we can correctly + unwind back to ffi_call. Note that the location to which we + moved the return address is (the new) CFA-4, so from the + perspective of the unwind info, it hasn't moved. */ + movl %ecx, %ebp +L(UW1): + # cfi_def_cfa(%ebp, 8) + # cfi_rel_offset(%ebp, 0) + + movl %edx, %esp /* set outgoing argument stack */ + movl 20+R_EAX*4(%ebp), %eax /* set register arguments */ + movl 20+R_EDX*4(%ebp), %edx + movl 20+R_ECX*4(%ebp), %ecx + + call *8(%ebp) + + movl 12(%ebp), %ecx /* load return type code */ + movl %ebx, 8(%ebp) /* preserve %ebx */ +L(UW2): + # cfi_rel_offset(%ebx, 8) + + andl $X86_RET_TYPE_MASK, %ecx +#ifdef __PIC__ + call C(__x86.get_pc_thunk.bx) +L(pc1): + leal L(store_table)-L(pc1)(%ebx, %ecx, 8), %ebx +#else + leal L(store_table)(,%ecx, 8), %ebx +#endif + movl 16(%ebp), %ecx /* load result address */ + jmp *%ebx + + .balign 8 +L(store_table): +E(L(store_table), X86_RET_FLOAT) + fstps (%ecx) + jmp L(e1) +E(L(store_table), X86_RET_DOUBLE) + fstpl (%ecx) + jmp L(e1) +E(L(store_table), X86_RET_LDOUBLE) + fstpt (%ecx) + jmp L(e1) +E(L(store_table), X86_RET_SINT8) + movsbl %al, %eax + mov %eax, (%ecx) + jmp L(e1) +E(L(store_table), X86_RET_SINT16) + movswl %ax, %eax + mov %eax, (%ecx) + jmp L(e1) +E(L(store_table), X86_RET_UINT8) + movzbl %al, %eax + mov %eax, (%ecx) + jmp L(e1) +E(L(store_table), X86_RET_UINT16) + movzwl %ax, %eax + mov %eax, (%ecx) + jmp L(e1) +E(L(store_table), X86_RET_INT64) + movl %edx, 4(%ecx) + /* fallthru */ +E(L(store_table), X86_RET_INT32) + movl %eax, (%ecx) + /* fallthru */ +E(L(store_table), X86_RET_VOID) +L(e1): + movl 8(%ebp), %ebx + movl %ebp, %esp + popl %ebp +L(UW3): + # cfi_remember_state + # cfi_def_cfa(%esp, 4) + # cfi_restore(%ebx) + # cfi_restore(%ebp) + ret +L(UW4): + # cfi_restore_state + +E(L(store_table), X86_RET_STRUCTPOP) + jmp L(e1) +E(L(store_table), X86_RET_STRUCTARG) + jmp L(e1) +E(L(store_table), X86_RET_STRUCT_1B) + movb %al, (%ecx) + jmp L(e1) +E(L(store_table), X86_RET_STRUCT_2B) + movw %ax, (%ecx) + jmp L(e1) + + /* Fill out the table so that bad values are predictable. */ +E(L(store_table), X86_RET_UNUSED14) + ud2 +E(L(store_table), X86_RET_UNUSED15) + ud2 + +L(UW5): + # cfi_endproc +ENDF(ffi_call_i386) + +/* The inner helper is declared as + + void ffi_closure_inner(struct closure_frame *frame, char *argp) + __attribute_((fastcall)) + + Thus the arguments are placed in + + ecx: frame + edx: argp +*/ + +/* Macros to help setting up the closure_data structure. */ + +#if HAVE_FASTCALL +# define closure_FS (40 + 4) +# define closure_CF 0 #else -# define FFI_CLOSURE_PREP_CALL \ - leal closure_CF(%esp), %ecx; /* load closure_data */ \ - leal closure_FS+4(%esp), %edx; /* load incoming stack */ \ - movl %ecx, (%esp); \ - movl %edx, 4(%esp) +# define closure_FS (8 + 40 + 12) +# define closure_CF 8 #endif -#define FFI_CLOSURE_CALL_INNER(UWN) \ - call ffi_closure_inner - -#define FFI_CLOSURE_MASK_AND_JUMP(N, UW) \ - andl $X86_RET_TYPE_MASK, %eax; \ - leal L(C1(load_table,N))(, %eax, 8), %edx; \ - movl closure_CF(%esp), %eax; /* optimiztic load */ \ - jmp *%edx - -#ifdef __PIC__ -# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE -# undef FFI_CLOSURE_MASK_AND_JUMP -# define FFI_CLOSURE_MASK_AND_JUMP(N, UW) \ - andl $X86_RET_TYPE_MASK, %eax; \ - call C(__x86.get_pc_thunk.dx); \ -L(C1(pc,N)): \ - leal L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %edx; \ - movl closure_CF(%esp), %eax; /* optimiztic load */ \ - jmp *%edx -# else -# define FFI_CLOSURE_CALL_INNER_SAVE_EBX -# undef FFI_CLOSURE_CALL_INNER -# define FFI_CLOSURE_CALL_INNER(UWN) \ - movl %ebx, 40(%esp); /* save ebx */ \ -L(C1(UW,UWN)): \ - /* cfi_rel_offset(%ebx, 40); */ \ - call C(__x86.get_pc_thunk.bx); /* load got register */ \ - addl $C(_GLOBAL_OFFSET_TABLE_), %ebx; \ - call ffi_closure_inner@PLT -# undef FFI_CLOSURE_MASK_AND_JUMP -# define FFI_CLOSURE_MASK_AND_JUMP(N, UWN) \ - andl $X86_RET_TYPE_MASK, %eax; \ - leal L(C1(load_table,N))@GOTOFF(%ebx, %eax, 8), %edx; \ - movl 40(%esp), %ebx; /* restore ebx */ \ -L(C1(UW,UWN)): \ - /* cfi_restore(%ebx); */ \ - movl closure_CF(%esp), %eax; /* optimiztic load */ \ - jmp *%edx -# endif /* DARWIN || HIDDEN */ -#endif /* __PIC__ */ - - .balign 16 - .globl C(ffi_go_closure_EAX) - FFI_HIDDEN(C(ffi_go_closure_EAX)) -C(ffi_go_closure_EAX): -L(UW6): - # cfi_startproc - subl $closure_FS, %esp -L(UW7): - # cfi_def_cfa_offset(closure_FS + 4) - FFI_CLOSURE_SAVE_REGS - movl 4(%eax), %edx /* copy cif */ - movl 8(%eax), %ecx /* copy fun */ - movl %edx, closure_CF+28(%esp) - movl %ecx, closure_CF+32(%esp) - movl %eax, closure_CF+36(%esp) /* closure is user_data */ - jmp L(do_closure_i386) -L(UW8): - # cfi_endproc -ENDF(C(ffi_go_closure_EAX)) - - .balign 16 - .globl C(ffi_go_closure_ECX) - FFI_HIDDEN(C(ffi_go_closure_ECX)) -C(ffi_go_closure_ECX): -L(UW9): - # cfi_startproc - subl $closure_FS, %esp -L(UW10): - # cfi_def_cfa_offset(closure_FS + 4) - FFI_CLOSURE_SAVE_REGS - movl 4(%ecx), %edx /* copy cif */ - movl 8(%ecx), %eax /* copy fun */ - movl %edx, closure_CF+28(%esp) - movl %eax, closure_CF+32(%esp) - movl %ecx, closure_CF+36(%esp) /* closure is user_data */ - jmp L(do_closure_i386) -L(UW11): - # cfi_endproc -ENDF(C(ffi_go_closure_ECX)) - -/* The closure entry points are reached from the ffi_closure trampoline. - On entry, %eax contains the address of the ffi_closure. */ - - .balign 16 - .globl C(ffi_closure_i386) - FFI_HIDDEN(C(ffi_closure_i386)) - -C(ffi_closure_i386): -L(UW12): - # cfi_startproc - subl $closure_FS, %esp -L(UW13): - # cfi_def_cfa_offset(closure_FS + 4) - - FFI_CLOSURE_SAVE_REGS - FFI_CLOSURE_COPY_TRAMP_DATA - - /* Entry point from preceeding Go closures. */ -L(do_closure_i386): - - FFI_CLOSURE_PREP_CALL - FFI_CLOSURE_CALL_INNER(14) - FFI_CLOSURE_MASK_AND_JUMP(2, 15) - - .balign 8 -L(load_table2): -E(L(load_table2), X86_RET_FLOAT) - flds closure_CF(%esp) - jmp L(e2) -E(L(load_table2), X86_RET_DOUBLE) - fldl closure_CF(%esp) - jmp L(e2) -E(L(load_table2), X86_RET_LDOUBLE) - fldt closure_CF(%esp) - jmp L(e2) -E(L(load_table2), X86_RET_SINT8) - movsbl %al, %eax - jmp L(e2) -E(L(load_table2), X86_RET_SINT16) - movswl %ax, %eax - jmp L(e2) -E(L(load_table2), X86_RET_UINT8) - movzbl %al, %eax - jmp L(e2) -E(L(load_table2), X86_RET_UINT16) - movzwl %ax, %eax - jmp L(e2) -E(L(load_table2), X86_RET_INT64) - movl closure_CF+4(%esp), %edx - jmp L(e2) -E(L(load_table2), X86_RET_INT32) - nop - /* fallthru */ -E(L(load_table2), X86_RET_VOID) -L(e2): - addl $closure_FS, %esp -L(UW16): - # cfi_adjust_cfa_offset(-closure_FS) +#define FFI_CLOSURE_SAVE_REGS \ + movl %eax, closure_CF+16+R_EAX*4(%esp); \ + movl %edx, closure_CF+16+R_EDX*4(%esp); \ + movl %ecx, closure_CF+16+R_ECX*4(%esp) + +#define FFI_CLOSURE_COPY_TRAMP_DATA \ + movl FFI_TRAMPOLINE_SIZE(%eax), %edx; /* copy cif */ \ + movl FFI_TRAMPOLINE_SIZE+4(%eax), %ecx; /* copy fun */ \ + movl FFI_TRAMPOLINE_SIZE+8(%eax), %eax; /* copy user_data */ \ + movl %edx, closure_CF+28(%esp); \ + movl %ecx, closure_CF+32(%esp); \ + movl %eax, closure_CF+36(%esp) + +#if HAVE_FASTCALL +# define FFI_CLOSURE_PREP_CALL \ + movl %esp, %ecx; /* load closure_data */ \ + leal closure_FS+4(%esp), %edx; /* load incoming stack */ +#else +# define FFI_CLOSURE_PREP_CALL \ + leal closure_CF(%esp), %ecx; /* load closure_data */ \ + leal closure_FS+4(%esp), %edx; /* load incoming stack */ \ + movl %ecx, (%esp); \ + movl %edx, 4(%esp) +#endif + +#define FFI_CLOSURE_CALL_INNER(UWN) \ + call ffi_closure_inner + +#define FFI_CLOSURE_MASK_AND_JUMP(N, UW) \ + andl $X86_RET_TYPE_MASK, %eax; \ + leal L(C1(load_table,N))(, %eax, 8), %edx; \ + movl closure_CF(%esp), %eax; /* optimiztic load */ \ + jmp *%edx + +#ifdef __PIC__ +# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE +# undef FFI_CLOSURE_MASK_AND_JUMP +# define FFI_CLOSURE_MASK_AND_JUMP(N, UW) \ + andl $X86_RET_TYPE_MASK, %eax; \ + call C(__x86.get_pc_thunk.dx); \ +L(C1(pc,N)): \ + leal L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %edx; \ + movl closure_CF(%esp), %eax; /* optimiztic load */ \ + jmp *%edx +# else +# define FFI_CLOSURE_CALL_INNER_SAVE_EBX +# undef FFI_CLOSURE_CALL_INNER +# define FFI_CLOSURE_CALL_INNER(UWN) \ + movl %ebx, 40(%esp); /* save ebx */ \ +L(C1(UW,UWN)): \ + /* cfi_rel_offset(%ebx, 40); */ \ + call C(__x86.get_pc_thunk.bx); /* load got register */ \ + addl $C(_GLOBAL_OFFSET_TABLE_), %ebx; \ + call ffi_closure_inner@PLT +# undef FFI_CLOSURE_MASK_AND_JUMP +# define FFI_CLOSURE_MASK_AND_JUMP(N, UWN) \ + andl $X86_RET_TYPE_MASK, %eax; \ + leal L(C1(load_table,N))@GOTOFF(%ebx, %eax, 8), %edx; \ + movl 40(%esp), %ebx; /* restore ebx */ \ +L(C1(UW,UWN)): \ + /* cfi_restore(%ebx); */ \ + movl closure_CF(%esp), %eax; /* optimiztic load */ \ + jmp *%edx +# endif /* DARWIN || HIDDEN */ +#endif /* __PIC__ */ + + .balign 16 + .globl C(ffi_go_closure_EAX) + FFI_HIDDEN(C(ffi_go_closure_EAX)) +C(ffi_go_closure_EAX): +L(UW6): + # cfi_startproc + subl $closure_FS, %esp +L(UW7): + # cfi_def_cfa_offset(closure_FS + 4) + FFI_CLOSURE_SAVE_REGS + movl 4(%eax), %edx /* copy cif */ + movl 8(%eax), %ecx /* copy fun */ + movl %edx, closure_CF+28(%esp) + movl %ecx, closure_CF+32(%esp) + movl %eax, closure_CF+36(%esp) /* closure is user_data */ + jmp L(do_closure_i386) +L(UW8): + # cfi_endproc +ENDF(C(ffi_go_closure_EAX)) + + .balign 16 + .globl C(ffi_go_closure_ECX) + FFI_HIDDEN(C(ffi_go_closure_ECX)) +C(ffi_go_closure_ECX): +L(UW9): + # cfi_startproc + subl $closure_FS, %esp +L(UW10): + # cfi_def_cfa_offset(closure_FS + 4) + FFI_CLOSURE_SAVE_REGS + movl 4(%ecx), %edx /* copy cif */ + movl 8(%ecx), %eax /* copy fun */ + movl %edx, closure_CF+28(%esp) + movl %eax, closure_CF+32(%esp) + movl %ecx, closure_CF+36(%esp) /* closure is user_data */ + jmp L(do_closure_i386) +L(UW11): + # cfi_endproc +ENDF(C(ffi_go_closure_ECX)) + +/* The closure entry points are reached from the ffi_closure trampoline. + On entry, %eax contains the address of the ffi_closure. */ + + .balign 16 + .globl C(ffi_closure_i386) + FFI_HIDDEN(C(ffi_closure_i386)) + +C(ffi_closure_i386): +L(UW12): + # cfi_startproc + subl $closure_FS, %esp +L(UW13): + # cfi_def_cfa_offset(closure_FS + 4) + + FFI_CLOSURE_SAVE_REGS + FFI_CLOSURE_COPY_TRAMP_DATA + + /* Entry point from preceeding Go closures. */ +L(do_closure_i386): + + FFI_CLOSURE_PREP_CALL + FFI_CLOSURE_CALL_INNER(14) + FFI_CLOSURE_MASK_AND_JUMP(2, 15) + + .balign 8 +L(load_table2): +E(L(load_table2), X86_RET_FLOAT) + flds closure_CF(%esp) + jmp L(e2) +E(L(load_table2), X86_RET_DOUBLE) + fldl closure_CF(%esp) + jmp L(e2) +E(L(load_table2), X86_RET_LDOUBLE) + fldt closure_CF(%esp) + jmp L(e2) +E(L(load_table2), X86_RET_SINT8) + movsbl %al, %eax + jmp L(e2) +E(L(load_table2), X86_RET_SINT16) + movswl %ax, %eax + jmp L(e2) +E(L(load_table2), X86_RET_UINT8) + movzbl %al, %eax + jmp L(e2) +E(L(load_table2), X86_RET_UINT16) + movzwl %ax, %eax + jmp L(e2) +E(L(load_table2), X86_RET_INT64) + movl closure_CF+4(%esp), %edx + jmp L(e2) +E(L(load_table2), X86_RET_INT32) + nop + /* fallthru */ +E(L(load_table2), X86_RET_VOID) +L(e2): + addl $closure_FS, %esp +L(UW16): + # cfi_adjust_cfa_offset(-closure_FS) ret -L(UW17): - # cfi_adjust_cfa_offset(closure_FS) -E(L(load_table2), X86_RET_STRUCTPOP) - addl $closure_FS, %esp -L(UW18): - # cfi_adjust_cfa_offset(-closure_FS) +L(UW17): + # cfi_adjust_cfa_offset(closure_FS) +E(L(load_table2), X86_RET_STRUCTPOP) + addl $closure_FS, %esp +L(UW18): + # cfi_adjust_cfa_offset(-closure_FS) ret $4 -L(UW19): - # cfi_adjust_cfa_offset(closure_FS) -E(L(load_table2), X86_RET_STRUCTARG) - jmp L(e2) -E(L(load_table2), X86_RET_STRUCT_1B) - movzbl %al, %eax - jmp L(e2) -E(L(load_table2), X86_RET_STRUCT_2B) - movzwl %ax, %eax - jmp L(e2) - - /* Fill out the table so that bad values are predictable. */ -E(L(load_table2), X86_RET_UNUSED14) - ud2 -E(L(load_table2), X86_RET_UNUSED15) - ud2 - -L(UW20): - # cfi_endproc -ENDF(C(ffi_closure_i386)) - - .balign 16 - .globl C(ffi_go_closure_STDCALL) - FFI_HIDDEN(C(ffi_go_closure_STDCALL)) -C(ffi_go_closure_STDCALL): -L(UW21): - # cfi_startproc - subl $closure_FS, %esp -L(UW22): - # cfi_def_cfa_offset(closure_FS + 4) - FFI_CLOSURE_SAVE_REGS - movl 4(%ecx), %edx /* copy cif */ - movl 8(%ecx), %eax /* copy fun */ - movl %edx, closure_CF+28(%esp) - movl %eax, closure_CF+32(%esp) - movl %ecx, closure_CF+36(%esp) /* closure is user_data */ - jmp L(do_closure_STDCALL) -L(UW23): - # cfi_endproc -ENDF(C(ffi_go_closure_STDCALL)) - -/* For REGISTER, we have no available parameter registers, and so we - enter here having pushed the closure onto the stack. */ - - .balign 16 - .globl C(ffi_closure_REGISTER) - FFI_HIDDEN(C(ffi_closure_REGISTER)) -C(ffi_closure_REGISTER): -L(UW24): - # cfi_startproc - # cfi_def_cfa(%esp, 8) - # cfi_offset(%eip, -8) - subl $closure_FS-4, %esp -L(UW25): - # cfi_def_cfa_offset(closure_FS + 4) - FFI_CLOSURE_SAVE_REGS - movl closure_FS-4(%esp), %ecx /* load retaddr */ - movl closure_FS(%esp), %eax /* load closure */ - movl %ecx, closure_FS(%esp) /* move retaddr */ - jmp L(do_closure_REGISTER) -L(UW26): - # cfi_endproc -ENDF(C(ffi_closure_REGISTER)) - -/* For STDCALL (and others), we need to pop N bytes of arguments off - the stack following the closure. The amount needing to be popped - is returned to us from ffi_closure_inner. */ - - .balign 16 - .globl C(ffi_closure_STDCALL) - FFI_HIDDEN(C(ffi_closure_STDCALL)) -C(ffi_closure_STDCALL): -L(UW27): - # cfi_startproc - subl $closure_FS, %esp -L(UW28): - # cfi_def_cfa_offset(closure_FS + 4) - - FFI_CLOSURE_SAVE_REGS - - /* Entry point from ffi_closure_REGISTER. */ -L(do_closure_REGISTER): - - FFI_CLOSURE_COPY_TRAMP_DATA - - /* Entry point from preceeding Go closure. */ -L(do_closure_STDCALL): - - FFI_CLOSURE_PREP_CALL - FFI_CLOSURE_CALL_INNER(29) - - movl %eax, %ecx - shrl $X86_RET_POP_SHIFT, %ecx /* isolate pop count */ - leal closure_FS(%esp, %ecx), %ecx /* compute popped esp */ - movl closure_FS(%esp), %edx /* move return address */ - movl %edx, (%ecx) - - /* From this point on, the value of %esp upon return is %ecx+4, - and we've copied the return address to %ecx to make return easy. - There's no point in representing this in the unwind info, as - there is always a window between the mov and the ret which - will be wrong from one point of view or another. */ - - FFI_CLOSURE_MASK_AND_JUMP(3, 30) - - .balign 8 -L(load_table3): -E(L(load_table3), X86_RET_FLOAT) - flds closure_CF(%esp) - movl %ecx, %esp - ret -E(L(load_table3), X86_RET_DOUBLE) - fldl closure_CF(%esp) - movl %ecx, %esp - ret -E(L(load_table3), X86_RET_LDOUBLE) - fldt closure_CF(%esp) - movl %ecx, %esp - ret -E(L(load_table3), X86_RET_SINT8) - movsbl %al, %eax - movl %ecx, %esp - ret -E(L(load_table3), X86_RET_SINT16) - movswl %ax, %eax - movl %ecx, %esp - ret -E(L(load_table3), X86_RET_UINT8) - movzbl %al, %eax - movl %ecx, %esp - ret -E(L(load_table3), X86_RET_UINT16) - movzwl %ax, %eax - movl %ecx, %esp - ret -E(L(load_table3), X86_RET_INT64) - movl closure_CF+4(%esp), %edx - movl %ecx, %esp - ret -E(L(load_table3), X86_RET_INT32) - movl %ecx, %esp - ret -E(L(load_table3), X86_RET_VOID) - movl %ecx, %esp - ret -E(L(load_table3), X86_RET_STRUCTPOP) - movl %ecx, %esp - ret -E(L(load_table3), X86_RET_STRUCTARG) - movl %ecx, %esp - ret -E(L(load_table3), X86_RET_STRUCT_1B) - movzbl %al, %eax - movl %ecx, %esp - ret -E(L(load_table3), X86_RET_STRUCT_2B) - movzwl %ax, %eax - movl %ecx, %esp - ret - - /* Fill out the table so that bad values are predictable. */ -E(L(load_table3), X86_RET_UNUSED14) - ud2 -E(L(load_table3), X86_RET_UNUSED15) - ud2 - -L(UW31): - # cfi_endproc -ENDF(C(ffi_closure_STDCALL)) - +L(UW19): + # cfi_adjust_cfa_offset(closure_FS) +E(L(load_table2), X86_RET_STRUCTARG) + jmp L(e2) +E(L(load_table2), X86_RET_STRUCT_1B) + movzbl %al, %eax + jmp L(e2) +E(L(load_table2), X86_RET_STRUCT_2B) + movzwl %ax, %eax + jmp L(e2) + + /* Fill out the table so that bad values are predictable. */ +E(L(load_table2), X86_RET_UNUSED14) + ud2 +E(L(load_table2), X86_RET_UNUSED15) + ud2 + +L(UW20): + # cfi_endproc +ENDF(C(ffi_closure_i386)) + + .balign 16 + .globl C(ffi_go_closure_STDCALL) + FFI_HIDDEN(C(ffi_go_closure_STDCALL)) +C(ffi_go_closure_STDCALL): +L(UW21): + # cfi_startproc + subl $closure_FS, %esp +L(UW22): + # cfi_def_cfa_offset(closure_FS + 4) + FFI_CLOSURE_SAVE_REGS + movl 4(%ecx), %edx /* copy cif */ + movl 8(%ecx), %eax /* copy fun */ + movl %edx, closure_CF+28(%esp) + movl %eax, closure_CF+32(%esp) + movl %ecx, closure_CF+36(%esp) /* closure is user_data */ + jmp L(do_closure_STDCALL) +L(UW23): + # cfi_endproc +ENDF(C(ffi_go_closure_STDCALL)) + +/* For REGISTER, we have no available parameter registers, and so we + enter here having pushed the closure onto the stack. */ + + .balign 16 + .globl C(ffi_closure_REGISTER) + FFI_HIDDEN(C(ffi_closure_REGISTER)) +C(ffi_closure_REGISTER): +L(UW24): + # cfi_startproc + # cfi_def_cfa(%esp, 8) + # cfi_offset(%eip, -8) + subl $closure_FS-4, %esp +L(UW25): + # cfi_def_cfa_offset(closure_FS + 4) + FFI_CLOSURE_SAVE_REGS + movl closure_FS-4(%esp), %ecx /* load retaddr */ + movl closure_FS(%esp), %eax /* load closure */ + movl %ecx, closure_FS(%esp) /* move retaddr */ + jmp L(do_closure_REGISTER) +L(UW26): + # cfi_endproc +ENDF(C(ffi_closure_REGISTER)) + +/* For STDCALL (and others), we need to pop N bytes of arguments off + the stack following the closure. The amount needing to be popped + is returned to us from ffi_closure_inner. */ + + .balign 16 + .globl C(ffi_closure_STDCALL) + FFI_HIDDEN(C(ffi_closure_STDCALL)) +C(ffi_closure_STDCALL): +L(UW27): + # cfi_startproc + subl $closure_FS, %esp +L(UW28): + # cfi_def_cfa_offset(closure_FS + 4) + + FFI_CLOSURE_SAVE_REGS + + /* Entry point from ffi_closure_REGISTER. */ +L(do_closure_REGISTER): + + FFI_CLOSURE_COPY_TRAMP_DATA + + /* Entry point from preceeding Go closure. */ +L(do_closure_STDCALL): + + FFI_CLOSURE_PREP_CALL + FFI_CLOSURE_CALL_INNER(29) + + movl %eax, %ecx + shrl $X86_RET_POP_SHIFT, %ecx /* isolate pop count */ + leal closure_FS(%esp, %ecx), %ecx /* compute popped esp */ + movl closure_FS(%esp), %edx /* move return address */ + movl %edx, (%ecx) + + /* From this point on, the value of %esp upon return is %ecx+4, + and we've copied the return address to %ecx to make return easy. + There's no point in representing this in the unwind info, as + there is always a window between the mov and the ret which + will be wrong from one point of view or another. */ + + FFI_CLOSURE_MASK_AND_JUMP(3, 30) + + .balign 8 +L(load_table3): +E(L(load_table3), X86_RET_FLOAT) + flds closure_CF(%esp) + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_DOUBLE) + fldl closure_CF(%esp) + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_LDOUBLE) + fldt closure_CF(%esp) + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_SINT8) + movsbl %al, %eax + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_SINT16) + movswl %ax, %eax + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_UINT8) + movzbl %al, %eax + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_UINT16) + movzwl %ax, %eax + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_INT64) + movl closure_CF+4(%esp), %edx + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_INT32) + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_VOID) + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_STRUCTPOP) + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_STRUCTARG) + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_STRUCT_1B) + movzbl %al, %eax + movl %ecx, %esp + ret +E(L(load_table3), X86_RET_STRUCT_2B) + movzwl %ax, %eax + movl %ecx, %esp + ret + + /* Fill out the table so that bad values are predictable. */ +E(L(load_table3), X86_RET_UNUSED14) + ud2 +E(L(load_table3), X86_RET_UNUSED15) + ud2 + +L(UW31): + # cfi_endproc +ENDF(C(ffi_closure_STDCALL)) + #if !FFI_NO_RAW_API -#define raw_closure_S_FS (16+16+12) - - .balign 16 - .globl C(ffi_closure_raw_SYSV) - FFI_HIDDEN(C(ffi_closure_raw_SYSV)) -C(ffi_closure_raw_SYSV): -L(UW32): - # cfi_startproc - subl $raw_closure_S_FS, %esp -L(UW33): - # cfi_def_cfa_offset(raw_closure_S_FS + 4) - movl %ebx, raw_closure_S_FS-4(%esp) -L(UW34): - # cfi_rel_offset(%ebx, raw_closure_S_FS-4) - - movl FFI_TRAMPOLINE_SIZE+8(%eax), %edx /* load cl->user_data */ - movl %edx, 12(%esp) - leal raw_closure_S_FS+4(%esp), %edx /* load raw_args */ - movl %edx, 8(%esp) - leal 16(%esp), %edx /* load &res */ - movl %edx, 4(%esp) - movl FFI_TRAMPOLINE_SIZE(%eax), %ebx /* load cl->cif */ - movl %ebx, (%esp) - call *FFI_TRAMPOLINE_SIZE+4(%eax) /* call cl->fun */ - - movl 20(%ebx), %eax /* load cif->flags */ - andl $X86_RET_TYPE_MASK, %eax -#ifdef __PIC__ - call C(__x86.get_pc_thunk.bx) -L(pc4): - leal L(load_table4)-L(pc4)(%ebx, %eax, 8), %ecx +#define raw_closure_S_FS (16+16+12) + + .balign 16 + .globl C(ffi_closure_raw_SYSV) + FFI_HIDDEN(C(ffi_closure_raw_SYSV)) +C(ffi_closure_raw_SYSV): +L(UW32): + # cfi_startproc + subl $raw_closure_S_FS, %esp +L(UW33): + # cfi_def_cfa_offset(raw_closure_S_FS + 4) + movl %ebx, raw_closure_S_FS-4(%esp) +L(UW34): + # cfi_rel_offset(%ebx, raw_closure_S_FS-4) + + movl FFI_TRAMPOLINE_SIZE+8(%eax), %edx /* load cl->user_data */ + movl %edx, 12(%esp) + leal raw_closure_S_FS+4(%esp), %edx /* load raw_args */ + movl %edx, 8(%esp) + leal 16(%esp), %edx /* load &res */ + movl %edx, 4(%esp) + movl FFI_TRAMPOLINE_SIZE(%eax), %ebx /* load cl->cif */ + movl %ebx, (%esp) + call *FFI_TRAMPOLINE_SIZE+4(%eax) /* call cl->fun */ + + movl 20(%ebx), %eax /* load cif->flags */ + andl $X86_RET_TYPE_MASK, %eax +#ifdef __PIC__ + call C(__x86.get_pc_thunk.bx) +L(pc4): + leal L(load_table4)-L(pc4)(%ebx, %eax, 8), %ecx #else - leal L(load_table4)(,%eax, 8), %ecx + leal L(load_table4)(,%eax, 8), %ecx #endif - movl raw_closure_S_FS-4(%esp), %ebx -L(UW35): - # cfi_restore(%ebx) - movl 16(%esp), %eax /* Optimistic load */ - jmp *%ecx - - .balign 8 -L(load_table4): -E(L(load_table4), X86_RET_FLOAT) - flds 16(%esp) - jmp L(e4) -E(L(load_table4), X86_RET_DOUBLE) - fldl 16(%esp) - jmp L(e4) -E(L(load_table4), X86_RET_LDOUBLE) - fldt 16(%esp) - jmp L(e4) -E(L(load_table4), X86_RET_SINT8) - movsbl %al, %eax - jmp L(e4) -E(L(load_table4), X86_RET_SINT16) - movswl %ax, %eax - jmp L(e4) -E(L(load_table4), X86_RET_UINT8) - movzbl %al, %eax - jmp L(e4) -E(L(load_table4), X86_RET_UINT16) - movzwl %ax, %eax - jmp L(e4) -E(L(load_table4), X86_RET_INT64) - movl 16+4(%esp), %edx - jmp L(e4) -E(L(load_table4), X86_RET_INT32) - nop - /* fallthru */ -E(L(load_table4), X86_RET_VOID) -L(e4): - addl $raw_closure_S_FS, %esp -L(UW36): - # cfi_adjust_cfa_offset(-raw_closure_S_FS) - ret -L(UW37): - # cfi_adjust_cfa_offset(raw_closure_S_FS) -E(L(load_table4), X86_RET_STRUCTPOP) - addl $raw_closure_S_FS, %esp -L(UW38): - # cfi_adjust_cfa_offset(-raw_closure_S_FS) - ret $4 -L(UW39): - # cfi_adjust_cfa_offset(raw_closure_S_FS) -E(L(load_table4), X86_RET_STRUCTARG) - jmp L(e4) -E(L(load_table4), X86_RET_STRUCT_1B) - movzbl %al, %eax - jmp L(e4) -E(L(load_table4), X86_RET_STRUCT_2B) - movzwl %ax, %eax - jmp L(e4) - - /* Fill out the table so that bad values are predictable. */ -E(L(load_table4), X86_RET_UNUSED14) - ud2 -E(L(load_table4), X86_RET_UNUSED15) - ud2 - -L(UW40): - # cfi_endproc -ENDF(C(ffi_closure_raw_SYSV)) - -#define raw_closure_T_FS (16+16+8) - - .balign 16 - .globl C(ffi_closure_raw_THISCALL) - FFI_HIDDEN(C(ffi_closure_raw_THISCALL)) -C(ffi_closure_raw_THISCALL): -L(UW41): - # cfi_startproc - /* Rearrange the stack such that %ecx is the first argument. - This means moving the return address. */ - popl %edx -L(UW42): - # cfi_def_cfa_offset(0) - # cfi_register(%eip, %edx) - pushl %ecx -L(UW43): - # cfi_adjust_cfa_offset(4) - pushl %edx -L(UW44): - # cfi_adjust_cfa_offset(4) - # cfi_rel_offset(%eip, 0) - subl $raw_closure_T_FS, %esp -L(UW45): - # cfi_adjust_cfa_offset(raw_closure_T_FS) - movl %ebx, raw_closure_T_FS-4(%esp) -L(UW46): - # cfi_rel_offset(%ebx, raw_closure_T_FS-4) - - movl FFI_TRAMPOLINE_SIZE+8(%eax), %edx /* load cl->user_data */ - movl %edx, 12(%esp) - leal raw_closure_T_FS+4(%esp), %edx /* load raw_args */ - movl %edx, 8(%esp) - leal 16(%esp), %edx /* load &res */ - movl %edx, 4(%esp) - movl FFI_TRAMPOLINE_SIZE(%eax), %ebx /* load cl->cif */ - movl %ebx, (%esp) - call *FFI_TRAMPOLINE_SIZE+4(%eax) /* call cl->fun */ - - movl 20(%ebx), %eax /* load cif->flags */ - andl $X86_RET_TYPE_MASK, %eax -#ifdef __PIC__ - call C(__x86.get_pc_thunk.bx) -L(pc5): - leal L(load_table5)-L(pc5)(%ebx, %eax, 8), %ecx -#else - leal L(load_table5)(,%eax, 8), %ecx + movl raw_closure_S_FS-4(%esp), %ebx +L(UW35): + # cfi_restore(%ebx) + movl 16(%esp), %eax /* Optimistic load */ + jmp *%ecx + + .balign 8 +L(load_table4): +E(L(load_table4), X86_RET_FLOAT) + flds 16(%esp) + jmp L(e4) +E(L(load_table4), X86_RET_DOUBLE) + fldl 16(%esp) + jmp L(e4) +E(L(load_table4), X86_RET_LDOUBLE) + fldt 16(%esp) + jmp L(e4) +E(L(load_table4), X86_RET_SINT8) + movsbl %al, %eax + jmp L(e4) +E(L(load_table4), X86_RET_SINT16) + movswl %ax, %eax + jmp L(e4) +E(L(load_table4), X86_RET_UINT8) + movzbl %al, %eax + jmp L(e4) +E(L(load_table4), X86_RET_UINT16) + movzwl %ax, %eax + jmp L(e4) +E(L(load_table4), X86_RET_INT64) + movl 16+4(%esp), %edx + jmp L(e4) +E(L(load_table4), X86_RET_INT32) + nop + /* fallthru */ +E(L(load_table4), X86_RET_VOID) +L(e4): + addl $raw_closure_S_FS, %esp +L(UW36): + # cfi_adjust_cfa_offset(-raw_closure_S_FS) + ret +L(UW37): + # cfi_adjust_cfa_offset(raw_closure_S_FS) +E(L(load_table4), X86_RET_STRUCTPOP) + addl $raw_closure_S_FS, %esp +L(UW38): + # cfi_adjust_cfa_offset(-raw_closure_S_FS) + ret $4 +L(UW39): + # cfi_adjust_cfa_offset(raw_closure_S_FS) +E(L(load_table4), X86_RET_STRUCTARG) + jmp L(e4) +E(L(load_table4), X86_RET_STRUCT_1B) + movzbl %al, %eax + jmp L(e4) +E(L(load_table4), X86_RET_STRUCT_2B) + movzwl %ax, %eax + jmp L(e4) + + /* Fill out the table so that bad values are predictable. */ +E(L(load_table4), X86_RET_UNUSED14) + ud2 +E(L(load_table4), X86_RET_UNUSED15) + ud2 + +L(UW40): + # cfi_endproc +ENDF(C(ffi_closure_raw_SYSV)) + +#define raw_closure_T_FS (16+16+8) + + .balign 16 + .globl C(ffi_closure_raw_THISCALL) + FFI_HIDDEN(C(ffi_closure_raw_THISCALL)) +C(ffi_closure_raw_THISCALL): +L(UW41): + # cfi_startproc + /* Rearrange the stack such that %ecx is the first argument. + This means moving the return address. */ + popl %edx +L(UW42): + # cfi_def_cfa_offset(0) + # cfi_register(%eip, %edx) + pushl %ecx +L(UW43): + # cfi_adjust_cfa_offset(4) + pushl %edx +L(UW44): + # cfi_adjust_cfa_offset(4) + # cfi_rel_offset(%eip, 0) + subl $raw_closure_T_FS, %esp +L(UW45): + # cfi_adjust_cfa_offset(raw_closure_T_FS) + movl %ebx, raw_closure_T_FS-4(%esp) +L(UW46): + # cfi_rel_offset(%ebx, raw_closure_T_FS-4) + + movl FFI_TRAMPOLINE_SIZE+8(%eax), %edx /* load cl->user_data */ + movl %edx, 12(%esp) + leal raw_closure_T_FS+4(%esp), %edx /* load raw_args */ + movl %edx, 8(%esp) + leal 16(%esp), %edx /* load &res */ + movl %edx, 4(%esp) + movl FFI_TRAMPOLINE_SIZE(%eax), %ebx /* load cl->cif */ + movl %ebx, (%esp) + call *FFI_TRAMPOLINE_SIZE+4(%eax) /* call cl->fun */ + + movl 20(%ebx), %eax /* load cif->flags */ + andl $X86_RET_TYPE_MASK, %eax +#ifdef __PIC__ + call C(__x86.get_pc_thunk.bx) +L(pc5): + leal L(load_table5)-L(pc5)(%ebx, %eax, 8), %ecx +#else + leal L(load_table5)(,%eax, 8), %ecx #endif - movl raw_closure_T_FS-4(%esp), %ebx -L(UW47): - # cfi_restore(%ebx) - movl 16(%esp), %eax /* Optimistic load */ - jmp *%ecx - - .balign 8 -L(load_table5): -E(L(load_table5), X86_RET_FLOAT) - flds 16(%esp) - jmp L(e5) -E(L(load_table5), X86_RET_DOUBLE) - fldl 16(%esp) - jmp L(e5) -E(L(load_table5), X86_RET_LDOUBLE) - fldt 16(%esp) - jmp L(e5) -E(L(load_table5), X86_RET_SINT8) - movsbl %al, %eax - jmp L(e5) -E(L(load_table5), X86_RET_SINT16) - movswl %ax, %eax - jmp L(e5) -E(L(load_table5), X86_RET_UINT8) - movzbl %al, %eax - jmp L(e5) -E(L(load_table5), X86_RET_UINT16) - movzwl %ax, %eax - jmp L(e5) -E(L(load_table5), X86_RET_INT64) - movl 16+4(%esp), %edx - jmp L(e5) -E(L(load_table5), X86_RET_INT32) - nop - /* fallthru */ -E(L(load_table5), X86_RET_VOID) -L(e5): - addl $raw_closure_T_FS, %esp -L(UW48): - # cfi_adjust_cfa_offset(-raw_closure_T_FS) - /* Remove the extra %ecx argument we pushed. */ - ret $4 -L(UW49): - # cfi_adjust_cfa_offset(raw_closure_T_FS) -E(L(load_table5), X86_RET_STRUCTPOP) - addl $raw_closure_T_FS, %esp -L(UW50): - # cfi_adjust_cfa_offset(-raw_closure_T_FS) - ret $8 -L(UW51): - # cfi_adjust_cfa_offset(raw_closure_T_FS) -E(L(load_table5), X86_RET_STRUCTARG) - jmp L(e5) -E(L(load_table5), X86_RET_STRUCT_1B) - movzbl %al, %eax - jmp L(e5) -E(L(load_table5), X86_RET_STRUCT_2B) - movzwl %ax, %eax - jmp L(e5) - - /* Fill out the table so that bad values are predictable. */ -E(L(load_table5), X86_RET_UNUSED14) - ud2 -E(L(load_table5), X86_RET_UNUSED15) - ud2 - -L(UW52): - # cfi_endproc -ENDF(C(ffi_closure_raw_THISCALL)) - -#endif /* !FFI_NO_RAW_API */ - -#ifdef X86_DARWIN -# define COMDAT(X) \ - .section __TEXT,__text,coalesced,pure_instructions; \ - .weak_definition X; \ - FFI_HIDDEN(X) -#elif defined __ELF__ && !(defined(__sun__) && defined(__svr4__)) -# define COMDAT(X) \ - .section .text.X,"axG",@progbits,X,comdat; \ - .globl X; \ - FFI_HIDDEN(X) + movl raw_closure_T_FS-4(%esp), %ebx +L(UW47): + # cfi_restore(%ebx) + movl 16(%esp), %eax /* Optimistic load */ + jmp *%ecx + + .balign 8 +L(load_table5): +E(L(load_table5), X86_RET_FLOAT) + flds 16(%esp) + jmp L(e5) +E(L(load_table5), X86_RET_DOUBLE) + fldl 16(%esp) + jmp L(e5) +E(L(load_table5), X86_RET_LDOUBLE) + fldt 16(%esp) + jmp L(e5) +E(L(load_table5), X86_RET_SINT8) + movsbl %al, %eax + jmp L(e5) +E(L(load_table5), X86_RET_SINT16) + movswl %ax, %eax + jmp L(e5) +E(L(load_table5), X86_RET_UINT8) + movzbl %al, %eax + jmp L(e5) +E(L(load_table5), X86_RET_UINT16) + movzwl %ax, %eax + jmp L(e5) +E(L(load_table5), X86_RET_INT64) + movl 16+4(%esp), %edx + jmp L(e5) +E(L(load_table5), X86_RET_INT32) + nop + /* fallthru */ +E(L(load_table5), X86_RET_VOID) +L(e5): + addl $raw_closure_T_FS, %esp +L(UW48): + # cfi_adjust_cfa_offset(-raw_closure_T_FS) + /* Remove the extra %ecx argument we pushed. */ + ret $4 +L(UW49): + # cfi_adjust_cfa_offset(raw_closure_T_FS) +E(L(load_table5), X86_RET_STRUCTPOP) + addl $raw_closure_T_FS, %esp +L(UW50): + # cfi_adjust_cfa_offset(-raw_closure_T_FS) + ret $8 +L(UW51): + # cfi_adjust_cfa_offset(raw_closure_T_FS) +E(L(load_table5), X86_RET_STRUCTARG) + jmp L(e5) +E(L(load_table5), X86_RET_STRUCT_1B) + movzbl %al, %eax + jmp L(e5) +E(L(load_table5), X86_RET_STRUCT_2B) + movzwl %ax, %eax + jmp L(e5) + + /* Fill out the table so that bad values are predictable. */ +E(L(load_table5), X86_RET_UNUSED14) + ud2 +E(L(load_table5), X86_RET_UNUSED15) + ud2 + +L(UW52): + # cfi_endproc +ENDF(C(ffi_closure_raw_THISCALL)) + +#endif /* !FFI_NO_RAW_API */ + +#ifdef X86_DARWIN +# define COMDAT(X) \ + .section __TEXT,__text,coalesced,pure_instructions; \ + .weak_definition X; \ + FFI_HIDDEN(X) +#elif defined __ELF__ && !(defined(__sun__) && defined(__svr4__)) +# define COMDAT(X) \ + .section .text.X,"axG",@progbits,X,comdat; \ + .globl X; \ + FFI_HIDDEN(X) #else -# define COMDAT(X) +# define COMDAT(X) #endif -#if defined(__PIC__) - COMDAT(C(__x86.get_pc_thunk.bx)) -C(__x86.get_pc_thunk.bx): - movl (%esp), %ebx - ret -ENDF(C(__x86.get_pc_thunk.bx)) -# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE - COMDAT(C(__x86.get_pc_thunk.dx)) -C(__x86.get_pc_thunk.dx): - movl (%esp), %edx - ret -ENDF(C(__x86.get_pc_thunk.dx)) -#endif /* DARWIN || HIDDEN */ -#endif /* __PIC__ */ - -/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */ - -#ifdef __APPLE__ -.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support -EHFrame0: -#elif defined(X86_WIN32) -.section .eh_frame,"r" -#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE) -.section .eh_frame,EH_FRAME_FLAGS,@unwind +#if defined(__PIC__) + COMDAT(C(__x86.get_pc_thunk.bx)) +C(__x86.get_pc_thunk.bx): + movl (%esp), %ebx + ret +ENDF(C(__x86.get_pc_thunk.bx)) +# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE + COMDAT(C(__x86.get_pc_thunk.dx)) +C(__x86.get_pc_thunk.dx): + movl (%esp), %edx + ret +ENDF(C(__x86.get_pc_thunk.dx)) +#endif /* DARWIN || HIDDEN */ +#endif /* __PIC__ */ + +/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */ + +#ifdef __APPLE__ +.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EHFrame0: +#elif defined(X86_WIN32) +.section .eh_frame,"r" +#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE) +.section .eh_frame,EH_FRAME_FLAGS,@unwind #else -.section .eh_frame,EH_FRAME_FLAGS,@progbits +.section .eh_frame,EH_FRAME_FLAGS,@progbits #endif - -#ifdef HAVE_AS_X86_PCREL -# define PCREL(X) X - . + +#ifdef HAVE_AS_X86_PCREL +# define PCREL(X) X - . #else -# define PCREL(X) X@rel +# define PCREL(X) X@rel #endif - -/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */ -#define ADV(N, P) .byte 2, L(N)-L(P) - - .balign 4 -L(CIE): - .set L(set0),L(ECIE)-L(SCIE) - .long L(set0) /* CIE Length */ -L(SCIE): - .long 0 /* CIE Identifier Tag */ - .byte 1 /* CIE Version */ - .ascii "zR\0" /* CIE Augmentation */ - .byte 1 /* CIE Code Alignment Factor */ - .byte 0x7c /* CIE Data Alignment Factor */ - .byte 0x8 /* CIE RA Column */ - .byte 1 /* Augmentation size */ - .byte 0x1b /* FDE Encoding (pcrel sdata4) */ - .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp offset 4 */ - .byte 0x80+8, 1 /* DW_CFA_offset, %eip offset 1*-4 */ - .balign 4 -L(ECIE): - - .set L(set1),L(EFDE1)-L(SFDE1) - .long L(set1) /* FDE Length */ -L(SFDE1): - .long L(SFDE1)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW0)) /* Initial location */ - .long L(UW5)-L(UW0) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW1, UW0) - .byte 0xc, 5, 8 /* DW_CFA_def_cfa, %ebp 8 */ - .byte 0x80+5, 2 /* DW_CFA_offset, %ebp 2*-4 */ - ADV(UW2, UW1) - .byte 0x80+3, 0 /* DW_CFA_offset, %ebx 0*-4 */ - ADV(UW3, UW2) - .byte 0xa /* DW_CFA_remember_state */ - .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp 4 */ - .byte 0xc0+3 /* DW_CFA_restore, %ebx */ - .byte 0xc0+5 /* DW_CFA_restore, %ebp */ - ADV(UW4, UW3) - .byte 0xb /* DW_CFA_restore_state */ - .balign 4 -L(EFDE1): - - .set L(set2),L(EFDE2)-L(SFDE2) - .long L(set2) /* FDE Length */ -L(SFDE2): - .long L(SFDE2)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW6)) /* Initial location */ - .long L(UW8)-L(UW6) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW7, UW6) - .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ - .balign 4 -L(EFDE2): - - .set L(set3),L(EFDE3)-L(SFDE3) - .long L(set3) /* FDE Length */ -L(SFDE3): - .long L(SFDE3)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW9)) /* Initial location */ - .long L(UW11)-L(UW9) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW10, UW9) - .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ - .balign 4 -L(EFDE3): - - .set L(set4),L(EFDE4)-L(SFDE4) - .long L(set4) /* FDE Length */ -L(SFDE4): - .long L(SFDE4)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW12)) /* Initial location */ - .long L(UW20)-L(UW12) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW13, UW12) - .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ -#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX - ADV(UW14, UW13) - .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */ - ADV(UW15, UW14) - .byte 0xc0+3 /* DW_CFA_restore %ebx */ - ADV(UW16, UW15) + +/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */ +#define ADV(N, P) .byte 2, L(N)-L(P) + + .balign 4 +L(CIE): + .set L(set0),L(ECIE)-L(SCIE) + .long L(set0) /* CIE Length */ +L(SCIE): + .long 0 /* CIE Identifier Tag */ + .byte 1 /* CIE Version */ + .ascii "zR\0" /* CIE Augmentation */ + .byte 1 /* CIE Code Alignment Factor */ + .byte 0x7c /* CIE Data Alignment Factor */ + .byte 0x8 /* CIE RA Column */ + .byte 1 /* Augmentation size */ + .byte 0x1b /* FDE Encoding (pcrel sdata4) */ + .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp offset 4 */ + .byte 0x80+8, 1 /* DW_CFA_offset, %eip offset 1*-4 */ + .balign 4 +L(ECIE): + + .set L(set1),L(EFDE1)-L(SFDE1) + .long L(set1) /* FDE Length */ +L(SFDE1): + .long L(SFDE1)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW0)) /* Initial location */ + .long L(UW5)-L(UW0) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW1, UW0) + .byte 0xc, 5, 8 /* DW_CFA_def_cfa, %ebp 8 */ + .byte 0x80+5, 2 /* DW_CFA_offset, %ebp 2*-4 */ + ADV(UW2, UW1) + .byte 0x80+3, 0 /* DW_CFA_offset, %ebx 0*-4 */ + ADV(UW3, UW2) + .byte 0xa /* DW_CFA_remember_state */ + .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp 4 */ + .byte 0xc0+3 /* DW_CFA_restore, %ebx */ + .byte 0xc0+5 /* DW_CFA_restore, %ebp */ + ADV(UW4, UW3) + .byte 0xb /* DW_CFA_restore_state */ + .balign 4 +L(EFDE1): + + .set L(set2),L(EFDE2)-L(SFDE2) + .long L(set2) /* FDE Length */ +L(SFDE2): + .long L(SFDE2)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW6)) /* Initial location */ + .long L(UW8)-L(UW6) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW7, UW6) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE2): + + .set L(set3),L(EFDE3)-L(SFDE3) + .long L(set3) /* FDE Length */ +L(SFDE3): + .long L(SFDE3)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW9)) /* Initial location */ + .long L(UW11)-L(UW9) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW10, UW9) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE3): + + .set L(set4),L(EFDE4)-L(SFDE4) + .long L(set4) /* FDE Length */ +L(SFDE4): + .long L(SFDE4)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW12)) /* Initial location */ + .long L(UW20)-L(UW12) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW13, UW12) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ +#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX + ADV(UW14, UW13) + .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */ + ADV(UW15, UW14) + .byte 0xc0+3 /* DW_CFA_restore %ebx */ + ADV(UW16, UW15) #else - ADV(UW16, UW13) + ADV(UW16, UW13) #endif - .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ - ADV(UW17, UW16) - .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ - ADV(UW18, UW17) - .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ - ADV(UW19, UW18) - .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ - .balign 4 -L(EFDE4): - - .set L(set5),L(EFDE5)-L(SFDE5) - .long L(set5) /* FDE Length */ -L(SFDE5): - .long L(SFDE5)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW21)) /* Initial location */ - .long L(UW23)-L(UW21) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW22, UW21) - .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ - .balign 4 -L(EFDE5): - - .set L(set6),L(EFDE6)-L(SFDE6) - .long L(set6) /* FDE Length */ -L(SFDE6): - .long L(SFDE6)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW24)) /* Initial location */ - .long L(UW26)-L(UW24) /* Address range */ - .byte 0 /* Augmentation size */ - .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ - .byte 0x80+8, 2 /* DW_CFA_offset %eip, 2*-4 */ - ADV(UW25, UW24) - .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ - .balign 4 -L(EFDE6): - - .set L(set7),L(EFDE7)-L(SFDE7) - .long L(set7) /* FDE Length */ -L(SFDE7): - .long L(SFDE7)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW27)) /* Initial location */ - .long L(UW31)-L(UW27) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW28, UW27) - .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ -#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX - ADV(UW29, UW28) - .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */ - ADV(UW30, UW29) - .byte 0xc0+3 /* DW_CFA_restore %ebx */ + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW17, UW16) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + ADV(UW18, UW17) + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW19, UW18) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE4): + + .set L(set5),L(EFDE5)-L(SFDE5) + .long L(set5) /* FDE Length */ +L(SFDE5): + .long L(SFDE5)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW21)) /* Initial location */ + .long L(UW23)-L(UW21) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW22, UW21) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE5): + + .set L(set6),L(EFDE6)-L(SFDE6) + .long L(set6) /* FDE Length */ +L(SFDE6): + .long L(SFDE6)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW24)) /* Initial location */ + .long L(UW26)-L(UW24) /* Address range */ + .byte 0 /* Augmentation size */ + .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ + .byte 0x80+8, 2 /* DW_CFA_offset %eip, 2*-4 */ + ADV(UW25, UW24) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE6): + + .set L(set7),L(EFDE7)-L(SFDE7) + .long L(set7) /* FDE Length */ +L(SFDE7): + .long L(SFDE7)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW27)) /* Initial location */ + .long L(UW31)-L(UW27) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW28, UW27) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ +#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX + ADV(UW29, UW28) + .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */ + ADV(UW30, UW29) + .byte 0xc0+3 /* DW_CFA_restore %ebx */ #endif - .balign 4 -L(EFDE7): + .balign 4 +L(EFDE7): #if !FFI_NO_RAW_API - .set L(set8),L(EFDE8)-L(SFDE8) - .long L(set8) /* FDE Length */ -L(SFDE8): - .long L(SFDE8)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW32)) /* Initial location */ - .long L(UW40)-L(UW32) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW33, UW32) - .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */ - ADV(UW34, UW33) - .byte 0x80+3, 2 /* DW_CFA_offset %ebx 2*-4 */ - ADV(UW35, UW34) - .byte 0xc0+3 /* DW_CFA_restore %ebx */ - ADV(UW36, UW35) - .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ - ADV(UW37, UW36) - .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */ - ADV(UW38, UW37) - .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ - ADV(UW39, UW38) - .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */ - .balign 4 -L(EFDE8): - - .set L(set9),L(EFDE9)-L(SFDE9) - .long L(set9) /* FDE Length */ -L(SFDE9): - .long L(SFDE9)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW41)) /* Initial location */ - .long L(UW52)-L(UW41) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW42, UW41) - .byte 0xe, 0 /* DW_CFA_def_cfa_offset */ - .byte 0x9, 8, 2 /* DW_CFA_register %eip, %edx */ - ADV(UW43, UW42) - .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ - ADV(UW44, UW43) - .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ - .byte 0x80+8, 2 /* DW_CFA_offset %eip 2*-4 */ - ADV(UW45, UW44) - .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */ - ADV(UW46, UW45) - .byte 0x80+3, 3 /* DW_CFA_offset %ebx 3*-4 */ - ADV(UW47, UW46) - .byte 0xc0+3 /* DW_CFA_restore %ebx */ - ADV(UW48, UW47) - .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ - ADV(UW49, UW48) - .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */ - ADV(UW50, UW49) - .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ - ADV(UW51, UW50) - .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */ - .balign 4 -L(EFDE9): -#endif /* !FFI_NO_RAW_API */ - -#ifdef _WIN32 - .def @feat.00; - .scl 3; - .type 0; - .endef - .globl @feat.00 -@feat.00 = 1 + .set L(set8),L(EFDE8)-L(SFDE8) + .long L(set8) /* FDE Length */ +L(SFDE8): + .long L(SFDE8)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW32)) /* Initial location */ + .long L(UW40)-L(UW32) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW33, UW32) + .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */ + ADV(UW34, UW33) + .byte 0x80+3, 2 /* DW_CFA_offset %ebx 2*-4 */ + ADV(UW35, UW34) + .byte 0xc0+3 /* DW_CFA_restore %ebx */ + ADV(UW36, UW35) + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW37, UW36) + .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */ + ADV(UW38, UW37) + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW39, UW38) + .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE8): + + .set L(set9),L(EFDE9)-L(SFDE9) + .long L(set9) /* FDE Length */ +L(SFDE9): + .long L(SFDE9)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW41)) /* Initial location */ + .long L(UW52)-L(UW41) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW42, UW41) + .byte 0xe, 0 /* DW_CFA_def_cfa_offset */ + .byte 0x9, 8, 2 /* DW_CFA_register %eip, %edx */ + ADV(UW43, UW42) + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW44, UW43) + .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ + .byte 0x80+8, 2 /* DW_CFA_offset %eip 2*-4 */ + ADV(UW45, UW44) + .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */ + ADV(UW46, UW45) + .byte 0x80+3, 3 /* DW_CFA_offset %ebx 3*-4 */ + ADV(UW47, UW46) + .byte 0xc0+3 /* DW_CFA_restore %ebx */ + ADV(UW48, UW47) + .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ + ADV(UW49, UW48) + .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */ + ADV(UW50, UW49) + .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ + ADV(UW51, UW50) + .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE9): +#endif /* !FFI_NO_RAW_API */ + +#ifdef _WIN32 + .def @feat.00; + .scl 3; + .type 0; + .endef + .globl @feat.00 +@feat.00 = 1 #endif -#ifdef __APPLE__ - .subsections_via_symbols - .section __LD,__compact_unwind,regular,debug - - /* compact unwind for ffi_call_i386 */ - .long C(ffi_call_i386) - .set L1,L(UW5)-L(UW0) - .long L1 - .long 0x04000000 /* use dwarf unwind info */ - .long 0 - .long 0 - - /* compact unwind for ffi_go_closure_EAX */ - .long C(ffi_go_closure_EAX) - .set L2,L(UW8)-L(UW6) - .long L2 - .long 0x04000000 /* use dwarf unwind info */ - .long 0 - .long 0 - - /* compact unwind for ffi_go_closure_ECX */ - .long C(ffi_go_closure_ECX) - .set L3,L(UW11)-L(UW9) - .long L3 - .long 0x04000000 /* use dwarf unwind info */ - .long 0 - .long 0 - - /* compact unwind for ffi_closure_i386 */ - .long C(ffi_closure_i386) - .set L4,L(UW20)-L(UW12) - .long L4 - .long 0x04000000 /* use dwarf unwind info */ - .long 0 - .long 0 - - /* compact unwind for ffi_go_closure_STDCALL */ - .long C(ffi_go_closure_STDCALL) - .set L5,L(UW23)-L(UW21) - .long L5 - .long 0x04000000 /* use dwarf unwind info */ - .long 0 - .long 0 - - /* compact unwind for ffi_closure_REGISTER */ - .long C(ffi_closure_REGISTER) - .set L6,L(UW26)-L(UW24) - .long L6 - .long 0x04000000 /* use dwarf unwind info */ - .long 0 - .long 0 - - /* compact unwind for ffi_closure_STDCALL */ - .long C(ffi_closure_STDCALL) - .set L7,L(UW31)-L(UW27) - .long L7 - .long 0x04000000 /* use dwarf unwind info */ - .long 0 - .long 0 - - /* compact unwind for ffi_closure_raw_SYSV */ - .long C(ffi_closure_raw_SYSV) - .set L8,L(UW40)-L(UW32) - .long L8 - .long 0x04000000 /* use dwarf unwind info */ - .long 0 - .long 0 - - /* compact unwind for ffi_closure_raw_THISCALL */ - .long C(ffi_closure_raw_THISCALL) - .set L9,L(UW52)-L(UW41) - .long L9 - .long 0x04000000 /* use dwarf unwind info */ - .long 0 - .long 0 -#endif /* __APPLE__ */ - -#endif /* ifndef _MSC_VER */ -#endif /* ifdef __i386__ */ - +#ifdef __APPLE__ + .subsections_via_symbols + .section __LD,__compact_unwind,regular,debug + + /* compact unwind for ffi_call_i386 */ + .long C(ffi_call_i386) + .set L1,L(UW5)-L(UW0) + .long L1 + .long 0x04000000 /* use dwarf unwind info */ + .long 0 + .long 0 + + /* compact unwind for ffi_go_closure_EAX */ + .long C(ffi_go_closure_EAX) + .set L2,L(UW8)-L(UW6) + .long L2 + .long 0x04000000 /* use dwarf unwind info */ + .long 0 + .long 0 + + /* compact unwind for ffi_go_closure_ECX */ + .long C(ffi_go_closure_ECX) + .set L3,L(UW11)-L(UW9) + .long L3 + .long 0x04000000 /* use dwarf unwind info */ + .long 0 + .long 0 + + /* compact unwind for ffi_closure_i386 */ + .long C(ffi_closure_i386) + .set L4,L(UW20)-L(UW12) + .long L4 + .long 0x04000000 /* use dwarf unwind info */ + .long 0 + .long 0 + + /* compact unwind for ffi_go_closure_STDCALL */ + .long C(ffi_go_closure_STDCALL) + .set L5,L(UW23)-L(UW21) + .long L5 + .long 0x04000000 /* use dwarf unwind info */ + .long 0 + .long 0 + + /* compact unwind for ffi_closure_REGISTER */ + .long C(ffi_closure_REGISTER) + .set L6,L(UW26)-L(UW24) + .long L6 + .long 0x04000000 /* use dwarf unwind info */ + .long 0 + .long 0 + + /* compact unwind for ffi_closure_STDCALL */ + .long C(ffi_closure_STDCALL) + .set L7,L(UW31)-L(UW27) + .long L7 + .long 0x04000000 /* use dwarf unwind info */ + .long 0 + .long 0 + + /* compact unwind for ffi_closure_raw_SYSV */ + .long C(ffi_closure_raw_SYSV) + .set L8,L(UW40)-L(UW32) + .long L8 + .long 0x04000000 /* use dwarf unwind info */ + .long 0 + .long 0 + + /* compact unwind for ffi_closure_raw_THISCALL */ + .long C(ffi_closure_raw_THISCALL) + .set L9,L(UW52)-L(UW41) + .long L9 + .long 0x04000000 /* use dwarf unwind info */ + .long 0 + .long 0 +#endif /* __APPLE__ */ + +#endif /* ifndef _MSC_VER */ +#endif /* ifdef __i386__ */ + #if defined __ELF__ && defined __linux__ .section .note.GNU-stack,"",@progbits #endif diff --git a/contrib/restricted/libffi/src/x86/sysv_intel.S b/contrib/restricted/libffi/src/x86/sysv_intel.S index 5fc9ff21fa..3cafd71ce1 100644 --- a/contrib/restricted/libffi/src/x86/sysv_intel.S +++ b/contrib/restricted/libffi/src/x86/sysv_intel.S @@ -1,995 +1,995 @@ -/* ----------------------------------------------------------------------- - sysv.S - Copyright (c) 2017 Anthony Green - - Copyright (c) 2013 The Written Word, Inc. - - Copyright (c) 1996,1998,2001-2003,2005,2008,2010 Red Hat, Inc. - - X86 Foreign Function Interface - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - ``Software''), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice shall be included - in all copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT - HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, - WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - DEALINGS IN THE SOFTWARE. - ----------------------------------------------------------------------- */ - -#ifndef __x86_64__ -#ifdef _MSC_VER - -#define LIBFFI_ASM -#include <fficonfig.h> -#include <ffi.h> -#include <ffi_cfi.h> -#include "internal.h" - -#define C2(X, Y) X ## Y -#define C1(X, Y) C2(X, Y) -#define L(X) C1(L, X) -# define ENDF(X) X ENDP - -/* This macro allows the safe creation of jump tables without an - actual table. The entry points into the table are all 8 bytes. - The use of ORG asserts that we're at the correct location. */ -/* ??? The clang assembler doesn't handle .org with symbolic expressions. */ -#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) -# define E(BASE, X) ALIGN 8 -#else -# define E(BASE, X) ALIGN 8; ORG BASE + X * 8 -#endif - - .686P - .MODEL FLAT - -EXTRN @ffi_closure_inner@8:PROC -_TEXT SEGMENT - -/* This is declared as - - void ffi_call_i386(struct call_frame *frame, char *argp) - __attribute__((fastcall)); - - Thus the arguments are present in - - ecx: frame - edx: argp -*/ - -ALIGN 16 -PUBLIC @ffi_call_i386@8 -@ffi_call_i386@8 PROC -L(UW0): - cfi_startproc - #if !HAVE_FASTCALL - mov ecx, [esp+4] - mov edx, [esp+8] - #endif - mov eax, [esp] /* move the return address */ - mov [ecx], ebp /* store ebp into local frame */ - mov [ecx+4], eax /* store retaddr into local frame */ - - /* New stack frame based off ebp. This is a itty bit of unwind - trickery in that the CFA *has* changed. There is no easy way - to describe it correctly on entry to the function. Fortunately, - it doesn't matter too much since at all points we can correctly - unwind back to ffi_call. Note that the location to which we - moved the return address is (the new) CFA-4, so from the - perspective of the unwind info, it hasn't moved. */ - mov ebp, ecx -L(UW1): - // cfi_def_cfa(%ebp, 8) - // cfi_rel_offset(%ebp, 0) - - mov esp, edx /* set outgoing argument stack */ - mov eax, [20+R_EAX*4+ebp] /* set register arguments */ - mov edx, [20+R_EDX*4+ebp] - mov ecx, [20+R_ECX*4+ebp] - - call dword ptr [ebp+8] - - mov ecx, [12+ebp] /* load return type code */ - mov [ebp+8], ebx /* preserve %ebx */ -L(UW2): - // cfi_rel_offset(%ebx, 8) - - and ecx, X86_RET_TYPE_MASK - lea ebx, [L(store_table) + ecx * 8] - mov ecx, [ebp+16] /* load result address */ - jmp ebx - - ALIGN 8 -L(store_table): -E(L(store_table), X86_RET_FLOAT) - fstp DWORD PTR [ecx] - jmp L(e1) -E(L(store_table), X86_RET_DOUBLE) - fstp QWORD PTR [ecx] - jmp L(e1) -E(L(store_table), X86_RET_LDOUBLE) - fstp QWORD PTR [ecx] - jmp L(e1) -E(L(store_table), X86_RET_SINT8) - movsx eax, al - mov [ecx], eax - jmp L(e1) -E(L(store_table), X86_RET_SINT16) - movsx eax, ax - mov [ecx], eax - jmp L(e1) -E(L(store_table), X86_RET_UINT8) - movzx eax, al - mov [ecx], eax - jmp L(e1) -E(L(store_table), X86_RET_UINT16) - movzx eax, ax - mov [ecx], eax - jmp L(e1) -E(L(store_table), X86_RET_INT64) - mov [ecx+4], edx - /* fallthru */ -E(L(store_table), X86_RET_int 32) - mov [ecx], eax - /* fallthru */ -E(L(store_table), X86_RET_VOID) -L(e1): - mov ebx, [ebp+8] - mov esp, ebp - pop ebp -L(UW3): - // cfi_remember_state - // cfi_def_cfa(%esp, 4) - // cfi_restore(%ebx) - // cfi_restore(%ebp) - ret -L(UW4): - // cfi_restore_state - -E(L(store_table), X86_RET_STRUCTPOP) - jmp L(e1) -E(L(store_table), X86_RET_STRUCTARG) - jmp L(e1) -E(L(store_table), X86_RET_STRUCT_1B) - mov [ecx], al - jmp L(e1) -E(L(store_table), X86_RET_STRUCT_2B) - mov [ecx], ax - jmp L(e1) - - /* Fill out the table so that bad values are predictable. */ -E(L(store_table), X86_RET_UNUSED14) - int 3 -E(L(store_table), X86_RET_UNUSED15) - int 3 - -L(UW5): - // cfi_endproc -ENDF(@ffi_call_i386@8) - -/* The inner helper is declared as - - void ffi_closure_inner(struct closure_frame *frame, char *argp) - __attribute_((fastcall)) - - Thus the arguments are placed in - - ecx: frame - edx: argp -*/ - -/* Macros to help setting up the closure_data structure. */ - -#if HAVE_FASTCALL -# define closure_FS (40 + 4) -# define closure_CF 0 -#else -# define closure_FS (8 + 40 + 12) -# define closure_CF 8 -#endif - -FFI_CLOSURE_SAVE_REGS MACRO - mov [esp + closure_CF+16+R_EAX*4], eax - mov [esp + closure_CF+16+R_EDX*4], edx - mov [esp + closure_CF+16+R_ECX*4], ecx -ENDM - -FFI_CLOSURE_COPY_TRAMP_DATA MACRO - mov edx, [eax+FFI_TRAMPOLINE_SIZE] /* copy cif */ - mov ecx, [eax+FFI_TRAMPOLINE_SIZE+4] /* copy fun */ - mov eax, [eax+FFI_TRAMPOLINE_SIZE+8]; /* copy user_data */ - mov [esp+closure_CF+28], edx - mov [esp+closure_CF+32], ecx - mov [esp+closure_CF+36], eax -ENDM - -#if HAVE_FASTCALL -FFI_CLOSURE_PREP_CALL MACRO - mov ecx, esp /* load closure_data */ - lea edx, [esp+closure_FS+4] /* load incoming stack */ -ENDM -#else -FFI_CLOSURE_PREP_CALL MACRO - lea ecx, [esp+closure_CF] /* load closure_data */ - lea edx, [esp+closure_FS+4] /* load incoming stack */ - mov [esp], ecx - mov [esp+4], edx -ENDM -#endif - -FFI_CLOSURE_CALL_INNER MACRO UWN - call @ffi_closure_inner@8 -ENDM - -FFI_CLOSURE_MASK_AND_JUMP MACRO LABEL - and eax, X86_RET_TYPE_MASK - lea edx, [LABEL+eax*8] - mov eax, [esp+closure_CF] /* optimiztic load */ - jmp edx -ENDM - -ALIGN 16 -PUBLIC ffi_go_closure_EAX -ffi_go_closure_EAX PROC C -L(UW6): - // cfi_startproc - sub esp, closure_FS -L(UW7): - // cfi_def_cfa_offset(closure_FS + 4) - FFI_CLOSURE_SAVE_REGS - mov edx, [eax+4] /* copy cif */ - mov ecx, [eax +8] /* copy fun */ - mov [esp+closure_CF+28], edx - mov [esp+closure_CF+32], ecx - mov [esp+closure_CF+36], eax /* closure is user_data */ - jmp L(do_closure_i386) -L(UW8): - // cfi_endproc -ENDF(ffi_go_closure_EAX) - -ALIGN 16 -PUBLIC ffi_go_closure_ECX -ffi_go_closure_ECX PROC C -L(UW9): - // cfi_startproc - sub esp, closure_FS -L(UW10): - // cfi_def_cfa_offset(closure_FS + 4) - FFI_CLOSURE_SAVE_REGS - mov edx, [ecx+4] /* copy cif */ - mov eax, [ecx+8] /* copy fun */ - mov [esp+closure_CF+28], edx - mov [esp+closure_CF+32], eax - mov [esp+closure_CF+36], ecx /* closure is user_data */ - jmp L(do_closure_i386) -L(UW11): - // cfi_endproc -ENDF(ffi_go_closure_ECX) - -/* The closure entry points are reached from the ffi_closure trampoline. - On entry, %eax contains the address of the ffi_closure. */ - -ALIGN 16 -PUBLIC ffi_closure_i386 -ffi_closure_i386 PROC C -L(UW12): - // cfi_startproc - sub esp, closure_FS -L(UW13): - // cfi_def_cfa_offset(closure_FS + 4) - - FFI_CLOSURE_SAVE_REGS - FFI_CLOSURE_COPY_TRAMP_DATA - - /* Entry point from preceeding Go closures. */ -L(do_closure_i386):: - - FFI_CLOSURE_PREP_CALL - FFI_CLOSURE_CALL_INNER(14) - FFI_CLOSURE_MASK_AND_JUMP L(C1(load_table,2)) - - ALIGN 8 -L(load_table2): -E(L(load_table2), X86_RET_FLOAT) - fld dword ptr [esp+closure_CF] - jmp L(e2) -E(L(load_table2), X86_RET_DOUBLE) - fld qword ptr [esp+closure_CF] - jmp L(e2) -E(L(load_table2), X86_RET_LDOUBLE) - fld qword ptr [esp+closure_CF] - jmp L(e2) -E(L(load_table2), X86_RET_SINT8) - movsx eax, al - jmp L(e2) -E(L(load_table2), X86_RET_SINT16) - movsx eax, ax - jmp L(e2) -E(L(load_table2), X86_RET_UINT8) - movzx eax, al - jmp L(e2) -E(L(load_table2), X86_RET_UINT16) - movzx eax, ax - jmp L(e2) -E(L(load_table2), X86_RET_INT64) - mov edx, [esp+closure_CF+4] - jmp L(e2) -E(L(load_table2), X86_RET_INT32) - nop - /* fallthru */ -E(L(load_table2), X86_RET_VOID) -L(e2): - add esp, closure_FS -L(UW16): - // cfi_adjust_cfa_offset(-closure_FS) - ret -L(UW17): - // cfi_adjust_cfa_offset(closure_FS) -E(L(load_table2), X86_RET_STRUCTPOP) - add esp, closure_FS -L(UW18): - // cfi_adjust_cfa_offset(-closure_FS) - ret 4 -L(UW19): - // cfi_adjust_cfa_offset(closure_FS) -E(L(load_table2), X86_RET_STRUCTARG) - jmp L(e2) -E(L(load_table2), X86_RET_STRUCT_1B) - movzx eax, al - jmp L(e2) -E(L(load_table2), X86_RET_STRUCT_2B) - movzx eax, ax - jmp L(e2) - - /* Fill out the table so that bad values are predictable. */ -E(L(load_table2), X86_RET_UNUSED14) - int 3 -E(L(load_table2), X86_RET_UNUSED15) - int 3 - -L(UW20): - // cfi_endproc -ENDF(ffi_closure_i386) - -ALIGN 16 -PUBLIC ffi_go_closure_STDCALL -ffi_go_closure_STDCALL PROC C -L(UW21): - // cfi_startproc - sub esp, closure_FS -L(UW22): - // cfi_def_cfa_offset(closure_FS + 4) - FFI_CLOSURE_SAVE_REGS - mov edx, [ecx+4] /* copy cif */ - mov eax, [ecx+8] /* copy fun */ - mov [esp+closure_CF+28], edx - mov [esp+closure_CF+32], eax - mov [esp+closure_CF+36], ecx /* closure is user_data */ - jmp L(do_closure_STDCALL) -L(UW23): - // cfi_endproc -ENDF(ffi_go_closure_STDCALL) - -/* For REGISTER, we have no available parameter registers, and so we - enter here having pushed the closure onto the stack. */ - -ALIGN 16 -PUBLIC ffi_closure_REGISTER -ffi_closure_REGISTER PROC C -L(UW24): - // cfi_startproc - // cfi_def_cfa(%esp, 8) - // cfi_offset(%eip, -8) - sub esp, closure_FS-4 -L(UW25): - // cfi_def_cfa_offset(closure_FS + 4) - FFI_CLOSURE_SAVE_REGS - mov ecx, [esp+closure_FS-4] /* load retaddr */ - mov eax, [esp+closure_FS] /* load closure */ - mov [esp+closure_FS], ecx /* move retaddr */ - jmp L(do_closure_REGISTER) -L(UW26): - // cfi_endproc -ENDF(ffi_closure_REGISTER) - -/* For STDCALL (and others), we need to pop N bytes of arguments off - the stack following the closure. The amount needing to be popped - is returned to us from ffi_closure_inner. */ - -ALIGN 16 -PUBLIC ffi_closure_STDCALL -ffi_closure_STDCALL PROC C -L(UW27): - // cfi_startproc - sub esp, closure_FS -L(UW28): - // cfi_def_cfa_offset(closure_FS + 4) - - FFI_CLOSURE_SAVE_REGS - - /* Entry point from ffi_closure_REGISTER. */ -L(do_closure_REGISTER):: - - FFI_CLOSURE_COPY_TRAMP_DATA - - /* Entry point from preceeding Go closure. */ -L(do_closure_STDCALL):: - - FFI_CLOSURE_PREP_CALL - FFI_CLOSURE_CALL_INNER(29) - - mov ecx, eax - shr ecx, X86_RET_POP_SHIFT /* isolate pop count */ - lea ecx, [esp+closure_FS+ecx] /* compute popped esp */ - mov edx, [esp+closure_FS] /* move return address */ - mov [ecx], edx - - /* From this point on, the value of %esp upon return is %ecx+4, - and we've copied the return address to %ecx to make return easy. - There's no point in representing this in the unwind info, as - there is always a window between the mov and the ret which - will be wrong from one point of view or another. */ - - FFI_CLOSURE_MASK_AND_JUMP L(C1(load_table,3)) - - ALIGN 8 -L(load_table3): -E(L(load_table3), X86_RET_FLOAT) - fld DWORD PTR [esp+closure_CF] - mov esp, ecx - ret -E(L(load_table3), X86_RET_DOUBLE) - fld QWORD PTR [esp+closure_CF] - mov esp, ecx - ret -E(L(load_table3), X86_RET_LDOUBLE) - fld QWORD PTR [esp+closure_CF] - mov esp, ecx - ret -E(L(load_table3), X86_RET_SINT8) - movsx eax, al - mov esp, ecx - ret -E(L(load_table3), X86_RET_SINT16) - movsx eax, ax - mov esp, ecx - ret -E(L(load_table3), X86_RET_UINT8) - movzx eax, al - mov esp, ecx - ret -E(L(load_table3), X86_RET_UINT16) - movzx eax, ax - mov esp, ecx - ret -E(L(load_table3), X86_RET_INT64) - mov edx, [esp+closure_CF+4] - mov esp, ecx - ret -E(L(load_table3), X86_RET_int 32) - mov esp, ecx - ret -E(L(load_table3), X86_RET_VOID) - mov esp, ecx - ret -E(L(load_table3), X86_RET_STRUCTPOP) - mov esp, ecx - ret -E(L(load_table3), X86_RET_STRUCTARG) - mov esp, ecx - ret -E(L(load_table3), X86_RET_STRUCT_1B) - movzx eax, al - mov esp, ecx - ret -E(L(load_table3), X86_RET_STRUCT_2B) - movzx eax, ax - mov esp, ecx - ret - - /* Fill out the table so that bad values are predictable. */ -E(L(load_table3), X86_RET_UNUSED14) - int 3 -E(L(load_table3), X86_RET_UNUSED15) - int 3 - -L(UW31): - // cfi_endproc -ENDF(ffi_closure_STDCALL) - -#if !FFI_NO_RAW_API - -#define raw_closure_S_FS (16+16+12) - -ALIGN 16 -PUBLIC ffi_closure_raw_SYSV -ffi_closure_raw_SYSV PROC C -L(UW32): - // cfi_startproc - sub esp, raw_closure_S_FS -L(UW33): - // cfi_def_cfa_offset(raw_closure_S_FS + 4) - mov [esp+raw_closure_S_FS-4], ebx -L(UW34): - // cfi_rel_offset(%ebx, raw_closure_S_FS-4) - - mov edx, [eax+FFI_TRAMPOLINE_SIZE+8] /* load cl->user_data */ - mov [esp+12], edx - lea edx, [esp+raw_closure_S_FS+4] /* load raw_args */ - mov [esp+8], edx - lea edx, [esp+16] /* load &res */ - mov [esp+4], edx - mov ebx, [eax+FFI_TRAMPOLINE_SIZE] /* load cl->cif */ - mov [esp], ebx - call DWORD PTR [eax+FFI_TRAMPOLINE_SIZE+4] /* call cl->fun */ - - mov eax, [ebx+20] /* load cif->flags */ - and eax, X86_RET_TYPE_MASK -// #ifdef __PIC__ -// call __x86.get_pc_thunk.bx -// L(pc4): -// lea ecx, L(load_table4)-L(pc4)(%ebx, %eax, 8), %ecx -// #else - lea ecx, [L(load_table4)+eax+8] -// #endif - mov ebx, [esp+raw_closure_S_FS-4] -L(UW35): - // cfi_restore(%ebx) - mov eax, [esp+16] /* Optimistic load */ - jmp dword ptr [ecx] - - ALIGN 8 -L(load_table4): -E(L(load_table4), X86_RET_FLOAT) - fld DWORD PTR [esp +16] - jmp L(e4) -E(L(load_table4), X86_RET_DOUBLE) - fld QWORD PTR [esp +16] - jmp L(e4) -E(L(load_table4), X86_RET_LDOUBLE) - fld QWORD PTR [esp +16] - jmp L(e4) -E(L(load_table4), X86_RET_SINT8) - movsx eax, al - jmp L(e4) -E(L(load_table4), X86_RET_SINT16) - movsx eax, ax - jmp L(e4) -E(L(load_table4), X86_RET_UINT8) - movzx eax, al - jmp L(e4) -E(L(load_table4), X86_RET_UINT16) - movzx eax, ax - jmp L(e4) -E(L(load_table4), X86_RET_INT64) - mov edx, [esp+16+4] - jmp L(e4) -E(L(load_table4), X86_RET_int 32) - nop - /* fallthru */ -E(L(load_table4), X86_RET_VOID) -L(e4): - add esp, raw_closure_S_FS -L(UW36): - // cfi_adjust_cfa_offset(-raw_closure_S_FS) - ret -L(UW37): - // cfi_adjust_cfa_offset(raw_closure_S_FS) -E(L(load_table4), X86_RET_STRUCTPOP) - add esp, raw_closure_S_FS -L(UW38): - // cfi_adjust_cfa_offset(-raw_closure_S_FS) - ret 4 -L(UW39): - // cfi_adjust_cfa_offset(raw_closure_S_FS) -E(L(load_table4), X86_RET_STRUCTARG) - jmp L(e4) -E(L(load_table4), X86_RET_STRUCT_1B) - movzx eax, al - jmp L(e4) -E(L(load_table4), X86_RET_STRUCT_2B) - movzx eax, ax - jmp L(e4) - - /* Fill out the table so that bad values are predictable. */ -E(L(load_table4), X86_RET_UNUSED14) - int 3 -E(L(load_table4), X86_RET_UNUSED15) - int 3 - -L(UW40): - // cfi_endproc -ENDF(ffi_closure_raw_SYSV) - -#define raw_closure_T_FS (16+16+8) - -ALIGN 16 -PUBLIC ffi_closure_raw_THISCALL -ffi_closure_raw_THISCALL PROC C -L(UW41): - // cfi_startproc - /* Rearrange the stack such that %ecx is the first argument. - This means moving the return address. */ - pop edx -L(UW42): - // cfi_def_cfa_offset(0) - // cfi_register(%eip, %edx) - push ecx -L(UW43): - // cfi_adjust_cfa_offset(4) - push edx -L(UW44): - // cfi_adjust_cfa_offset(4) - // cfi_rel_offset(%eip, 0) - sub esp, raw_closure_T_FS -L(UW45): - // cfi_adjust_cfa_offset(raw_closure_T_FS) - mov [esp+raw_closure_T_FS-4], ebx -L(UW46): - // cfi_rel_offset(%ebx, raw_closure_T_FS-4) - - mov edx, [eax+FFI_TRAMPOLINE_SIZE+8] /* load cl->user_data */ - mov [esp+12], edx - lea edx, [esp+raw_closure_T_FS+4] /* load raw_args */ - mov [esp+8], edx - lea edx, [esp+16] /* load &res */ - mov [esp+4], edx - mov ebx, [eax+FFI_TRAMPOLINE_SIZE] /* load cl->cif */ - mov [esp], ebx - call DWORD PTR [eax+FFI_TRAMPOLINE_SIZE+4] /* call cl->fun */ - - mov eax, [ebx+20] /* load cif->flags */ - and eax, X86_RET_TYPE_MASK -// #ifdef __PIC__ -// call __x86.get_pc_thunk.bx -// L(pc5): -// leal L(load_table5)-L(pc5)(%ebx, %eax, 8), %ecx -// #else - lea ecx, [L(load_table5)+eax*8] -//#endif - mov ebx, [esp+raw_closure_T_FS-4] -L(UW47): - // cfi_restore(%ebx) - mov eax, [esp+16] /* Optimistic load */ - jmp DWORD PTR [ecx] - - AlIGN 4 -L(load_table5): -E(L(load_table5), X86_RET_FLOAT) - fld DWORD PTR [esp +16] - jmp L(e5) -E(L(load_table5), X86_RET_DOUBLE) - fld QWORD PTR [esp +16] - jmp L(e5) -E(L(load_table5), X86_RET_LDOUBLE) - fld QWORD PTR [esp+16] - jmp L(e5) -E(L(load_table5), X86_RET_SINT8) - movsx eax, al - jmp L(e5) -E(L(load_table5), X86_RET_SINT16) - movsx eax, ax - jmp L(e5) -E(L(load_table5), X86_RET_UINT8) - movzx eax, al - jmp L(e5) -E(L(load_table5), X86_RET_UINT16) - movzx eax, ax - jmp L(e5) -E(L(load_table5), X86_RET_INT64) - mov edx, [esp+16+4] - jmp L(e5) -E(L(load_table5), X86_RET_int 32) - nop - /* fallthru */ -E(L(load_table5), X86_RET_VOID) -L(e5): - add esp, raw_closure_T_FS -L(UW48): - // cfi_adjust_cfa_offset(-raw_closure_T_FS) - /* Remove the extra %ecx argument we pushed. */ - ret 4 -L(UW49): - // cfi_adjust_cfa_offset(raw_closure_T_FS) -E(L(load_table5), X86_RET_STRUCTPOP) - add esp, raw_closure_T_FS -L(UW50): - // cfi_adjust_cfa_offset(-raw_closure_T_FS) - ret 8 -L(UW51): - // cfi_adjust_cfa_offset(raw_closure_T_FS) -E(L(load_table5), X86_RET_STRUCTARG) - jmp L(e5) -E(L(load_table5), X86_RET_STRUCT_1B) - movzx eax, al - jmp L(e5) -E(L(load_table5), X86_RET_STRUCT_2B) - movzx eax, ax - jmp L(e5) - - /* Fill out the table so that bad values are predictable. */ -E(L(load_table5), X86_RET_UNUSED14) - int 3 -E(L(load_table5), X86_RET_UNUSED15) - int 3 - -L(UW52): - // cfi_endproc -ENDF(ffi_closure_raw_THISCALL) - -#endif /* !FFI_NO_RAW_API */ - -#ifdef X86_DARWIN -# define COMDAT(X) \ - .section __TEXT,__text,coalesced,pure_instructions; \ - .weak_definition X; \ - FFI_HIDDEN(X) -#elif defined __ELF__ && !(defined(__sun__) && defined(__svr4__)) -# define COMDAT(X) \ - .section .text.X,"axG",@progbits,X,comdat; \ - PUBLIC X; \ - FFI_HIDDEN(X) -#else -# define COMDAT(X) -#endif - -// #if defined(__PIC__) -// COMDAT(C(__x86.get_pc_thunk.bx)) -// C(__x86.get_pc_thunk.bx): -// movl (%esp), %ebx -// ret -// ENDF(C(__x86.get_pc_thunk.bx)) -// # if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE -// COMDAT(C(__x86.get_pc_thunk.dx)) -// C(__x86.get_pc_thunk.dx): -// movl (%esp), %edx -// ret -// ENDF(C(__x86.get_pc_thunk.dx)) -// #endif /* DARWIN || HIDDEN */ -// #endif /* __PIC__ */ - -#if 0 -/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */ - -#ifdef __APPLE__ -.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support -EHFrame0: -#elif defined(X86_WIN32) -.section .eh_frame,"r" -#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE) -.section .eh_frame,EH_FRAME_FLAGS,@unwind -#else -.section .eh_frame,EH_FRAME_FLAGS,@progbits -#endif - -#ifdef HAVE_AS_X86_PCREL -# define PCREL(X) X - . -#else -# define PCREL(X) X@rel -#endif - -/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */ -#define ADV(N, P) .byte 2, L(N)-L(P) - - .balign 4 -L(CIE): - .set L(set0),L(ECIE)-L(SCIE) - .long L(set0) /* CIE Length */ -L(SCIE): - .long 0 /* CIE Identifier Tag */ - .byte 1 /* CIE Version */ - .ascii "zR\0" /* CIE Augmentation */ - .byte 1 /* CIE Code Alignment Factor */ - .byte 0x7c /* CIE Data Alignment Factor */ - .byte 0x8 /* CIE RA Column */ - .byte 1 /* Augmentation size */ - .byte 0x1b /* FDE Encoding (pcrel sdata4) */ - .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp offset 4 */ - .byte 0x80+8, 1 /* DW_CFA_offset, %eip offset 1*-4 */ - .balign 4 -L(ECIE): - - .set L(set1),L(EFDE1)-L(SFDE1) - .long L(set1) /* FDE Length */ -L(SFDE1): - .long L(SFDE1)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW0)) /* Initial location */ - .long L(UW5)-L(UW0) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW1, UW0) - .byte 0xc, 5, 8 /* DW_CFA_def_cfa, %ebp 8 */ - .byte 0x80+5, 2 /* DW_CFA_offset, %ebp 2*-4 */ - ADV(UW2, UW1) - .byte 0x80+3, 0 /* DW_CFA_offset, %ebx 0*-4 */ - ADV(UW3, UW2) - .byte 0xa /* DW_CFA_remember_state */ - .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp 4 */ - .byte 0xc0+3 /* DW_CFA_restore, %ebx */ - .byte 0xc0+5 /* DW_CFA_restore, %ebp */ - ADV(UW4, UW3) - .byte 0xb /* DW_CFA_restore_state */ - .balign 4 -L(EFDE1): - - .set L(set2),L(EFDE2)-L(SFDE2) - .long L(set2) /* FDE Length */ -L(SFDE2): - .long L(SFDE2)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW6)) /* Initial location */ - .long L(UW8)-L(UW6) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW7, UW6) - .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ - .balign 4 -L(EFDE2): - - .set L(set3),L(EFDE3)-L(SFDE3) - .long L(set3) /* FDE Length */ -L(SFDE3): - .long L(SFDE3)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW9)) /* Initial location */ - .long L(UW11)-L(UW9) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW10, UW9) - .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ - .balign 4 -L(EFDE3): - - .set L(set4),L(EFDE4)-L(SFDE4) - .long L(set4) /* FDE Length */ -L(SFDE4): - .long L(SFDE4)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW12)) /* Initial location */ - .long L(UW20)-L(UW12) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW13, UW12) - .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ -#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX - ADV(UW14, UW13) - .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */ - ADV(UW15, UW14) - .byte 0xc0+3 /* DW_CFA_restore %ebx */ - ADV(UW16, UW15) -#else - ADV(UW16, UW13) -#endif - .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ - ADV(UW17, UW16) - .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ - ADV(UW18, UW17) - .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ - ADV(UW19, UW18) - .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ - .balign 4 -L(EFDE4): - - .set L(set5),L(EFDE5)-L(SFDE5) - .long L(set5) /* FDE Length */ -L(SFDE5): - .long L(SFDE5)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW21)) /* Initial location */ - .long L(UW23)-L(UW21) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW22, UW21) - .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ - .balign 4 -L(EFDE5): - - .set L(set6),L(EFDE6)-L(SFDE6) - .long L(set6) /* FDE Length */ -L(SFDE6): - .long L(SFDE6)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW24)) /* Initial location */ - .long L(UW26)-L(UW24) /* Address range */ - .byte 0 /* Augmentation size */ - .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ - .byte 0x80+8, 2 /* DW_CFA_offset %eip, 2*-4 */ - ADV(UW25, UW24) - .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ - .balign 4 -L(EFDE6): - - .set L(set7),L(EFDE7)-L(SFDE7) - .long L(set7) /* FDE Length */ -L(SFDE7): - .long L(SFDE7)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW27)) /* Initial location */ - .long L(UW31)-L(UW27) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW28, UW27) - .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ -#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX - ADV(UW29, UW28) - .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */ - ADV(UW30, UW29) - .byte 0xc0+3 /* DW_CFA_restore %ebx */ -#endif - .balign 4 -L(EFDE7): - -#if !FFI_NO_RAW_API - .set L(set8),L(EFDE8)-L(SFDE8) - .long L(set8) /* FDE Length */ -L(SFDE8): - .long L(SFDE8)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW32)) /* Initial location */ - .long L(UW40)-L(UW32) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW33, UW32) - .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */ - ADV(UW34, UW33) - .byte 0x80+3, 2 /* DW_CFA_offset %ebx 2*-4 */ - ADV(UW35, UW34) - .byte 0xc0+3 /* DW_CFA_restore %ebx */ - ADV(UW36, UW35) - .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ - ADV(UW37, UW36) - .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */ - ADV(UW38, UW37) - .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ - ADV(UW39, UW38) - .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */ - .balign 4 -L(EFDE8): - - .set L(set9),L(EFDE9)-L(SFDE9) - .long L(set9) /* FDE Length */ -L(SFDE9): - .long L(SFDE9)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW41)) /* Initial location */ - .long L(UW52)-L(UW41) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW42, UW41) - .byte 0xe, 0 /* DW_CFA_def_cfa_offset */ - .byte 0x9, 8, 2 /* DW_CFA_register %eip, %edx */ - ADV(UW43, UW42) - .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ - ADV(UW44, UW43) - .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ - .byte 0x80+8, 2 /* DW_CFA_offset %eip 2*-4 */ - ADV(UW45, UW44) - .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */ - ADV(UW46, UW45) - .byte 0x80+3, 3 /* DW_CFA_offset %ebx 3*-4 */ - ADV(UW47, UW46) - .byte 0xc0+3 /* DW_CFA_restore %ebx */ - ADV(UW48, UW47) - .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ - ADV(UW49, UW48) - .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */ - ADV(UW50, UW49) - .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ - ADV(UW51, UW50) - .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */ - .balign 4 -L(EFDE9): -#endif /* !FFI_NO_RAW_API */ - -#ifdef _WIN32 - .def @feat.00; - .scl 3; - .type 0; - .endef - PUBLIC @feat.00 -@feat.00 = 1 -#endif - -#endif /* ifndef _MSC_VER */ -#endif /* ifndef __x86_64__ */ - -#if defined __ELF__ && defined __linux__ - .section .note.GNU-stack,"",@progbits -#endif -#endif - -END
\ No newline at end of file +/* ----------------------------------------------------------------------- + sysv.S - Copyright (c) 2017 Anthony Green + - Copyright (c) 2013 The Written Word, Inc. + - Copyright (c) 1996,1998,2001-2003,2005,2008,2010 Red Hat, Inc. + + X86 Foreign Function Interface + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + ``Software''), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + ----------------------------------------------------------------------- */ + +#ifndef __x86_64__ +#ifdef _MSC_VER + +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> +#include <ffi_cfi.h> +#include "internal.h" + +#define C2(X, Y) X ## Y +#define C1(X, Y) C2(X, Y) +#define L(X) C1(L, X) +# define ENDF(X) X ENDP + +/* This macro allows the safe creation of jump tables without an + actual table. The entry points into the table are all 8 bytes. + The use of ORG asserts that we're at the correct location. */ +/* ??? The clang assembler doesn't handle .org with symbolic expressions. */ +#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) +# define E(BASE, X) ALIGN 8 +#else +# define E(BASE, X) ALIGN 8; ORG BASE + X * 8 +#endif + + .686P + .MODEL FLAT + +EXTRN @ffi_closure_inner@8:PROC +_TEXT SEGMENT + +/* This is declared as + + void ffi_call_i386(struct call_frame *frame, char *argp) + __attribute__((fastcall)); + + Thus the arguments are present in + + ecx: frame + edx: argp +*/ + +ALIGN 16 +PUBLIC @ffi_call_i386@8 +@ffi_call_i386@8 PROC +L(UW0): + cfi_startproc + #if !HAVE_FASTCALL + mov ecx, [esp+4] + mov edx, [esp+8] + #endif + mov eax, [esp] /* move the return address */ + mov [ecx], ebp /* store ebp into local frame */ + mov [ecx+4], eax /* store retaddr into local frame */ + + /* New stack frame based off ebp. This is a itty bit of unwind + trickery in that the CFA *has* changed. There is no easy way + to describe it correctly on entry to the function. Fortunately, + it doesn't matter too much since at all points we can correctly + unwind back to ffi_call. Note that the location to which we + moved the return address is (the new) CFA-4, so from the + perspective of the unwind info, it hasn't moved. */ + mov ebp, ecx +L(UW1): + // cfi_def_cfa(%ebp, 8) + // cfi_rel_offset(%ebp, 0) + + mov esp, edx /* set outgoing argument stack */ + mov eax, [20+R_EAX*4+ebp] /* set register arguments */ + mov edx, [20+R_EDX*4+ebp] + mov ecx, [20+R_ECX*4+ebp] + + call dword ptr [ebp+8] + + mov ecx, [12+ebp] /* load return type code */ + mov [ebp+8], ebx /* preserve %ebx */ +L(UW2): + // cfi_rel_offset(%ebx, 8) + + and ecx, X86_RET_TYPE_MASK + lea ebx, [L(store_table) + ecx * 8] + mov ecx, [ebp+16] /* load result address */ + jmp ebx + + ALIGN 8 +L(store_table): +E(L(store_table), X86_RET_FLOAT) + fstp DWORD PTR [ecx] + jmp L(e1) +E(L(store_table), X86_RET_DOUBLE) + fstp QWORD PTR [ecx] + jmp L(e1) +E(L(store_table), X86_RET_LDOUBLE) + fstp QWORD PTR [ecx] + jmp L(e1) +E(L(store_table), X86_RET_SINT8) + movsx eax, al + mov [ecx], eax + jmp L(e1) +E(L(store_table), X86_RET_SINT16) + movsx eax, ax + mov [ecx], eax + jmp L(e1) +E(L(store_table), X86_RET_UINT8) + movzx eax, al + mov [ecx], eax + jmp L(e1) +E(L(store_table), X86_RET_UINT16) + movzx eax, ax + mov [ecx], eax + jmp L(e1) +E(L(store_table), X86_RET_INT64) + mov [ecx+4], edx + /* fallthru */ +E(L(store_table), X86_RET_int 32) + mov [ecx], eax + /* fallthru */ +E(L(store_table), X86_RET_VOID) +L(e1): + mov ebx, [ebp+8] + mov esp, ebp + pop ebp +L(UW3): + // cfi_remember_state + // cfi_def_cfa(%esp, 4) + // cfi_restore(%ebx) + // cfi_restore(%ebp) + ret +L(UW4): + // cfi_restore_state + +E(L(store_table), X86_RET_STRUCTPOP) + jmp L(e1) +E(L(store_table), X86_RET_STRUCTARG) + jmp L(e1) +E(L(store_table), X86_RET_STRUCT_1B) + mov [ecx], al + jmp L(e1) +E(L(store_table), X86_RET_STRUCT_2B) + mov [ecx], ax + jmp L(e1) + + /* Fill out the table so that bad values are predictable. */ +E(L(store_table), X86_RET_UNUSED14) + int 3 +E(L(store_table), X86_RET_UNUSED15) + int 3 + +L(UW5): + // cfi_endproc +ENDF(@ffi_call_i386@8) + +/* The inner helper is declared as + + void ffi_closure_inner(struct closure_frame *frame, char *argp) + __attribute_((fastcall)) + + Thus the arguments are placed in + + ecx: frame + edx: argp +*/ + +/* Macros to help setting up the closure_data structure. */ + +#if HAVE_FASTCALL +# define closure_FS (40 + 4) +# define closure_CF 0 +#else +# define closure_FS (8 + 40 + 12) +# define closure_CF 8 +#endif + +FFI_CLOSURE_SAVE_REGS MACRO + mov [esp + closure_CF+16+R_EAX*4], eax + mov [esp + closure_CF+16+R_EDX*4], edx + mov [esp + closure_CF+16+R_ECX*4], ecx +ENDM + +FFI_CLOSURE_COPY_TRAMP_DATA MACRO + mov edx, [eax+FFI_TRAMPOLINE_SIZE] /* copy cif */ + mov ecx, [eax+FFI_TRAMPOLINE_SIZE+4] /* copy fun */ + mov eax, [eax+FFI_TRAMPOLINE_SIZE+8]; /* copy user_data */ + mov [esp+closure_CF+28], edx + mov [esp+closure_CF+32], ecx + mov [esp+closure_CF+36], eax +ENDM + +#if HAVE_FASTCALL +FFI_CLOSURE_PREP_CALL MACRO + mov ecx, esp /* load closure_data */ + lea edx, [esp+closure_FS+4] /* load incoming stack */ +ENDM +#else +FFI_CLOSURE_PREP_CALL MACRO + lea ecx, [esp+closure_CF] /* load closure_data */ + lea edx, [esp+closure_FS+4] /* load incoming stack */ + mov [esp], ecx + mov [esp+4], edx +ENDM +#endif + +FFI_CLOSURE_CALL_INNER MACRO UWN + call @ffi_closure_inner@8 +ENDM + +FFI_CLOSURE_MASK_AND_JUMP MACRO LABEL + and eax, X86_RET_TYPE_MASK + lea edx, [LABEL+eax*8] + mov eax, [esp+closure_CF] /* optimiztic load */ + jmp edx +ENDM + +ALIGN 16 +PUBLIC ffi_go_closure_EAX +ffi_go_closure_EAX PROC C +L(UW6): + // cfi_startproc + sub esp, closure_FS +L(UW7): + // cfi_def_cfa_offset(closure_FS + 4) + FFI_CLOSURE_SAVE_REGS + mov edx, [eax+4] /* copy cif */ + mov ecx, [eax +8] /* copy fun */ + mov [esp+closure_CF+28], edx + mov [esp+closure_CF+32], ecx + mov [esp+closure_CF+36], eax /* closure is user_data */ + jmp L(do_closure_i386) +L(UW8): + // cfi_endproc +ENDF(ffi_go_closure_EAX) + +ALIGN 16 +PUBLIC ffi_go_closure_ECX +ffi_go_closure_ECX PROC C +L(UW9): + // cfi_startproc + sub esp, closure_FS +L(UW10): + // cfi_def_cfa_offset(closure_FS + 4) + FFI_CLOSURE_SAVE_REGS + mov edx, [ecx+4] /* copy cif */ + mov eax, [ecx+8] /* copy fun */ + mov [esp+closure_CF+28], edx + mov [esp+closure_CF+32], eax + mov [esp+closure_CF+36], ecx /* closure is user_data */ + jmp L(do_closure_i386) +L(UW11): + // cfi_endproc +ENDF(ffi_go_closure_ECX) + +/* The closure entry points are reached from the ffi_closure trampoline. + On entry, %eax contains the address of the ffi_closure. */ + +ALIGN 16 +PUBLIC ffi_closure_i386 +ffi_closure_i386 PROC C +L(UW12): + // cfi_startproc + sub esp, closure_FS +L(UW13): + // cfi_def_cfa_offset(closure_FS + 4) + + FFI_CLOSURE_SAVE_REGS + FFI_CLOSURE_COPY_TRAMP_DATA + + /* Entry point from preceeding Go closures. */ +L(do_closure_i386):: + + FFI_CLOSURE_PREP_CALL + FFI_CLOSURE_CALL_INNER(14) + FFI_CLOSURE_MASK_AND_JUMP L(C1(load_table,2)) + + ALIGN 8 +L(load_table2): +E(L(load_table2), X86_RET_FLOAT) + fld dword ptr [esp+closure_CF] + jmp L(e2) +E(L(load_table2), X86_RET_DOUBLE) + fld qword ptr [esp+closure_CF] + jmp L(e2) +E(L(load_table2), X86_RET_LDOUBLE) + fld qword ptr [esp+closure_CF] + jmp L(e2) +E(L(load_table2), X86_RET_SINT8) + movsx eax, al + jmp L(e2) +E(L(load_table2), X86_RET_SINT16) + movsx eax, ax + jmp L(e2) +E(L(load_table2), X86_RET_UINT8) + movzx eax, al + jmp L(e2) +E(L(load_table2), X86_RET_UINT16) + movzx eax, ax + jmp L(e2) +E(L(load_table2), X86_RET_INT64) + mov edx, [esp+closure_CF+4] + jmp L(e2) +E(L(load_table2), X86_RET_INT32) + nop + /* fallthru */ +E(L(load_table2), X86_RET_VOID) +L(e2): + add esp, closure_FS +L(UW16): + // cfi_adjust_cfa_offset(-closure_FS) + ret +L(UW17): + // cfi_adjust_cfa_offset(closure_FS) +E(L(load_table2), X86_RET_STRUCTPOP) + add esp, closure_FS +L(UW18): + // cfi_adjust_cfa_offset(-closure_FS) + ret 4 +L(UW19): + // cfi_adjust_cfa_offset(closure_FS) +E(L(load_table2), X86_RET_STRUCTARG) + jmp L(e2) +E(L(load_table2), X86_RET_STRUCT_1B) + movzx eax, al + jmp L(e2) +E(L(load_table2), X86_RET_STRUCT_2B) + movzx eax, ax + jmp L(e2) + + /* Fill out the table so that bad values are predictable. */ +E(L(load_table2), X86_RET_UNUSED14) + int 3 +E(L(load_table2), X86_RET_UNUSED15) + int 3 + +L(UW20): + // cfi_endproc +ENDF(ffi_closure_i386) + +ALIGN 16 +PUBLIC ffi_go_closure_STDCALL +ffi_go_closure_STDCALL PROC C +L(UW21): + // cfi_startproc + sub esp, closure_FS +L(UW22): + // cfi_def_cfa_offset(closure_FS + 4) + FFI_CLOSURE_SAVE_REGS + mov edx, [ecx+4] /* copy cif */ + mov eax, [ecx+8] /* copy fun */ + mov [esp+closure_CF+28], edx + mov [esp+closure_CF+32], eax + mov [esp+closure_CF+36], ecx /* closure is user_data */ + jmp L(do_closure_STDCALL) +L(UW23): + // cfi_endproc +ENDF(ffi_go_closure_STDCALL) + +/* For REGISTER, we have no available parameter registers, and so we + enter here having pushed the closure onto the stack. */ + +ALIGN 16 +PUBLIC ffi_closure_REGISTER +ffi_closure_REGISTER PROC C +L(UW24): + // cfi_startproc + // cfi_def_cfa(%esp, 8) + // cfi_offset(%eip, -8) + sub esp, closure_FS-4 +L(UW25): + // cfi_def_cfa_offset(closure_FS + 4) + FFI_CLOSURE_SAVE_REGS + mov ecx, [esp+closure_FS-4] /* load retaddr */ + mov eax, [esp+closure_FS] /* load closure */ + mov [esp+closure_FS], ecx /* move retaddr */ + jmp L(do_closure_REGISTER) +L(UW26): + // cfi_endproc +ENDF(ffi_closure_REGISTER) + +/* For STDCALL (and others), we need to pop N bytes of arguments off + the stack following the closure. The amount needing to be popped + is returned to us from ffi_closure_inner. */ + +ALIGN 16 +PUBLIC ffi_closure_STDCALL +ffi_closure_STDCALL PROC C +L(UW27): + // cfi_startproc + sub esp, closure_FS +L(UW28): + // cfi_def_cfa_offset(closure_FS + 4) + + FFI_CLOSURE_SAVE_REGS + + /* Entry point from ffi_closure_REGISTER. */ +L(do_closure_REGISTER):: + + FFI_CLOSURE_COPY_TRAMP_DATA + + /* Entry point from preceeding Go closure. */ +L(do_closure_STDCALL):: + + FFI_CLOSURE_PREP_CALL + FFI_CLOSURE_CALL_INNER(29) + + mov ecx, eax + shr ecx, X86_RET_POP_SHIFT /* isolate pop count */ + lea ecx, [esp+closure_FS+ecx] /* compute popped esp */ + mov edx, [esp+closure_FS] /* move return address */ + mov [ecx], edx + + /* From this point on, the value of %esp upon return is %ecx+4, + and we've copied the return address to %ecx to make return easy. + There's no point in representing this in the unwind info, as + there is always a window between the mov and the ret which + will be wrong from one point of view or another. */ + + FFI_CLOSURE_MASK_AND_JUMP L(C1(load_table,3)) + + ALIGN 8 +L(load_table3): +E(L(load_table3), X86_RET_FLOAT) + fld DWORD PTR [esp+closure_CF] + mov esp, ecx + ret +E(L(load_table3), X86_RET_DOUBLE) + fld QWORD PTR [esp+closure_CF] + mov esp, ecx + ret +E(L(load_table3), X86_RET_LDOUBLE) + fld QWORD PTR [esp+closure_CF] + mov esp, ecx + ret +E(L(load_table3), X86_RET_SINT8) + movsx eax, al + mov esp, ecx + ret +E(L(load_table3), X86_RET_SINT16) + movsx eax, ax + mov esp, ecx + ret +E(L(load_table3), X86_RET_UINT8) + movzx eax, al + mov esp, ecx + ret +E(L(load_table3), X86_RET_UINT16) + movzx eax, ax + mov esp, ecx + ret +E(L(load_table3), X86_RET_INT64) + mov edx, [esp+closure_CF+4] + mov esp, ecx + ret +E(L(load_table3), X86_RET_int 32) + mov esp, ecx + ret +E(L(load_table3), X86_RET_VOID) + mov esp, ecx + ret +E(L(load_table3), X86_RET_STRUCTPOP) + mov esp, ecx + ret +E(L(load_table3), X86_RET_STRUCTARG) + mov esp, ecx + ret +E(L(load_table3), X86_RET_STRUCT_1B) + movzx eax, al + mov esp, ecx + ret +E(L(load_table3), X86_RET_STRUCT_2B) + movzx eax, ax + mov esp, ecx + ret + + /* Fill out the table so that bad values are predictable. */ +E(L(load_table3), X86_RET_UNUSED14) + int 3 +E(L(load_table3), X86_RET_UNUSED15) + int 3 + +L(UW31): + // cfi_endproc +ENDF(ffi_closure_STDCALL) + +#if !FFI_NO_RAW_API + +#define raw_closure_S_FS (16+16+12) + +ALIGN 16 +PUBLIC ffi_closure_raw_SYSV +ffi_closure_raw_SYSV PROC C +L(UW32): + // cfi_startproc + sub esp, raw_closure_S_FS +L(UW33): + // cfi_def_cfa_offset(raw_closure_S_FS + 4) + mov [esp+raw_closure_S_FS-4], ebx +L(UW34): + // cfi_rel_offset(%ebx, raw_closure_S_FS-4) + + mov edx, [eax+FFI_TRAMPOLINE_SIZE+8] /* load cl->user_data */ + mov [esp+12], edx + lea edx, [esp+raw_closure_S_FS+4] /* load raw_args */ + mov [esp+8], edx + lea edx, [esp+16] /* load &res */ + mov [esp+4], edx + mov ebx, [eax+FFI_TRAMPOLINE_SIZE] /* load cl->cif */ + mov [esp], ebx + call DWORD PTR [eax+FFI_TRAMPOLINE_SIZE+4] /* call cl->fun */ + + mov eax, [ebx+20] /* load cif->flags */ + and eax, X86_RET_TYPE_MASK +// #ifdef __PIC__ +// call __x86.get_pc_thunk.bx +// L(pc4): +// lea ecx, L(load_table4)-L(pc4)(%ebx, %eax, 8), %ecx +// #else + lea ecx, [L(load_table4)+eax+8] +// #endif + mov ebx, [esp+raw_closure_S_FS-4] +L(UW35): + // cfi_restore(%ebx) + mov eax, [esp+16] /* Optimistic load */ + jmp dword ptr [ecx] + + ALIGN 8 +L(load_table4): +E(L(load_table4), X86_RET_FLOAT) + fld DWORD PTR [esp +16] + jmp L(e4) +E(L(load_table4), X86_RET_DOUBLE) + fld QWORD PTR [esp +16] + jmp L(e4) +E(L(load_table4), X86_RET_LDOUBLE) + fld QWORD PTR [esp +16] + jmp L(e4) +E(L(load_table4), X86_RET_SINT8) + movsx eax, al + jmp L(e4) +E(L(load_table4), X86_RET_SINT16) + movsx eax, ax + jmp L(e4) +E(L(load_table4), X86_RET_UINT8) + movzx eax, al + jmp L(e4) +E(L(load_table4), X86_RET_UINT16) + movzx eax, ax + jmp L(e4) +E(L(load_table4), X86_RET_INT64) + mov edx, [esp+16+4] + jmp L(e4) +E(L(load_table4), X86_RET_int 32) + nop + /* fallthru */ +E(L(load_table4), X86_RET_VOID) +L(e4): + add esp, raw_closure_S_FS +L(UW36): + // cfi_adjust_cfa_offset(-raw_closure_S_FS) + ret +L(UW37): + // cfi_adjust_cfa_offset(raw_closure_S_FS) +E(L(load_table4), X86_RET_STRUCTPOP) + add esp, raw_closure_S_FS +L(UW38): + // cfi_adjust_cfa_offset(-raw_closure_S_FS) + ret 4 +L(UW39): + // cfi_adjust_cfa_offset(raw_closure_S_FS) +E(L(load_table4), X86_RET_STRUCTARG) + jmp L(e4) +E(L(load_table4), X86_RET_STRUCT_1B) + movzx eax, al + jmp L(e4) +E(L(load_table4), X86_RET_STRUCT_2B) + movzx eax, ax + jmp L(e4) + + /* Fill out the table so that bad values are predictable. */ +E(L(load_table4), X86_RET_UNUSED14) + int 3 +E(L(load_table4), X86_RET_UNUSED15) + int 3 + +L(UW40): + // cfi_endproc +ENDF(ffi_closure_raw_SYSV) + +#define raw_closure_T_FS (16+16+8) + +ALIGN 16 +PUBLIC ffi_closure_raw_THISCALL +ffi_closure_raw_THISCALL PROC C +L(UW41): + // cfi_startproc + /* Rearrange the stack such that %ecx is the first argument. + This means moving the return address. */ + pop edx +L(UW42): + // cfi_def_cfa_offset(0) + // cfi_register(%eip, %edx) + push ecx +L(UW43): + // cfi_adjust_cfa_offset(4) + push edx +L(UW44): + // cfi_adjust_cfa_offset(4) + // cfi_rel_offset(%eip, 0) + sub esp, raw_closure_T_FS +L(UW45): + // cfi_adjust_cfa_offset(raw_closure_T_FS) + mov [esp+raw_closure_T_FS-4], ebx +L(UW46): + // cfi_rel_offset(%ebx, raw_closure_T_FS-4) + + mov edx, [eax+FFI_TRAMPOLINE_SIZE+8] /* load cl->user_data */ + mov [esp+12], edx + lea edx, [esp+raw_closure_T_FS+4] /* load raw_args */ + mov [esp+8], edx + lea edx, [esp+16] /* load &res */ + mov [esp+4], edx + mov ebx, [eax+FFI_TRAMPOLINE_SIZE] /* load cl->cif */ + mov [esp], ebx + call DWORD PTR [eax+FFI_TRAMPOLINE_SIZE+4] /* call cl->fun */ + + mov eax, [ebx+20] /* load cif->flags */ + and eax, X86_RET_TYPE_MASK +// #ifdef __PIC__ +// call __x86.get_pc_thunk.bx +// L(pc5): +// leal L(load_table5)-L(pc5)(%ebx, %eax, 8), %ecx +// #else + lea ecx, [L(load_table5)+eax*8] +//#endif + mov ebx, [esp+raw_closure_T_FS-4] +L(UW47): + // cfi_restore(%ebx) + mov eax, [esp+16] /* Optimistic load */ + jmp DWORD PTR [ecx] + + AlIGN 4 +L(load_table5): +E(L(load_table5), X86_RET_FLOAT) + fld DWORD PTR [esp +16] + jmp L(e5) +E(L(load_table5), X86_RET_DOUBLE) + fld QWORD PTR [esp +16] + jmp L(e5) +E(L(load_table5), X86_RET_LDOUBLE) + fld QWORD PTR [esp+16] + jmp L(e5) +E(L(load_table5), X86_RET_SINT8) + movsx eax, al + jmp L(e5) +E(L(load_table5), X86_RET_SINT16) + movsx eax, ax + jmp L(e5) +E(L(load_table5), X86_RET_UINT8) + movzx eax, al + jmp L(e5) +E(L(load_table5), X86_RET_UINT16) + movzx eax, ax + jmp L(e5) +E(L(load_table5), X86_RET_INT64) + mov edx, [esp+16+4] + jmp L(e5) +E(L(load_table5), X86_RET_int 32) + nop + /* fallthru */ +E(L(load_table5), X86_RET_VOID) +L(e5): + add esp, raw_closure_T_FS +L(UW48): + // cfi_adjust_cfa_offset(-raw_closure_T_FS) + /* Remove the extra %ecx argument we pushed. */ + ret 4 +L(UW49): + // cfi_adjust_cfa_offset(raw_closure_T_FS) +E(L(load_table5), X86_RET_STRUCTPOP) + add esp, raw_closure_T_FS +L(UW50): + // cfi_adjust_cfa_offset(-raw_closure_T_FS) + ret 8 +L(UW51): + // cfi_adjust_cfa_offset(raw_closure_T_FS) +E(L(load_table5), X86_RET_STRUCTARG) + jmp L(e5) +E(L(load_table5), X86_RET_STRUCT_1B) + movzx eax, al + jmp L(e5) +E(L(load_table5), X86_RET_STRUCT_2B) + movzx eax, ax + jmp L(e5) + + /* Fill out the table so that bad values are predictable. */ +E(L(load_table5), X86_RET_UNUSED14) + int 3 +E(L(load_table5), X86_RET_UNUSED15) + int 3 + +L(UW52): + // cfi_endproc +ENDF(ffi_closure_raw_THISCALL) + +#endif /* !FFI_NO_RAW_API */ + +#ifdef X86_DARWIN +# define COMDAT(X) \ + .section __TEXT,__text,coalesced,pure_instructions; \ + .weak_definition X; \ + FFI_HIDDEN(X) +#elif defined __ELF__ && !(defined(__sun__) && defined(__svr4__)) +# define COMDAT(X) \ + .section .text.X,"axG",@progbits,X,comdat; \ + PUBLIC X; \ + FFI_HIDDEN(X) +#else +# define COMDAT(X) +#endif + +// #if defined(__PIC__) +// COMDAT(C(__x86.get_pc_thunk.bx)) +// C(__x86.get_pc_thunk.bx): +// movl (%esp), %ebx +// ret +// ENDF(C(__x86.get_pc_thunk.bx)) +// # if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE +// COMDAT(C(__x86.get_pc_thunk.dx)) +// C(__x86.get_pc_thunk.dx): +// movl (%esp), %edx +// ret +// ENDF(C(__x86.get_pc_thunk.dx)) +// #endif /* DARWIN || HIDDEN */ +// #endif /* __PIC__ */ + +#if 0 +/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */ + +#ifdef __APPLE__ +.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EHFrame0: +#elif defined(X86_WIN32) +.section .eh_frame,"r" +#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE) +.section .eh_frame,EH_FRAME_FLAGS,@unwind +#else +.section .eh_frame,EH_FRAME_FLAGS,@progbits +#endif + +#ifdef HAVE_AS_X86_PCREL +# define PCREL(X) X - . +#else +# define PCREL(X) X@rel +#endif + +/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */ +#define ADV(N, P) .byte 2, L(N)-L(P) + + .balign 4 +L(CIE): + .set L(set0),L(ECIE)-L(SCIE) + .long L(set0) /* CIE Length */ +L(SCIE): + .long 0 /* CIE Identifier Tag */ + .byte 1 /* CIE Version */ + .ascii "zR\0" /* CIE Augmentation */ + .byte 1 /* CIE Code Alignment Factor */ + .byte 0x7c /* CIE Data Alignment Factor */ + .byte 0x8 /* CIE RA Column */ + .byte 1 /* Augmentation size */ + .byte 0x1b /* FDE Encoding (pcrel sdata4) */ + .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp offset 4 */ + .byte 0x80+8, 1 /* DW_CFA_offset, %eip offset 1*-4 */ + .balign 4 +L(ECIE): + + .set L(set1),L(EFDE1)-L(SFDE1) + .long L(set1) /* FDE Length */ +L(SFDE1): + .long L(SFDE1)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW0)) /* Initial location */ + .long L(UW5)-L(UW0) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW1, UW0) + .byte 0xc, 5, 8 /* DW_CFA_def_cfa, %ebp 8 */ + .byte 0x80+5, 2 /* DW_CFA_offset, %ebp 2*-4 */ + ADV(UW2, UW1) + .byte 0x80+3, 0 /* DW_CFA_offset, %ebx 0*-4 */ + ADV(UW3, UW2) + .byte 0xa /* DW_CFA_remember_state */ + .byte 0xc, 4, 4 /* DW_CFA_def_cfa, %esp 4 */ + .byte 0xc0+3 /* DW_CFA_restore, %ebx */ + .byte 0xc0+5 /* DW_CFA_restore, %ebp */ + ADV(UW4, UW3) + .byte 0xb /* DW_CFA_restore_state */ + .balign 4 +L(EFDE1): + + .set L(set2),L(EFDE2)-L(SFDE2) + .long L(set2) /* FDE Length */ +L(SFDE2): + .long L(SFDE2)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW6)) /* Initial location */ + .long L(UW8)-L(UW6) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW7, UW6) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE2): + + .set L(set3),L(EFDE3)-L(SFDE3) + .long L(set3) /* FDE Length */ +L(SFDE3): + .long L(SFDE3)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW9)) /* Initial location */ + .long L(UW11)-L(UW9) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW10, UW9) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE3): + + .set L(set4),L(EFDE4)-L(SFDE4) + .long L(set4) /* FDE Length */ +L(SFDE4): + .long L(SFDE4)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW12)) /* Initial location */ + .long L(UW20)-L(UW12) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW13, UW12) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ +#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX + ADV(UW14, UW13) + .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */ + ADV(UW15, UW14) + .byte 0xc0+3 /* DW_CFA_restore %ebx */ + ADV(UW16, UW15) +#else + ADV(UW16, UW13) +#endif + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW17, UW16) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + ADV(UW18, UW17) + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW19, UW18) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE4): + + .set L(set5),L(EFDE5)-L(SFDE5) + .long L(set5) /* FDE Length */ +L(SFDE5): + .long L(SFDE5)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW21)) /* Initial location */ + .long L(UW23)-L(UW21) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW22, UW21) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE5): + + .set L(set6),L(EFDE6)-L(SFDE6) + .long L(set6) /* FDE Length */ +L(SFDE6): + .long L(SFDE6)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW24)) /* Initial location */ + .long L(UW26)-L(UW24) /* Address range */ + .byte 0 /* Augmentation size */ + .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ + .byte 0x80+8, 2 /* DW_CFA_offset %eip, 2*-4 */ + ADV(UW25, UW24) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE6): + + .set L(set7),L(EFDE7)-L(SFDE7) + .long L(set7) /* FDE Length */ +L(SFDE7): + .long L(SFDE7)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW27)) /* Initial location */ + .long L(UW31)-L(UW27) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW28, UW27) + .byte 0xe, closure_FS+4 /* DW_CFA_def_cfa_offset */ +#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX + ADV(UW29, UW28) + .byte 0x80+3, (40-(closure_FS+4))/-4 /* DW_CFA_offset %ebx */ + ADV(UW30, UW29) + .byte 0xc0+3 /* DW_CFA_restore %ebx */ +#endif + .balign 4 +L(EFDE7): + +#if !FFI_NO_RAW_API + .set L(set8),L(EFDE8)-L(SFDE8) + .long L(set8) /* FDE Length */ +L(SFDE8): + .long L(SFDE8)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW32)) /* Initial location */ + .long L(UW40)-L(UW32) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW33, UW32) + .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */ + ADV(UW34, UW33) + .byte 0x80+3, 2 /* DW_CFA_offset %ebx 2*-4 */ + ADV(UW35, UW34) + .byte 0xc0+3 /* DW_CFA_restore %ebx */ + ADV(UW36, UW35) + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW37, UW36) + .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */ + ADV(UW38, UW37) + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW39, UW38) + .byte 0xe, raw_closure_S_FS+4 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE8): + + .set L(set9),L(EFDE9)-L(SFDE9) + .long L(set9) /* FDE Length */ +L(SFDE9): + .long L(SFDE9)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW41)) /* Initial location */ + .long L(UW52)-L(UW41) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW42, UW41) + .byte 0xe, 0 /* DW_CFA_def_cfa_offset */ + .byte 0x9, 8, 2 /* DW_CFA_register %eip, %edx */ + ADV(UW43, UW42) + .byte 0xe, 4 /* DW_CFA_def_cfa_offset */ + ADV(UW44, UW43) + .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ + .byte 0x80+8, 2 /* DW_CFA_offset %eip 2*-4 */ + ADV(UW45, UW44) + .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */ + ADV(UW46, UW45) + .byte 0x80+3, 3 /* DW_CFA_offset %ebx 3*-4 */ + ADV(UW47, UW46) + .byte 0xc0+3 /* DW_CFA_restore %ebx */ + ADV(UW48, UW47) + .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ + ADV(UW49, UW48) + .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */ + ADV(UW50, UW49) + .byte 0xe, 8 /* DW_CFA_def_cfa_offset */ + ADV(UW51, UW50) + .byte 0xe, raw_closure_T_FS+8 /* DW_CFA_def_cfa_offset */ + .balign 4 +L(EFDE9): +#endif /* !FFI_NO_RAW_API */ + +#ifdef _WIN32 + .def @feat.00; + .scl 3; + .type 0; + .endef + PUBLIC @feat.00 +@feat.00 = 1 +#endif + +#endif /* ifndef _MSC_VER */ +#endif /* ifndef __x86_64__ */ + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif +#endif + +END
\ No newline at end of file diff --git a/contrib/restricted/libffi/src/x86/unix64.S b/contrib/restricted/libffi/src/x86/unix64.S index ae81f77d09..41563f5c60 100644 --- a/contrib/restricted/libffi/src/x86/unix64.S +++ b/contrib/restricted/libffi/src/x86/unix64.S @@ -30,21 +30,21 @@ #define LIBFFI_ASM #include <fficonfig.h> #include <ffi.h> -#include "internal64.h" -#include "asmnames.h" - - .text - -/* This macro allows the safe creation of jump tables without an - actual table. The entry points into the table are all 8 bytes. - The use of ORG asserts that we're at the correct location. */ -/* ??? The clang assembler doesn't handle .org with symbolic expressions. */ -#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) -# define E(BASE, X) .balign 8 -#else -# define E(BASE, X) .balign 8; .org BASE + X * 8 -#endif - +#include "internal64.h" +#include "asmnames.h" + + .text + +/* This macro allows the safe creation of jump tables without an + actual table. The entry points into the table are all 8 bytes. + The use of ORG asserts that we're at the correct location. */ +/* ??? The clang assembler doesn't handle .org with symbolic expressions. */ +#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) +# define E(BASE, X) .balign 8 +#else +# define E(BASE, X) .balign 8; .org BASE + X * 8 +#endif + /* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags, void *raddr, void (*fnaddr)(void)); @@ -52,12 +52,12 @@ for this function. This has been allocated by ffi_call. We also deallocate some of the stack that has been alloca'd. */ - .balign 8 - .globl C(ffi_call_unix64) - FFI_HIDDEN(C(ffi_call_unix64)) + .balign 8 + .globl C(ffi_call_unix64) + FFI_HIDDEN(C(ffi_call_unix64)) -C(ffi_call_unix64): -L(UW0): +C(ffi_call_unix64): +L(UW0): movq (%rsp), %r10 /* Load return address. */ leaq (%rdi, %rsi), %rax /* Find local stack base. */ movq %rdx, (%rax) /* Save flags. */ @@ -65,37 +65,37 @@ L(UW0): movq %rbp, 16(%rax) /* Save old frame pointer. */ movq %r10, 24(%rax) /* Relocate return address. */ movq %rax, %rbp /* Finalize local stack frame. */ - - /* New stack frame based off rbp. This is a itty bit of unwind - trickery in that the CFA *has* changed. There is no easy way - to describe it correctly on entry to the function. Fortunately, - it doesn't matter too much since at all points we can correctly - unwind back to ffi_call. Note that the location to which we - moved the return address is (the new) CFA-8, so from the - perspective of the unwind info, it hasn't moved. */ -L(UW1): - /* cfi_def_cfa(%rbp, 32) */ - /* cfi_rel_offset(%rbp, 16) */ - + + /* New stack frame based off rbp. This is a itty bit of unwind + trickery in that the CFA *has* changed. There is no easy way + to describe it correctly on entry to the function. Fortunately, + it doesn't matter too much since at all points we can correctly + unwind back to ffi_call. Note that the location to which we + moved the return address is (the new) CFA-8, so from the + perspective of the unwind info, it hasn't moved. */ +L(UW1): + /* cfi_def_cfa(%rbp, 32) */ + /* cfi_rel_offset(%rbp, 16) */ + movq %rdi, %r10 /* Save a copy of the register area. */ movq %r8, %r11 /* Save a copy of the target fn. */ movl %r9d, %eax /* Set number of SSE registers. */ /* Load up all argument registers. */ movq (%r10), %rdi - movq 0x08(%r10), %rsi - movq 0x10(%r10), %rdx - movq 0x18(%r10), %rcx - movq 0x20(%r10), %r8 - movq 0x28(%r10), %r9 - movl 0xb0(%r10), %eax + movq 0x08(%r10), %rsi + movq 0x10(%r10), %rdx + movq 0x18(%r10), %rcx + movq 0x20(%r10), %r8 + movq 0x28(%r10), %r9 + movl 0xb0(%r10), %eax testl %eax, %eax - jnz L(load_sse) -L(ret_from_load_sse): + jnz L(load_sse) +L(ret_from_load_sse): - /* Deallocate the reg arg area, except for r10, then load via pop. */ - leaq 0xb8(%r10), %rsp - popq %r10 + /* Deallocate the reg arg area, except for r10, then load via pop. */ + leaq 0xb8(%r10), %rsp + popq %r10 /* Call the user function. */ call *%r11 @@ -106,460 +106,460 @@ L(ret_from_load_sse): movq 0(%rbp), %rcx /* Reload flags. */ movq 8(%rbp), %rdi /* Reload raddr. */ movq 16(%rbp), %rbp /* Reload old frame pointer. */ -L(UW2): - /* cfi_remember_state */ - /* cfi_def_cfa(%rsp, 8) */ - /* cfi_restore(%rbp) */ +L(UW2): + /* cfi_remember_state */ + /* cfi_def_cfa(%rsp, 8) */ + /* cfi_restore(%rbp) */ /* The first byte of the flags contains the FFI_TYPE. */ - cmpb $UNIX64_RET_LAST, %cl + cmpb $UNIX64_RET_LAST, %cl movzbl %cl, %r10d - leaq L(store_table)(%rip), %r11 - ja L(sa) - leaq (%r11, %r10, 8), %r10 - - /* Prep for the structure cases: scratch area in redzone. */ - leaq -20(%rsp), %rsi + leaq L(store_table)(%rip), %r11 + ja L(sa) + leaq (%r11, %r10, 8), %r10 + + /* Prep for the structure cases: scratch area in redzone. */ + leaq -20(%rsp), %rsi jmp *%r10 - .balign 8 -L(store_table): -E(L(store_table), UNIX64_RET_VOID) + .balign 8 +L(store_table): +E(L(store_table), UNIX64_RET_VOID) ret -E(L(store_table), UNIX64_RET_UINT8) - movzbl %al, %eax +E(L(store_table), UNIX64_RET_UINT8) + movzbl %al, %eax movq %rax, (%rdi) ret -E(L(store_table), UNIX64_RET_UINT16) - movzwl %ax, %eax - movq %rax, (%rdi) - ret -E(L(store_table), UNIX64_RET_UINT32) - movl %eax, %eax - movq %rax, (%rdi) - ret -E(L(store_table), UNIX64_RET_SINT8) +E(L(store_table), UNIX64_RET_UINT16) + movzwl %ax, %eax + movq %rax, (%rdi) + ret +E(L(store_table), UNIX64_RET_UINT32) + movl %eax, %eax + movq %rax, (%rdi) + ret +E(L(store_table), UNIX64_RET_SINT8) movsbq %al, %rax movq %rax, (%rdi) ret -E(L(store_table), UNIX64_RET_SINT16) +E(L(store_table), UNIX64_RET_SINT16) movswq %ax, %rax movq %rax, (%rdi) ret -E(L(store_table), UNIX64_RET_SINT32) +E(L(store_table), UNIX64_RET_SINT32) cltq movq %rax, (%rdi) ret -E(L(store_table), UNIX64_RET_INT64) +E(L(store_table), UNIX64_RET_INT64) movq %rax, (%rdi) ret -E(L(store_table), UNIX64_RET_XMM32) - movd %xmm0, (%rdi) +E(L(store_table), UNIX64_RET_XMM32) + movd %xmm0, (%rdi) + ret +E(L(store_table), UNIX64_RET_XMM64) + movq %xmm0, (%rdi) ret -E(L(store_table), UNIX64_RET_XMM64) - movq %xmm0, (%rdi) +E(L(store_table), UNIX64_RET_X87) + fstpt (%rdi) ret -E(L(store_table), UNIX64_RET_X87) +E(L(store_table), UNIX64_RET_X87_2) fstpt (%rdi) + fstpt 16(%rdi) ret -E(L(store_table), UNIX64_RET_X87_2) - fstpt (%rdi) - fstpt 16(%rdi) - ret -E(L(store_table), UNIX64_RET_ST_XMM0_RAX) - movq %rax, 8(%rsi) - jmp L(s3) -E(L(store_table), UNIX64_RET_ST_RAX_XMM0) - movq %xmm0, 8(%rsi) - jmp L(s2) -E(L(store_table), UNIX64_RET_ST_XMM0_XMM1) - movq %xmm1, 8(%rsi) - jmp L(s3) -E(L(store_table), UNIX64_RET_ST_RAX_RDX) - movq %rdx, 8(%rsi) -L(s2): +E(L(store_table), UNIX64_RET_ST_XMM0_RAX) + movq %rax, 8(%rsi) + jmp L(s3) +E(L(store_table), UNIX64_RET_ST_RAX_XMM0) + movq %xmm0, 8(%rsi) + jmp L(s2) +E(L(store_table), UNIX64_RET_ST_XMM0_XMM1) + movq %xmm1, 8(%rsi) + jmp L(s3) +E(L(store_table), UNIX64_RET_ST_RAX_RDX) + movq %rdx, 8(%rsi) +L(s2): movq %rax, (%rsi) - shrl $UNIX64_SIZE_SHIFT, %ecx + shrl $UNIX64_SIZE_SHIFT, %ecx + rep movsb + ret + .balign 8 +L(s3): + movq %xmm0, (%rsi) + shrl $UNIX64_SIZE_SHIFT, %ecx rep movsb ret - .balign 8 -L(s3): - movq %xmm0, (%rsi) - shrl $UNIX64_SIZE_SHIFT, %ecx - rep movsb - ret - -L(sa): call PLT(C(abort)) - + +L(sa): call PLT(C(abort)) + /* Many times we can avoid loading any SSE registers at all. It's not worth an indirect jump to load the exact set of SSE registers needed; zero or all is a good compromise. */ - .balign 2 -L(UW3): - /* cfi_restore_state */ -L(load_sse): - movdqa 0x30(%r10), %xmm0 - movdqa 0x40(%r10), %xmm1 - movdqa 0x50(%r10), %xmm2 - movdqa 0x60(%r10), %xmm3 - movdqa 0x70(%r10), %xmm4 - movdqa 0x80(%r10), %xmm5 - movdqa 0x90(%r10), %xmm6 - movdqa 0xa0(%r10), %xmm7 - jmp L(ret_from_load_sse) - -L(UW4): -ENDF(C(ffi_call_unix64)) - -/* 6 general registers, 8 vector registers, - 32 bytes of rvalue, 8 bytes of alignment. */ -#define ffi_closure_OFS_G 0 -#define ffi_closure_OFS_V (6*8) -#define ffi_closure_OFS_RVALUE (ffi_closure_OFS_V + 8*16) -#define ffi_closure_FS (ffi_closure_OFS_RVALUE + 32 + 8) - -/* The location of rvalue within the red zone after deallocating the frame. */ -#define ffi_closure_RED_RVALUE (ffi_closure_OFS_RVALUE - ffi_closure_FS) - - .balign 2 - .globl C(ffi_closure_unix64_sse) - FFI_HIDDEN(C(ffi_closure_unix64_sse)) - -C(ffi_closure_unix64_sse): -L(UW5): - subq $ffi_closure_FS, %rsp -L(UW6): - /* cfi_adjust_cfa_offset(ffi_closure_FS) */ - - movdqa %xmm0, ffi_closure_OFS_V+0x00(%rsp) - movdqa %xmm1, ffi_closure_OFS_V+0x10(%rsp) - movdqa %xmm2, ffi_closure_OFS_V+0x20(%rsp) - movdqa %xmm3, ffi_closure_OFS_V+0x30(%rsp) - movdqa %xmm4, ffi_closure_OFS_V+0x40(%rsp) - movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp) - movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp) - movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp) - jmp L(sse_entry1) - -L(UW7): -ENDF(C(ffi_closure_unix64_sse)) - - .balign 2 - .globl C(ffi_closure_unix64) - FFI_HIDDEN(C(ffi_closure_unix64)) - -C(ffi_closure_unix64): -L(UW8): - subq $ffi_closure_FS, %rsp -L(UW9): - /* cfi_adjust_cfa_offset(ffi_closure_FS) */ -L(sse_entry1): - movq %rdi, ffi_closure_OFS_G+0x00(%rsp) - movq %rsi, ffi_closure_OFS_G+0x08(%rsp) - movq %rdx, ffi_closure_OFS_G+0x10(%rsp) - movq %rcx, ffi_closure_OFS_G+0x18(%rsp) - movq %r8, ffi_closure_OFS_G+0x20(%rsp) - movq %r9, ffi_closure_OFS_G+0x28(%rsp) - -#ifdef __ILP32__ - movl FFI_TRAMPOLINE_SIZE(%r10), %edi /* Load cif */ - movl FFI_TRAMPOLINE_SIZE+4(%r10), %esi /* Load fun */ - movl FFI_TRAMPOLINE_SIZE+8(%r10), %edx /* Load user_data */ -#else - movq FFI_TRAMPOLINE_SIZE(%r10), %rdi /* Load cif */ - movq FFI_TRAMPOLINE_SIZE+8(%r10), %rsi /* Load fun */ - movq FFI_TRAMPOLINE_SIZE+16(%r10), %rdx /* Load user_data */ -#endif -L(do_closure): - leaq ffi_closure_OFS_RVALUE(%rsp), %rcx /* Load rvalue */ - movq %rsp, %r8 /* Load reg_args */ - leaq ffi_closure_FS+8(%rsp), %r9 /* Load argp */ - call PLT(C(ffi_closure_unix64_inner)) - + .balign 2 +L(UW3): + /* cfi_restore_state */ +L(load_sse): + movdqa 0x30(%r10), %xmm0 + movdqa 0x40(%r10), %xmm1 + movdqa 0x50(%r10), %xmm2 + movdqa 0x60(%r10), %xmm3 + movdqa 0x70(%r10), %xmm4 + movdqa 0x80(%r10), %xmm5 + movdqa 0x90(%r10), %xmm6 + movdqa 0xa0(%r10), %xmm7 + jmp L(ret_from_load_sse) + +L(UW4): +ENDF(C(ffi_call_unix64)) + +/* 6 general registers, 8 vector registers, + 32 bytes of rvalue, 8 bytes of alignment. */ +#define ffi_closure_OFS_G 0 +#define ffi_closure_OFS_V (6*8) +#define ffi_closure_OFS_RVALUE (ffi_closure_OFS_V + 8*16) +#define ffi_closure_FS (ffi_closure_OFS_RVALUE + 32 + 8) + +/* The location of rvalue within the red zone after deallocating the frame. */ +#define ffi_closure_RED_RVALUE (ffi_closure_OFS_RVALUE - ffi_closure_FS) + + .balign 2 + .globl C(ffi_closure_unix64_sse) + FFI_HIDDEN(C(ffi_closure_unix64_sse)) + +C(ffi_closure_unix64_sse): +L(UW5): + subq $ffi_closure_FS, %rsp +L(UW6): + /* cfi_adjust_cfa_offset(ffi_closure_FS) */ + + movdqa %xmm0, ffi_closure_OFS_V+0x00(%rsp) + movdqa %xmm1, ffi_closure_OFS_V+0x10(%rsp) + movdqa %xmm2, ffi_closure_OFS_V+0x20(%rsp) + movdqa %xmm3, ffi_closure_OFS_V+0x30(%rsp) + movdqa %xmm4, ffi_closure_OFS_V+0x40(%rsp) + movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp) + movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp) + movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp) + jmp L(sse_entry1) + +L(UW7): +ENDF(C(ffi_closure_unix64_sse)) + + .balign 2 + .globl C(ffi_closure_unix64) + FFI_HIDDEN(C(ffi_closure_unix64)) + +C(ffi_closure_unix64): +L(UW8): + subq $ffi_closure_FS, %rsp +L(UW9): + /* cfi_adjust_cfa_offset(ffi_closure_FS) */ +L(sse_entry1): + movq %rdi, ffi_closure_OFS_G+0x00(%rsp) + movq %rsi, ffi_closure_OFS_G+0x08(%rsp) + movq %rdx, ffi_closure_OFS_G+0x10(%rsp) + movq %rcx, ffi_closure_OFS_G+0x18(%rsp) + movq %r8, ffi_closure_OFS_G+0x20(%rsp) + movq %r9, ffi_closure_OFS_G+0x28(%rsp) + +#ifdef __ILP32__ + movl FFI_TRAMPOLINE_SIZE(%r10), %edi /* Load cif */ + movl FFI_TRAMPOLINE_SIZE+4(%r10), %esi /* Load fun */ + movl FFI_TRAMPOLINE_SIZE+8(%r10), %edx /* Load user_data */ +#else + movq FFI_TRAMPOLINE_SIZE(%r10), %rdi /* Load cif */ + movq FFI_TRAMPOLINE_SIZE+8(%r10), %rsi /* Load fun */ + movq FFI_TRAMPOLINE_SIZE+16(%r10), %rdx /* Load user_data */ +#endif +L(do_closure): + leaq ffi_closure_OFS_RVALUE(%rsp), %rcx /* Load rvalue */ + movq %rsp, %r8 /* Load reg_args */ + leaq ffi_closure_FS+8(%rsp), %r9 /* Load argp */ + call PLT(C(ffi_closure_unix64_inner)) + /* Deallocate stack frame early; return value is now in redzone. */ - addq $ffi_closure_FS, %rsp -L(UW10): - /* cfi_adjust_cfa_offset(-ffi_closure_FS) */ + addq $ffi_closure_FS, %rsp +L(UW10): + /* cfi_adjust_cfa_offset(-ffi_closure_FS) */ /* The first byte of the return value contains the FFI_TYPE. */ - cmpb $UNIX64_RET_LAST, %al + cmpb $UNIX64_RET_LAST, %al movzbl %al, %r10d - leaq L(load_table)(%rip), %r11 - ja L(la) - leaq (%r11, %r10, 8), %r10 - leaq ffi_closure_RED_RVALUE(%rsp), %rsi + leaq L(load_table)(%rip), %r11 + ja L(la) + leaq (%r11, %r10, 8), %r10 + leaq ffi_closure_RED_RVALUE(%rsp), %rsi jmp *%r10 - .balign 8 -L(load_table): -E(L(load_table), UNIX64_RET_VOID) + .balign 8 +L(load_table): +E(L(load_table), UNIX64_RET_VOID) ret -E(L(load_table), UNIX64_RET_UINT8) - movzbl (%rsi), %eax +E(L(load_table), UNIX64_RET_UINT8) + movzbl (%rsi), %eax ret -E(L(load_table), UNIX64_RET_UINT16) - movzwl (%rsi), %eax +E(L(load_table), UNIX64_RET_UINT16) + movzwl (%rsi), %eax ret -E(L(load_table), UNIX64_RET_UINT32) - movl (%rsi), %eax +E(L(load_table), UNIX64_RET_UINT32) + movl (%rsi), %eax ret -E(L(load_table), UNIX64_RET_SINT8) - movsbl (%rsi), %eax +E(L(load_table), UNIX64_RET_SINT8) + movsbl (%rsi), %eax ret -E(L(load_table), UNIX64_RET_SINT16) - movswl (%rsi), %eax +E(L(load_table), UNIX64_RET_SINT16) + movswl (%rsi), %eax ret -E(L(load_table), UNIX64_RET_SINT32) - movl (%rsi), %eax +E(L(load_table), UNIX64_RET_SINT32) + movl (%rsi), %eax ret -E(L(load_table), UNIX64_RET_INT64) - movq (%rsi), %rax +E(L(load_table), UNIX64_RET_INT64) + movq (%rsi), %rax ret -E(L(load_table), UNIX64_RET_XMM32) - movd (%rsi), %xmm0 +E(L(load_table), UNIX64_RET_XMM32) + movd (%rsi), %xmm0 ret -E(L(load_table), UNIX64_RET_XMM64) - movq (%rsi), %xmm0 - ret -E(L(load_table), UNIX64_RET_X87) - fldt (%rsi) - ret -E(L(load_table), UNIX64_RET_X87_2) - fldt 16(%rsi) - fldt (%rsi) - ret -E(L(load_table), UNIX64_RET_ST_XMM0_RAX) - movq 8(%rsi), %rax - jmp L(l3) -E(L(load_table), UNIX64_RET_ST_RAX_XMM0) - movq 8(%rsi), %xmm0 - jmp L(l2) -E(L(load_table), UNIX64_RET_ST_XMM0_XMM1) - movq 8(%rsi), %xmm1 - jmp L(l3) -E(L(load_table), UNIX64_RET_ST_RAX_RDX) - movq 8(%rsi), %rdx -L(l2): - movq (%rsi), %rax - ret - .balign 8 -L(l3): - movq (%rsi), %xmm0 - ret - -L(la): call PLT(C(abort)) - -L(UW11): -ENDF(C(ffi_closure_unix64)) - - .balign 2 - .globl C(ffi_go_closure_unix64_sse) - FFI_HIDDEN(C(ffi_go_closure_unix64_sse)) - -C(ffi_go_closure_unix64_sse): -L(UW12): - subq $ffi_closure_FS, %rsp -L(UW13): - /* cfi_adjust_cfa_offset(ffi_closure_FS) */ - - movdqa %xmm0, ffi_closure_OFS_V+0x00(%rsp) - movdqa %xmm1, ffi_closure_OFS_V+0x10(%rsp) - movdqa %xmm2, ffi_closure_OFS_V+0x20(%rsp) - movdqa %xmm3, ffi_closure_OFS_V+0x30(%rsp) - movdqa %xmm4, ffi_closure_OFS_V+0x40(%rsp) - movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp) - movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp) - movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp) - jmp L(sse_entry2) - -L(UW14): -ENDF(C(ffi_go_closure_unix64_sse)) - - .balign 2 - .globl C(ffi_go_closure_unix64) - FFI_HIDDEN(C(ffi_go_closure_unix64)) - -C(ffi_go_closure_unix64): -L(UW15): - subq $ffi_closure_FS, %rsp -L(UW16): - /* cfi_adjust_cfa_offset(ffi_closure_FS) */ -L(sse_entry2): - movq %rdi, ffi_closure_OFS_G+0x00(%rsp) - movq %rsi, ffi_closure_OFS_G+0x08(%rsp) - movq %rdx, ffi_closure_OFS_G+0x10(%rsp) - movq %rcx, ffi_closure_OFS_G+0x18(%rsp) - movq %r8, ffi_closure_OFS_G+0x20(%rsp) - movq %r9, ffi_closure_OFS_G+0x28(%rsp) - -#ifdef __ILP32__ - movl 4(%r10), %edi /* Load cif */ - movl 8(%r10), %esi /* Load fun */ - movl %r10d, %edx /* Load closure (user_data) */ +E(L(load_table), UNIX64_RET_XMM64) + movq (%rsi), %xmm0 + ret +E(L(load_table), UNIX64_RET_X87) + fldt (%rsi) + ret +E(L(load_table), UNIX64_RET_X87_2) + fldt 16(%rsi) + fldt (%rsi) + ret +E(L(load_table), UNIX64_RET_ST_XMM0_RAX) + movq 8(%rsi), %rax + jmp L(l3) +E(L(load_table), UNIX64_RET_ST_RAX_XMM0) + movq 8(%rsi), %xmm0 + jmp L(l2) +E(L(load_table), UNIX64_RET_ST_XMM0_XMM1) + movq 8(%rsi), %xmm1 + jmp L(l3) +E(L(load_table), UNIX64_RET_ST_RAX_RDX) + movq 8(%rsi), %rdx +L(l2): + movq (%rsi), %rax + ret + .balign 8 +L(l3): + movq (%rsi), %xmm0 + ret + +L(la): call PLT(C(abort)) + +L(UW11): +ENDF(C(ffi_closure_unix64)) + + .balign 2 + .globl C(ffi_go_closure_unix64_sse) + FFI_HIDDEN(C(ffi_go_closure_unix64_sse)) + +C(ffi_go_closure_unix64_sse): +L(UW12): + subq $ffi_closure_FS, %rsp +L(UW13): + /* cfi_adjust_cfa_offset(ffi_closure_FS) */ + + movdqa %xmm0, ffi_closure_OFS_V+0x00(%rsp) + movdqa %xmm1, ffi_closure_OFS_V+0x10(%rsp) + movdqa %xmm2, ffi_closure_OFS_V+0x20(%rsp) + movdqa %xmm3, ffi_closure_OFS_V+0x30(%rsp) + movdqa %xmm4, ffi_closure_OFS_V+0x40(%rsp) + movdqa %xmm5, ffi_closure_OFS_V+0x50(%rsp) + movdqa %xmm6, ffi_closure_OFS_V+0x60(%rsp) + movdqa %xmm7, ffi_closure_OFS_V+0x70(%rsp) + jmp L(sse_entry2) + +L(UW14): +ENDF(C(ffi_go_closure_unix64_sse)) + + .balign 2 + .globl C(ffi_go_closure_unix64) + FFI_HIDDEN(C(ffi_go_closure_unix64)) + +C(ffi_go_closure_unix64): +L(UW15): + subq $ffi_closure_FS, %rsp +L(UW16): + /* cfi_adjust_cfa_offset(ffi_closure_FS) */ +L(sse_entry2): + movq %rdi, ffi_closure_OFS_G+0x00(%rsp) + movq %rsi, ffi_closure_OFS_G+0x08(%rsp) + movq %rdx, ffi_closure_OFS_G+0x10(%rsp) + movq %rcx, ffi_closure_OFS_G+0x18(%rsp) + movq %r8, ffi_closure_OFS_G+0x20(%rsp) + movq %r9, ffi_closure_OFS_G+0x28(%rsp) + +#ifdef __ILP32__ + movl 4(%r10), %edi /* Load cif */ + movl 8(%r10), %esi /* Load fun */ + movl %r10d, %edx /* Load closure (user_data) */ +#else + movq 8(%r10), %rdi /* Load cif */ + movq 16(%r10), %rsi /* Load fun */ + movq %r10, %rdx /* Load closure (user_data) */ +#endif + jmp L(do_closure) + +L(UW17): +ENDF(C(ffi_go_closure_unix64)) + +/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */ + +#ifdef __APPLE__ +.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support +EHFrame0: +#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE) +.section .eh_frame,"a",@unwind #else - movq 8(%r10), %rdi /* Load cif */ - movq 16(%r10), %rsi /* Load fun */ - movq %r10, %rdx /* Load closure (user_data) */ +.section .eh_frame,"a",@progbits #endif - jmp L(do_closure) - -L(UW17): -ENDF(C(ffi_go_closure_unix64)) - -/* Sadly, OSX cctools-as doesn't understand .cfi directives at all. */ - -#ifdef __APPLE__ -.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support -EHFrame0: -#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE) -.section .eh_frame,"a",@unwind -#else -.section .eh_frame,"a",@progbits -#endif - -#ifdef HAVE_AS_X86_PCREL -# define PCREL(X) X - . -#else -# define PCREL(X) X@rel -#endif - -/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */ -#define ADV(N, P) .byte 2, L(N)-L(P) - - .balign 8 -L(CIE): - .set L(set0),L(ECIE)-L(SCIE) - .long L(set0) /* CIE Length */ -L(SCIE): + +#ifdef HAVE_AS_X86_PCREL +# define PCREL(X) X - . +#else +# define PCREL(X) X@rel +#endif + +/* Simplify advancing between labels. Assume DW_CFA_advance_loc1 fits. */ +#define ADV(N, P) .byte 2, L(N)-L(P) + + .balign 8 +L(CIE): + .set L(set0),L(ECIE)-L(SCIE) + .long L(set0) /* CIE Length */ +L(SCIE): .long 0 /* CIE Identifier Tag */ .byte 1 /* CIE Version */ - .ascii "zR\0" /* CIE Augmentation */ - .byte 1 /* CIE Code Alignment Factor */ - .byte 0x78 /* CIE Data Alignment Factor */ + .ascii "zR\0" /* CIE Augmentation */ + .byte 1 /* CIE Code Alignment Factor */ + .byte 0x78 /* CIE Data Alignment Factor */ .byte 0x10 /* CIE RA Column */ - .byte 1 /* Augmentation size */ + .byte 1 /* Augmentation size */ .byte 0x1b /* FDE Encoding (pcrel sdata4) */ - .byte 0xc, 7, 8 /* DW_CFA_def_cfa, %rsp offset 8 */ - .byte 0x80+16, 1 /* DW_CFA_offset, %rip offset 1*-8 */ - .balign 8 -L(ECIE): - - .set L(set1),L(EFDE1)-L(SFDE1) - .long L(set1) /* FDE Length */ -L(SFDE1): - .long L(SFDE1)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW0)) /* Initial location */ - .long L(UW4)-L(UW0) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW1, UW0) - .byte 0xc, 6, 32 /* DW_CFA_def_cfa, %rbp 32 */ - .byte 0x80+6, 2 /* DW_CFA_offset, %rbp 2*-8 */ - ADV(UW2, UW1) + .byte 0xc, 7, 8 /* DW_CFA_def_cfa, %rsp offset 8 */ + .byte 0x80+16, 1 /* DW_CFA_offset, %rip offset 1*-8 */ + .balign 8 +L(ECIE): + + .set L(set1),L(EFDE1)-L(SFDE1) + .long L(set1) /* FDE Length */ +L(SFDE1): + .long L(SFDE1)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW0)) /* Initial location */ + .long L(UW4)-L(UW0) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW1, UW0) + .byte 0xc, 6, 32 /* DW_CFA_def_cfa, %rbp 32 */ + .byte 0x80+6, 2 /* DW_CFA_offset, %rbp 2*-8 */ + ADV(UW2, UW1) .byte 0xa /* DW_CFA_remember_state */ - .byte 0xc, 7, 8 /* DW_CFA_def_cfa, %rsp 8 */ + .byte 0xc, 7, 8 /* DW_CFA_def_cfa, %rsp 8 */ .byte 0xc0+6 /* DW_CFA_restore, %rbp */ - ADV(UW3, UW2) + ADV(UW3, UW2) .byte 0xb /* DW_CFA_restore_state */ - .balign 8 -L(EFDE1): - - .set L(set2),L(EFDE2)-L(SFDE2) - .long L(set2) /* FDE Length */ -L(SFDE2): - .long L(SFDE2)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW5)) /* Initial location */ - .long L(UW7)-L(UW5) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW6, UW5) - .byte 0xe /* DW_CFA_def_cfa_offset */ - .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */ - .balign 8 -L(EFDE2): - - .set L(set3),L(EFDE3)-L(SFDE3) - .long L(set3) /* FDE Length */ -L(SFDE3): - .long L(SFDE3)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW8)) /* Initial location */ - .long L(UW11)-L(UW8) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW9, UW8) + .balign 8 +L(EFDE1): + + .set L(set2),L(EFDE2)-L(SFDE2) + .long L(set2) /* FDE Length */ +L(SFDE2): + .long L(SFDE2)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW5)) /* Initial location */ + .long L(UW7)-L(UW5) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW6, UW5) + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */ + .balign 8 +L(EFDE2): + + .set L(set3),L(EFDE3)-L(SFDE3) + .long L(set3) /* FDE Length */ +L(SFDE3): + .long L(SFDE3)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW8)) /* Initial location */ + .long L(UW11)-L(UW8) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW9, UW8) + .byte 0xe /* DW_CFA_def_cfa_offset */ + .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */ + ADV(UW10, UW9) + .byte 0xe, 8 /* DW_CFA_def_cfa_offset 8 */ +L(EFDE3): + + .set L(set4),L(EFDE4)-L(SFDE4) + .long L(set4) /* FDE Length */ +L(SFDE4): + .long L(SFDE4)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW12)) /* Initial location */ + .long L(UW14)-L(UW12) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW13, UW12) .byte 0xe /* DW_CFA_def_cfa_offset */ - .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */ - ADV(UW10, UW9) - .byte 0xe, 8 /* DW_CFA_def_cfa_offset 8 */ -L(EFDE3): - - .set L(set4),L(EFDE4)-L(SFDE4) - .long L(set4) /* FDE Length */ -L(SFDE4): - .long L(SFDE4)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW12)) /* Initial location */ - .long L(UW14)-L(UW12) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW13, UW12) + .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */ + .balign 8 +L(EFDE4): + + .set L(set5),L(EFDE5)-L(SFDE5) + .long L(set5) /* FDE Length */ +L(SFDE5): + .long L(SFDE5)-L(CIE) /* FDE CIE offset */ + .long PCREL(L(UW15)) /* Initial location */ + .long L(UW17)-L(UW15) /* Address range */ + .byte 0 /* Augmentation size */ + ADV(UW16, UW15) .byte 0xe /* DW_CFA_def_cfa_offset */ - .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */ - .balign 8 -L(EFDE4): - - .set L(set5),L(EFDE5)-L(SFDE5) - .long L(set5) /* FDE Length */ -L(SFDE5): - .long L(SFDE5)-L(CIE) /* FDE CIE offset */ - .long PCREL(L(UW15)) /* Initial location */ - .long L(UW17)-L(UW15) /* Address range */ - .byte 0 /* Augmentation size */ - ADV(UW16, UW15) - .byte 0xe /* DW_CFA_def_cfa_offset */ - .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */ - .balign 8 -L(EFDE5): -#ifdef __APPLE__ - .subsections_via_symbols - .section __LD,__compact_unwind,regular,debug - - /* compact unwind for ffi_call_unix64 */ - .quad C(ffi_call_unix64) - .set L1,L(UW4)-L(UW0) - .long L1 - .long 0x04000000 /* use dwarf unwind info */ - .quad 0 - .quad 0 - - /* compact unwind for ffi_closure_unix64_sse */ - .quad C(ffi_closure_unix64_sse) - .set L2,L(UW7)-L(UW5) - .long L2 - .long 0x04000000 /* use dwarf unwind info */ - .quad 0 - .quad 0 - - /* compact unwind for ffi_closure_unix64 */ - .quad C(ffi_closure_unix64) - .set L3,L(UW11)-L(UW8) - .long L3 - .long 0x04000000 /* use dwarf unwind info */ - .quad 0 - .quad 0 - - /* compact unwind for ffi_go_closure_unix64_sse */ - .quad C(ffi_go_closure_unix64_sse) - .set L4,L(UW14)-L(UW12) - .long L4 - .long 0x04000000 /* use dwarf unwind info */ - .quad 0 - .quad 0 - - /* compact unwind for ffi_go_closure_unix64 */ - .quad C(ffi_go_closure_unix64) - .set L5,L(UW17)-L(UW15) - .long L5 - .long 0x04000000 /* use dwarf unwind info */ - .quad 0 - .quad 0 -#endif - + .byte ffi_closure_FS + 8, 1 /* uleb128, assuming 128 <= FS < 255 */ + .balign 8 +L(EFDE5): +#ifdef __APPLE__ + .subsections_via_symbols + .section __LD,__compact_unwind,regular,debug + + /* compact unwind for ffi_call_unix64 */ + .quad C(ffi_call_unix64) + .set L1,L(UW4)-L(UW0) + .long L1 + .long 0x04000000 /* use dwarf unwind info */ + .quad 0 + .quad 0 + + /* compact unwind for ffi_closure_unix64_sse */ + .quad C(ffi_closure_unix64_sse) + .set L2,L(UW7)-L(UW5) + .long L2 + .long 0x04000000 /* use dwarf unwind info */ + .quad 0 + .quad 0 + + /* compact unwind for ffi_closure_unix64 */ + .quad C(ffi_closure_unix64) + .set L3,L(UW11)-L(UW8) + .long L3 + .long 0x04000000 /* use dwarf unwind info */ + .quad 0 + .quad 0 + + /* compact unwind for ffi_go_closure_unix64_sse */ + .quad C(ffi_go_closure_unix64_sse) + .set L4,L(UW14)-L(UW12) + .long L4 + .long 0x04000000 /* use dwarf unwind info */ + .quad 0 + .quad 0 + + /* compact unwind for ffi_go_closure_unix64 */ + .quad C(ffi_go_closure_unix64) + .set L5,L(UW17)-L(UW15) + .long L5 + .long 0x04000000 /* use dwarf unwind info */ + .quad 0 + .quad 0 +#endif + #endif /* __x86_64__ */ #if defined __ELF__ && defined __linux__ .section .note.GNU-stack,"",@progbits diff --git a/contrib/restricted/libffi/src/x86/win64.S b/contrib/restricted/libffi/src/x86/win64.S index 13b89acbdd..2c334c82f9 100644 --- a/contrib/restricted/libffi/src/x86/win64.S +++ b/contrib/restricted/libffi/src/x86/win64.S @@ -1,237 +1,237 @@ -#ifdef __x86_64__ +#ifdef __x86_64__ #define LIBFFI_ASM #include <fficonfig.h> #include <ffi.h> -#include <ffi_cfi.h> -#include "asmnames.h" - -#if defined(HAVE_AS_CFI_PSEUDO_OP) - .cfi_sections .debug_frame -#endif - -#ifdef X86_WIN64 -#define SEH(...) __VA_ARGS__ -#define arg0 %rcx -#define arg1 %rdx -#define arg2 %r8 -#define arg3 %r9 +#include <ffi_cfi.h> +#include "asmnames.h" + +#if defined(HAVE_AS_CFI_PSEUDO_OP) + .cfi_sections .debug_frame +#endif + +#ifdef X86_WIN64 +#define SEH(...) __VA_ARGS__ +#define arg0 %rcx +#define arg1 %rdx +#define arg2 %r8 +#define arg3 %r9 #else -#define SEH(...) -#define arg0 %rdi -#define arg1 %rsi -#define arg2 %rdx -#define arg3 %rcx -#endif - -/* This macro allows the safe creation of jump tables without an - actual table. The entry points into the table are all 8 bytes. - The use of ORG asserts that we're at the correct location. */ -/* ??? The clang assembler doesn't handle .org with symbolic expressions. */ -#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) -# define E(BASE, X) .balign 8 +#define SEH(...) +#define arg0 %rdi +#define arg1 %rsi +#define arg2 %rdx +#define arg3 %rcx +#endif + +/* This macro allows the safe creation of jump tables without an + actual table. The entry points into the table are all 8 bytes. + The use of ORG asserts that we're at the correct location. */ +/* ??? The clang assembler doesn't handle .org with symbolic expressions. */ +#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) +# define E(BASE, X) .balign 8 #else -# define E(BASE, X) .balign 8; .org BASE + X * 8 +# define E(BASE, X) .balign 8; .org BASE + X * 8 #endif - .text - -/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10) - - Bit o trickiness here -- FRAME is the base of the stack frame - for this function. This has been allocated by ffi_call. We also - deallocate some of the stack that has been alloca'd. */ - - .align 8 - .globl C(ffi_call_win64) - FFI_HIDDEN(C(ffi_call_win64)) - - SEH(.seh_proc ffi_call_win64) -C(ffi_call_win64): - cfi_startproc - /* Set up the local stack frame and install it in rbp/rsp. */ - movq (%rsp), %rax - movq %rbp, (arg1) - movq %rax, 8(arg1) - movq arg1, %rbp - cfi_def_cfa(%rbp, 16) - cfi_rel_offset(%rbp, 0) - SEH(.seh_pushreg %rbp) - SEH(.seh_setframe %rbp, 0) - SEH(.seh_endprologue) - movq arg0, %rsp - - movq arg2, %r10 - - /* Load all slots into both general and xmm registers. */ - movq (%rsp), %rcx - movsd (%rsp), %xmm0 - movq 8(%rsp), %rdx - movsd 8(%rsp), %xmm1 - movq 16(%rsp), %r8 - movsd 16(%rsp), %xmm2 - movq 24(%rsp), %r9 - movsd 24(%rsp), %xmm3 - - call *16(%rbp) - - movl 24(%rbp), %ecx - movq 32(%rbp), %r8 - leaq 0f(%rip), %r10 - cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx - leaq (%r10, %rcx, 8), %r10 - ja 99f - jmp *%r10 - -/* Below, we're space constrained most of the time. Thus we eschew the - modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */ -.macro epilogue - leaveq - cfi_remember_state - cfi_def_cfa(%rsp, 8) - cfi_restore(%rbp) - ret - cfi_restore_state -.endm - - .align 8 -0: -E(0b, FFI_TYPE_VOID) - epilogue -E(0b, FFI_TYPE_INT) - movslq %eax, %rax - movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_FLOAT) - movss %xmm0, (%r8) - epilogue -E(0b, FFI_TYPE_DOUBLE) - movsd %xmm0, (%r8) - epilogue -E(0b, FFI_TYPE_LONGDOUBLE) - call PLT(C(abort)) -E(0b, FFI_TYPE_UINT8) - movzbl %al, %eax - movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_SINT8) - movsbq %al, %rax - jmp 98f -E(0b, FFI_TYPE_UINT16) - movzwl %ax, %eax - movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_SINT16) - movswq %ax, %rax - jmp 98f -E(0b, FFI_TYPE_UINT32) - movl %eax, %eax - movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_SINT32) - movslq %eax, %rax - movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_UINT64) -98: movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_SINT64) - movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_STRUCT) - epilogue -E(0b, FFI_TYPE_POINTER) - movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_COMPLEX) - call PLT(C(abort)) -E(0b, FFI_TYPE_SMALL_STRUCT_1B) - movb %al, (%r8) - epilogue -E(0b, FFI_TYPE_SMALL_STRUCT_2B) - movw %ax, (%r8) - epilogue -E(0b, FFI_TYPE_SMALL_STRUCT_4B) - movl %eax, (%r8) - epilogue - - .align 8 -99: call PLT(C(abort)) - - epilogue - - cfi_endproc - SEH(.seh_endproc) - - -/* 32 bytes of outgoing register stack space, 8 bytes of alignment, - 16 bytes of result, 32 bytes of xmm registers. */ -#define ffi_clo_FS (32+8+16+32) -#define ffi_clo_OFF_R (32+8) -#define ffi_clo_OFF_X (32+8+16) - - .align 8 - .globl C(ffi_go_closure_win64) - FFI_HIDDEN(C(ffi_go_closure_win64)) - - SEH(.seh_proc ffi_go_closure_win64) -C(ffi_go_closure_win64): - cfi_startproc - /* Save all integer arguments into the incoming reg stack space. */ - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %r8, 24(%rsp) - movq %r9, 32(%rsp) - - movq 8(%r10), %rcx /* load cif */ - movq 16(%r10), %rdx /* load fun */ - movq %r10, %r8 /* closure is user_data */ - jmp 0f - cfi_endproc - SEH(.seh_endproc) - - .align 8 - .globl C(ffi_closure_win64) - FFI_HIDDEN(C(ffi_closure_win64)) - - SEH(.seh_proc ffi_closure_win64) -C(ffi_closure_win64): - cfi_startproc - /* Save all integer arguments into the incoming reg stack space. */ - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %r8, 24(%rsp) - movq %r9, 32(%rsp) - - movq FFI_TRAMPOLINE_SIZE(%r10), %rcx /* load cif */ - movq FFI_TRAMPOLINE_SIZE+8(%r10), %rdx /* load fun */ - movq FFI_TRAMPOLINE_SIZE+16(%r10), %r8 /* load user_data */ -0: - subq $ffi_clo_FS, %rsp - cfi_adjust_cfa_offset(ffi_clo_FS) - SEH(.seh_stackalloc ffi_clo_FS) - SEH(.seh_endprologue) - - /* Save all sse arguments into the stack frame. */ - movsd %xmm0, ffi_clo_OFF_X(%rsp) - movsd %xmm1, ffi_clo_OFF_X+8(%rsp) - movsd %xmm2, ffi_clo_OFF_X+16(%rsp) - movsd %xmm3, ffi_clo_OFF_X+24(%rsp) - - leaq ffi_clo_OFF_R(%rsp), %r9 - call PLT(C(ffi_closure_win64_inner)) - - /* Load the result into both possible result registers. */ - movq ffi_clo_OFF_R(%rsp), %rax - movsd ffi_clo_OFF_R(%rsp), %xmm0 - - addq $ffi_clo_FS, %rsp - cfi_adjust_cfa_offset(-ffi_clo_FS) - ret - - cfi_endproc - SEH(.seh_endproc) -#endif /* __x86_64__ */ - -#if defined __ELF__ && defined __linux__ - .section .note.GNU-stack,"",@progbits -#endif + .text + +/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10) + + Bit o trickiness here -- FRAME is the base of the stack frame + for this function. This has been allocated by ffi_call. We also + deallocate some of the stack that has been alloca'd. */ + + .align 8 + .globl C(ffi_call_win64) + FFI_HIDDEN(C(ffi_call_win64)) + + SEH(.seh_proc ffi_call_win64) +C(ffi_call_win64): + cfi_startproc + /* Set up the local stack frame and install it in rbp/rsp. */ + movq (%rsp), %rax + movq %rbp, (arg1) + movq %rax, 8(arg1) + movq arg1, %rbp + cfi_def_cfa(%rbp, 16) + cfi_rel_offset(%rbp, 0) + SEH(.seh_pushreg %rbp) + SEH(.seh_setframe %rbp, 0) + SEH(.seh_endprologue) + movq arg0, %rsp + + movq arg2, %r10 + + /* Load all slots into both general and xmm registers. */ + movq (%rsp), %rcx + movsd (%rsp), %xmm0 + movq 8(%rsp), %rdx + movsd 8(%rsp), %xmm1 + movq 16(%rsp), %r8 + movsd 16(%rsp), %xmm2 + movq 24(%rsp), %r9 + movsd 24(%rsp), %xmm3 + + call *16(%rbp) + + movl 24(%rbp), %ecx + movq 32(%rbp), %r8 + leaq 0f(%rip), %r10 + cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx + leaq (%r10, %rcx, 8), %r10 + ja 99f + jmp *%r10 + +/* Below, we're space constrained most of the time. Thus we eschew the + modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */ +.macro epilogue + leaveq + cfi_remember_state + cfi_def_cfa(%rsp, 8) + cfi_restore(%rbp) + ret + cfi_restore_state +.endm + + .align 8 +0: +E(0b, FFI_TYPE_VOID) + epilogue +E(0b, FFI_TYPE_INT) + movslq %eax, %rax + movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_FLOAT) + movss %xmm0, (%r8) + epilogue +E(0b, FFI_TYPE_DOUBLE) + movsd %xmm0, (%r8) + epilogue +E(0b, FFI_TYPE_LONGDOUBLE) + call PLT(C(abort)) +E(0b, FFI_TYPE_UINT8) + movzbl %al, %eax + movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_SINT8) + movsbq %al, %rax + jmp 98f +E(0b, FFI_TYPE_UINT16) + movzwl %ax, %eax + movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_SINT16) + movswq %ax, %rax + jmp 98f +E(0b, FFI_TYPE_UINT32) + movl %eax, %eax + movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_SINT32) + movslq %eax, %rax + movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_UINT64) +98: movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_SINT64) + movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_STRUCT) + epilogue +E(0b, FFI_TYPE_POINTER) + movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_COMPLEX) + call PLT(C(abort)) +E(0b, FFI_TYPE_SMALL_STRUCT_1B) + movb %al, (%r8) + epilogue +E(0b, FFI_TYPE_SMALL_STRUCT_2B) + movw %ax, (%r8) + epilogue +E(0b, FFI_TYPE_SMALL_STRUCT_4B) + movl %eax, (%r8) + epilogue + + .align 8 +99: call PLT(C(abort)) + + epilogue + + cfi_endproc + SEH(.seh_endproc) + + +/* 32 bytes of outgoing register stack space, 8 bytes of alignment, + 16 bytes of result, 32 bytes of xmm registers. */ +#define ffi_clo_FS (32+8+16+32) +#define ffi_clo_OFF_R (32+8) +#define ffi_clo_OFF_X (32+8+16) + + .align 8 + .globl C(ffi_go_closure_win64) + FFI_HIDDEN(C(ffi_go_closure_win64)) + + SEH(.seh_proc ffi_go_closure_win64) +C(ffi_go_closure_win64): + cfi_startproc + /* Save all integer arguments into the incoming reg stack space. */ + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %r8, 24(%rsp) + movq %r9, 32(%rsp) + + movq 8(%r10), %rcx /* load cif */ + movq 16(%r10), %rdx /* load fun */ + movq %r10, %r8 /* closure is user_data */ + jmp 0f + cfi_endproc + SEH(.seh_endproc) + + .align 8 + .globl C(ffi_closure_win64) + FFI_HIDDEN(C(ffi_closure_win64)) + + SEH(.seh_proc ffi_closure_win64) +C(ffi_closure_win64): + cfi_startproc + /* Save all integer arguments into the incoming reg stack space. */ + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %r8, 24(%rsp) + movq %r9, 32(%rsp) + + movq FFI_TRAMPOLINE_SIZE(%r10), %rcx /* load cif */ + movq FFI_TRAMPOLINE_SIZE+8(%r10), %rdx /* load fun */ + movq FFI_TRAMPOLINE_SIZE+16(%r10), %r8 /* load user_data */ +0: + subq $ffi_clo_FS, %rsp + cfi_adjust_cfa_offset(ffi_clo_FS) + SEH(.seh_stackalloc ffi_clo_FS) + SEH(.seh_endprologue) + + /* Save all sse arguments into the stack frame. */ + movsd %xmm0, ffi_clo_OFF_X(%rsp) + movsd %xmm1, ffi_clo_OFF_X+8(%rsp) + movsd %xmm2, ffi_clo_OFF_X+16(%rsp) + movsd %xmm3, ffi_clo_OFF_X+24(%rsp) + + leaq ffi_clo_OFF_R(%rsp), %r9 + call PLT(C(ffi_closure_win64_inner)) + + /* Load the result into both possible result registers. */ + movq ffi_clo_OFF_R(%rsp), %rax + movsd ffi_clo_OFF_R(%rsp), %xmm0 + + addq $ffi_clo_FS, %rsp + cfi_adjust_cfa_offset(-ffi_clo_FS) + ret + + cfi_endproc + SEH(.seh_endproc) +#endif /* __x86_64__ */ + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif diff --git a/contrib/restricted/libffi/src/x86/win64_intel.S b/contrib/restricted/libffi/src/x86/win64_intel.S index fdf3e4aa74..7df78b30e4 100644 --- a/contrib/restricted/libffi/src/x86/win64_intel.S +++ b/contrib/restricted/libffi/src/x86/win64_intel.S @@ -1,237 +1,237 @@ -#define LIBFFI_ASM -#include <fficonfig.h> -#include <ffi.h> -#include <ffi_cfi.h> -#include "asmnames.h" - -#if defined(HAVE_AS_CFI_PSEUDO_OP) - .cfi_sections .debug_frame -#endif - -#ifdef X86_WIN64 -#define SEH(...) __VA_ARGS__ -#define arg0 rcx -#define arg1 rdx -#define arg2 r8 -#define arg3 r9 -#else -#define SEH(...) -#define arg0 rdi -#define arg1 rsi -#define arg2 rdx -#define arg3 rcx -#endif - -/* This macro allows the safe creation of jump tables without an - actual table. The entry points into the table are all 8 bytes. - The use of ORG asserts that we're at the correct location. */ -/* ??? The clang assembler doesn't handle .org with symbolic expressions. */ -#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) -# define E(BASE, X) ALIGN 8 -#else -# define E(BASE, X) ALIGN 8; ORG BASE + X * 8 -#endif - - .CODE - extern PLT(C(abort)):near - extern C(ffi_closure_win64_inner):near - -/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10) - - Bit o trickiness here -- FRAME is the base of the stack frame - for this function. This has been allocated by ffi_call. We also - deallocate some of the stack that has been alloca'd. */ - - ALIGN 8 - PUBLIC C(ffi_call_win64) - - ; SEH(.safesh ffi_call_win64) -C(ffi_call_win64) proc SEH(frame) - cfi_startproc - /* Set up the local stack frame and install it in rbp/rsp. */ - mov RAX, [RSP] ; movq (%rsp), %rax - mov [arg1], RBP ; movq %rbp, (arg1) - mov [arg1 + 8], RAX; movq %rax, 8(arg1) - mov RBP, arg1; movq arg1, %rbp - cfi_def_cfa(rbp, 16) - cfi_rel_offset(rbp, 0) - SEH(.pushreg rbp) - SEH(.setframe rbp, 0) - SEH(.endprolog) - mov RSP, arg0 ; movq arg0, %rsp - - mov R10, arg2 ; movq arg2, %r10 - - /* Load all slots into both general and xmm registers. */ - mov RCX, [RSP] ; movq (%rsp), %rcx - movsd XMM0, qword ptr [RSP] ; movsd (%rsp), %xmm0 - mov RDX, [RSP + 8] ;movq 8(%rsp), %rdx - movsd XMM1, qword ptr [RSP + 8]; movsd 8(%rsp), %xmm1 - mov R8, [RSP + 16] ; movq 16(%rsp), %r8 - movsd XMM2, qword ptr [RSP + 16] ; movsd 16(%rsp), %xmm2 - mov R9, [RSP + 24] ; movq 24(%rsp), %r9 - movsd XMM3, qword ptr [RSP + 24] ;movsd 24(%rsp), %xmm3 - - CALL qword ptr [RBP + 16] ; call *16(%rbp) - - mov ECX, [RBP + 24] ; movl 24(%rbp), %ecx - mov R8, [RBP + 32] ; movq 32(%rbp), %r8 - LEA R10, ffi_call_win64_tab ; leaq 0f(%rip), %r10 - CMP ECX, FFI_TYPE_SMALL_STRUCT_4B ; cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx - LEA R10, [R10 + RCX*8] ; leaq (%r10, %rcx, 8), %r10 - JA L99 ; ja 99f - JMP R10 ; jmp *%r10 - -/* Below, we're space constrained most of the time. Thus we eschew the - modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */ -epilogue macro - LEAVE - cfi_remember_state - cfi_def_cfa(rsp, 8) - cfi_restore(rbp) - RET - cfi_restore_state -endm - - ALIGN 8 -ffi_call_win64_tab LABEL NEAR -E(0b, FFI_TYPE_VOID) - epilogue -E(0b, FFI_TYPE_INT) - movsxd rax, eax ; movslq %eax, %rax - mov qword ptr [r8], rax; movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_FLOAT) - movss dword ptr [r8], xmm0 ; movss %xmm0, (%r8) - epilogue -E(0b, FFI_TYPE_DOUBLE) - movsd qword ptr[r8], xmm0; movsd %xmm0, (%r8) - epilogue -E(0b, FFI_TYPE_LONGDOUBLE) - call PLT(C(abort)) -E(0b, FFI_TYPE_UINT8) - movzx eax, al ;movzbl %al, %eax - mov qword ptr[r8], rax; movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_SINT8) - movsx rax, al ; movsbq %al, %rax - jmp L98 -E(0b, FFI_TYPE_UINT16) - movzx eax, ax ; movzwl %ax, %eax - mov qword ptr[r8], rax; movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_SINT16) - movsx rax, ax; movswq %ax, %rax - jmp L98 -E(0b, FFI_TYPE_UINT32) - mov eax, eax; movl %eax, %eax - mov qword ptr[r8], rax ; movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_SINT32) - movsxd rax, eax; movslq %eax, %rax - mov qword ptr [r8], rax; movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_UINT64) -L98 LABEL near - mov qword ptr [r8], rax ; movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_SINT64) - mov qword ptr [r8], rax;movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_STRUCT) - epilogue -E(0b, FFI_TYPE_POINTER) - mov qword ptr [r8], rax ;movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_COMPLEX) - call PLT(C(abort)) -E(0b, FFI_TYPE_SMALL_STRUCT_1B) - mov byte ptr [r8], al ; movb %al, (%r8) - epilogue -E(0b, FFI_TYPE_SMALL_STRUCT_2B) - mov word ptr [r8], ax ; movw %ax, (%r8) - epilogue -E(0b, FFI_TYPE_SMALL_STRUCT_4B) - mov dword ptr [r8], eax ; movl %eax, (%r8) - epilogue - - align 8 -L99 LABEL near - call PLT(C(abort)) - - epilogue - - cfi_endproc - C(ffi_call_win64) endp - - -/* 32 bytes of outgoing register stack space, 8 bytes of alignment, - 16 bytes of result, 32 bytes of xmm registers. */ -#define ffi_clo_FS (32+8+16+32) -#define ffi_clo_OFF_R (32+8) -#define ffi_clo_OFF_X (32+8+16) - - align 8 - PUBLIC C(ffi_go_closure_win64) - -C(ffi_go_closure_win64) proc - cfi_startproc - /* Save all integer arguments into the incoming reg stack space. */ - mov qword ptr [rsp + 8], rcx; movq %rcx, 8(%rsp) - mov qword ptr [rsp + 16], rdx; movq %rdx, 16(%rsp) - mov qword ptr [rsp + 24], r8; movq %r8, 24(%rsp) - mov qword ptr [rsp + 32], r9 ;movq %r9, 32(%rsp) - - mov rcx, qword ptr [r10 + 8]; movq 8(%r10), %rcx /* load cif */ - mov rdx, qword ptr [r10 + 16]; movq 16(%r10), %rdx /* load fun */ - mov r8, r10 ; movq %r10, %r8 /* closure is user_data */ - jmp ffi_closure_win64_2 - cfi_endproc - C(ffi_go_closure_win64) endp - - align 8 - -PUBLIC C(ffi_closure_win64) -C(ffi_closure_win64) PROC FRAME - cfi_startproc - /* Save all integer arguments into the incoming reg stack space. */ - mov qword ptr [rsp + 8], rcx; movq %rcx, 8(%rsp) - mov qword ptr [rsp + 16], rdx; movq %rdx, 16(%rsp) - mov qword ptr [rsp + 24], r8; movq %r8, 24(%rsp) - mov qword ptr [rsp + 32], r9; movq %r9, 32(%rsp) - - mov rcx, qword ptr [FFI_TRAMPOLINE_SIZE + r10] ;movq FFI_TRAMPOLINE_SIZE(%r10), %rcx /* load cif */ - mov rdx, qword ptr [FFI_TRAMPOLINE_SIZE + 8 + r10] ; movq FFI_TRAMPOLINE_SIZE+8(%r10), %rdx /* load fun */ - mov r8, qword ptr [FFI_TRAMPOLINE_SIZE+16+r10] ;movq FFI_TRAMPOLINE_SIZE+16(%r10), %r8 /* load user_data */ -ffi_closure_win64_2 LABEL near - sub rsp, ffi_clo_FS ;subq $ffi_clo_FS, %rsp - cfi_adjust_cfa_offset(ffi_clo_FS) - SEH(.allocstack ffi_clo_FS) - SEH(.endprolog) - - /* Save all sse arguments into the stack frame. */ - movsd qword ptr [ffi_clo_OFF_X + rsp], xmm0 ; movsd %xmm0, ffi_clo_OFF_X(%rsp) - movsd qword ptr [ffi_clo_OFF_X+8+rsp], xmm1 ; movsd %xmm1, ffi_clo_OFF_X+8(%rsp) - movsd qword ptr [ffi_clo_OFF_X+16+rsp], xmm2 ; movsd %xmm2, ffi_clo_OFF_X+16(%rsp) - movsd qword ptr [ffi_clo_OFF_X+24+rsp], xmm3 ; movsd %xmm3, ffi_clo_OFF_X+24(%rsp) - - lea r9, [ffi_clo_OFF_R + rsp] ; leaq ffi_clo_OFF_R(%rsp), %r9 - call C(ffi_closure_win64_inner) - - /* Load the result into both possible result registers. */ - - mov rax, qword ptr [ffi_clo_OFF_R + rsp] ;movq ffi_clo_OFF_R(%rsp), %rax - movsd xmm0, qword ptr [rsp + ffi_clo_OFF_R] ;movsd ffi_clo_OFF_R(%rsp), %xmm0 - - add rsp, ffi_clo_FS ;addq $ffi_clo_FS, %rsp - cfi_adjust_cfa_offset(-ffi_clo_FS) - ret - - cfi_endproc - C(ffi_closure_win64) endp - -#if defined __ELF__ && defined __linux__ - .section .note.GNU-stack,"",@progbits -#endif -_text ends -end
\ No newline at end of file +#define LIBFFI_ASM +#include <fficonfig.h> +#include <ffi.h> +#include <ffi_cfi.h> +#include "asmnames.h" + +#if defined(HAVE_AS_CFI_PSEUDO_OP) + .cfi_sections .debug_frame +#endif + +#ifdef X86_WIN64 +#define SEH(...) __VA_ARGS__ +#define arg0 rcx +#define arg1 rdx +#define arg2 r8 +#define arg3 r9 +#else +#define SEH(...) +#define arg0 rdi +#define arg1 rsi +#define arg2 rdx +#define arg3 rcx +#endif + +/* This macro allows the safe creation of jump tables without an + actual table. The entry points into the table are all 8 bytes. + The use of ORG asserts that we're at the correct location. */ +/* ??? The clang assembler doesn't handle .org with symbolic expressions. */ +#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) +# define E(BASE, X) ALIGN 8 +#else +# define E(BASE, X) ALIGN 8; ORG BASE + X * 8 +#endif + + .CODE + extern PLT(C(abort)):near + extern C(ffi_closure_win64_inner):near + +/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10) + + Bit o trickiness here -- FRAME is the base of the stack frame + for this function. This has been allocated by ffi_call. We also + deallocate some of the stack that has been alloca'd. */ + + ALIGN 8 + PUBLIC C(ffi_call_win64) + + ; SEH(.safesh ffi_call_win64) +C(ffi_call_win64) proc SEH(frame) + cfi_startproc + /* Set up the local stack frame and install it in rbp/rsp. */ + mov RAX, [RSP] ; movq (%rsp), %rax + mov [arg1], RBP ; movq %rbp, (arg1) + mov [arg1 + 8], RAX; movq %rax, 8(arg1) + mov RBP, arg1; movq arg1, %rbp + cfi_def_cfa(rbp, 16) + cfi_rel_offset(rbp, 0) + SEH(.pushreg rbp) + SEH(.setframe rbp, 0) + SEH(.endprolog) + mov RSP, arg0 ; movq arg0, %rsp + + mov R10, arg2 ; movq arg2, %r10 + + /* Load all slots into both general and xmm registers. */ + mov RCX, [RSP] ; movq (%rsp), %rcx + movsd XMM0, qword ptr [RSP] ; movsd (%rsp), %xmm0 + mov RDX, [RSP + 8] ;movq 8(%rsp), %rdx + movsd XMM1, qword ptr [RSP + 8]; movsd 8(%rsp), %xmm1 + mov R8, [RSP + 16] ; movq 16(%rsp), %r8 + movsd XMM2, qword ptr [RSP + 16] ; movsd 16(%rsp), %xmm2 + mov R9, [RSP + 24] ; movq 24(%rsp), %r9 + movsd XMM3, qword ptr [RSP + 24] ;movsd 24(%rsp), %xmm3 + + CALL qword ptr [RBP + 16] ; call *16(%rbp) + + mov ECX, [RBP + 24] ; movl 24(%rbp), %ecx + mov R8, [RBP + 32] ; movq 32(%rbp), %r8 + LEA R10, ffi_call_win64_tab ; leaq 0f(%rip), %r10 + CMP ECX, FFI_TYPE_SMALL_STRUCT_4B ; cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx + LEA R10, [R10 + RCX*8] ; leaq (%r10, %rcx, 8), %r10 + JA L99 ; ja 99f + JMP R10 ; jmp *%r10 + +/* Below, we're space constrained most of the time. Thus we eschew the + modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */ +epilogue macro + LEAVE + cfi_remember_state + cfi_def_cfa(rsp, 8) + cfi_restore(rbp) + RET + cfi_restore_state +endm + + ALIGN 8 +ffi_call_win64_tab LABEL NEAR +E(0b, FFI_TYPE_VOID) + epilogue +E(0b, FFI_TYPE_INT) + movsxd rax, eax ; movslq %eax, %rax + mov qword ptr [r8], rax; movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_FLOAT) + movss dword ptr [r8], xmm0 ; movss %xmm0, (%r8) + epilogue +E(0b, FFI_TYPE_DOUBLE) + movsd qword ptr[r8], xmm0; movsd %xmm0, (%r8) + epilogue +E(0b, FFI_TYPE_LONGDOUBLE) + call PLT(C(abort)) +E(0b, FFI_TYPE_UINT8) + movzx eax, al ;movzbl %al, %eax + mov qword ptr[r8], rax; movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_SINT8) + movsx rax, al ; movsbq %al, %rax + jmp L98 +E(0b, FFI_TYPE_UINT16) + movzx eax, ax ; movzwl %ax, %eax + mov qword ptr[r8], rax; movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_SINT16) + movsx rax, ax; movswq %ax, %rax + jmp L98 +E(0b, FFI_TYPE_UINT32) + mov eax, eax; movl %eax, %eax + mov qword ptr[r8], rax ; movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_SINT32) + movsxd rax, eax; movslq %eax, %rax + mov qword ptr [r8], rax; movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_UINT64) +L98 LABEL near + mov qword ptr [r8], rax ; movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_SINT64) + mov qword ptr [r8], rax;movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_STRUCT) + epilogue +E(0b, FFI_TYPE_POINTER) + mov qword ptr [r8], rax ;movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_COMPLEX) + call PLT(C(abort)) +E(0b, FFI_TYPE_SMALL_STRUCT_1B) + mov byte ptr [r8], al ; movb %al, (%r8) + epilogue +E(0b, FFI_TYPE_SMALL_STRUCT_2B) + mov word ptr [r8], ax ; movw %ax, (%r8) + epilogue +E(0b, FFI_TYPE_SMALL_STRUCT_4B) + mov dword ptr [r8], eax ; movl %eax, (%r8) + epilogue + + align 8 +L99 LABEL near + call PLT(C(abort)) + + epilogue + + cfi_endproc + C(ffi_call_win64) endp + + +/* 32 bytes of outgoing register stack space, 8 bytes of alignment, + 16 bytes of result, 32 bytes of xmm registers. */ +#define ffi_clo_FS (32+8+16+32) +#define ffi_clo_OFF_R (32+8) +#define ffi_clo_OFF_X (32+8+16) + + align 8 + PUBLIC C(ffi_go_closure_win64) + +C(ffi_go_closure_win64) proc + cfi_startproc + /* Save all integer arguments into the incoming reg stack space. */ + mov qword ptr [rsp + 8], rcx; movq %rcx, 8(%rsp) + mov qword ptr [rsp + 16], rdx; movq %rdx, 16(%rsp) + mov qword ptr [rsp + 24], r8; movq %r8, 24(%rsp) + mov qword ptr [rsp + 32], r9 ;movq %r9, 32(%rsp) + + mov rcx, qword ptr [r10 + 8]; movq 8(%r10), %rcx /* load cif */ + mov rdx, qword ptr [r10 + 16]; movq 16(%r10), %rdx /* load fun */ + mov r8, r10 ; movq %r10, %r8 /* closure is user_data */ + jmp ffi_closure_win64_2 + cfi_endproc + C(ffi_go_closure_win64) endp + + align 8 + +PUBLIC C(ffi_closure_win64) +C(ffi_closure_win64) PROC FRAME + cfi_startproc + /* Save all integer arguments into the incoming reg stack space. */ + mov qword ptr [rsp + 8], rcx; movq %rcx, 8(%rsp) + mov qword ptr [rsp + 16], rdx; movq %rdx, 16(%rsp) + mov qword ptr [rsp + 24], r8; movq %r8, 24(%rsp) + mov qword ptr [rsp + 32], r9; movq %r9, 32(%rsp) + + mov rcx, qword ptr [FFI_TRAMPOLINE_SIZE + r10] ;movq FFI_TRAMPOLINE_SIZE(%r10), %rcx /* load cif */ + mov rdx, qword ptr [FFI_TRAMPOLINE_SIZE + 8 + r10] ; movq FFI_TRAMPOLINE_SIZE+8(%r10), %rdx /* load fun */ + mov r8, qword ptr [FFI_TRAMPOLINE_SIZE+16+r10] ;movq FFI_TRAMPOLINE_SIZE+16(%r10), %r8 /* load user_data */ +ffi_closure_win64_2 LABEL near + sub rsp, ffi_clo_FS ;subq $ffi_clo_FS, %rsp + cfi_adjust_cfa_offset(ffi_clo_FS) + SEH(.allocstack ffi_clo_FS) + SEH(.endprolog) + + /* Save all sse arguments into the stack frame. */ + movsd qword ptr [ffi_clo_OFF_X + rsp], xmm0 ; movsd %xmm0, ffi_clo_OFF_X(%rsp) + movsd qword ptr [ffi_clo_OFF_X+8+rsp], xmm1 ; movsd %xmm1, ffi_clo_OFF_X+8(%rsp) + movsd qword ptr [ffi_clo_OFF_X+16+rsp], xmm2 ; movsd %xmm2, ffi_clo_OFF_X+16(%rsp) + movsd qword ptr [ffi_clo_OFF_X+24+rsp], xmm3 ; movsd %xmm3, ffi_clo_OFF_X+24(%rsp) + + lea r9, [ffi_clo_OFF_R + rsp] ; leaq ffi_clo_OFF_R(%rsp), %r9 + call C(ffi_closure_win64_inner) + + /* Load the result into both possible result registers. */ + + mov rax, qword ptr [ffi_clo_OFF_R + rsp] ;movq ffi_clo_OFF_R(%rsp), %rax + movsd xmm0, qword ptr [rsp + ffi_clo_OFF_R] ;movsd ffi_clo_OFF_R(%rsp), %xmm0 + + add rsp, ffi_clo_FS ;addq $ffi_clo_FS, %rsp + cfi_adjust_cfa_offset(-ffi_clo_FS) + ret + + cfi_endproc + C(ffi_closure_win64) endp + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif +_text ends +end
\ No newline at end of file |