Restoring authorship annotation for Mikhail Borisov <borisov.mikhail@gmail.com>. Commit 2 of 2.

author: Mikhail Borisov <borisov.mikhail@gmail.com> 2022-02-10 16:45:40 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:45:40 +0300
commit: 5d50718e66d9c037dc587a0211110b7d25a66185 (patch)
tree: e98df59de24d2ef7c77baed9f41e4875a2fef972 /contrib/restricted/libffi/src
parent: a6a92afe03e02795227d2641b49819b687f088f8 (diff)
download: ydb-5d50718e66d9c037dc587a0211110b7d25a66185.tar.gz
43 files changed, 16458 insertions, 16458 deletions
diff --git a/contrib/restricted/libffi/src/aarch64/ffi.c b/contrib/restricted/libffi/src/aarch64/ffi.c
index 84d44ab74a..1ebf43c192 100644
--- a/contrib/restricted/libffi/src/aarch64/ffi.c
+++ b/contrib/restricted/libffi/src/aarch64/ffi.c
@@ -1,1009 +1,1009 @@
-/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. 
- 
-Permission is hereby granted, free of charge, to any person obtaining 
-a copy of this software and associated documentation files (the 
-``Software''), to deal in the Software without restriction, including 
-without limitation the rights to use, copy, modify, merge, publish, 
-distribute, sublicense, and/or sell copies of the Software, and to 
-permit persons to whom the Software is furnished to do so, subject to 
-the following conditions: 
- 
-The above copyright notice and this permission notice shall be 
-included in all copies or substantial portions of the Software. 
- 
-THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */ 
- 
-#if defined(__aarch64__) || defined(__arm64__)|| defined (_M_ARM64) 
-#include <stdio.h> 
-#include <stdlib.h> 
-#include <stdint.h> 
-#include <fficonfig.h> 
-#include <ffi.h> 
-#include <ffi_common.h> 
-#include "internal.h" 
-#ifdef _M_ARM64 
-#include <windows.h> /* FlushInstructionCache */ 
-#endif 
- 
-/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE; 
-   all further uses in this file will refer to the 128-bit type.  */ 
-#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE 
-# if FFI_TYPE_LONGDOUBLE != 4 
-#  error FFI_TYPE_LONGDOUBLE out of date 
-# endif 
-#else 
-# undef FFI_TYPE_LONGDOUBLE 
-# define FFI_TYPE_LONGDOUBLE 4 
-#endif 
- 
-union _d 
-{ 
-  UINT64 d; 
-  UINT32 s[2]; 
-}; 
- 
-struct _v 
-{ 
-  union _d d[2] __attribute__((aligned(16))); 
-}; 
- 
-struct call_context 
-{ 
-  struct _v v[N_V_ARG_REG]; 
-  UINT64 x[N_X_ARG_REG]; 
-}; 
- 
-#if FFI_EXEC_TRAMPOLINE_TABLE 
- 
-#ifdef __MACH__ 
-#include <mach/vm_param.h> 
-#endif 
- 
-#else 
- 
-#if defined (__clang__) && defined (__APPLE__) 
-extern void sys_icache_invalidate (void *start, size_t len); 
-#endif 
- 
-static inline void 
-ffi_clear_cache (void *start, void *end) 
-{ 
-#if defined (__clang__) && defined (__APPLE__) 
-  sys_icache_invalidate (start, (char *)end - (char *)start); 
-#elif defined (__GNUC__) 
-  __builtin___clear_cache (start, end); 
-#elif defined (_M_ARM64) 
-  FlushInstructionCache(GetCurrentProcess(), start, (char*)end - (char*)start); 
-#else 
-#error "Missing builtin to flush instruction cache" 
-#endif 
-} 
- 
-#endif 
- 
-/* A subroutine of is_vfp_type.  Given a structure type, return the type code 
-   of the first non-structure element.  Recurse for structure elements. 
-   Return -1 if the structure is in fact empty, i.e. no nested elements.  */ 
- 
-static int 
-is_hfa0 (const ffi_type *ty) 
-{ 
-  ffi_type **elements = ty->elements; 
-  int i, ret = -1; 
- 
-  if (elements != NULL) 
-    for (i = 0; elements[i]; ++i) 
-      { 
-        ret = elements[i]->type; 
-        if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX) 
-          { 
-            ret = is_hfa0 (elements[i]); 
-            if (ret < 0) 
-              continue; 
-          } 
-        break; 
-      } 
- 
-  return ret; 
-} 
- 
-/* A subroutine of is_vfp_type.  Given a structure type, return true if all 
-   of the non-structure elements are the same as CANDIDATE.  */ 
- 
-static int 
-is_hfa1 (const ffi_type *ty, int candidate) 
-{ 
-  ffi_type **elements = ty->elements; 
-  int i; 
- 
-  if (elements != NULL) 
-    for (i = 0; elements[i]; ++i) 
-      { 
-        int t = elements[i]->type; 
-        if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX) 
-          { 
-            if (!is_hfa1 (elements[i], candidate)) 
-              return 0; 
-          } 
-        else if (t != candidate) 
-          return 0; 
-      } 
- 
-  return 1; 
-} 
- 
-/* Determine if TY may be allocated to the FP registers.  This is both an 
-   fp scalar type as well as an homogenous floating point aggregate (HFA). 
-   That is, a structure consisting of 1 to 4 members of all the same type, 
-   where that type is an fp scalar. 
- 
-   Returns non-zero iff TY is an HFA.  The result is the AARCH64_RET_* 
-   constant for the type.  */ 
- 
-static int 
-is_vfp_type (const ffi_type *ty) 
-{ 
-  ffi_type **elements; 
-  int candidate, i; 
-  size_t size, ele_count; 
- 
-  /* Quickest tests first.  */ 
-  candidate = ty->type; 
-  switch (candidate) 
-    { 
-    default: 
-      return 0; 
-    case FFI_TYPE_FLOAT: 
-    case FFI_TYPE_DOUBLE: 
-    case FFI_TYPE_LONGDOUBLE: 
-      ele_count = 1; 
-      goto done; 
-    case FFI_TYPE_COMPLEX: 
-      candidate = ty->elements[0]->type; 
-      switch (candidate) 
-	{ 
-	case FFI_TYPE_FLOAT: 
-	case FFI_TYPE_DOUBLE: 
-	case FFI_TYPE_LONGDOUBLE: 
-	  ele_count = 2; 
-	  goto done; 
-	} 
-      return 0; 
-    case FFI_TYPE_STRUCT: 
-      break; 
-    } 
- 
-  /* No HFA types are smaller than 4 bytes, or larger than 64 bytes.  */ 
-  size = ty->size; 
-  if (size < 4 || size > 64) 
-    return 0; 
- 
-  /* Find the type of the first non-structure member.  */ 
-  elements = ty->elements; 
-  candidate = elements[0]->type; 
-  if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX) 
-    { 
-      for (i = 0; ; ++i) 
-        { 
-          candidate = is_hfa0 (elements[i]); 
-          if (candidate >= 0) 
-            break; 
-        } 
-    } 
- 
-  /* If the first member is not a floating point type, it's not an HFA. 
-     Also quickly re-check the size of the structure.  */ 
-  switch (candidate) 
-    { 
-    case FFI_TYPE_FLOAT: 
-      ele_count = size / sizeof(float); 
-      if (size != ele_count * sizeof(float)) 
-        return 0; 
-      break; 
-    case FFI_TYPE_DOUBLE: 
-      ele_count = size / sizeof(double); 
-      if (size != ele_count * sizeof(double)) 
-        return 0; 
-      break; 
-    case FFI_TYPE_LONGDOUBLE: 
-      ele_count = size / sizeof(long double); 
-      if (size != ele_count * sizeof(long double)) 
-        return 0; 
-      break; 
-    default: 
-      return 0; 
-    } 
-  if (ele_count > 4) 
-    return 0; 
- 
-  /* Finally, make sure that all scalar elements are the same type.  */ 
-  for (i = 0; elements[i]; ++i) 
-    { 
-      int t = elements[i]->type; 
-      if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX) 
-        { 
-          if (!is_hfa1 (elements[i], candidate)) 
-            return 0; 
-        } 
-      else if (t != candidate) 
-        return 0; 
-    } 
- 
-  /* All tests succeeded.  Encode the result.  */ 
- done: 
-  return candidate * 4 + (4 - (int)ele_count); 
-} 
- 
-/* Representation of the procedure call argument marshalling 
-   state. 
- 
-   The terse state variable names match the names used in the AARCH64 
-   PCS. */ 
- 
-struct arg_state 
-{ 
-  unsigned ngrn;                /* Next general-purpose register number. */ 
-  unsigned nsrn;                /* Next vector register number. */ 
-  size_t nsaa;                  /* Next stack offset. */ 
- 
-#if defined (__APPLE__) 
-  unsigned allocating_variadic; 
-#endif 
-}; 
- 
-/* Initialize a procedure call argument marshalling state.  */ 
-static void 
-arg_init (struct arg_state *state) 
-{ 
-  state->ngrn = 0; 
-  state->nsrn = 0; 
-  state->nsaa = 0; 
-#if defined (__APPLE__) 
-  state->allocating_variadic = 0; 
-#endif 
-} 
- 
-/* Allocate an aligned slot on the stack and return a pointer to it.  */ 
-static void * 
-allocate_to_stack (struct arg_state *state, void *stack, 
-		   size_t alignment, size_t size) 
-{ 
-  size_t nsaa = state->nsaa; 
- 
-  /* Round up the NSAA to the larger of 8 or the natural 
-     alignment of the argument's type.  */ 
-#if defined (__APPLE__) 
-  if (state->allocating_variadic && alignment < 8) 
-    alignment = 8; 
-#else 
-  if (alignment < 8) 
-    alignment = 8; 
-#endif 
-     
-  nsaa = FFI_ALIGN (nsaa, alignment); 
-  state->nsaa = nsaa + size; 
- 
-  return (char *)stack + nsaa; 
-} 
- 
-static ffi_arg 
-extend_integer_type (void *source, int type) 
-{ 
-  switch (type) 
-    { 
-    case FFI_TYPE_UINT8: 
-      return *(UINT8 *) source; 
-    case FFI_TYPE_SINT8: 
-      return *(SINT8 *) source; 
-    case FFI_TYPE_UINT16: 
-      return *(UINT16 *) source; 
-    case FFI_TYPE_SINT16: 
-      return *(SINT16 *) source; 
-    case FFI_TYPE_UINT32: 
-      return *(UINT32 *) source; 
-    case FFI_TYPE_INT: 
-    case FFI_TYPE_SINT32: 
-      return *(SINT32 *) source; 
-    case FFI_TYPE_UINT64: 
-    case FFI_TYPE_SINT64: 
-      return *(UINT64 *) source; 
-      break; 
-    case FFI_TYPE_POINTER: 
-      return *(uintptr_t *) source; 
-    default: 
-      abort(); 
-    } 
-} 
- 
-#if defined(_MSC_VER) 
-void extend_hfa_type (void *dest, void *src, int h); 
-#else 
-static void 
-extend_hfa_type (void *dest, void *src, int h) 
-{ 
-  ssize_t f = h - AARCH64_RET_S4; 
-  void *x0; 
- 
-  asm volatile ( 
-	"adr	%0, 0f\n" 
-"	add	%0, %0, %1\n" 
-"	br	%0\n" 
-"0:	ldp	s16, s17, [%3]\n"	/* S4 */ 
-"	ldp	s18, s19, [%3, #8]\n" 
-"	b	4f\n" 
-"	ldp	s16, s17, [%3]\n"	/* S3 */ 
-"	ldr	s18, [%3, #8]\n" 
-"	b	3f\n" 
-"	ldp	s16, s17, [%3]\n"	/* S2 */ 
-"	b	2f\n" 
-"	nop\n" 
-"	ldr	s16, [%3]\n"		/* S1 */ 
-"	b	1f\n" 
-"	nop\n" 
-"	ldp	d16, d17, [%3]\n"	/* D4 */ 
-"	ldp	d18, d19, [%3, #16]\n" 
-"	b	4f\n" 
-"	ldp	d16, d17, [%3]\n"	/* D3 */ 
-"	ldr	d18, [%3, #16]\n" 
-"	b	3f\n" 
-"	ldp	d16, d17, [%3]\n"	/* D2 */ 
-"	b	2f\n" 
-"	nop\n" 
-"	ldr	d16, [%3]\n"		/* D1 */ 
-"	b	1f\n" 
-"	nop\n" 
-"	ldp	q16, q17, [%3]\n"	/* Q4 */ 
-"	ldp	q18, q19, [%3, #32]\n" 
-"	b	4f\n" 
-"	ldp	q16, q17, [%3]\n"	/* Q3 */ 
-"	ldr	q18, [%3, #32]\n" 
-"	b	3f\n" 
-"	ldp	q16, q17, [%3]\n"	/* Q2 */ 
-"	b	2f\n" 
-"	nop\n" 
-"	ldr	q16, [%3]\n"		/* Q1 */ 
-"	b	1f\n" 
-"4:	str	q19, [%2, #48]\n" 
-"3:	str	q18, [%2, #32]\n" 
-"2:	str	q17, [%2, #16]\n" 
-"1:	str	q16, [%2]" 
-    : "=&r"(x0) 
-    : "r"(f * 12), "r"(dest), "r"(src) 
-    : "memory", "v16", "v17", "v18", "v19"); 
-} 
-#endif 
- 
-#if defined(_MSC_VER) 
-void* compress_hfa_type (void *dest, void *src, int h); 
-#else 
-static void * 
-compress_hfa_type (void *dest, void *reg, int h) 
-{ 
-  switch (h) 
-    { 
-    case AARCH64_RET_S1: 
-      if (dest == reg) 
-	{ 
-#ifdef __AARCH64EB__ 
-	  dest += 12; 
-#endif 
-	} 
-      else 
-	*(float *)dest = *(float *)reg; 
-      break; 
-    case AARCH64_RET_S2: 
-      asm ("ldp q16, q17, [%1]\n\t" 
-	   "st2 { v16.s, v17.s }[0], [%0]" 
-	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17"); 
-      break; 
-    case AARCH64_RET_S3: 
-      asm ("ldp q16, q17, [%1]\n\t" 
-	   "ldr q18, [%1, #32]\n\t" 
-	   "st3 { v16.s, v17.s, v18.s }[0], [%0]" 
-	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18"); 
-      break; 
-    case AARCH64_RET_S4: 
-      asm ("ldp q16, q17, [%1]\n\t" 
-	   "ldp q18, q19, [%1, #32]\n\t" 
-	   "st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]" 
-	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19"); 
-      break; 
- 
-    case AARCH64_RET_D1: 
-      if (dest == reg) 
-	{ 
-#ifdef __AARCH64EB__ 
-	  dest += 8; 
-#endif 
-	} 
-      else 
-	*(double *)dest = *(double *)reg; 
-      break; 
-    case AARCH64_RET_D2: 
-      asm ("ldp q16, q17, [%1]\n\t" 
-	   "st2 { v16.d, v17.d }[0], [%0]" 
-	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17"); 
-      break; 
-    case AARCH64_RET_D3: 
-      asm ("ldp q16, q17, [%1]\n\t" 
-	   "ldr q18, [%1, #32]\n\t" 
-	   "st3 { v16.d, v17.d, v18.d }[0], [%0]" 
-	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18"); 
-      break; 
-    case AARCH64_RET_D4: 
-      asm ("ldp q16, q17, [%1]\n\t" 
-	   "ldp q18, q19, [%1, #32]\n\t" 
-	   "st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]" 
-	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19"); 
-      break; 
- 
-    default: 
-      if (dest != reg) 
-	return memcpy (dest, reg, 16 * (4 - (h & 3))); 
-      break; 
-    } 
-  return dest; 
-} 
-#endif 
- 
-/* Either allocate an appropriate register for the argument type, or if 
-   none are available, allocate a stack slot and return a pointer 
-   to the allocated space.  */ 
- 
-static void * 
-allocate_int_to_reg_or_stack (struct call_context *context, 
-			      struct arg_state *state, 
-			      void *stack, size_t size) 
-{ 
-  if (state->ngrn < N_X_ARG_REG) 
-    return &context->x[state->ngrn++]; 
- 
-  state->ngrn = N_X_ARG_REG; 
-  return allocate_to_stack (state, stack, size, size); 
-} 
- 
-ffi_status FFI_HIDDEN 
-ffi_prep_cif_machdep (ffi_cif *cif) 
-{ 
-  ffi_type *rtype = cif->rtype; 
-  size_t bytes = cif->bytes; 
-  int flags, i, n; 
- 
-  switch (rtype->type) 
-    { 
-    case FFI_TYPE_VOID: 
-      flags = AARCH64_RET_VOID; 
-      break; 
-    case FFI_TYPE_UINT8: 
-      flags = AARCH64_RET_UINT8; 
-      break; 
-    case FFI_TYPE_UINT16: 
-      flags = AARCH64_RET_UINT16; 
-      break; 
-    case FFI_TYPE_UINT32: 
-      flags = AARCH64_RET_UINT32; 
-      break; 
-    case FFI_TYPE_SINT8: 
-      flags = AARCH64_RET_SINT8; 
-      break; 
-    case FFI_TYPE_SINT16: 
-      flags = AARCH64_RET_SINT16; 
-      break; 
-    case FFI_TYPE_INT: 
-    case FFI_TYPE_SINT32: 
-      flags = AARCH64_RET_SINT32; 
-      break; 
-    case FFI_TYPE_SINT64: 
-    case FFI_TYPE_UINT64: 
-      flags = AARCH64_RET_INT64; 
-      break; 
-    case FFI_TYPE_POINTER: 
-      flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64); 
-      break; 
- 
-    case FFI_TYPE_FLOAT: 
-    case FFI_TYPE_DOUBLE: 
-    case FFI_TYPE_LONGDOUBLE: 
-    case FFI_TYPE_STRUCT: 
-    case FFI_TYPE_COMPLEX: 
-      flags = is_vfp_type (rtype); 
-      if (flags == 0) 
-	{ 
-	  size_t s = rtype->size; 
-	  if (s > 16) 
-	    { 
-	      flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM; 
-	      bytes += 8; 
-	    } 
-	  else if (s == 16) 
-	    flags = AARCH64_RET_INT128; 
-	  else if (s == 8) 
-	    flags = AARCH64_RET_INT64; 
-	  else 
-	    flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY; 
-	} 
-      break; 
- 
-    default: 
-      abort(); 
-    } 
- 
-  for (i = 0, n = cif->nargs; i < n; i++) 
-    if (is_vfp_type (cif->arg_types[i])) 
-      { 
-	flags |= AARCH64_FLAG_ARG_V; 
-	break; 
-      } 
- 
-  /* Round the stack up to a multiple of the stack alignment requirement. */ 
-  cif->bytes = (unsigned) FFI_ALIGN(bytes, 16); 
-  cif->flags = flags; 
-#if defined (__APPLE__) 
-  cif->aarch64_nfixedargs = 0; 
-#endif 
- 
-  return FFI_OK; 
-} 
- 
-#if defined (__APPLE__) 
-/* Perform Apple-specific cif processing for variadic calls */ 
-ffi_status FFI_HIDDEN 
-ffi_prep_cif_machdep_var(ffi_cif *cif, unsigned int nfixedargs, 
-			 unsigned int ntotalargs) 
-{ 
-  ffi_status status = ffi_prep_cif_machdep (cif); 
-  cif->aarch64_nfixedargs = nfixedargs; 
-  return status; 
-} 
-#endif /* __APPLE__ */ 
- 
-extern void ffi_call_SYSV (struct call_context *context, void *frame, 
-			   void (*fn)(void), void *rvalue, int flags, 
-			   void *closure) FFI_HIDDEN; 
- 
-/* Call a function with the provided arguments and capture the return 
-   value.  */ 
-static void 
-ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue, 
-	      void **avalue, void *closure) 
-{ 
-  struct call_context *context; 
-  void *stack, *frame, *rvalue; 
-  struct arg_state state; 
-  size_t stack_bytes, rtype_size, rsize; 
-  int i, nargs, flags; 
-  ffi_type *rtype; 
- 
-  flags = cif->flags; 
-  rtype = cif->rtype; 
-  rtype_size = rtype->size; 
-  stack_bytes = cif->bytes; 
- 
-  /* If the target function returns a structure via hidden pointer, 
-     then we cannot allow a null rvalue.  Otherwise, mash a null 
-     rvalue to void return type.  */ 
-  rsize = 0; 
-  if (flags & AARCH64_RET_IN_MEM) 
-    { 
-      if (orig_rvalue == NULL) 
-	rsize = rtype_size; 
-    } 
-  else if (orig_rvalue == NULL) 
-    flags &= AARCH64_FLAG_ARG_V; 
-  else if (flags & AARCH64_RET_NEED_COPY) 
-    rsize = 16; 
- 
-  /* Allocate consectutive stack for everything we'll need.  */ 
-  context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize); 
-  stack = context + 1; 
-  frame = (void*)((uintptr_t)stack + (uintptr_t)stack_bytes); 
-  rvalue = (rsize ? (void*)((uintptr_t)frame + 32) : orig_rvalue); 
- 
-  arg_init (&state); 
-  for (i = 0, nargs = cif->nargs; i < nargs; i++) 
-    { 
-      ffi_type *ty = cif->arg_types[i]; 
-      size_t s = ty->size; 
-      void *a = avalue[i]; 
-      int h, t; 
- 
-      t = ty->type; 
-      switch (t) 
-	{ 
-	case FFI_TYPE_VOID: 
-	  FFI_ASSERT (0); 
-	  break; 
- 
-	/* If the argument is a basic type the argument is allocated to an 
-	   appropriate register, or if none are available, to the stack.  */ 
-	case FFI_TYPE_INT: 
-	case FFI_TYPE_UINT8: 
-	case FFI_TYPE_SINT8: 
-	case FFI_TYPE_UINT16: 
-	case FFI_TYPE_SINT16: 
-	case FFI_TYPE_UINT32: 
-	case FFI_TYPE_SINT32: 
-	case FFI_TYPE_UINT64: 
-	case FFI_TYPE_SINT64: 
-	case FFI_TYPE_POINTER: 
-	do_pointer: 
-	  { 
-	    ffi_arg ext = extend_integer_type (a, t); 
-	    if (state.ngrn < N_X_ARG_REG) 
-	      context->x[state.ngrn++] = ext; 
-	    else 
-	      { 
-		void *d = allocate_to_stack (&state, stack, ty->alignment, s); 
-		state.ngrn = N_X_ARG_REG; 
-		/* Note that the default abi extends each argument 
-		   to a full 64-bit slot, while the iOS abi allocates 
-		   only enough space. */ 
-#ifdef __APPLE__ 
-		memcpy(d, a, s); 
-#else 
-		*(ffi_arg *)d = ext; 
-#endif 
-	      } 
-	  } 
-	  break; 
- 
-	case FFI_TYPE_FLOAT: 
-	case FFI_TYPE_DOUBLE: 
-	case FFI_TYPE_LONGDOUBLE: 
-	case FFI_TYPE_STRUCT: 
-	case FFI_TYPE_COMPLEX: 
-	  { 
-	    void *dest; 
- 
-	    h = is_vfp_type (ty); 
-	    if (h) 
-	      { 
-		int elems = 4 - (h & 3); 
-#ifdef _M_ARM64 /* for handling armasm calling convention */ 
-                if (cif->is_variadic) 
-                  { 
-                    if (state.ngrn + elems <= N_X_ARG_REG) 
-                      { 
-                        dest = &context->x[state.ngrn]; 
-                        state.ngrn += elems; 
-                        extend_hfa_type(dest, a, h); 
-                        break; 
-                      } 
-                    state.nsrn = N_X_ARG_REG; 
-                    dest = allocate_to_stack(&state, stack, ty->alignment, s); 
-                  } 
-                else 
-                  { 
-#endif /* for handling armasm calling convention */ 
-	        if (state.nsrn + elems <= N_V_ARG_REG) 
-		  { 
-		    dest = &context->v[state.nsrn]; 
-		    state.nsrn += elems; 
-		    extend_hfa_type (dest, a, h); 
-		    break; 
-		  } 
-		state.nsrn = N_V_ARG_REG; 
-		dest = allocate_to_stack (&state, stack, ty->alignment, s); 
-#ifdef _M_ARM64 /* for handling armasm calling convention */ 
-	      } 
-#endif /* for handling armasm calling convention */ 
-	      } 
-	    else if (s > 16) 
-	      { 
-		/* If the argument is a composite type that is larger than 16 
-		   bytes, then the argument has been copied to memory, and 
-		   the argument is replaced by a pointer to the copy.  */ 
-		a = &avalue[i]; 
-		t = FFI_TYPE_POINTER; 
-		s = sizeof (void *); 
-		goto do_pointer; 
-	      } 
-	    else 
-	      { 
-		size_t n = (s + 7) / 8; 
-		if (state.ngrn + n <= N_X_ARG_REG) 
-		  { 
-		    /* If the argument is a composite type and the size in 
-		       double-words is not more than the number of available 
-		       X registers, then the argument is copied into 
-		       consecutive X registers.  */ 
-		    dest = &context->x[state.ngrn]; 
-                    state.ngrn += (unsigned int)n; 
-		  } 
-		else 
-		  { 
-		    /* Otherwise, there are insufficient X registers. Further 
-		       X register allocations are prevented, the NSAA is 
-		       adjusted and the argument is copied to memory at the 
-		       adjusted NSAA.  */ 
-		    state.ngrn = N_X_ARG_REG; 
-		    dest = allocate_to_stack (&state, stack, ty->alignment, s); 
-		  } 
-		} 
-	      memcpy (dest, a, s); 
-	    } 
-	  break; 
- 
-	default: 
-	  abort(); 
-	} 
- 
-#if defined (__APPLE__) 
-      if (i + 1 == cif->aarch64_nfixedargs) 
-	{ 
-	  state.ngrn = N_X_ARG_REG; 
-	  state.nsrn = N_V_ARG_REG; 
-	  state.allocating_variadic = 1; 
-	} 
-#endif 
-    } 
- 
-  ffi_call_SYSV (context, frame, fn, rvalue, flags, closure); 
- 
-  if (flags & AARCH64_RET_NEED_COPY) 
-    memcpy (orig_rvalue, rvalue, rtype_size); 
-} 
- 
-void 
-ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue) 
-{ 
-  ffi_call_int (cif, fn, rvalue, avalue, NULL); 
-} 
- 
-#ifdef FFI_GO_CLOSURES 
-void 
-ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue, 
-	     void **avalue, void *closure) 
-{ 
-  ffi_call_int (cif, fn, rvalue, avalue, closure); 
-} 
-#endif /* FFI_GO_CLOSURES */ 
- 
-/* Build a trampoline.  */ 
- 
-extern void ffi_closure_SYSV (void) FFI_HIDDEN; 
-extern void ffi_closure_SYSV_V (void) FFI_HIDDEN; 
- 
-ffi_status 
-ffi_prep_closure_loc (ffi_closure *closure, 
-                      ffi_cif* cif, 
-                      void (*fun)(ffi_cif*,void*,void**,void*), 
-                      void *user_data, 
-                      void *codeloc) 
-{ 
-  if (cif->abi != FFI_SYSV) 
-    return FFI_BAD_ABI; 
- 
-  void (*start)(void); 
-   
-  if (cif->flags & AARCH64_FLAG_ARG_V) 
-    start = ffi_closure_SYSV_V; 
-  else 
-    start = ffi_closure_SYSV; 
- 
-#if FFI_EXEC_TRAMPOLINE_TABLE 
-#ifdef __MACH__ 
-  void **config = (void **)((uint8_t *)codeloc - PAGE_MAX_SIZE); 
-  config[0] = closure; 
-  config[1] = start; 
-#endif 
-#else 
-  static const unsigned char trampoline[16] = { 
-    0x90, 0x00, 0x00, 0x58,	/* ldr	x16, tramp+16	*/ 
-    0xf1, 0xff, 0xff, 0x10,	/* adr	x17, tramp+0	*/ 
-    0x00, 0x02, 0x1f, 0xd6	/* br	x16		*/ 
-  }; 
-  char *tramp = closure->tramp; 
-   
-  memcpy (tramp, trampoline, sizeof(trampoline)); 
-   
-  *(UINT64 *)(tramp + 16) = (uintptr_t)start; 
- 
-  ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE); 
- 
-  /* Also flush the cache for code mapping.  */ 
-#ifdef _M_ARM64 
-  // Not using dlmalloc.c for Windows ARM64 builds 
-  // so calling ffi_data_to_code_pointer() isn't necessary 
-  unsigned char *tramp_code = tramp; 
-  #else 
-  unsigned char *tramp_code = ffi_data_to_code_pointer (tramp); 
-  #endif 
-  ffi_clear_cache (tramp_code, tramp_code + FFI_TRAMPOLINE_SIZE); 
-#endif 
- 
-  closure->cif = cif; 
-  closure->fun = fun; 
-  closure->user_data = user_data; 
- 
-  return FFI_OK; 
-} 
- 
-#ifdef FFI_GO_CLOSURES 
-extern void ffi_go_closure_SYSV (void) FFI_HIDDEN; 
-extern void ffi_go_closure_SYSV_V (void) FFI_HIDDEN; 
- 
-ffi_status 
-ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif* cif, 
-                     void (*fun)(ffi_cif*,void*,void**,void*)) 
-{ 
-  void (*start)(void); 
- 
-  if (cif->abi != FFI_SYSV) 
-    return FFI_BAD_ABI; 
- 
-  if (cif->flags & AARCH64_FLAG_ARG_V) 
-    start = ffi_go_closure_SYSV_V; 
-  else 
-    start = ffi_go_closure_SYSV; 
- 
-  closure->tramp = start; 
-  closure->cif = cif; 
-  closure->fun = fun; 
- 
-  return FFI_OK; 
-} 
-#endif /* FFI_GO_CLOSURES */ 
- 
-/* Primary handler to setup and invoke a function within a closure. 
- 
-   A closure when invoked enters via the assembler wrapper 
-   ffi_closure_SYSV(). The wrapper allocates a call context on the 
-   stack, saves the interesting registers (from the perspective of 
-   the calling convention) into the context then passes control to 
-   ffi_closure_SYSV_inner() passing the saved context and a pointer to 
-   the stack at the point ffi_closure_SYSV() was invoked. 
- 
-   On the return path the assembler wrapper will reload call context 
-   registers. 
- 
-   ffi_closure_SYSV_inner() marshalls the call context into ffi value 
-   descriptors, invokes the wrapped function, then marshalls the return 
-   value back into the call context.  */ 
- 
-int FFI_HIDDEN 
-ffi_closure_SYSV_inner (ffi_cif *cif, 
-			void (*fun)(ffi_cif*,void*,void**,void*), 
-			void *user_data, 
-			struct call_context *context, 
-			void *stack, void *rvalue, void *struct_rvalue) 
-{ 
-  void **avalue = (void**) alloca (cif->nargs * sizeof (void*)); 
-  int i, h, nargs, flags; 
-  struct arg_state state; 
- 
-  arg_init (&state); 
- 
-  for (i = 0, nargs = cif->nargs; i < nargs; i++) 
-    { 
-      ffi_type *ty = cif->arg_types[i]; 
-      int t = ty->type; 
-      size_t n, s = ty->size; 
- 
-      switch (t) 
-	{ 
-	case FFI_TYPE_VOID: 
-	  FFI_ASSERT (0); 
-	  break; 
- 
-	case FFI_TYPE_INT: 
-	case FFI_TYPE_UINT8: 
-	case FFI_TYPE_SINT8: 
-	case FFI_TYPE_UINT16: 
-	case FFI_TYPE_SINT16: 
-	case FFI_TYPE_UINT32: 
-	case FFI_TYPE_SINT32: 
-	case FFI_TYPE_UINT64: 
-	case FFI_TYPE_SINT64: 
-	case FFI_TYPE_POINTER: 
-	  avalue[i] = allocate_int_to_reg_or_stack (context, &state, stack, s); 
-	  break; 
- 
-	case FFI_TYPE_FLOAT: 
-	case FFI_TYPE_DOUBLE: 
-	case FFI_TYPE_LONGDOUBLE: 
-	case FFI_TYPE_STRUCT: 
-	case FFI_TYPE_COMPLEX: 
-	  h = is_vfp_type (ty); 
-	  if (h) 
-	    { 
-	      n = 4 - (h & 3); 
-#ifdef _M_ARM64  /* for handling armasm calling convention */ 
-              if (cif->is_variadic) 
-                { 
-                  if (state.ngrn + n <= N_X_ARG_REG) 
-                    { 
-                      void *reg = &context->x[state.ngrn]; 
-                      state.ngrn += (unsigned int)n; 
-     
-                      /* Eeek! We need a pointer to the structure, however the 
-                       homogeneous float elements are being passed in individual 
-                       registers, therefore for float and double the structure 
-                       is not represented as a contiguous sequence of bytes in 
-                       our saved register context.  We don't need the original 
-                       contents of the register storage, so we reformat the 
-                       structure into the same memory.  */ 
-                      avalue[i] = compress_hfa_type(reg, reg, h); 
-                    } 
-                  else 
-                    { 
-                      state.ngrn = N_X_ARG_REG; 
-                      state.nsrn = N_V_ARG_REG; 
-                      avalue[i] = allocate_to_stack(&state, stack, 
-                             ty->alignment, s); 
-                    } 
-                } 
-              else 
-                { 
-#endif  /* for handling armasm calling convention */ 
-                  if (state.nsrn + n <= N_V_ARG_REG) 
-                    { 
-                      void *reg = &context->v[state.nsrn]; 
-                      state.nsrn += (unsigned int)n; 
-                      avalue[i] = compress_hfa_type(reg, reg, h); 
-                    } 
-                  else 
-                    { 
-                      state.nsrn = N_V_ARG_REG; 
-                      avalue[i] = allocate_to_stack(&state, stack, 
-                                                   ty->alignment, s); 
-                    } 
-#ifdef _M_ARM64  /* for handling armasm calling convention */ 
-                } 
-#endif  /* for handling armasm calling convention */ 
-            } 
-          else if (s > 16) 
-            { 
-              /* Replace Composite type of size greater than 16 with a 
-                  pointer.  */ 
-              avalue[i] = *(void **) 
-              allocate_int_to_reg_or_stack (context, &state, stack, 
-                                         sizeof (void *)); 
-            } 
-          else 
-            { 
-              n = (s + 7) / 8; 
-              if (state.ngrn + n <= N_X_ARG_REG) 
-                { 
-                  avalue[i] = &context->x[state.ngrn]; 
-                  state.ngrn += (unsigned int)n; 
-                } 
-              else 
-                { 
-                  state.ngrn = N_X_ARG_REG; 
-                  avalue[i] = allocate_to_stack(&state, stack, 
-                                           ty->alignment, s); 
-                } 
-            } 
-          break; 
- 
-        default: 
-          abort(); 
-      } 
- 
-#if defined (__APPLE__) 
-      if (i + 1 == cif->aarch64_nfixedargs) 
-	{ 
-	  state.ngrn = N_X_ARG_REG; 
-	  state.nsrn = N_V_ARG_REG; 
-	  state.allocating_variadic = 1; 
-	} 
-#endif 
-    } 
- 
-  flags = cif->flags; 
-  if (flags & AARCH64_RET_IN_MEM) 
-    rvalue = struct_rvalue; 
- 
-  fun (cif, rvalue, avalue, user_data); 
- 
-  return flags; 
-} 
- 
-#endif /* (__aarch64__) || defined(__arm64__)|| defined (_M_ARM64)*/ 
+/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#if defined(__aarch64__) || defined(__arm64__)|| defined (_M_ARM64)
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_common.h>
+#include "internal.h"
+#ifdef _M_ARM64
+#include <windows.h> /* FlushInstructionCache */
+#endif
+
+/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
+   all further uses in this file will refer to the 128-bit type.  */
+#if FFI_TYPE_DOUBLE != FFI_TYPE_LONGDOUBLE
+# if FFI_TYPE_LONGDOUBLE != 4
+#  error FFI_TYPE_LONGDOUBLE out of date
+# endif
+#else
+# undef FFI_TYPE_LONGDOUBLE
+# define FFI_TYPE_LONGDOUBLE 4
+#endif
+
+union _d
+{
+  UINT64 d;
+  UINT32 s[2];
+};
+
+struct _v
+{
+  union _d d[2] __attribute__((aligned(16)));
+};
+
+struct call_context
+{
+  struct _v v[N_V_ARG_REG];
+  UINT64 x[N_X_ARG_REG];
+};
+
+#if FFI_EXEC_TRAMPOLINE_TABLE
+
+#ifdef __MACH__
+#include <mach/vm_param.h>
+#endif
+
+#else
+
+#if defined (__clang__) && defined (__APPLE__)
+extern void sys_icache_invalidate (void *start, size_t len);
+#endif
+
+static inline void
+ffi_clear_cache (void *start, void *end)
+{
+#if defined (__clang__) && defined (__APPLE__)
+  sys_icache_invalidate (start, (char *)end - (char *)start);
+#elif defined (__GNUC__)
+  __builtin___clear_cache (start, end);
+#elif defined (_M_ARM64)
+  FlushInstructionCache(GetCurrentProcess(), start, (char*)end - (char*)start);
+#else
+#error "Missing builtin to flush instruction cache"
+#endif
+}
+
+#endif
+
+/* A subroutine of is_vfp_type.  Given a structure type, return the type code
+   of the first non-structure element.  Recurse for structure elements.
+   Return -1 if the structure is in fact empty, i.e. no nested elements.  */
+
+static int
+is_hfa0 (const ffi_type *ty)
+{
+  ffi_type **elements = ty->elements;
+  int i, ret = -1;
+
+  if (elements != NULL)
+    for (i = 0; elements[i]; ++i)
+      {
+        ret = elements[i]->type;
+        if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX)
+          {
+            ret = is_hfa0 (elements[i]);
+            if (ret < 0)
+              continue;
+          }
+        break;
+      }
+
+  return ret;
+}
+
+/* A subroutine of is_vfp_type.  Given a structure type, return true if all
+   of the non-structure elements are the same as CANDIDATE.  */
+
+static int
+is_hfa1 (const ffi_type *ty, int candidate)
+{
+  ffi_type **elements = ty->elements;
+  int i;
+
+  if (elements != NULL)
+    for (i = 0; elements[i]; ++i)
+      {
+        int t = elements[i]->type;
+        if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
+          {
+            if (!is_hfa1 (elements[i], candidate))
+              return 0;
+          }
+        else if (t != candidate)
+          return 0;
+      }
+
+  return 1;
+}
+
+/* Determine if TY may be allocated to the FP registers.  This is both an
+   fp scalar type as well as an homogenous floating point aggregate (HFA).
+   That is, a structure consisting of 1 to 4 members of all the same type,
+   where that type is an fp scalar.
+
+   Returns non-zero iff TY is an HFA.  The result is the AARCH64_RET_*
+   constant for the type.  */
+
+static int
+is_vfp_type (const ffi_type *ty)
+{
+  ffi_type **elements;
+  int candidate, i;
+  size_t size, ele_count;
+
+  /* Quickest tests first.  */
+  candidate = ty->type;
+  switch (candidate)
+    {
+    default:
+      return 0;
+    case FFI_TYPE_FLOAT:
+    case FFI_TYPE_DOUBLE:
+    case FFI_TYPE_LONGDOUBLE:
+      ele_count = 1;
+      goto done;
+    case FFI_TYPE_COMPLEX:
+      candidate = ty->elements[0]->type;
+      switch (candidate)
+	{
+	case FFI_TYPE_FLOAT:
+	case FFI_TYPE_DOUBLE:
+	case FFI_TYPE_LONGDOUBLE:
+	  ele_count = 2;
+	  goto done;
+	}
+      return 0;
+    case FFI_TYPE_STRUCT:
+      break;
+    }
+
+  /* No HFA types are smaller than 4 bytes, or larger than 64 bytes.  */
+  size = ty->size;
+  if (size < 4 || size > 64)
+    return 0;
+
+  /* Find the type of the first non-structure member.  */
+  elements = ty->elements;
+  candidate = elements[0]->type;
+  if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX)
+    {
+      for (i = 0; ; ++i)
+        {
+          candidate = is_hfa0 (elements[i]);
+          if (candidate >= 0)
+            break;
+        }
+    }
+
+  /* If the first member is not a floating point type, it's not an HFA.
+     Also quickly re-check the size of the structure.  */
+  switch (candidate)
+    {
+    case FFI_TYPE_FLOAT:
+      ele_count = size / sizeof(float);
+      if (size != ele_count * sizeof(float))
+        return 0;
+      break;
+    case FFI_TYPE_DOUBLE:
+      ele_count = size / sizeof(double);
+      if (size != ele_count * sizeof(double))
+        return 0;
+      break;
+    case FFI_TYPE_LONGDOUBLE:
+      ele_count = size / sizeof(long double);
+      if (size != ele_count * sizeof(long double))
+        return 0;
+      break;
+    default:
+      return 0;
+    }
+  if (ele_count > 4)
+    return 0;
+
+  /* Finally, make sure that all scalar elements are the same type.  */
+  for (i = 0; elements[i]; ++i)
+    {
+      int t = elements[i]->type;
+      if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
+        {
+          if (!is_hfa1 (elements[i], candidate))
+            return 0;
+        }
+      else if (t != candidate)
+        return 0;
+    }
+
+  /* All tests succeeded.  Encode the result.  */
+ done:
+  return candidate * 4 + (4 - (int)ele_count);
+}
+
+/* Representation of the procedure call argument marshalling
+   state.
+
+   The terse state variable names match the names used in the AARCH64
+   PCS. */
+
+struct arg_state
+{
+  unsigned ngrn;                /* Next general-purpose register number. */
+  unsigned nsrn;                /* Next vector register number. */
+  size_t nsaa;                  /* Next stack offset. */
+
+#if defined (__APPLE__)
+  unsigned allocating_variadic;
+#endif
+};
+
+/* Initialize a procedure call argument marshalling state.  */
+static void
+arg_init (struct arg_state *state)
+{
+  state->ngrn = 0;
+  state->nsrn = 0;
+  state->nsaa = 0;
+#if defined (__APPLE__)
+  state->allocating_variadic = 0;
+#endif
+}
+
+/* Allocate an aligned slot on the stack and return a pointer to it.  */
+static void *
+allocate_to_stack (struct arg_state *state, void *stack,
+		   size_t alignment, size_t size)
+{
+  size_t nsaa = state->nsaa;
+
+  /* Round up the NSAA to the larger of 8 or the natural
+     alignment of the argument's type.  */
+#if defined (__APPLE__)
+  if (state->allocating_variadic && alignment < 8)
+    alignment = 8;
+#else
+  if (alignment < 8)
+    alignment = 8;
+#endif
+    
+  nsaa = FFI_ALIGN (nsaa, alignment);
+  state->nsaa = nsaa + size;
+
+  return (char *)stack + nsaa;
+}
+
+static ffi_arg
+extend_integer_type (void *source, int type)
+{
+  switch (type)
+    {
+    case FFI_TYPE_UINT8:
+      return *(UINT8 *) source;
+    case FFI_TYPE_SINT8:
+      return *(SINT8 *) source;
+    case FFI_TYPE_UINT16:
+      return *(UINT16 *) source;
+    case FFI_TYPE_SINT16:
+      return *(SINT16 *) source;
+    case FFI_TYPE_UINT32:
+      return *(UINT32 *) source;
+    case FFI_TYPE_INT:
+    case FFI_TYPE_SINT32:
+      return *(SINT32 *) source;
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+      return *(UINT64 *) source;
+      break;
+    case FFI_TYPE_POINTER:
+      return *(uintptr_t *) source;
+    default:
+      abort();
+    }
+}
+
+#if defined(_MSC_VER)
+void extend_hfa_type (void *dest, void *src, int h);
+#else
+static void
+extend_hfa_type (void *dest, void *src, int h)
+{
+  ssize_t f = h - AARCH64_RET_S4;
+  void *x0;
+
+  asm volatile (
+	"adr	%0, 0f\n"
+"	add	%0, %0, %1\n"
+"	br	%0\n"
+"0:	ldp	s16, s17, [%3]\n"	/* S4 */
+"	ldp	s18, s19, [%3, #8]\n"
+"	b	4f\n"
+"	ldp	s16, s17, [%3]\n"	/* S3 */
+"	ldr	s18, [%3, #8]\n"
+"	b	3f\n"
+"	ldp	s16, s17, [%3]\n"	/* S2 */
+"	b	2f\n"
+"	nop\n"
+"	ldr	s16, [%3]\n"		/* S1 */
+"	b	1f\n"
+"	nop\n"
+"	ldp	d16, d17, [%3]\n"	/* D4 */
+"	ldp	d18, d19, [%3, #16]\n"
+"	b	4f\n"
+"	ldp	d16, d17, [%3]\n"	/* D3 */
+"	ldr	d18, [%3, #16]\n"
+"	b	3f\n"
+"	ldp	d16, d17, [%3]\n"	/* D2 */
+"	b	2f\n"
+"	nop\n"
+"	ldr	d16, [%3]\n"		/* D1 */
+"	b	1f\n"
+"	nop\n"
+"	ldp	q16, q17, [%3]\n"	/* Q4 */
+"	ldp	q18, q19, [%3, #32]\n"
+"	b	4f\n"
+"	ldp	q16, q17, [%3]\n"	/* Q3 */
+"	ldr	q18, [%3, #32]\n"
+"	b	3f\n"
+"	ldp	q16, q17, [%3]\n"	/* Q2 */
+"	b	2f\n"
+"	nop\n"
+"	ldr	q16, [%3]\n"		/* Q1 */
+"	b	1f\n"
+"4:	str	q19, [%2, #48]\n"
+"3:	str	q18, [%2, #32]\n"
+"2:	str	q17, [%2, #16]\n"
+"1:	str	q16, [%2]"
+    : "=&r"(x0)
+    : "r"(f * 12), "r"(dest), "r"(src)
+    : "memory", "v16", "v17", "v18", "v19");
+}
+#endif
+
+#if defined(_MSC_VER)
+void* compress_hfa_type (void *dest, void *src, int h);
+#else
+static void *
+compress_hfa_type (void *dest, void *reg, int h)
+{
+  switch (h)
+    {
+    case AARCH64_RET_S1:
+      if (dest == reg)
+	{
+#ifdef __AARCH64EB__
+	  dest += 12;
+#endif
+	}
+      else
+	*(float *)dest = *(float *)reg;
+      break;
+    case AARCH64_RET_S2:
+      asm ("ldp q16, q17, [%1]\n\t"
+	   "st2 { v16.s, v17.s }[0], [%0]"
+	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
+      break;
+    case AARCH64_RET_S3:
+      asm ("ldp q16, q17, [%1]\n\t"
+	   "ldr q18, [%1, #32]\n\t"
+	   "st3 { v16.s, v17.s, v18.s }[0], [%0]"
+	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
+      break;
+    case AARCH64_RET_S4:
+      asm ("ldp q16, q17, [%1]\n\t"
+	   "ldp q18, q19, [%1, #32]\n\t"
+	   "st4 { v16.s, v17.s, v18.s, v19.s }[0], [%0]"
+	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
+      break;
+
+    case AARCH64_RET_D1:
+      if (dest == reg)
+	{
+#ifdef __AARCH64EB__
+	  dest += 8;
+#endif
+	}
+      else
+	*(double *)dest = *(double *)reg;
+      break;
+    case AARCH64_RET_D2:
+      asm ("ldp q16, q17, [%1]\n\t"
+	   "st2 { v16.d, v17.d }[0], [%0]"
+	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17");
+      break;
+    case AARCH64_RET_D3:
+      asm ("ldp q16, q17, [%1]\n\t"
+	   "ldr q18, [%1, #32]\n\t"
+	   "st3 { v16.d, v17.d, v18.d }[0], [%0]"
+	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18");
+      break;
+    case AARCH64_RET_D4:
+      asm ("ldp q16, q17, [%1]\n\t"
+	   "ldp q18, q19, [%1, #32]\n\t"
+	   "st4 { v16.d, v17.d, v18.d, v19.d }[0], [%0]"
+	   : : "r"(dest), "r"(reg) : "memory", "v16", "v17", "v18", "v19");
+      break;
+
+    default:
+      if (dest != reg)
+	return memcpy (dest, reg, 16 * (4 - (h & 3)));
+      break;
+    }
+  return dest;
+}
+#endif
+
+/* Either allocate an appropriate register for the argument type, or if
+   none are available, allocate a stack slot and return a pointer
+   to the allocated space.  */
+
+static void *
+allocate_int_to_reg_or_stack (struct call_context *context,
+			      struct arg_state *state,
+			      void *stack, size_t size)
+{
+  if (state->ngrn < N_X_ARG_REG)
+    return &context->x[state->ngrn++];
+
+  state->ngrn = N_X_ARG_REG;
+  return allocate_to_stack (state, stack, size, size);
+}
+
+ffi_status FFI_HIDDEN
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+  ffi_type *rtype = cif->rtype;
+  size_t bytes = cif->bytes;
+  int flags, i, n;
+
+  switch (rtype->type)
+    {
+    case FFI_TYPE_VOID:
+      flags = AARCH64_RET_VOID;
+      break;
+    case FFI_TYPE_UINT8:
+      flags = AARCH64_RET_UINT8;
+      break;
+    case FFI_TYPE_UINT16:
+      flags = AARCH64_RET_UINT16;
+      break;
+    case FFI_TYPE_UINT32:
+      flags = AARCH64_RET_UINT32;
+      break;
+    case FFI_TYPE_SINT8:
+      flags = AARCH64_RET_SINT8;
+      break;
+    case FFI_TYPE_SINT16:
+      flags = AARCH64_RET_SINT16;
+      break;
+    case FFI_TYPE_INT:
+    case FFI_TYPE_SINT32:
+      flags = AARCH64_RET_SINT32;
+      break;
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_UINT64:
+      flags = AARCH64_RET_INT64;
+      break;
+    case FFI_TYPE_POINTER:
+      flags = (sizeof(void *) == 4 ? AARCH64_RET_UINT32 : AARCH64_RET_INT64);
+      break;
+
+    case FFI_TYPE_FLOAT:
+    case FFI_TYPE_DOUBLE:
+    case FFI_TYPE_LONGDOUBLE:
+    case FFI_TYPE_STRUCT:
+    case FFI_TYPE_COMPLEX:
+      flags = is_vfp_type (rtype);
+      if (flags == 0)
+	{
+	  size_t s = rtype->size;
+	  if (s > 16)
+	    {
+	      flags = AARCH64_RET_VOID | AARCH64_RET_IN_MEM;
+	      bytes += 8;
+	    }
+	  else if (s == 16)
+	    flags = AARCH64_RET_INT128;
+	  else if (s == 8)
+	    flags = AARCH64_RET_INT64;
+	  else
+	    flags = AARCH64_RET_INT128 | AARCH64_RET_NEED_COPY;
+	}
+      break;
+
+    default:
+      abort();
+    }
+
+  for (i = 0, n = cif->nargs; i < n; i++)
+    if (is_vfp_type (cif->arg_types[i]))
+      {
+	flags |= AARCH64_FLAG_ARG_V;
+	break;
+      }
+
+  /* Round the stack up to a multiple of the stack alignment requirement. */
+  cif->bytes = (unsigned) FFI_ALIGN(bytes, 16);
+  cif->flags = flags;
+#if defined (__APPLE__)
+  cif->aarch64_nfixedargs = 0;
+#endif
+
+  return FFI_OK;
+}
+
+#if defined (__APPLE__)
+/* Perform Apple-specific cif processing for variadic calls */
+ffi_status FFI_HIDDEN
+ffi_prep_cif_machdep_var(ffi_cif *cif, unsigned int nfixedargs,
+			 unsigned int ntotalargs)
+{
+  ffi_status status = ffi_prep_cif_machdep (cif);
+  cif->aarch64_nfixedargs = nfixedargs;
+  return status;
+}
+#endif /* __APPLE__ */
+
+extern void ffi_call_SYSV (struct call_context *context, void *frame,
+			   void (*fn)(void), void *rvalue, int flags,
+			   void *closure) FFI_HIDDEN;
+
+/* Call a function with the provided arguments and capture the return
+   value.  */
+static void
+ffi_call_int (ffi_cif *cif, void (*fn)(void), void *orig_rvalue,
+	      void **avalue, void *closure)
+{
+  struct call_context *context;
+  void *stack, *frame, *rvalue;
+  struct arg_state state;
+  size_t stack_bytes, rtype_size, rsize;
+  int i, nargs, flags;
+  ffi_type *rtype;
+
+  flags = cif->flags;
+  rtype = cif->rtype;
+  rtype_size = rtype->size;
+  stack_bytes = cif->bytes;
+
+  /* If the target function returns a structure via hidden pointer,
+     then we cannot allow a null rvalue.  Otherwise, mash a null
+     rvalue to void return type.  */
+  rsize = 0;
+  if (flags & AARCH64_RET_IN_MEM)
+    {
+      if (orig_rvalue == NULL)
+	rsize = rtype_size;
+    }
+  else if (orig_rvalue == NULL)
+    flags &= AARCH64_FLAG_ARG_V;
+  else if (flags & AARCH64_RET_NEED_COPY)
+    rsize = 16;
+
+  /* Allocate consectutive stack for everything we'll need.  */
+  context = alloca (sizeof(struct call_context) + stack_bytes + 32 + rsize);
+  stack = context + 1;
+  frame = (void*)((uintptr_t)stack + (uintptr_t)stack_bytes);
+  rvalue = (rsize ? (void*)((uintptr_t)frame + 32) : orig_rvalue);
+
+  arg_init (&state);
+  for (i = 0, nargs = cif->nargs; i < nargs; i++)
+    {
+      ffi_type *ty = cif->arg_types[i];
+      size_t s = ty->size;
+      void *a = avalue[i];
+      int h, t;
+
+      t = ty->type;
+      switch (t)
+	{
+	case FFI_TYPE_VOID:
+	  FFI_ASSERT (0);
+	  break;
+
+	/* If the argument is a basic type the argument is allocated to an
+	   appropriate register, or if none are available, to the stack.  */
+	case FFI_TYPE_INT:
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_POINTER:
+	do_pointer:
+	  {
+	    ffi_arg ext = extend_integer_type (a, t);
+	    if (state.ngrn < N_X_ARG_REG)
+	      context->x[state.ngrn++] = ext;
+	    else
+	      {
+		void *d = allocate_to_stack (&state, stack, ty->alignment, s);
+		state.ngrn = N_X_ARG_REG;
+		/* Note that the default abi extends each argument
+		   to a full 64-bit slot, while the iOS abi allocates
+		   only enough space. */
+#ifdef __APPLE__
+		memcpy(d, a, s);
+#else
+		*(ffi_arg *)d = ext;
+#endif
+	      }
+	  }
+	  break;
+
+	case FFI_TYPE_FLOAT:
+	case FFI_TYPE_DOUBLE:
+	case FFI_TYPE_LONGDOUBLE:
+	case FFI_TYPE_STRUCT:
+	case FFI_TYPE_COMPLEX:
+	  {
+	    void *dest;
+
+	    h = is_vfp_type (ty);
+	    if (h)
+	      {
+		int elems = 4 - (h & 3);
+#ifdef _M_ARM64 /* for handling armasm calling convention */
+                if (cif->is_variadic)
+                  {
+                    if (state.ngrn + elems <= N_X_ARG_REG)
+                      {
+                        dest = &context->x[state.ngrn];
+                        state.ngrn += elems;
+                        extend_hfa_type(dest, a, h);
+                        break;
+                      }
+                    state.nsrn = N_X_ARG_REG;
+                    dest = allocate_to_stack(&state, stack, ty->alignment, s);
+                  }
+                else
+                  {
+#endif /* for handling armasm calling convention */
+	        if (state.nsrn + elems <= N_V_ARG_REG)
+		  {
+		    dest = &context->v[state.nsrn];
+		    state.nsrn += elems;
+		    extend_hfa_type (dest, a, h);
+		    break;
+		  }
+		state.nsrn = N_V_ARG_REG;
+		dest = allocate_to_stack (&state, stack, ty->alignment, s);
+#ifdef _M_ARM64 /* for handling armasm calling convention */
+	      }
+#endif /* for handling armasm calling convention */
+	      }
+	    else if (s > 16)
+	      {
+		/* If the argument is a composite type that is larger than 16
+		   bytes, then the argument has been copied to memory, and
+		   the argument is replaced by a pointer to the copy.  */
+		a = &avalue[i];
+		t = FFI_TYPE_POINTER;
+		s = sizeof (void *);
+		goto do_pointer;
+	      }
+	    else
+	      {
+		size_t n = (s + 7) / 8;
+		if (state.ngrn + n <= N_X_ARG_REG)
+		  {
+		    /* If the argument is a composite type and the size in
+		       double-words is not more than the number of available
+		       X registers, then the argument is copied into
+		       consecutive X registers.  */
+		    dest = &context->x[state.ngrn];
+                    state.ngrn += (unsigned int)n;
+		  }
+		else
+		  {
+		    /* Otherwise, there are insufficient X registers. Further
+		       X register allocations are prevented, the NSAA is
+		       adjusted and the argument is copied to memory at the
+		       adjusted NSAA.  */
+		    state.ngrn = N_X_ARG_REG;
+		    dest = allocate_to_stack (&state, stack, ty->alignment, s);
+		  }
+		}
+	      memcpy (dest, a, s);
+	    }
+	  break;
+
+	default:
+	  abort();
+	}
+
+#if defined (__APPLE__)
+      if (i + 1 == cif->aarch64_nfixedargs)
+	{
+	  state.ngrn = N_X_ARG_REG;
+	  state.nsrn = N_V_ARG_REG;
+	  state.allocating_variadic = 1;
+	}
+#endif
+    }
+
+  ffi_call_SYSV (context, frame, fn, rvalue, flags, closure);
+
+  if (flags & AARCH64_RET_NEED_COPY)
+    memcpy (orig_rvalue, rvalue, rtype_size);
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+#ifdef FFI_GO_CLOSURES
+void
+ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
+	     void **avalue, void *closure)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
+#endif /* FFI_GO_CLOSURES */
+
+/* Build a trampoline.  */
+
+extern void ffi_closure_SYSV (void) FFI_HIDDEN;
+extern void ffi_closure_SYSV_V (void) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure *closure,
+                      ffi_cif* cif,
+                      void (*fun)(ffi_cif*,void*,void**,void*),
+                      void *user_data,
+                      void *codeloc)
+{
+  if (cif->abi != FFI_SYSV)
+    return FFI_BAD_ABI;
+
+  void (*start)(void);
+  
+  if (cif->flags & AARCH64_FLAG_ARG_V)
+    start = ffi_closure_SYSV_V;
+  else
+    start = ffi_closure_SYSV;
+
+#if FFI_EXEC_TRAMPOLINE_TABLE
+#ifdef __MACH__
+  void **config = (void **)((uint8_t *)codeloc - PAGE_MAX_SIZE);
+  config[0] = closure;
+  config[1] = start;
+#endif
+#else
+  static const unsigned char trampoline[16] = {
+    0x90, 0x00, 0x00, 0x58,	/* ldr	x16, tramp+16	*/
+    0xf1, 0xff, 0xff, 0x10,	/* adr	x17, tramp+0	*/
+    0x00, 0x02, 0x1f, 0xd6	/* br	x16		*/
+  };
+  char *tramp = closure->tramp;
+  
+  memcpy (tramp, trampoline, sizeof(trampoline));
+  
+  *(UINT64 *)(tramp + 16) = (uintptr_t)start;
+
+  ffi_clear_cache(tramp, tramp + FFI_TRAMPOLINE_SIZE);
+
+  /* Also flush the cache for code mapping.  */
+#ifdef _M_ARM64
+  // Not using dlmalloc.c for Windows ARM64 builds
+  // so calling ffi_data_to_code_pointer() isn't necessary
+  unsigned char *tramp_code = tramp;
+  #else
+  unsigned char *tramp_code = ffi_data_to_code_pointer (tramp);
+  #endif
+  ffi_clear_cache (tramp_code, tramp_code + FFI_TRAMPOLINE_SIZE);
+#endif
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+#ifdef FFI_GO_CLOSURES
+extern void ffi_go_closure_SYSV (void) FFI_HIDDEN;
+extern void ffi_go_closure_SYSV_V (void) FFI_HIDDEN;
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif* cif,
+                     void (*fun)(ffi_cif*,void*,void**,void*))
+{
+  void (*start)(void);
+
+  if (cif->abi != FFI_SYSV)
+    return FFI_BAD_ABI;
+
+  if (cif->flags & AARCH64_FLAG_ARG_V)
+    start = ffi_go_closure_SYSV_V;
+  else
+    start = ffi_go_closure_SYSV;
+
+  closure->tramp = start;
+  closure->cif = cif;
+  closure->fun = fun;
+
+  return FFI_OK;
+}
+#endif /* FFI_GO_CLOSURES */
+
+/* Primary handler to setup and invoke a function within a closure.
+
+   A closure when invoked enters via the assembler wrapper
+   ffi_closure_SYSV(). The wrapper allocates a call context on the
+   stack, saves the interesting registers (from the perspective of
+   the calling convention) into the context then passes control to
+   ffi_closure_SYSV_inner() passing the saved context and a pointer to
+   the stack at the point ffi_closure_SYSV() was invoked.
+
+   On the return path the assembler wrapper will reload call context
+   registers.
+
+   ffi_closure_SYSV_inner() marshalls the call context into ffi value
+   descriptors, invokes the wrapped function, then marshalls the return
+   value back into the call context.  */
+
+int FFI_HIDDEN
+ffi_closure_SYSV_inner (ffi_cif *cif,
+			void (*fun)(ffi_cif*,void*,void**,void*),
+			void *user_data,
+			struct call_context *context,
+			void *stack, void *rvalue, void *struct_rvalue)
+{
+  void **avalue = (void**) alloca (cif->nargs * sizeof (void*));
+  int i, h, nargs, flags;
+  struct arg_state state;
+
+  arg_init (&state);
+
+  for (i = 0, nargs = cif->nargs; i < nargs; i++)
+    {
+      ffi_type *ty = cif->arg_types[i];
+      int t = ty->type;
+      size_t n, s = ty->size;
+
+      switch (t)
+	{
+	case FFI_TYPE_VOID:
+	  FFI_ASSERT (0);
+	  break;
+
+	case FFI_TYPE_INT:
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_POINTER:
+	  avalue[i] = allocate_int_to_reg_or_stack (context, &state, stack, s);
+	  break;
+
+	case FFI_TYPE_FLOAT:
+	case FFI_TYPE_DOUBLE:
+	case FFI_TYPE_LONGDOUBLE:
+	case FFI_TYPE_STRUCT:
+	case FFI_TYPE_COMPLEX:
+	  h = is_vfp_type (ty);
+	  if (h)
+	    {
+	      n = 4 - (h & 3);
+#ifdef _M_ARM64  /* for handling armasm calling convention */
+              if (cif->is_variadic)
+                {
+                  if (state.ngrn + n <= N_X_ARG_REG)
+                    {
+                      void *reg = &context->x[state.ngrn];
+                      state.ngrn += (unsigned int)n;
+    
+                      /* Eeek! We need a pointer to the structure, however the
+                       homogeneous float elements are being passed in individual
+                       registers, therefore for float and double the structure
+                       is not represented as a contiguous sequence of bytes in
+                       our saved register context.  We don't need the original
+                       contents of the register storage, so we reformat the
+                       structure into the same memory.  */
+                      avalue[i] = compress_hfa_type(reg, reg, h);
+                    }
+                  else
+                    {
+                      state.ngrn = N_X_ARG_REG;
+                      state.nsrn = N_V_ARG_REG;
+                      avalue[i] = allocate_to_stack(&state, stack,
+                             ty->alignment, s);
+                    }
+                }
+              else
+                {
+#endif  /* for handling armasm calling convention */
+                  if (state.nsrn + n <= N_V_ARG_REG)
+                    {
+                      void *reg = &context->v[state.nsrn];
+                      state.nsrn += (unsigned int)n;
+                      avalue[i] = compress_hfa_type(reg, reg, h);
+                    }
+                  else
+                    {
+                      state.nsrn = N_V_ARG_REG;
+                      avalue[i] = allocate_to_stack(&state, stack,
+                                                   ty->alignment, s);
+                    }
+#ifdef _M_ARM64  /* for handling armasm calling convention */
+                }
+#endif  /* for handling armasm calling convention */
+            }
+          else if (s > 16)
+            {
+              /* Replace Composite type of size greater than 16 with a
+                  pointer.  */
+              avalue[i] = *(void **)
+              allocate_int_to_reg_or_stack (context, &state, stack,
+                                         sizeof (void *));
+            }
+          else
+            {
+              n = (s + 7) / 8;
+              if (state.ngrn + n <= N_X_ARG_REG)
+                {
+                  avalue[i] = &context->x[state.ngrn];
+                  state.ngrn += (unsigned int)n;
+                }
+              else
+                {
+                  state.ngrn = N_X_ARG_REG;
+                  avalue[i] = allocate_to_stack(&state, stack,
+                                           ty->alignment, s);
+                }
+            }
+          break;
+
+        default:
+          abort();
+      }
+
+#if defined (__APPLE__)
+      if (i + 1 == cif->aarch64_nfixedargs)
+	{
+	  state.ngrn = N_X_ARG_REG;
+	  state.nsrn = N_V_ARG_REG;
+	  state.allocating_variadic = 1;
+	}
+#endif
+    }
+
+  flags = cif->flags;
+  if (flags & AARCH64_RET_IN_MEM)
+    rvalue = struct_rvalue;
+
+  fun (cif, rvalue, avalue, user_data);
+
+  return flags;
+}
+
+#endif /* (__aarch64__) || defined(__arm64__)|| defined (_M_ARM64)*/
diff --git a/contrib/restricted/libffi/src/aarch64/ffitarget.h b/contrib/restricted/libffi/src/aarch64/ffitarget.h
index ddce8f21eb..ecb6d2deae 100644
--- a/contrib/restricted/libffi/src/aarch64/ffitarget.h
+++ b/contrib/restricted/libffi/src/aarch64/ffitarget.h
@@ -1,92 +1,92 @@
-/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. 
- 
-Permission is hereby granted, free of charge, to any person obtaining 
-a copy of this software and associated documentation files (the 
-``Software''), to deal in the Software without restriction, including 
-without limitation the rights to use, copy, modify, merge, publish, 
-distribute, sublicense, and/or sell copies of the Software, and to 
-permit persons to whom the Software is furnished to do so, subject to 
-the following conditions: 
- 
-The above copyright notice and this permission notice shall be 
-included in all copies or substantial portions of the Software. 
- 
-THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */ 
- 
-#ifndef LIBFFI_TARGET_H 
-#define LIBFFI_TARGET_H 
- 
-#ifndef LIBFFI_H 
-#error "Please do not include ffitarget.h directly into your source.  Use ffi.h instead." 
-#endif 
- 
-#ifndef LIBFFI_ASM 
-#ifdef __ILP32__ 
-#define FFI_SIZEOF_ARG 8 
-#define FFI_SIZEOF_JAVA_RAW  4 
-typedef unsigned long long ffi_arg; 
-typedef signed long long ffi_sarg; 
-#elif defined(_M_ARM64) 
-#define FFI_SIZEOF_ARG 8 
-typedef unsigned long long ffi_arg; 
-typedef signed long long ffi_sarg; 
-#else 
-typedef unsigned long ffi_arg; 
-typedef signed long ffi_sarg; 
-#endif 
- 
-typedef enum ffi_abi 
-  { 
-    FFI_FIRST_ABI = 0, 
-    FFI_SYSV, 
-    FFI_LAST_ABI, 
-    FFI_DEFAULT_ABI = FFI_SYSV 
-  } ffi_abi; 
-#endif 
- 
-/* ---- Definitions for closures ----------------------------------------- */ 
- 
-#define FFI_CLOSURES 1 
-#define FFI_NATIVE_RAW_API 0 
- 
-#if defined (FFI_EXEC_TRAMPOLINE_TABLE) && FFI_EXEC_TRAMPOLINE_TABLE 
- 
-#ifdef __MACH__ 
-#define FFI_TRAMPOLINE_SIZE 16 
-#define FFI_TRAMPOLINE_CLOSURE_OFFSET 16 
-#else 
-#error "No trampoline table implementation" 
-#endif 
- 
-#else 
-#define FFI_TRAMPOLINE_SIZE 24 
-#define FFI_TRAMPOLINE_CLOSURE_OFFSET FFI_TRAMPOLINE_SIZE 
-#endif 
- 
-#ifdef _M_ARM64 
-#define FFI_EXTRA_CIF_FIELDS unsigned is_variadic 
-#endif 
- 
-/* ---- Internal ---- */ 
- 
-#if defined (__APPLE__) 
-#define FFI_TARGET_SPECIFIC_VARIADIC 
-#define FFI_EXTRA_CIF_FIELDS unsigned aarch64_nfixedargs 
-#elif !defined(_M_ARM64) 
-/* iOS and Windows reserve x18 for the system.  Disable Go closures until 
-   a new static chain is chosen.  */ 
-#define FFI_GO_CLOSURES 1 
-#endif 
- 
-#ifndef _M_ARM64 
-/* No complex type on Windows */ 
-#define FFI_TARGET_HAS_COMPLEX_TYPE 
-#endif 
- 
-#endif 
+/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source.  Use ffi.h instead."
+#endif
+
+#ifndef LIBFFI_ASM
+#ifdef __ILP32__
+#define FFI_SIZEOF_ARG 8
+#define FFI_SIZEOF_JAVA_RAW  4
+typedef unsigned long long ffi_arg;
+typedef signed long long ffi_sarg;
+#elif defined(_M_ARM64)
+#define FFI_SIZEOF_ARG 8
+typedef unsigned long long ffi_arg;
+typedef signed long long ffi_sarg;
+#else
+typedef unsigned long ffi_arg;
+typedef signed long ffi_sarg;
+#endif
+
+typedef enum ffi_abi
+  {
+    FFI_FIRST_ABI = 0,
+    FFI_SYSV,
+    FFI_LAST_ABI,
+    FFI_DEFAULT_ABI = FFI_SYSV
+  } ffi_abi;
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_NATIVE_RAW_API 0
+
+#if defined (FFI_EXEC_TRAMPOLINE_TABLE) && FFI_EXEC_TRAMPOLINE_TABLE
+
+#ifdef __MACH__
+#define FFI_TRAMPOLINE_SIZE 16
+#define FFI_TRAMPOLINE_CLOSURE_OFFSET 16
+#else
+#error "No trampoline table implementation"
+#endif
+
+#else
+#define FFI_TRAMPOLINE_SIZE 24
+#define FFI_TRAMPOLINE_CLOSURE_OFFSET FFI_TRAMPOLINE_SIZE
+#endif
+
+#ifdef _M_ARM64
+#define FFI_EXTRA_CIF_FIELDS unsigned is_variadic
+#endif
+
+/* ---- Internal ---- */
+
+#if defined (__APPLE__)
+#define FFI_TARGET_SPECIFIC_VARIADIC
+#define FFI_EXTRA_CIF_FIELDS unsigned aarch64_nfixedargs
+#elif !defined(_M_ARM64)
+/* iOS and Windows reserve x18 for the system.  Disable Go closures until
+   a new static chain is chosen.  */
+#define FFI_GO_CLOSURES 1
+#endif
+
+#ifndef _M_ARM64
+/* No complex type on Windows */
+#define FFI_TARGET_HAS_COMPLEX_TYPE
+#endif
+
+#endif
diff --git a/contrib/restricted/libffi/src/aarch64/internal.h b/contrib/restricted/libffi/src/aarch64/internal.h
index 2691dafa98..9c3e07725a 100644
--- a/contrib/restricted/libffi/src/aarch64/internal.h
+++ b/contrib/restricted/libffi/src/aarch64/internal.h
@@ -1,67 +1,67 @@
-/*  
-Permission is hereby granted, free of charge, to any person obtaining 
-a copy of this software and associated documentation files (the 
-``Software''), to deal in the Software without restriction, including 
-without limitation the rights to use, copy, modify, merge, publish, 
-distribute, sublicense, and/or sell copies of the Software, and to 
-permit persons to whom the Software is furnished to do so, subject to 
-the following conditions: 
- 
-The above copyright notice and this permission notice shall be 
-included in all copies or substantial portions of the Software. 
- 
-THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */ 
- 
-#define AARCH64_RET_VOID	0 
-#define AARCH64_RET_INT64	1 
-#define AARCH64_RET_INT128	2 
- 
-#define AARCH64_RET_UNUSED3	3 
-#define AARCH64_RET_UNUSED4	4 
-#define AARCH64_RET_UNUSED5	5 
-#define AARCH64_RET_UNUSED6	6 
-#define AARCH64_RET_UNUSED7	7 
- 
-/* Note that FFI_TYPE_FLOAT == 2, _DOUBLE == 3, _LONGDOUBLE == 4, 
-   so _S4 through _Q1 are layed out as (TYPE * 4) + (4 - COUNT).  */ 
-#define AARCH64_RET_S4		8 
-#define AARCH64_RET_S3		9 
-#define AARCH64_RET_S2		10 
-#define AARCH64_RET_S1		11 
- 
-#define AARCH64_RET_D4		12 
-#define AARCH64_RET_D3		13 
-#define AARCH64_RET_D2		14 
-#define AARCH64_RET_D1		15 
- 
-#define AARCH64_RET_Q4		16 
-#define AARCH64_RET_Q3		17 
-#define AARCH64_RET_Q2		18 
-#define AARCH64_RET_Q1		19 
- 
-/* Note that each of the sub-64-bit integers gets two entries.  */ 
-#define AARCH64_RET_UINT8	20 
-#define AARCH64_RET_UINT16	22 
-#define AARCH64_RET_UINT32	24 
- 
-#define AARCH64_RET_SINT8	26 
-#define AARCH64_RET_SINT16	28 
-#define AARCH64_RET_SINT32	30 
- 
-#define AARCH64_RET_MASK	31 
- 
-#define AARCH64_RET_IN_MEM	(1 << 5) 
-#define AARCH64_RET_NEED_COPY	(1 << 6) 
- 
-#define AARCH64_FLAG_ARG_V_BIT	7 
-#define AARCH64_FLAG_ARG_V	(1 << AARCH64_FLAG_ARG_V_BIT) 
- 
-#define N_X_ARG_REG		8 
-#define N_V_ARG_REG		8 
-#define CALL_CONTEXT_SIZE	(N_V_ARG_REG * 16 + N_X_ARG_REG * 8) 
+/* 
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#define AARCH64_RET_VOID	0
+#define AARCH64_RET_INT64	1
+#define AARCH64_RET_INT128	2
+
+#define AARCH64_RET_UNUSED3	3
+#define AARCH64_RET_UNUSED4	4
+#define AARCH64_RET_UNUSED5	5
+#define AARCH64_RET_UNUSED6	6
+#define AARCH64_RET_UNUSED7	7
+
+/* Note that FFI_TYPE_FLOAT == 2, _DOUBLE == 3, _LONGDOUBLE == 4,
+   so _S4 through _Q1 are layed out as (TYPE * 4) + (4 - COUNT).  */
+#define AARCH64_RET_S4		8
+#define AARCH64_RET_S3		9
+#define AARCH64_RET_S2		10
+#define AARCH64_RET_S1		11
+
+#define AARCH64_RET_D4		12
+#define AARCH64_RET_D3		13
+#define AARCH64_RET_D2		14
+#define AARCH64_RET_D1		15
+
+#define AARCH64_RET_Q4		16
+#define AARCH64_RET_Q3		17
+#define AARCH64_RET_Q2		18
+#define AARCH64_RET_Q1		19
+
+/* Note that each of the sub-64-bit integers gets two entries.  */
+#define AARCH64_RET_UINT8	20
+#define AARCH64_RET_UINT16	22
+#define AARCH64_RET_UINT32	24
+
+#define AARCH64_RET_SINT8	26
+#define AARCH64_RET_SINT16	28
+#define AARCH64_RET_SINT32	30
+
+#define AARCH64_RET_MASK	31
+
+#define AARCH64_RET_IN_MEM	(1 << 5)
+#define AARCH64_RET_NEED_COPY	(1 << 6)
+
+#define AARCH64_FLAG_ARG_V_BIT	7
+#define AARCH64_FLAG_ARG_V	(1 << AARCH64_FLAG_ARG_V_BIT)
+
+#define N_X_ARG_REG		8
+#define N_V_ARG_REG		8
+#define CALL_CONTEXT_SIZE	(N_V_ARG_REG * 16 + N_X_ARG_REG * 8)
diff --git a/contrib/restricted/libffi/src/aarch64/sysv.S b/contrib/restricted/libffi/src/aarch64/sysv.S
index 4d8d85139a..6761ee1ea9 100644
--- a/contrib/restricted/libffi/src/aarch64/sysv.S
+++ b/contrib/restricted/libffi/src/aarch64/sysv.S
@@ -1,440 +1,440 @@
-/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. 
- 
-Permission is hereby granted, free of charge, to any person obtaining 
-a copy of this software and associated documentation files (the 
-``Software''), to deal in the Software without restriction, including 
-without limitation the rights to use, copy, modify, merge, publish, 
-distribute, sublicense, and/or sell copies of the Software, and to 
-permit persons to whom the Software is furnished to do so, subject to 
-the following conditions: 
- 
-The above copyright notice and this permission notice shall be 
-included in all copies or substantial portions of the Software. 
- 
-THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */ 
- 
-#if defined(__aarch64__) || defined(__arm64__) 
-#define LIBFFI_ASM 
-#include <fficonfig.h> 
-#include <ffi.h> 
-#include <ffi_cfi.h> 
-#include "internal.h" 
- 
-#ifdef HAVE_MACHINE_ASM_H 
-#include <machine/asm.h> 
-#else 
-#ifdef __USER_LABEL_PREFIX__ 
-#define CONCAT1(a, b) CONCAT2(a, b) 
-#define CONCAT2(a, b) a ## b 
- 
-/* Use the right prefix for global labels.  */ 
-#define CNAME(x) CONCAT1 (__USER_LABEL_PREFIX__, x) 
-#else 
-#define CNAME(x) x 
-#endif 
-#endif 
- 
-#ifdef __AARCH64EB__ 
-# define BE(X)	X 
-#else 
-# define BE(X)	0 
-#endif 
- 
-#ifdef __ILP32__ 
-#define PTR_REG(n)      w##n 
-#else 
-#define PTR_REG(n)      x##n 
-#endif 
- 
-#ifdef __ILP32__ 
-#define PTR_SIZE	4 
-#else 
-#define PTR_SIZE	8 
-#endif 
- 
-	.text 
-	.align 4 
- 
-/* ffi_call_SYSV 
-   extern void ffi_call_SYSV (void *stack, void *frame, 
-			      void (*fn)(void), void *rvalue, 
-			      int flags, void *closure); 
- 
-   Therefore on entry we have: 
- 
-   x0 stack 
-   x1 frame 
-   x2 fn 
-   x3 rvalue 
-   x4 flags 
-   x5 closure 
-*/ 
- 
-	cfi_startproc 
-CNAME(ffi_call_SYSV): 
-	/* Use a stack frame allocated by our caller.  */ 
-	cfi_def_cfa(x1, 32); 
-	stp	x29, x30, [x1] 
-	mov	x29, x1 
-	mov	sp, x0 
-	cfi_def_cfa_register(x29) 
-	cfi_rel_offset (x29, 0) 
-	cfi_rel_offset (x30, 8) 
- 
-	mov	x9, x2			/* save fn */ 
-	mov	x8, x3			/* install structure return */ 
-#ifdef FFI_GO_CLOSURES 
-	mov	x18, x5			/* install static chain */ 
-#endif 
-	stp	x3, x4, [x29, #16]	/* save rvalue and flags */ 
- 
-	/* Load the vector argument passing registers, if necessary.  */ 
-	tbz	w4, #AARCH64_FLAG_ARG_V_BIT, 1f 
-	ldp     q0, q1, [sp, #0] 
-	ldp     q2, q3, [sp, #32] 
-	ldp     q4, q5, [sp, #64] 
-	ldp     q6, q7, [sp, #96] 
-1: 
-	/* Load the core argument passing registers, including 
-	   the structure return pointer.  */ 
-	ldp     x0, x1, [sp, #16*N_V_ARG_REG + 0] 
-	ldp     x2, x3, [sp, #16*N_V_ARG_REG + 16] 
-	ldp     x4, x5, [sp, #16*N_V_ARG_REG + 32] 
-	ldp     x6, x7, [sp, #16*N_V_ARG_REG + 48] 
- 
-	/* Deallocate the context, leaving the stacked arguments.  */ 
-	add	sp, sp, #CALL_CONTEXT_SIZE 
- 
-	blr     x9			/* call fn */ 
- 
-	ldp	x3, x4, [x29, #16]	/* reload rvalue and flags */ 
- 
-	/* Partially deconstruct the stack frame.  */ 
-	mov     sp, x29 
-	cfi_def_cfa_register (sp) 
-	ldp     x29, x30, [x29] 
- 
-	/* Save the return value as directed.  */ 
-	adr	x5, 0f 
-	and	w4, w4, #AARCH64_RET_MASK 
-	add	x5, x5, x4, lsl #3 
-	br	x5 
- 
-	/* Note that each table entry is 2 insns, and thus 8 bytes. 
-	   For integer data, note that we're storing into ffi_arg 
-	   and therefore we want to extend to 64 bits; these types 
-	   have two consecutive entries allocated for them.  */ 
-	.align	4 
-0:	ret				/* VOID */ 
-	nop 
-1:	str	x0, [x3]		/* INT64 */ 
-	ret 
-2:	stp	x0, x1, [x3]		/* INT128 */ 
-	ret 
-3:	brk	#1000			/* UNUSED */ 
-	ret 
-4:	brk	#1000			/* UNUSED */ 
-	ret 
-5:	brk	#1000			/* UNUSED */ 
-	ret 
-6:	brk	#1000			/* UNUSED */ 
-	ret 
-7:	brk	#1000			/* UNUSED */ 
-	ret 
-8:	st4	{ v0.s, v1.s, v2.s, v3.s }[0], [x3]	/* S4 */ 
-	ret 
-9:	st3	{ v0.s, v1.s, v2.s }[0], [x3]	/* S3 */ 
-	ret 
-10:	stp	s0, s1, [x3]		/* S2 */ 
-	ret 
-11:	str	s0, [x3]		/* S1 */ 
-	ret 
-12:	st4	{ v0.d, v1.d, v2.d, v3.d }[0], [x3]	/* D4 */ 
-	ret 
-13:	st3	{ v0.d, v1.d, v2.d }[0], [x3]	/* D3 */ 
-	ret 
-14:	stp	d0, d1, [x3]		/* D2 */ 
-	ret 
-15:	str	d0, [x3]		/* D1 */ 
-	ret 
-16:	str	q3, [x3, #48]		/* Q4 */ 
-	nop 
-17:	str	q2, [x3, #32]		/* Q3 */ 
-	nop 
-18:	stp	q0, q1, [x3]		/* Q2 */ 
-	ret 
-19:	str	q0, [x3]		/* Q1 */ 
-	ret 
-20:	uxtb	w0, w0			/* UINT8 */ 
-	str	x0, [x3] 
-21:	ret				/* reserved */ 
-	nop 
-22:	uxth	w0, w0			/* UINT16 */ 
-	str	x0, [x3] 
-23:	ret				/* reserved */ 
-	nop 
-24:	mov	w0, w0			/* UINT32 */ 
-	str	x0, [x3] 
-25:	ret				/* reserved */ 
-	nop 
-26:	sxtb	x0, w0			/* SINT8 */ 
-	str	x0, [x3] 
-27:	ret				/* reserved */ 
-	nop 
-28:	sxth	x0, w0			/* SINT16 */ 
-	str	x0, [x3] 
-29:	ret				/* reserved */ 
-	nop 
-30:	sxtw	x0, w0			/* SINT32 */ 
-	str	x0, [x3] 
-31:	ret				/* reserved */ 
-	nop 
- 
-	cfi_endproc 
- 
-	.globl	CNAME(ffi_call_SYSV) 
-	FFI_HIDDEN(CNAME(ffi_call_SYSV)) 
-#ifdef __ELF__ 
-	.type	CNAME(ffi_call_SYSV), #function 
-	.size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV) 
-#endif 
- 
-/* ffi_closure_SYSV 
- 
-   Closure invocation glue. This is the low level code invoked directly by 
-   the closure trampoline to setup and call a closure. 
- 
-   On entry x17 points to a struct ffi_closure, x16 has been clobbered 
-   all other registers are preserved. 
- 
-   We allocate a call context and save the argument passing registers, 
-   then invoked the generic C ffi_closure_SYSV_inner() function to do all 
-   the real work, on return we load the result passing registers back from 
-   the call context. 
-*/ 
- 
-#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64) 
- 
-	.align 4 
-CNAME(ffi_closure_SYSV_V): 
-	cfi_startproc 
-	stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]! 
-	cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) 
-	cfi_rel_offset (x29, 0) 
-	cfi_rel_offset (x30, 8) 
- 
-	/* Save the argument passing vector registers.  */ 
-	stp     q0, q1, [sp, #16 + 0] 
-	stp     q2, q3, [sp, #16 + 32] 
-	stp     q4, q5, [sp, #16 + 64] 
-	stp     q6, q7, [sp, #16 + 96] 
-	b	0f 
-	cfi_endproc 
- 
-	.globl	CNAME(ffi_closure_SYSV_V) 
-	FFI_HIDDEN(CNAME(ffi_closure_SYSV_V)) 
-#ifdef __ELF__ 
-	.type	CNAME(ffi_closure_SYSV_V), #function 
-	.size	CNAME(ffi_closure_SYSV_V), . - CNAME(ffi_closure_SYSV_V) 
-#endif 
- 
-	.align	4 
-	cfi_startproc 
-CNAME(ffi_closure_SYSV): 
-	stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]! 
-	cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) 
-	cfi_rel_offset (x29, 0) 
-	cfi_rel_offset (x30, 8) 
-0: 
-	mov     x29, sp 
- 
-	/* Save the argument passing core registers.  */ 
-	stp     x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0] 
-	stp     x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16] 
-	stp     x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32] 
-	stp     x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48] 
- 
-	/* Load ffi_closure_inner arguments.  */ 
-	ldp	PTR_REG(0), PTR_REG(1), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET]	/* load cif, fn */ 
-	ldr	PTR_REG(2), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET+PTR_SIZE*2]	/* load user_data */ 
-.Ldo_closure: 
-	add	x3, sp, #16				/* load context */ 
-	add	x4, sp, #ffi_closure_SYSV_FS		/* load stack */ 
-	add	x5, sp, #16+CALL_CONTEXT_SIZE		/* load rvalue */ 
-	mov	x6, x8					/* load struct_rval */ 
-	bl      CNAME(ffi_closure_SYSV_inner) 
- 
-	/* Load the return value as directed.  */ 
-	adr	x1, 0f 
-	and	w0, w0, #AARCH64_RET_MASK 
-	add	x1, x1, x0, lsl #3 
-	add	x3, sp, #16+CALL_CONTEXT_SIZE 
-	br	x1 
- 
-	/* Note that each table entry is 2 insns, and thus 8 bytes.  */ 
-	.align	4 
-0:	b	99f			/* VOID */ 
-	nop 
-1:	ldr	x0, [x3]		/* INT64 */ 
-	b	99f 
-2:	ldp	x0, x1, [x3]		/* INT128 */ 
-	b	99f 
-3:	brk	#1000			/* UNUSED */ 
-	nop 
-4:	brk	#1000			/* UNUSED */ 
-	nop 
-5:	brk	#1000			/* UNUSED */ 
-	nop 
-6:	brk	#1000			/* UNUSED */ 
-	nop 
-7:	brk	#1000			/* UNUSED */ 
-	nop 
-8:	ldr	s3, [x3, #12]		/* S4 */ 
-	nop 
-9:	ldr	s2, [x3, #8]		/* S3 */ 
-	nop 
-10:	ldp	s0, s1, [x3]		/* S2 */ 
-	b	99f 
-11:	ldr	s0, [x3]		/* S1 */ 
-	b	99f 
-12:	ldr	d3, [x3, #24]		/* D4 */ 
-	nop 
-13:	ldr	d2, [x3, #16]		/* D3 */ 
-	nop 
-14:	ldp	d0, d1, [x3]		/* D2 */ 
-	b	99f 
-15:	ldr	d0, [x3]		/* D1 */ 
-	b	99f 
-16:	ldr	q3, [x3, #48]		/* Q4 */ 
-	nop 
-17:	ldr	q2, [x3, #32]		/* Q3 */ 
-	nop 
-18:	ldp	q0, q1, [x3]		/* Q2 */ 
-	b	99f 
-19:	ldr	q0, [x3]		/* Q1 */ 
-	b	99f 
-20:	ldrb	w0, [x3, #BE(7)]	/* UINT8 */ 
-	b	99f 
-21:	brk	#1000			/* reserved */ 
-	nop 
-22:	ldrh	w0, [x3, #BE(6)]	/* UINT16 */ 
-	b	99f 
-23:	brk	#1000			/* reserved */ 
-	nop 
-24:	ldr	w0, [x3, #BE(4)]	/* UINT32 */ 
-	b	99f 
-25:	brk	#1000			/* reserved */ 
-	nop 
-26:	ldrsb	x0, [x3, #BE(7)]	/* SINT8 */ 
-	b	99f 
-27:	brk	#1000			/* reserved */ 
-	nop 
-28:	ldrsh	x0, [x3, #BE(6)]	/* SINT16 */ 
-	b	99f 
-29:	brk	#1000			/* reserved */ 
-	nop 
-30:	ldrsw	x0, [x3, #BE(4)]	/* SINT32 */ 
-	nop 
-31:					/* reserved */ 
-99:	ldp     x29, x30, [sp], #ffi_closure_SYSV_FS 
-	cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS) 
-	cfi_restore (x29) 
-	cfi_restore (x30) 
-	ret 
-	cfi_endproc 
- 
-	.globl	CNAME(ffi_closure_SYSV) 
-	FFI_HIDDEN(CNAME(ffi_closure_SYSV)) 
-#ifdef __ELF__ 
-	.type	CNAME(ffi_closure_SYSV), #function 
-	.size	CNAME(ffi_closure_SYSV), . - CNAME(ffi_closure_SYSV) 
-#endif 
- 
-#if FFI_EXEC_TRAMPOLINE_TABLE 
- 
-#ifdef __MACH__ 
-#include <mach/machine/vm_param.h> 
-    .align PAGE_MAX_SHIFT 
-CNAME(ffi_closure_trampoline_table_page): 
-    .rept PAGE_MAX_SIZE / FFI_TRAMPOLINE_SIZE 
-    adr x16, -PAGE_MAX_SIZE 
-    ldp x17, x16, [x16] 
-    br x16 
-	nop		/* each entry in the trampoline config page is 2*sizeof(void*) so the trampoline itself cannot be smaller that 16 bytes */ 
-    .endr 
- 
-    .globl CNAME(ffi_closure_trampoline_table_page) 
-    FFI_HIDDEN(CNAME(ffi_closure_trampoline_table_page)) 
-    #ifdef __ELF__ 
-    	.type	CNAME(ffi_closure_trampoline_table_page), #function 
-    	.size	CNAME(ffi_closure_trampoline_table_page), . - CNAME(ffi_closure_trampoline_table_page) 
-    #endif 
-#endif 
- 
-#endif /* FFI_EXEC_TRAMPOLINE_TABLE */ 
- 
-#ifdef FFI_GO_CLOSURES 
-	.align 4 
-CNAME(ffi_go_closure_SYSV_V): 
-	cfi_startproc 
-	stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]! 
-	cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) 
-	cfi_rel_offset (x29, 0) 
-	cfi_rel_offset (x30, 8) 
- 
-	/* Save the argument passing vector registers.  */ 
-	stp     q0, q1, [sp, #16 + 0] 
-	stp     q2, q3, [sp, #16 + 32] 
-	stp     q4, q5, [sp, #16 + 64] 
-	stp     q6, q7, [sp, #16 + 96] 
-	b	0f 
-	cfi_endproc 
- 
-	.globl	CNAME(ffi_go_closure_SYSV_V) 
-	FFI_HIDDEN(CNAME(ffi_go_closure_SYSV_V)) 
-#ifdef __ELF__ 
-	.type	CNAME(ffi_go_closure_SYSV_V), #function 
-	.size	CNAME(ffi_go_closure_SYSV_V), . - CNAME(ffi_go_closure_SYSV_V) 
-#endif 
- 
-	.align	4 
-	cfi_startproc 
-CNAME(ffi_go_closure_SYSV): 
-	stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]! 
-	cfi_adjust_cfa_offset (ffi_closure_SYSV_FS) 
-	cfi_rel_offset (x29, 0) 
-	cfi_rel_offset (x30, 8) 
-0: 
-	mov     x29, sp 
- 
-	/* Save the argument passing core registers.  */ 
-	stp     x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0] 
-	stp     x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16] 
-	stp     x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32] 
-	stp     x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48] 
- 
-	/* Load ffi_closure_inner arguments.  */ 
-	ldp	PTR_REG(0), PTR_REG(1), [x18, #PTR_SIZE]/* load cif, fn */ 
-	mov	x2, x18					/* load user_data */ 
-	b	.Ldo_closure 
-	cfi_endproc 
- 
-	.globl	CNAME(ffi_go_closure_SYSV) 
-	FFI_HIDDEN(CNAME(ffi_go_closure_SYSV)) 
-#ifdef __ELF__ 
-	.type	CNAME(ffi_go_closure_SYSV), #function 
-	.size	CNAME(ffi_go_closure_SYSV), . - CNAME(ffi_go_closure_SYSV) 
-#endif 
-#endif /* FFI_GO_CLOSURES */ 
-#endif /* __arm64__ */ 
- 
-#if defined __ELF__ && defined __linux__ 
-	.section .note.GNU-stack,"",%progbits 
-#endif 
- 
+/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#if defined(__aarch64__) || defined(__arm64__)
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_cfi.h>
+#include "internal.h"
+
+#ifdef HAVE_MACHINE_ASM_H
+#include <machine/asm.h>
+#else
+#ifdef __USER_LABEL_PREFIX__
+#define CONCAT1(a, b) CONCAT2(a, b)
+#define CONCAT2(a, b) a ## b
+
+/* Use the right prefix for global labels.  */
+#define CNAME(x) CONCAT1 (__USER_LABEL_PREFIX__, x)
+#else
+#define CNAME(x) x
+#endif
+#endif
+
+#ifdef __AARCH64EB__
+# define BE(X)	X
+#else
+# define BE(X)	0
+#endif
+
+#ifdef __ILP32__
+#define PTR_REG(n)      w##n
+#else
+#define PTR_REG(n)      x##n
+#endif
+
+#ifdef __ILP32__
+#define PTR_SIZE	4
+#else
+#define PTR_SIZE	8
+#endif
+
+	.text
+	.align 4
+
+/* ffi_call_SYSV
+   extern void ffi_call_SYSV (void *stack, void *frame,
+			      void (*fn)(void), void *rvalue,
+			      int flags, void *closure);
+
+   Therefore on entry we have:
+
+   x0 stack
+   x1 frame
+   x2 fn
+   x3 rvalue
+   x4 flags
+   x5 closure
+*/
+
+	cfi_startproc
+CNAME(ffi_call_SYSV):
+	/* Use a stack frame allocated by our caller.  */
+	cfi_def_cfa(x1, 32);
+	stp	x29, x30, [x1]
+	mov	x29, x1
+	mov	sp, x0
+	cfi_def_cfa_register(x29)
+	cfi_rel_offset (x29, 0)
+	cfi_rel_offset (x30, 8)
+
+	mov	x9, x2			/* save fn */
+	mov	x8, x3			/* install structure return */
+#ifdef FFI_GO_CLOSURES
+	mov	x18, x5			/* install static chain */
+#endif
+	stp	x3, x4, [x29, #16]	/* save rvalue and flags */
+
+	/* Load the vector argument passing registers, if necessary.  */
+	tbz	w4, #AARCH64_FLAG_ARG_V_BIT, 1f
+	ldp     q0, q1, [sp, #0]
+	ldp     q2, q3, [sp, #32]
+	ldp     q4, q5, [sp, #64]
+	ldp     q6, q7, [sp, #96]
+1:
+	/* Load the core argument passing registers, including
+	   the structure return pointer.  */
+	ldp     x0, x1, [sp, #16*N_V_ARG_REG + 0]
+	ldp     x2, x3, [sp, #16*N_V_ARG_REG + 16]
+	ldp     x4, x5, [sp, #16*N_V_ARG_REG + 32]
+	ldp     x6, x7, [sp, #16*N_V_ARG_REG + 48]
+
+	/* Deallocate the context, leaving the stacked arguments.  */
+	add	sp, sp, #CALL_CONTEXT_SIZE
+
+	blr     x9			/* call fn */
+
+	ldp	x3, x4, [x29, #16]	/* reload rvalue and flags */
+
+	/* Partially deconstruct the stack frame.  */
+	mov     sp, x29
+	cfi_def_cfa_register (sp)
+	ldp     x29, x30, [x29]
+
+	/* Save the return value as directed.  */
+	adr	x5, 0f
+	and	w4, w4, #AARCH64_RET_MASK
+	add	x5, x5, x4, lsl #3
+	br	x5
+
+	/* Note that each table entry is 2 insns, and thus 8 bytes.
+	   For integer data, note that we're storing into ffi_arg
+	   and therefore we want to extend to 64 bits; these types
+	   have two consecutive entries allocated for them.  */
+	.align	4
+0:	ret				/* VOID */
+	nop
+1:	str	x0, [x3]		/* INT64 */
+	ret
+2:	stp	x0, x1, [x3]		/* INT128 */
+	ret
+3:	brk	#1000			/* UNUSED */
+	ret
+4:	brk	#1000			/* UNUSED */
+	ret
+5:	brk	#1000			/* UNUSED */
+	ret
+6:	brk	#1000			/* UNUSED */
+	ret
+7:	brk	#1000			/* UNUSED */
+	ret
+8:	st4	{ v0.s, v1.s, v2.s, v3.s }[0], [x3]	/* S4 */
+	ret
+9:	st3	{ v0.s, v1.s, v2.s }[0], [x3]	/* S3 */
+	ret
+10:	stp	s0, s1, [x3]		/* S2 */
+	ret
+11:	str	s0, [x3]		/* S1 */
+	ret
+12:	st4	{ v0.d, v1.d, v2.d, v3.d }[0], [x3]	/* D4 */
+	ret
+13:	st3	{ v0.d, v1.d, v2.d }[0], [x3]	/* D3 */
+	ret
+14:	stp	d0, d1, [x3]		/* D2 */
+	ret
+15:	str	d0, [x3]		/* D1 */
+	ret
+16:	str	q3, [x3, #48]		/* Q4 */
+	nop
+17:	str	q2, [x3, #32]		/* Q3 */
+	nop
+18:	stp	q0, q1, [x3]		/* Q2 */
+	ret
+19:	str	q0, [x3]		/* Q1 */
+	ret
+20:	uxtb	w0, w0			/* UINT8 */
+	str	x0, [x3]
+21:	ret				/* reserved */
+	nop
+22:	uxth	w0, w0			/* UINT16 */
+	str	x0, [x3]
+23:	ret				/* reserved */
+	nop
+24:	mov	w0, w0			/* UINT32 */
+	str	x0, [x3]
+25:	ret				/* reserved */
+	nop
+26:	sxtb	x0, w0			/* SINT8 */
+	str	x0, [x3]
+27:	ret				/* reserved */
+	nop
+28:	sxth	x0, w0			/* SINT16 */
+	str	x0, [x3]
+29:	ret				/* reserved */
+	nop
+30:	sxtw	x0, w0			/* SINT32 */
+	str	x0, [x3]
+31:	ret				/* reserved */
+	nop
+
+	cfi_endproc
+
+	.globl	CNAME(ffi_call_SYSV)
+	FFI_HIDDEN(CNAME(ffi_call_SYSV))
+#ifdef __ELF__
+	.type	CNAME(ffi_call_SYSV), #function
+	.size CNAME(ffi_call_SYSV), .-CNAME(ffi_call_SYSV)
+#endif
+
+/* ffi_closure_SYSV
+
+   Closure invocation glue. This is the low level code invoked directly by
+   the closure trampoline to setup and call a closure.
+
+   On entry x17 points to a struct ffi_closure, x16 has been clobbered
+   all other registers are preserved.
+
+   We allocate a call context and save the argument passing registers,
+   then invoked the generic C ffi_closure_SYSV_inner() function to do all
+   the real work, on return we load the result passing registers back from
+   the call context.
+*/
+
+#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64)
+
+	.align 4
+CNAME(ffi_closure_SYSV_V):
+	cfi_startproc
+	stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
+	cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
+	cfi_rel_offset (x29, 0)
+	cfi_rel_offset (x30, 8)
+
+	/* Save the argument passing vector registers.  */
+	stp     q0, q1, [sp, #16 + 0]
+	stp     q2, q3, [sp, #16 + 32]
+	stp     q4, q5, [sp, #16 + 64]
+	stp     q6, q7, [sp, #16 + 96]
+	b	0f
+	cfi_endproc
+
+	.globl	CNAME(ffi_closure_SYSV_V)
+	FFI_HIDDEN(CNAME(ffi_closure_SYSV_V))
+#ifdef __ELF__
+	.type	CNAME(ffi_closure_SYSV_V), #function
+	.size	CNAME(ffi_closure_SYSV_V), . - CNAME(ffi_closure_SYSV_V)
+#endif
+
+	.align	4
+	cfi_startproc
+CNAME(ffi_closure_SYSV):
+	stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
+	cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
+	cfi_rel_offset (x29, 0)
+	cfi_rel_offset (x30, 8)
+0:
+	mov     x29, sp
+
+	/* Save the argument passing core registers.  */
+	stp     x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
+	stp     x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
+	stp     x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
+	stp     x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
+
+	/* Load ffi_closure_inner arguments.  */
+	ldp	PTR_REG(0), PTR_REG(1), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET]	/* load cif, fn */
+	ldr	PTR_REG(2), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET+PTR_SIZE*2]	/* load user_data */
+.Ldo_closure:
+	add	x3, sp, #16				/* load context */
+	add	x4, sp, #ffi_closure_SYSV_FS		/* load stack */
+	add	x5, sp, #16+CALL_CONTEXT_SIZE		/* load rvalue */
+	mov	x6, x8					/* load struct_rval */
+	bl      CNAME(ffi_closure_SYSV_inner)
+
+	/* Load the return value as directed.  */
+	adr	x1, 0f
+	and	w0, w0, #AARCH64_RET_MASK
+	add	x1, x1, x0, lsl #3
+	add	x3, sp, #16+CALL_CONTEXT_SIZE
+	br	x1
+
+	/* Note that each table entry is 2 insns, and thus 8 bytes.  */
+	.align	4
+0:	b	99f			/* VOID */
+	nop
+1:	ldr	x0, [x3]		/* INT64 */
+	b	99f
+2:	ldp	x0, x1, [x3]		/* INT128 */
+	b	99f
+3:	brk	#1000			/* UNUSED */
+	nop
+4:	brk	#1000			/* UNUSED */
+	nop
+5:	brk	#1000			/* UNUSED */
+	nop
+6:	brk	#1000			/* UNUSED */
+	nop
+7:	brk	#1000			/* UNUSED */
+	nop
+8:	ldr	s3, [x3, #12]		/* S4 */
+	nop
+9:	ldr	s2, [x3, #8]		/* S3 */
+	nop
+10:	ldp	s0, s1, [x3]		/* S2 */
+	b	99f
+11:	ldr	s0, [x3]		/* S1 */
+	b	99f
+12:	ldr	d3, [x3, #24]		/* D4 */
+	nop
+13:	ldr	d2, [x3, #16]		/* D3 */
+	nop
+14:	ldp	d0, d1, [x3]		/* D2 */
+	b	99f
+15:	ldr	d0, [x3]		/* D1 */
+	b	99f
+16:	ldr	q3, [x3, #48]		/* Q4 */
+	nop
+17:	ldr	q2, [x3, #32]		/* Q3 */
+	nop
+18:	ldp	q0, q1, [x3]		/* Q2 */
+	b	99f
+19:	ldr	q0, [x3]		/* Q1 */
+	b	99f
+20:	ldrb	w0, [x3, #BE(7)]	/* UINT8 */
+	b	99f
+21:	brk	#1000			/* reserved */
+	nop
+22:	ldrh	w0, [x3, #BE(6)]	/* UINT16 */
+	b	99f
+23:	brk	#1000			/* reserved */
+	nop
+24:	ldr	w0, [x3, #BE(4)]	/* UINT32 */
+	b	99f
+25:	brk	#1000			/* reserved */
+	nop
+26:	ldrsb	x0, [x3, #BE(7)]	/* SINT8 */
+	b	99f
+27:	brk	#1000			/* reserved */
+	nop
+28:	ldrsh	x0, [x3, #BE(6)]	/* SINT16 */
+	b	99f
+29:	brk	#1000			/* reserved */
+	nop
+30:	ldrsw	x0, [x3, #BE(4)]	/* SINT32 */
+	nop
+31:					/* reserved */
+99:	ldp     x29, x30, [sp], #ffi_closure_SYSV_FS
+	cfi_adjust_cfa_offset (-ffi_closure_SYSV_FS)
+	cfi_restore (x29)
+	cfi_restore (x30)
+	ret
+	cfi_endproc
+
+	.globl	CNAME(ffi_closure_SYSV)
+	FFI_HIDDEN(CNAME(ffi_closure_SYSV))
+#ifdef __ELF__
+	.type	CNAME(ffi_closure_SYSV), #function
+	.size	CNAME(ffi_closure_SYSV), . - CNAME(ffi_closure_SYSV)
+#endif
+
+#if FFI_EXEC_TRAMPOLINE_TABLE
+
+#ifdef __MACH__
+#include <mach/machine/vm_param.h>
+    .align PAGE_MAX_SHIFT
+CNAME(ffi_closure_trampoline_table_page):
+    .rept PAGE_MAX_SIZE / FFI_TRAMPOLINE_SIZE
+    adr x16, -PAGE_MAX_SIZE
+    ldp x17, x16, [x16]
+    br x16
+	nop		/* each entry in the trampoline config page is 2*sizeof(void*) so the trampoline itself cannot be smaller that 16 bytes */
+    .endr
+
+    .globl CNAME(ffi_closure_trampoline_table_page)
+    FFI_HIDDEN(CNAME(ffi_closure_trampoline_table_page))
+    #ifdef __ELF__
+    	.type	CNAME(ffi_closure_trampoline_table_page), #function
+    	.size	CNAME(ffi_closure_trampoline_table_page), . - CNAME(ffi_closure_trampoline_table_page)
+    #endif
+#endif
+
+#endif /* FFI_EXEC_TRAMPOLINE_TABLE */
+
+#ifdef FFI_GO_CLOSURES
+	.align 4
+CNAME(ffi_go_closure_SYSV_V):
+	cfi_startproc
+	stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
+	cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
+	cfi_rel_offset (x29, 0)
+	cfi_rel_offset (x30, 8)
+
+	/* Save the argument passing vector registers.  */
+	stp     q0, q1, [sp, #16 + 0]
+	stp     q2, q3, [sp, #16 + 32]
+	stp     q4, q5, [sp, #16 + 64]
+	stp     q6, q7, [sp, #16 + 96]
+	b	0f
+	cfi_endproc
+
+	.globl	CNAME(ffi_go_closure_SYSV_V)
+	FFI_HIDDEN(CNAME(ffi_go_closure_SYSV_V))
+#ifdef __ELF__
+	.type	CNAME(ffi_go_closure_SYSV_V), #function
+	.size	CNAME(ffi_go_closure_SYSV_V), . - CNAME(ffi_go_closure_SYSV_V)
+#endif
+
+	.align	4
+	cfi_startproc
+CNAME(ffi_go_closure_SYSV):
+	stp     x29, x30, [sp, #-ffi_closure_SYSV_FS]!
+	cfi_adjust_cfa_offset (ffi_closure_SYSV_FS)
+	cfi_rel_offset (x29, 0)
+	cfi_rel_offset (x30, 8)
+0:
+	mov     x29, sp
+
+	/* Save the argument passing core registers.  */
+	stp     x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
+	stp     x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
+	stp     x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
+	stp     x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
+
+	/* Load ffi_closure_inner arguments.  */
+	ldp	PTR_REG(0), PTR_REG(1), [x18, #PTR_SIZE]/* load cif, fn */
+	mov	x2, x18					/* load user_data */
+	b	.Ldo_closure
+	cfi_endproc
+
+	.globl	CNAME(ffi_go_closure_SYSV)
+	FFI_HIDDEN(CNAME(ffi_go_closure_SYSV))
+#ifdef __ELF__
+	.type	CNAME(ffi_go_closure_SYSV), #function
+	.size	CNAME(ffi_go_closure_SYSV), . - CNAME(ffi_go_closure_SYSV)
+#endif
+#endif /* FFI_GO_CLOSURES */
+#endif /* __arm64__ */
+
+#if defined __ELF__ && defined __linux__
+	.section .note.GNU-stack,"",%progbits
+#endif
+
diff --git a/contrib/restricted/libffi/src/aarch64/win64_armasm.S b/contrib/restricted/libffi/src/aarch64/win64_armasm.S
index 90b95def5c..a79f8a8aa9 100644
--- a/contrib/restricted/libffi/src/aarch64/win64_armasm.S
+++ b/contrib/restricted/libffi/src/aarch64/win64_armasm.S
@@ -1,506 +1,506 @@
-/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd. 
-Permission is hereby granted, free of charge, to any person obtaining 
-a copy of this software and associated documentation files (the 
-``Software''), to deal in the Software without restriction, including 
-without limitation the rights to use, copy, modify, merge, publish, 
-distribute, sublicense, and/or sell copies of the Software, and to 
-permit persons to whom the Software is furnished to do so, subject to 
-the following conditions: 
-The above copyright notice and this permission notice shall be 
-included in all copies or substantial portions of the Software. 
-THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
-IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 
-CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 
-TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 
-SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */ 
- 
-#define LIBFFI_ASM 
-#include <fficonfig.h> 
-#include <ffi.h> 
-#include <ffi_cfi.h> 
-#include "internal.h" 
- 
-	OPT	2 /*disable listing */ 
-/* For some macros to add unwind information */ 
-#include "ksarm64.h" 
-	OPT	1 /*re-enable listing */ 
- 
-#define BE(X)	0 
-#define PTR_REG(n)      x##n 
-#define PTR_SIZE	8 
- 
-	IMPORT ffi_closure_SYSV_inner 
-	EXPORT	ffi_call_SYSV 
-	EXPORT	ffi_closure_SYSV_V 
-	EXPORT	ffi_closure_SYSV 
-	EXPORT	extend_hfa_type 
-	EXPORT	compress_hfa_type 
-#ifdef FFI_GO_CLOSURES 
-	EXPORT	ffi_go_closure_SYSV_V 
-	EXPORT	ffi_go_closure_SYSV 
-#endif 
- 
-	TEXTAREA, ALLIGN=8 
- 
-/* ffi_call_SYSV 
-   extern void ffi_call_SYSV (void *stack, void *frame, 
-			      void (*fn)(void), void *rvalue, 
-			      int flags, void *closure); 
-   Therefore on entry we have: 
-   x0 stack 
-   x1 frame 
-   x2 fn 
-   x3 rvalue 
-   x4 flags 
-   x5 closure 
-*/ 
- 
-	NESTED_ENTRY ffi_call_SYSV_fake 
- 
-	/* For unwind information, Windows has to store fp and lr  */ 
-	PROLOG_SAVE_REG_PAIR	x29, x30, #-32! 
- 
-	ALTERNATE_ENTRY ffi_call_SYSV 
-	/* Use a stack frame allocated by our caller. */ 
-	stp	x29, x30, [x1] 
-	mov	x29, x1 
-	mov	sp, x0 
- 
-	mov	x9, x2			/* save fn */ 
-	mov	x8, x3			/* install structure return */ 
-#ifdef FFI_GO_CLOSURES 
-	/*mov	x18, x5			install static chain */ 
-#endif 
-	stp	x3, x4, [x29, #16]	/* save rvalue and flags */ 
-	 
-	/* Load the vector argument passing registers, if necessary.  */ 
-	tbz	x4, #AARCH64_FLAG_ARG_V_BIT, ffi_call_SYSV_L1 
-	ldp	q0, q1, [sp, #0] 
-	ldp	q2, q3, [sp, #32] 
-	ldp	q4, q5, [sp, #64] 
-	ldp	q6, q7, [sp, #96] 
- 
-ffi_call_SYSV_L1 
-	/* Load the core argument passing registers, including 
-	   the structure return pointer.  */ 
-	ldp     x0, x1, [sp, #16*N_V_ARG_REG + 0] 
-	ldp     x2, x3, [sp, #16*N_V_ARG_REG + 16] 
-	ldp     x4, x5, [sp, #16*N_V_ARG_REG + 32] 
-	ldp     x6, x7, [sp, #16*N_V_ARG_REG + 48] 
- 
-	/* Deallocate the context, leaving the stacked arguments.  */ 
-	add	sp, sp, #CALL_CONTEXT_SIZE	 
- 
-	blr     x9			/* call fn */ 
- 
-	ldp	x3, x4, [x29, #16]	/* reload rvalue and flags */ 
- 
-	/* Partially deconstruct the stack frame. */ 
-	mov     sp, x29  
-	ldp     x29, x30, [x29] 
- 
-	/* Save the return value as directed.  */ 
-	adr	x5, ffi_call_SYSV_return 
-	and	w4, w4, #AARCH64_RET_MASK 
-	add	x5, x5, x4, lsl #3 
-	br	x5 
-	 
-	/* Note that each table entry is 2 insns, and thus 8 bytes. 
-	   For integer data, note that we're storing into ffi_arg 
-	   and therefore we want to extend to 64 bits; these types 
-	   have two consecutive entries allocated for them.  */ 
-	ALIGN 4 
-ffi_call_SYSV_return 
-	ret				/* VOID */ 
-	nop 
-	str	x0, [x3]		/* INT64 */ 
-	ret 
-	stp	x0, x1, [x3]		/* INT128 */ 
-	ret 
-	brk	#1000			/* UNUSED */ 
-	ret 
-	brk	#1000			/* UNUSED */ 
-	ret 
-	brk	#1000			/* UNUSED */ 
-	ret 
-	brk	#1000			/* UNUSED */ 
-	ret 
-	brk	#1000			/* UNUSED */ 
-	ret 
-	st4	{ v0.s, v1.s, v2.s, v3.s }[0], [x3]	/* S4 */ 
-	ret 
-	st3	{ v0.s, v1.s, v2.s }[0], [x3]	/* S3 */ 
-	ret 
-	stp	s0, s1, [x3]		/* S2 */ 
-	ret 
-	str	s0, [x3]		/* S1 */ 
-	ret 
-	st4	{ v0.d, v1.d, v2.d, v3.d }[0], [x3]	/* D4 */ 
-	ret 
-	st3	{ v0.d, v1.d, v2.d }[0], [x3]	/* D3 */ 
-	ret 
-	stp	d0, d1, [x3]		/* D2 */ 
-	ret 
-	str	d0, [x3]		/* D1 */ 
-	ret 
-	str	q3, [x3, #48]		/* Q4 */ 
-	nop 
-	str	q2, [x3, #32]		/* Q3 */ 
-	nop 
-	stp	q0, q1, [x3]		/* Q2 */ 
-	ret 
-	str	q0, [x3]		/* Q1 */ 
-	ret 
-	uxtb	w0, w0			/* UINT8 */ 
-	str	x0, [x3] 
-	ret				/* reserved */ 
-	nop 
-	uxth	w0, w0			/* UINT16 */ 
-	str	x0, [x3] 
-	ret				/* reserved */ 
-	nop 
-	mov	w0, w0			/* UINT32 */ 
-	str	x0, [x3] 
-	ret				/* reserved */ 
-	nop 
-	sxtb	x0, w0			/* SINT8 */ 
-	str	x0, [x3] 
-	ret				/* reserved */ 
-	nop 
-	sxth	x0, w0			/* SINT16 */ 
-	str	x0, [x3] 
-	ret				/* reserved */ 
-	nop 
-	sxtw	x0, w0			/* SINT32 */ 
-	str	x0, [x3] 
-	ret				/* reserved */ 
-	nop 
-	 
-	 
-	NESTED_END ffi_call_SYSV_fake 
-	 
- 
-/* ffi_closure_SYSV 
-   Closure invocation glue. This is the low level code invoked directly by 
-   the closure trampoline to setup and call a closure. 
-   On entry x17 points to a struct ffi_closure, x16 has been clobbered 
-   all other registers are preserved. 
-   We allocate a call context and save the argument passing registers, 
-   then invoked the generic C ffi_closure_SYSV_inner() function to do all 
-   the real work, on return we load the result passing registers back from 
-   the call context. 
-*/ 
- 
-#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64) 
- 
-	NESTED_ENTRY	ffi_closure_SYSV_V 
-	PROLOG_SAVE_REG_PAIR	x29, x30, #-ffi_closure_SYSV_FS! 
- 
-	/* Save the argument passing vector registers.  */ 
-	stp	q0, q1, [sp, #16 + 0] 
-	stp	q2, q3, [sp, #16 + 32] 
-	stp	q4, q5, [sp, #16 + 64] 
-	stp	q6, q7, [sp, #16 + 96] 
- 
-	b	ffi_closure_SYSV_save_argument 
-	NESTED_END	ffi_closure_SYSV_V 
- 
-	NESTED_ENTRY	ffi_closure_SYSV 
-	PROLOG_SAVE_REG_PAIR	x29, x30, #-ffi_closure_SYSV_FS! 
- 
-ffi_closure_SYSV_save_argument 
-	/* Save the argument passing core registers.  */ 
-	stp     x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0] 
-	stp     x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16] 
-	stp     x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32] 
-	stp     x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48] 
- 
-	/* Load ffi_closure_inner arguments.  */ 
-	ldp	PTR_REG(0), PTR_REG(1), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET]	/* load cif, fn */ 
-	ldr	PTR_REG(2), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET+PTR_SIZE*2]	/* load user_data */ 
- 
-do_closure 
-	add	x3, sp, #16							/* load context */ 
-	add	x4, sp, #ffi_closure_SYSV_FS		/* load stack */ 
-	add	x5, sp, #16+CALL_CONTEXT_SIZE		/* load rvalue */ 
-	mov	x6, x8					/* load struct_rval */ 
- 
-	bl	ffi_closure_SYSV_inner 
- 
-	/* Load the return value as directed.  */ 
-	adr	x1, ffi_closure_SYSV_return_base 
-	and	w0, w0, #AARCH64_RET_MASK 
-	add	x1, x1, x0, lsl #3 
-	add	x3, sp, #16+CALL_CONTEXT_SIZE 
-	br	x1 
- 
-	/* Note that each table entry is 2 insns, and thus 8 bytes.  */ 
-	ALIGN	8 
-ffi_closure_SYSV_return_base 
-	b	ffi_closure_SYSV_epilog			/* VOID */ 
-	nop 
-	ldr	x0, [x3]		/* INT64 */ 
-	b	ffi_closure_SYSV_epilog 
-	ldp	x0, x1, [x3]		/* INT128 */ 
-	b	ffi_closure_SYSV_epilog 
-	brk	#1000			/* UNUSED */ 
-	nop 
-	brk	#1000			/* UNUSED */ 
-	nop 
-	brk	#1000			/* UNUSED */ 
-	nop 
-	brk	#1000			/* UNUSED */ 
-	nop 
-	brk	#1000			/* UNUSED */ 
-	nop 
-	ldr	s3, [x3, #12]		/* S4 */ 
-	nop 
-	ldr	s2, [x3, #8]		/* S3 */ 
-	nop 
-	ldp	s0, s1, [x3]		/* S2 */ 
-	b	ffi_closure_SYSV_epilog 
-	ldr	s0, [x3]		/* S1 */ 
-	b	ffi_closure_SYSV_epilog 
-	ldr	d3, [x3, #24]		/* D4 */ 
-	nop 
-	ldr	d2, [x3, #16]		/* D3 */ 
-	nop 
-	ldp	d0, d1, [x3]		/* D2 */ 
-	b	ffi_closure_SYSV_epilog 
-	ldr	d0, [x3]		/* D1 */ 
-	b	ffi_closure_SYSV_epilog 
-	ldr	q3, [x3, #48]		/* Q4 */ 
-	nop 
-	ldr	q2, [x3, #32]		/* Q3 */ 
-	nop 
-	ldp	q0, q1, [x3]		/* Q2 */ 
-	b	ffi_closure_SYSV_epilog 
-	ldr	q0, [x3]		/* Q1 */ 
-	b	ffi_closure_SYSV_epilog 
-	ldrb	w0, [x3, #BE(7)]	/* UINT8 */ 
-	b	ffi_closure_SYSV_epilog 
-	brk	#1000			/* reserved */ 
-	nop 
-	ldrh	w0, [x3, #BE(6)]	/* UINT16 */ 
-	b	ffi_closure_SYSV_epilog 
-	brk	#1000			/* reserved */ 
-	nop 
-	ldr	w0, [x3, #BE(4)]	/* UINT32 */ 
-	b	ffi_closure_SYSV_epilog 
-	brk	#1000			/* reserved */ 
-	nop 
-	ldrsb	x0, [x3, #BE(7)]	/* SINT8 */ 
-	b	ffi_closure_SYSV_epilog 
-	brk	#1000			/* reserved */ 
-	nop 
-	ldrsh	x0, [x3, #BE(6)]	/* SINT16 */ 
-	b	ffi_closure_SYSV_epilog 
-	brk	#1000			/* reserved */ 
-	nop 
-	ldrsw	x0, [x3, #BE(4)]	/* SINT32 */ 
-	nop 
-					/* reserved */ 
- 
-ffi_closure_SYSV_epilog 
-	EPILOG_RESTORE_REG_PAIR	x29, x30, #ffi_closure_SYSV_FS! 
-	EPILOG_RETURN 
-	NESTED_END	ffi_closure_SYSV 
- 
- 
-#ifdef FFI_GO_CLOSURES 
-	NESTED_ENTRY	ffi_go_closure_SYSV_V 
-	PROLOG_SAVE_REG_PAIR	x29, x30, #-ffi_closure_SYSV_FS! 
- 
-	/* Save the argument passing vector registers.  */ 
-	stp	q0, q1, [sp, #16 + 0] 
-	stp	q2, q3, [sp, #16 + 32] 
-	stp	q4, q5, [sp, #16 + 64] 
-	stp	q6, q7, [sp, #16 + 96] 
-	b	ffi_go_closure_SYSV_save_argument 
-	NESTED_END	ffi_go_closure_SYSV_V 
- 
-	NESTED_ENTRY	ffi_go_closure_SYSV 
-	PROLOG_SAVE_REG_PAIR	x29, x30, #-ffi_closure_SYSV_FS! 
- 
-ffi_go_closure_SYSV_save_argument 
-	/* Save the argument passing core registers.  */ 
-	stp     x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0] 
-	stp     x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16] 
-	stp     x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32] 
-	stp     x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48] 
- 
-	/* Load ffi_closure_inner arguments.  */ 
-	ldp	PTR_REG(0), PTR_REG(1), [x18, #PTR_SIZE]/* load cif, fn */ 
-	mov	x2, x18					/* load user_data */ 
-	b	do_closure 
-	NESTED_END	ffi_go_closure_SYSV 
- 
-#endif /* FFI_GO_CLOSURES */ 
- 
- 
-/* void extend_hfa_type (void *dest, void *src, int h) */ 
- 
-	LEAF_ENTRY	extend_hfa_type 
- 
-	adr	x3, extend_hfa_type_jump_base 
-	and	w2, w2, #AARCH64_RET_MASK 
-	sub	x2, x2, #AARCH64_RET_S4 
-	add	x3, x3, x2, lsl #4 
-	br	x3 
- 
-	ALIGN	4 
-extend_hfa_type_jump_base 
-	ldp	s16, s17, [x1]		/* S4 */ 
-	ldp	s18, s19, [x1, #8] 
-	b	extend_hfa_type_store_4 
-	nop 
- 
-	ldp	s16, s17, [x1]		/* S3 */ 
-	ldr	s18, [x1, #8] 
-	b	extend_hfa_type_store_3 
-	nop 
- 
-	ldp	s16, s17, [x1]		/* S2 */ 
-	b	extend_hfa_type_store_2 
-	nop 
-	nop 
- 
-	ldr	s16, [x1]		/* S1 */ 
-	b	extend_hfa_type_store_1 
-	nop 
-	nop 
- 
-	ldp	d16, d17, [x1]		/* D4 */ 
-	ldp	d18, d19, [x1, #16] 
-	b       extend_hfa_type_store_4 
-	nop 
- 
-	ldp     d16, d17, [x1]		/* D3 */ 
-	ldr     d18, [x1, #16] 
-	b	extend_hfa_type_store_3 
-	nop 
- 
-	ldp	d16, d17, [x1]		/* D2 */ 
-	b	extend_hfa_type_store_2 
-	nop 
-	nop 
- 
-	ldr	d16, [x1]		/* D1 */ 
-	b	extend_hfa_type_store_1 
-	nop 
-	nop 
- 
-	ldp	q16, q17, [x1]		/* Q4 */ 
-	ldp	q18, q19, [x1, #16] 
-	b	extend_hfa_type_store_4 
-	nop 
- 
-	ldp	q16, q17, [x1]		/* Q3 */ 
-	ldr	q18, [x1, #16] 
-	b	extend_hfa_type_store_3 
-	nop 
- 
-	ldp	q16, q17, [x1]		/* Q2 */ 
-	b	extend_hfa_type_store_2 
-	nop 
-	nop 
- 
-	ldr	q16, [x1]		/* Q1 */ 
-	b	extend_hfa_type_store_1 
- 
-extend_hfa_type_store_4 
-	str	q19, [x0, #48] 
-extend_hfa_type_store_3 
-	str	q18, [x0, #32] 
-extend_hfa_type_store_2 
-	str	q17, [x0, #16] 
-extend_hfa_type_store_1 
-	str	q16, [x0] 
-	ret 
- 
-	LEAF_END	extend_hfa_type 
- 
- 
-/* void compress_hfa_type (void *dest, void *reg, int h) */ 
- 
-	LEAF_ENTRY	compress_hfa_type 
- 
-	adr	x3, compress_hfa_type_jump_base 
-	and	w2, w2, #AARCH64_RET_MASK 
-	sub	x2, x2, #AARCH64_RET_S4 
-	add	x3, x3, x2, lsl #4 
-	br	x3 
- 
-	ALIGN	4 
-compress_hfa_type_jump_base 
-	ldp	q16, q17, [x1]		/* S4 */ 
-	ldp	q18, q19, [x1, #32] 
-	st4	{ v16.s, v17.s, v18.s, v19.s }[0], [x0] 
-	ret 
- 
-	ldp	q16, q17, [x1]		/* S3 */ 
-	ldr	q18, [x1, #32] 
-	st3	{ v16.s, v17.s, v18.s }[0], [x0] 
-	ret 
- 
-	ldp	q16, q17, [x1]		/* S2 */ 
-	st2	{ v16.s, v17.s }[0], [x0] 
-	ret 
-	nop 
- 
-	ldr	q16, [x1]		/* S1 */ 
-	st1	{ v16.s }[0], [x0] 
-	ret 
-	nop 
- 
-	ldp	q16, q17, [x1]		/* D4 */ 
-	ldp	q18, q19, [x1, #32] 
-	st4	{ v16.d, v17.d, v18.d, v19.d }[0], [x0] 
-	ret 
- 
-	ldp	q16, q17, [x1]		/* D3 */ 
-	ldr	q18, [x1, #32] 
-	st3	{ v16.d, v17.d, v18.d }[0], [x0] 
-	ret 
- 
-	ldp	q16, q17, [x1]		/* D2 */ 
-	st2	{ v16.d, v17.d }[0], [x0] 
-	ret 
-	nop 
- 
-	ldr	q16, [x1]		/* D1 */ 
-	st1	{ v16.d }[0], [x0] 
-	ret 
-	nop 
- 
-	ldp	q16, q17, [x1]		/* Q4 */ 
-	ldp	q18, q19, [x1, #32] 
-	b	compress_hfa_type_store_q4 
-	nop 
- 
-	ldp	q16, q17, [x1]		/* Q3 */ 
-	ldr	q18, [x1, #32] 
-	b	compress_hfa_type_store_q3 
-	nop 
- 
-	ldp	q16, q17, [x1]		/* Q2 */ 
-	stp	q16, q17, [x0] 
-	ret 
-	nop 
- 
-	ldr	q16, [x1]		/* Q1 */ 
-	str	q16, [x0] 
-	ret 
- 
-compress_hfa_type_store_q4 
-	str	q19, [x0, #48] 
-compress_hfa_type_store_q3 
-	str	q18, [x0, #32] 
-	stp	q16, q17, [x0] 
-	ret 
- 
-	LEAF_END	compress_hfa_type 
- 
-	END 
-\ No newline at end of file
+/* Copyright (c) 2009, 2010, 2011, 2012 ARM Ltd.
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+``Software''), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_cfi.h>
+#include "internal.h"
+
+	OPT	2 /*disable listing */
+/* For some macros to add unwind information */
+#include "ksarm64.h"
+	OPT	1 /*re-enable listing */
+
+#define BE(X)	0
+#define PTR_REG(n)      x##n
+#define PTR_SIZE	8
+
+	IMPORT ffi_closure_SYSV_inner
+	EXPORT	ffi_call_SYSV
+	EXPORT	ffi_closure_SYSV_V
+	EXPORT	ffi_closure_SYSV
+	EXPORT	extend_hfa_type
+	EXPORT	compress_hfa_type
+#ifdef FFI_GO_CLOSURES
+	EXPORT	ffi_go_closure_SYSV_V
+	EXPORT	ffi_go_closure_SYSV
+#endif
+
+	TEXTAREA, ALLIGN=8
+
+/* ffi_call_SYSV
+   extern void ffi_call_SYSV (void *stack, void *frame,
+			      void (*fn)(void), void *rvalue,
+			      int flags, void *closure);
+   Therefore on entry we have:
+   x0 stack
+   x1 frame
+   x2 fn
+   x3 rvalue
+   x4 flags
+   x5 closure
+*/
+
+	NESTED_ENTRY ffi_call_SYSV_fake
+
+	/* For unwind information, Windows has to store fp and lr  */
+	PROLOG_SAVE_REG_PAIR	x29, x30, #-32!
+
+	ALTERNATE_ENTRY ffi_call_SYSV
+	/* Use a stack frame allocated by our caller. */
+	stp	x29, x30, [x1]
+	mov	x29, x1
+	mov	sp, x0
+
+	mov	x9, x2			/* save fn */
+	mov	x8, x3			/* install structure return */
+#ifdef FFI_GO_CLOSURES
+	/*mov	x18, x5			install static chain */
+#endif
+	stp	x3, x4, [x29, #16]	/* save rvalue and flags */
+	
+	/* Load the vector argument passing registers, if necessary.  */
+	tbz	x4, #AARCH64_FLAG_ARG_V_BIT, ffi_call_SYSV_L1
+	ldp	q0, q1, [sp, #0]
+	ldp	q2, q3, [sp, #32]
+	ldp	q4, q5, [sp, #64]
+	ldp	q6, q7, [sp, #96]
+
+ffi_call_SYSV_L1
+	/* Load the core argument passing registers, including
+	   the structure return pointer.  */
+	ldp     x0, x1, [sp, #16*N_V_ARG_REG + 0]
+	ldp     x2, x3, [sp, #16*N_V_ARG_REG + 16]
+	ldp     x4, x5, [sp, #16*N_V_ARG_REG + 32]
+	ldp     x6, x7, [sp, #16*N_V_ARG_REG + 48]
+
+	/* Deallocate the context, leaving the stacked arguments.  */
+	add	sp, sp, #CALL_CONTEXT_SIZE	
+
+	blr     x9			/* call fn */
+
+	ldp	x3, x4, [x29, #16]	/* reload rvalue and flags */
+
+	/* Partially deconstruct the stack frame. */
+	mov     sp, x29 
+	ldp     x29, x30, [x29]
+
+	/* Save the return value as directed.  */
+	adr	x5, ffi_call_SYSV_return
+	and	w4, w4, #AARCH64_RET_MASK
+	add	x5, x5, x4, lsl #3
+	br	x5
+	
+	/* Note that each table entry is 2 insns, and thus 8 bytes.
+	   For integer data, note that we're storing into ffi_arg
+	   and therefore we want to extend to 64 bits; these types
+	   have two consecutive entries allocated for them.  */
+	ALIGN 4
+ffi_call_SYSV_return
+	ret				/* VOID */
+	nop
+	str	x0, [x3]		/* INT64 */
+	ret
+	stp	x0, x1, [x3]		/* INT128 */
+	ret
+	brk	#1000			/* UNUSED */
+	ret
+	brk	#1000			/* UNUSED */
+	ret
+	brk	#1000			/* UNUSED */
+	ret
+	brk	#1000			/* UNUSED */
+	ret
+	brk	#1000			/* UNUSED */
+	ret
+	st4	{ v0.s, v1.s, v2.s, v3.s }[0], [x3]	/* S4 */
+	ret
+	st3	{ v0.s, v1.s, v2.s }[0], [x3]	/* S3 */
+	ret
+	stp	s0, s1, [x3]		/* S2 */
+	ret
+	str	s0, [x3]		/* S1 */
+	ret
+	st4	{ v0.d, v1.d, v2.d, v3.d }[0], [x3]	/* D4 */
+	ret
+	st3	{ v0.d, v1.d, v2.d }[0], [x3]	/* D3 */
+	ret
+	stp	d0, d1, [x3]		/* D2 */
+	ret
+	str	d0, [x3]		/* D1 */
+	ret
+	str	q3, [x3, #48]		/* Q4 */
+	nop
+	str	q2, [x3, #32]		/* Q3 */
+	nop
+	stp	q0, q1, [x3]		/* Q2 */
+	ret
+	str	q0, [x3]		/* Q1 */
+	ret
+	uxtb	w0, w0			/* UINT8 */
+	str	x0, [x3]
+	ret				/* reserved */
+	nop
+	uxth	w0, w0			/* UINT16 */
+	str	x0, [x3]
+	ret				/* reserved */
+	nop
+	mov	w0, w0			/* UINT32 */
+	str	x0, [x3]
+	ret				/* reserved */
+	nop
+	sxtb	x0, w0			/* SINT8 */
+	str	x0, [x3]
+	ret				/* reserved */
+	nop
+	sxth	x0, w0			/* SINT16 */
+	str	x0, [x3]
+	ret				/* reserved */
+	nop
+	sxtw	x0, w0			/* SINT32 */
+	str	x0, [x3]
+	ret				/* reserved */
+	nop
+	
+	
+	NESTED_END ffi_call_SYSV_fake
+	
+
+/* ffi_closure_SYSV
+   Closure invocation glue. This is the low level code invoked directly by
+   the closure trampoline to setup and call a closure.
+   On entry x17 points to a struct ffi_closure, x16 has been clobbered
+   all other registers are preserved.
+   We allocate a call context and save the argument passing registers,
+   then invoked the generic C ffi_closure_SYSV_inner() function to do all
+   the real work, on return we load the result passing registers back from
+   the call context.
+*/
+
+#define ffi_closure_SYSV_FS (8*2 + CALL_CONTEXT_SIZE + 64)
+
+	NESTED_ENTRY	ffi_closure_SYSV_V
+	PROLOG_SAVE_REG_PAIR	x29, x30, #-ffi_closure_SYSV_FS!
+
+	/* Save the argument passing vector registers.  */
+	stp	q0, q1, [sp, #16 + 0]
+	stp	q2, q3, [sp, #16 + 32]
+	stp	q4, q5, [sp, #16 + 64]
+	stp	q6, q7, [sp, #16 + 96]
+
+	b	ffi_closure_SYSV_save_argument
+	NESTED_END	ffi_closure_SYSV_V
+
+	NESTED_ENTRY	ffi_closure_SYSV
+	PROLOG_SAVE_REG_PAIR	x29, x30, #-ffi_closure_SYSV_FS!
+
+ffi_closure_SYSV_save_argument
+	/* Save the argument passing core registers.  */
+	stp     x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
+	stp     x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
+	stp     x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
+	stp     x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
+
+	/* Load ffi_closure_inner arguments.  */
+	ldp	PTR_REG(0), PTR_REG(1), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET]	/* load cif, fn */
+	ldr	PTR_REG(2), [x17, #FFI_TRAMPOLINE_CLOSURE_OFFSET+PTR_SIZE*2]	/* load user_data */
+
+do_closure
+	add	x3, sp, #16							/* load context */
+	add	x4, sp, #ffi_closure_SYSV_FS		/* load stack */
+	add	x5, sp, #16+CALL_CONTEXT_SIZE		/* load rvalue */
+	mov	x6, x8					/* load struct_rval */
+
+	bl	ffi_closure_SYSV_inner
+
+	/* Load the return value as directed.  */
+	adr	x1, ffi_closure_SYSV_return_base
+	and	w0, w0, #AARCH64_RET_MASK
+	add	x1, x1, x0, lsl #3
+	add	x3, sp, #16+CALL_CONTEXT_SIZE
+	br	x1
+
+	/* Note that each table entry is 2 insns, and thus 8 bytes.  */
+	ALIGN	8
+ffi_closure_SYSV_return_base
+	b	ffi_closure_SYSV_epilog			/* VOID */
+	nop
+	ldr	x0, [x3]		/* INT64 */
+	b	ffi_closure_SYSV_epilog
+	ldp	x0, x1, [x3]		/* INT128 */
+	b	ffi_closure_SYSV_epilog
+	brk	#1000			/* UNUSED */
+	nop
+	brk	#1000			/* UNUSED */
+	nop
+	brk	#1000			/* UNUSED */
+	nop
+	brk	#1000			/* UNUSED */
+	nop
+	brk	#1000			/* UNUSED */
+	nop
+	ldr	s3, [x3, #12]		/* S4 */
+	nop
+	ldr	s2, [x3, #8]		/* S3 */
+	nop
+	ldp	s0, s1, [x3]		/* S2 */
+	b	ffi_closure_SYSV_epilog
+	ldr	s0, [x3]		/* S1 */
+	b	ffi_closure_SYSV_epilog
+	ldr	d3, [x3, #24]		/* D4 */
+	nop
+	ldr	d2, [x3, #16]		/* D3 */
+	nop
+	ldp	d0, d1, [x3]		/* D2 */
+	b	ffi_closure_SYSV_epilog
+	ldr	d0, [x3]		/* D1 */
+	b	ffi_closure_SYSV_epilog
+	ldr	q3, [x3, #48]		/* Q4 */
+	nop
+	ldr	q2, [x3, #32]		/* Q3 */
+	nop
+	ldp	q0, q1, [x3]		/* Q2 */
+	b	ffi_closure_SYSV_epilog
+	ldr	q0, [x3]		/* Q1 */
+	b	ffi_closure_SYSV_epilog
+	ldrb	w0, [x3, #BE(7)]	/* UINT8 */
+	b	ffi_closure_SYSV_epilog
+	brk	#1000			/* reserved */
+	nop
+	ldrh	w0, [x3, #BE(6)]	/* UINT16 */
+	b	ffi_closure_SYSV_epilog
+	brk	#1000			/* reserved */
+	nop
+	ldr	w0, [x3, #BE(4)]	/* UINT32 */
+	b	ffi_closure_SYSV_epilog
+	brk	#1000			/* reserved */
+	nop
+	ldrsb	x0, [x3, #BE(7)]	/* SINT8 */
+	b	ffi_closure_SYSV_epilog
+	brk	#1000			/* reserved */
+	nop
+	ldrsh	x0, [x3, #BE(6)]	/* SINT16 */
+	b	ffi_closure_SYSV_epilog
+	brk	#1000			/* reserved */
+	nop
+	ldrsw	x0, [x3, #BE(4)]	/* SINT32 */
+	nop
+					/* reserved */
+
+ffi_closure_SYSV_epilog
+	EPILOG_RESTORE_REG_PAIR	x29, x30, #ffi_closure_SYSV_FS!
+	EPILOG_RETURN
+	NESTED_END	ffi_closure_SYSV
+
+
+#ifdef FFI_GO_CLOSURES
+	NESTED_ENTRY	ffi_go_closure_SYSV_V
+	PROLOG_SAVE_REG_PAIR	x29, x30, #-ffi_closure_SYSV_FS!
+
+	/* Save the argument passing vector registers.  */
+	stp	q0, q1, [sp, #16 + 0]
+	stp	q2, q3, [sp, #16 + 32]
+	stp	q4, q5, [sp, #16 + 64]
+	stp	q6, q7, [sp, #16 + 96]
+	b	ffi_go_closure_SYSV_save_argument
+	NESTED_END	ffi_go_closure_SYSV_V
+
+	NESTED_ENTRY	ffi_go_closure_SYSV
+	PROLOG_SAVE_REG_PAIR	x29, x30, #-ffi_closure_SYSV_FS!
+
+ffi_go_closure_SYSV_save_argument
+	/* Save the argument passing core registers.  */
+	stp     x0, x1, [sp, #16 + 16*N_V_ARG_REG + 0]
+	stp     x2, x3, [sp, #16 + 16*N_V_ARG_REG + 16]
+	stp     x4, x5, [sp, #16 + 16*N_V_ARG_REG + 32]
+	stp     x6, x7, [sp, #16 + 16*N_V_ARG_REG + 48]
+
+	/* Load ffi_closure_inner arguments.  */
+	ldp	PTR_REG(0), PTR_REG(1), [x18, #PTR_SIZE]/* load cif, fn */
+	mov	x2, x18					/* load user_data */
+	b	do_closure
+	NESTED_END	ffi_go_closure_SYSV
+
+#endif /* FFI_GO_CLOSURES */
+
+
+/* void extend_hfa_type (void *dest, void *src, int h) */
+
+	LEAF_ENTRY	extend_hfa_type
+
+	adr	x3, extend_hfa_type_jump_base
+	and	w2, w2, #AARCH64_RET_MASK
+	sub	x2, x2, #AARCH64_RET_S4
+	add	x3, x3, x2, lsl #4
+	br	x3
+
+	ALIGN	4
+extend_hfa_type_jump_base
+	ldp	s16, s17, [x1]		/* S4 */
+	ldp	s18, s19, [x1, #8]
+	b	extend_hfa_type_store_4
+	nop
+
+	ldp	s16, s17, [x1]		/* S3 */
+	ldr	s18, [x1, #8]
+	b	extend_hfa_type_store_3
+	nop
+
+	ldp	s16, s17, [x1]		/* S2 */
+	b	extend_hfa_type_store_2
+	nop
+	nop
+
+	ldr	s16, [x1]		/* S1 */
+	b	extend_hfa_type_store_1
+	nop
+	nop
+
+	ldp	d16, d17, [x1]		/* D4 */
+	ldp	d18, d19, [x1, #16]
+	b       extend_hfa_type_store_4
+	nop
+
+	ldp     d16, d17, [x1]		/* D3 */
+	ldr     d18, [x1, #16]
+	b	extend_hfa_type_store_3
+	nop
+
+	ldp	d16, d17, [x1]		/* D2 */
+	b	extend_hfa_type_store_2
+	nop
+	nop
+
+	ldr	d16, [x1]		/* D1 */
+	b	extend_hfa_type_store_1
+	nop
+	nop
+
+	ldp	q16, q17, [x1]		/* Q4 */
+	ldp	q18, q19, [x1, #16]
+	b	extend_hfa_type_store_4
+	nop
+
+	ldp	q16, q17, [x1]		/* Q3 */
+	ldr	q18, [x1, #16]
+	b	extend_hfa_type_store_3
+	nop
+
+	ldp	q16, q17, [x1]		/* Q2 */
+	b	extend_hfa_type_store_2
+	nop
+	nop
+
+	ldr	q16, [x1]		/* Q1 */
+	b	extend_hfa_type_store_1
+
+extend_hfa_type_store_4
+	str	q19, [x0, #48]
+extend_hfa_type_store_3
+	str	q18, [x0, #32]
+extend_hfa_type_store_2
+	str	q17, [x0, #16]
+extend_hfa_type_store_1
+	str	q16, [x0]
+	ret
+
+	LEAF_END	extend_hfa_type
+
+
+/* void compress_hfa_type (void *dest, void *reg, int h) */
+
+	LEAF_ENTRY	compress_hfa_type
+
+	adr	x3, compress_hfa_type_jump_base
+	and	w2, w2, #AARCH64_RET_MASK
+	sub	x2, x2, #AARCH64_RET_S4
+	add	x3, x3, x2, lsl #4
+	br	x3
+
+	ALIGN	4
+compress_hfa_type_jump_base
+	ldp	q16, q17, [x1]		/* S4 */
+	ldp	q18, q19, [x1, #32]
+	st4	{ v16.s, v17.s, v18.s, v19.s }[0], [x0]
+	ret
+
+	ldp	q16, q17, [x1]		/* S3 */
+	ldr	q18, [x1, #32]
+	st3	{ v16.s, v17.s, v18.s }[0], [x0]
+	ret
+
+	ldp	q16, q17, [x1]		/* S2 */
+	st2	{ v16.s, v17.s }[0], [x0]
+	ret
+	nop
+
+	ldr	q16, [x1]		/* S1 */
+	st1	{ v16.s }[0], [x0]
+	ret
+	nop
+
+	ldp	q16, q17, [x1]		/* D4 */
+	ldp	q18, q19, [x1, #32]
+	st4	{ v16.d, v17.d, v18.d, v19.d }[0], [x0]
+	ret
+
+	ldp	q16, q17, [x1]		/* D3 */
+	ldr	q18, [x1, #32]
+	st3	{ v16.d, v17.d, v18.d }[0], [x0]
+	ret
+
+	ldp	q16, q17, [x1]		/* D2 */
+	st2	{ v16.d, v17.d }[0], [x0]
+	ret
+	nop
+
+	ldr	q16, [x1]		/* D1 */
+	st1	{ v16.d }[0], [x0]
+	ret
+	nop
+
+	ldp	q16, q17, [x1]		/* Q4 */
+	ldp	q18, q19, [x1, #32]
+	b	compress_hfa_type_store_q4
+	nop
+
+	ldp	q16, q17, [x1]		/* Q3 */
+	ldr	q18, [x1, #32]
+	b	compress_hfa_type_store_q3
+	nop
+
+	ldp	q16, q17, [x1]		/* Q2 */
+	stp	q16, q17, [x0]
+	ret
+	nop
+
+	ldr	q16, [x1]		/* Q1 */
+	str	q16, [x0]
+	ret
+
+compress_hfa_type_store_q4
+	str	q19, [x0, #48]
+compress_hfa_type_store_q3
+	str	q18, [x0, #32]
+	stp	q16, q17, [x0]
+	ret
+
+	LEAF_END	compress_hfa_type
+
+	END
+\ No newline at end of file
diff --git a/contrib/restricted/libffi/src/arm/ffi.c b/contrib/restricted/libffi/src/arm/ffi.c
index 95cebf49ee..4e270718a3 100644
--- a/contrib/restricted/libffi/src/arm/ffi.c
+++ b/contrib/restricted/libffi/src/arm/ffi.c
@@ -1,854 +1,854 @@
-/* ----------------------------------------------------------------------- 
-   ffi.c - Copyright (c) 2011 Timothy Wall 
-           Copyright (c) 2011 Plausible Labs Cooperative, Inc. 
-           Copyright (c) 2011 Anthony Green 
-	   Copyright (c) 2011 Free Software Foundation 
-           Copyright (c) 1998, 2008, 2011  Red Hat, Inc. 
- 
-   ARM Foreign Function Interface 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-#if defined(__arm__) || defined(_M_ARM) 
-#include <fficonfig.h> 
-#include <ffi.h> 
-#include <ffi_common.h> 
-#include <stdint.h> 
-#include <stdlib.h> 
-#include "internal.h" 
- 
-#if defined(_MSC_VER) && defined(_M_ARM) 
-#define WIN32_LEAN_AND_MEAN 
-#include <windows.h> 
-#endif 
- 
-#if FFI_EXEC_TRAMPOLINE_TABLE 
- 
-#ifdef __MACH__ 
-#include <mach/machine/vm_param.h> 
-#endif 
- 
-#else 
-#ifndef _M_ARM 
-extern unsigned int ffi_arm_trampoline[2] FFI_HIDDEN; 
-#else 
-extern unsigned int ffi_arm_trampoline[3] FFI_HIDDEN; 
-#endif 
-#endif 
- 
-/* Forward declares. */ 
-static int vfp_type_p (const ffi_type *); 
-static void layout_vfp_args (ffi_cif *); 
- 
-static void * 
-ffi_align (ffi_type *ty, void *p) 
-{ 
-  /* Align if necessary */ 
-  size_t alignment; 
-#ifdef _WIN32_WCE 
-  alignment = 4; 
-#else 
-  alignment = ty->alignment; 
-  if (alignment < 4) 
-    alignment = 4; 
-#endif 
-  return (void *) FFI_ALIGN (p, alignment); 
-} 
- 
-static size_t 
-ffi_put_arg (ffi_type *ty, void *src, void *dst) 
-{ 
-  size_t z = ty->size; 
- 
-  switch (ty->type) 
-    { 
-    case FFI_TYPE_SINT8: 
-      *(UINT32 *)dst = *(SINT8 *)src; 
-      break; 
-    case FFI_TYPE_UINT8: 
-      *(UINT32 *)dst = *(UINT8 *)src; 
-      break; 
-    case FFI_TYPE_SINT16: 
-      *(UINT32 *)dst = *(SINT16 *)src; 
-      break; 
-    case FFI_TYPE_UINT16: 
-      *(UINT32 *)dst = *(UINT16 *)src; 
-      break; 
- 
-    case FFI_TYPE_INT: 
-    case FFI_TYPE_SINT32: 
-    case FFI_TYPE_UINT32: 
-    case FFI_TYPE_POINTER: 
-#ifndef _MSC_VER 
-    case FFI_TYPE_FLOAT: 
-#endif 
-      *(UINT32 *)dst = *(UINT32 *)src; 
-      break; 
- 
-#ifdef _MSC_VER 
-    // casting a float* to a UINT32* doesn't work on Windows 
-    case FFI_TYPE_FLOAT: 
-        *(uintptr_t *)dst = 0; 
-        *(float *)dst = *(float *)src; 
-        break; 
-#endif 
- 
-    case FFI_TYPE_SINT64: 
-    case FFI_TYPE_UINT64: 
-    case FFI_TYPE_DOUBLE: 
-      *(UINT64 *)dst = *(UINT64 *)src; 
-      break; 
- 
-    case FFI_TYPE_STRUCT: 
-    case FFI_TYPE_COMPLEX: 
-      memcpy (dst, src, z); 
-      break; 
- 
-    default: 
-      abort(); 
-    } 
- 
-  return FFI_ALIGN (z, 4); 
-} 
- 
-/* ffi_prep_args is called once stack space has been allocated 
-   for the function's arguments. 
- 
-   The vfp_space parameter is the load area for VFP regs, the return 
-   value is cif->vfp_used (word bitset of VFP regs used for passing 
-   arguments). These are only used for the VFP hard-float ABI. 
-*/ 
-static void 
-ffi_prep_args_SYSV (ffi_cif *cif, int flags, void *rvalue, 
-		    void **avalue, char *argp) 
-{ 
-  ffi_type **arg_types = cif->arg_types; 
-  int i, n; 
- 
-  if (flags == ARM_TYPE_STRUCT) 
-    { 
-      *(void **) argp = rvalue; 
-      argp += 4; 
-    } 
- 
-  for (i = 0, n = cif->nargs; i < n; i++) 
-    { 
-      ffi_type *ty = arg_types[i]; 
-      argp = ffi_align (ty, argp); 
-      argp += ffi_put_arg (ty, avalue[i], argp); 
-    } 
-} 
- 
-static void 
-ffi_prep_args_VFP (ffi_cif *cif, int flags, void *rvalue, 
-                   void **avalue, char *stack, char *vfp_space) 
-{ 
-  ffi_type **arg_types = cif->arg_types; 
-  int i, n, vi = 0; 
-  char *argp, *regp, *eo_regp; 
-  char stack_used = 0; 
-  char done_with_regs = 0; 
- 
-  /* The first 4 words on the stack are used for values 
-     passed in core registers.  */ 
-  regp = stack; 
-  eo_regp = argp = regp + 16; 
- 
-  /* If the function returns an FFI_TYPE_STRUCT in memory, 
-     that address is passed in r0 to the function.  */ 
-  if (flags == ARM_TYPE_STRUCT) 
-    { 
-      *(void **) regp = rvalue; 
-      regp += 4; 
-    } 
- 
-  for (i = 0, n = cif->nargs; i < n; i++) 
-    { 
-      ffi_type *ty = arg_types[i]; 
-      void *a = avalue[i]; 
-      int is_vfp_type = vfp_type_p (ty); 
- 
-      /* Allocated in VFP registers. */ 
-      if (vi < cif->vfp_nargs && is_vfp_type) 
-	{ 
-	  char *vfp_slot = vfp_space + cif->vfp_args[vi++] * 4; 
-	  ffi_put_arg (ty, a, vfp_slot); 
-	  continue; 
-	} 
-      /* Try allocating in core registers. */ 
-      else if (!done_with_regs && !is_vfp_type) 
-	{ 
-	  char *tregp = ffi_align (ty, regp); 
-	  size_t size = ty->size; 
-	  size = (size < 4) ? 4 : size;	// pad 
-	  /* Check if there is space left in the aligned register 
-	     area to place the argument.  */ 
-	  if (tregp + size <= eo_regp) 
-	    { 
-	      regp = tregp + ffi_put_arg (ty, a, tregp); 
-	      done_with_regs = (regp == argp); 
-	      // ensure we did not write into the stack area 
-	      FFI_ASSERT (regp <= argp); 
-	      continue; 
-	    } 
-	  /* In case there are no arguments in the stack area yet, 
-	     the argument is passed in the remaining core registers 
-	     and on the stack.  */ 
-	  else if (!stack_used) 
-	    { 
-	      stack_used = 1; 
-	      done_with_regs = 1; 
-	      argp = tregp + ffi_put_arg (ty, a, tregp); 
-	      FFI_ASSERT (eo_regp < argp); 
-	      continue; 
-	    } 
-	} 
-      /* Base case, arguments are passed on the stack */ 
-      stack_used = 1; 
-      argp = ffi_align (ty, argp); 
-      argp += ffi_put_arg (ty, a, argp); 
-    } 
-} 
- 
-/* Perform machine dependent cif processing */ 
-ffi_status FFI_HIDDEN 
-ffi_prep_cif_machdep (ffi_cif *cif) 
-{ 
-  int flags = 0, cabi = cif->abi; 
-  size_t bytes = cif->bytes; 
- 
-  /* Map out the register placements of VFP register args.  The VFP 
-     hard-float calling conventions are slightly more sophisticated 
-     than the base calling conventions, so we do it here instead of 
-     in ffi_prep_args(). */ 
-  if (cabi == FFI_VFP) 
-    layout_vfp_args (cif); 
- 
-  /* Set the return type flag */ 
-  switch (cif->rtype->type) 
-    { 
-    case FFI_TYPE_VOID: 
-      flags = ARM_TYPE_VOID; 
-      break; 
- 
-    case FFI_TYPE_INT: 
-    case FFI_TYPE_UINT8: 
-    case FFI_TYPE_SINT8: 
-    case FFI_TYPE_UINT16: 
-    case FFI_TYPE_SINT16: 
-    case FFI_TYPE_UINT32: 
-    case FFI_TYPE_SINT32: 
-    case FFI_TYPE_POINTER: 
-      flags = ARM_TYPE_INT; 
-      break; 
- 
-    case FFI_TYPE_SINT64: 
-    case FFI_TYPE_UINT64: 
-      flags = ARM_TYPE_INT64; 
-      break; 
- 
-    case FFI_TYPE_FLOAT: 
-      flags = (cabi == FFI_VFP ? ARM_TYPE_VFP_S : ARM_TYPE_INT); 
-      break; 
-    case FFI_TYPE_DOUBLE: 
-      flags = (cabi == FFI_VFP ? ARM_TYPE_VFP_D : ARM_TYPE_INT64); 
-      break; 
- 
-    case FFI_TYPE_STRUCT: 
-    case FFI_TYPE_COMPLEX: 
-      if (cabi == FFI_VFP) 
-	{ 
-	  int h = vfp_type_p (cif->rtype); 
- 
-	  flags = ARM_TYPE_VFP_N; 
-	  if (h == 0x100 + FFI_TYPE_FLOAT) 
-	    flags = ARM_TYPE_VFP_S; 
-	  if (h == 0x100 + FFI_TYPE_DOUBLE) 
-	    flags = ARM_TYPE_VFP_D; 
-	  if (h != 0) 
-	      break; 
-	} 
- 
-      /* A Composite Type not larger than 4 bytes is returned in r0. 
-	 A Composite Type larger than 4 bytes, or whose size cannot 
-	 be determined statically ... is stored in memory at an 
-	 address passed [in r0].  */ 
-      if (cif->rtype->size <= 4) 
-	flags = ARM_TYPE_INT; 
-      else 
-	{ 
-	  flags = ARM_TYPE_STRUCT; 
-	  bytes += 4; 
-	} 
-      break; 
- 
-    default: 
-      abort(); 
-    } 
- 
-  /* Round the stack up to a multiple of 8 bytes.  This isn't needed 
-     everywhere, but it is on some platforms, and it doesn't harm anything 
-     when it isn't needed.  */ 
-  bytes = FFI_ALIGN (bytes, 8); 
- 
-  /* Minimum stack space is the 4 register arguments that we pop.  */ 
-  if (bytes < 4*4) 
-    bytes = 4*4; 
- 
-  cif->bytes = bytes; 
-  cif->flags = flags; 
- 
-  return FFI_OK; 
-} 
- 
-/* Perform machine dependent cif processing for variadic calls */ 
-ffi_status FFI_HIDDEN 
-ffi_prep_cif_machdep_var (ffi_cif * cif, 
-			  unsigned int nfixedargs, unsigned int ntotalargs) 
-{ 
-  /* VFP variadic calls actually use the SYSV ABI */ 
-  if (cif->abi == FFI_VFP) 
-    cif->abi = FFI_SYSV; 
- 
-  return ffi_prep_cif_machdep (cif); 
-} 
- 
-/* Prototypes for assembly functions, in sysv.S.  */ 
- 
-struct call_frame 
-{ 
-  void *fp; 
-  void *lr; 
-  void *rvalue; 
-  int flags; 
-  void *closure; 
-}; 
- 
-extern void ffi_call_SYSV (void *stack, struct call_frame *, 
-			   void (*fn) (void)) FFI_HIDDEN; 
-extern void ffi_call_VFP (void *vfp_space, struct call_frame *, 
-			   void (*fn) (void), unsigned vfp_used) FFI_HIDDEN; 
- 
-static void 
-ffi_call_int (ffi_cif * cif, void (*fn) (void), void *rvalue, 
-	      void **avalue, void *closure) 
-{ 
-  int flags = cif->flags; 
-  ffi_type *rtype = cif->rtype; 
-  size_t bytes, rsize, vfp_size; 
-  char *stack, *vfp_space, *new_rvalue; 
-  struct call_frame *frame; 
- 
-  rsize = 0; 
-  if (rvalue == NULL) 
-    { 
-      /* If the return value is a struct and we don't have a return 
-	 value address then we need to make one.  Otherwise the return 
-	 value is in registers and we can ignore them.  */ 
-      if (flags == ARM_TYPE_STRUCT) 
-	rsize = rtype->size; 
-      else 
-	flags = ARM_TYPE_VOID; 
-    } 
-  else if (flags == ARM_TYPE_VFP_N) 
-    { 
-      /* Largest case is double x 4. */ 
-      rsize = 32; 
-    } 
-  else if (flags == ARM_TYPE_INT && rtype->type == FFI_TYPE_STRUCT) 
-    rsize = 4; 
- 
-  /* Largest case.  */ 
-  vfp_size = (cif->abi == FFI_VFP && cif->vfp_used ? 8*8: 0); 
- 
-  bytes = cif->bytes; 
-  stack = alloca (vfp_size + bytes + sizeof(struct call_frame) + rsize); 
- 
-  vfp_space = NULL; 
-  if (vfp_size) 
-    { 
-      vfp_space = stack; 
-      stack += vfp_size; 
-    } 
- 
-  frame = (struct call_frame *)(stack + bytes); 
- 
-  new_rvalue = rvalue; 
-  if (rsize) 
-    new_rvalue = (void *)(frame + 1); 
- 
-  frame->rvalue = new_rvalue; 
-  frame->flags = flags; 
-  frame->closure = closure; 
- 
-  if (vfp_space) 
-    { 
-      ffi_prep_args_VFP (cif, flags, new_rvalue, avalue, stack, vfp_space); 
-      ffi_call_VFP (vfp_space, frame, fn, cif->vfp_used); 
-    } 
-  else 
-    { 
-      ffi_prep_args_SYSV (cif, flags, new_rvalue, avalue, stack); 
-      ffi_call_SYSV (stack, frame, fn); 
-    } 
- 
-  if (rvalue && rvalue != new_rvalue) 
-    memcpy (rvalue, new_rvalue, rtype->size); 
-} 
- 
-void 
-ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue) 
-{ 
-  ffi_call_int (cif, fn, rvalue, avalue, NULL); 
-} 
- 
-void 
-ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue, 
-	     void **avalue, void *closure) 
-{ 
-  ffi_call_int (cif, fn, rvalue, avalue, closure); 
-} 
- 
-static void * 
-ffi_prep_incoming_args_SYSV (ffi_cif *cif, void *rvalue, 
-			     char *argp, void **avalue) 
-{ 
-  ffi_type **arg_types = cif->arg_types; 
-  int i, n; 
- 
-  if (cif->flags == ARM_TYPE_STRUCT) 
-    { 
-      rvalue = *(void **) argp; 
-      argp += 4; 
-    } 
-  else 
-    { 
-      if (cif->rtype->size && cif->rtype->size < 4) 
-        *(uint32_t *) rvalue = 0; 
-    } 
- 
-  for (i = 0, n = cif->nargs; i < n; i++) 
-    { 
-      ffi_type *ty = arg_types[i]; 
-      size_t z = ty->size; 
- 
-      argp = ffi_align (ty, argp); 
-      avalue[i] = (void *) argp; 
-      argp += z; 
-    } 
- 
-  return rvalue; 
-} 
- 
-static void * 
-ffi_prep_incoming_args_VFP (ffi_cif *cif, void *rvalue, char *stack, 
-			    char *vfp_space, void **avalue) 
-{ 
-  ffi_type **arg_types = cif->arg_types; 
-  int i, n, vi = 0; 
-  char *argp, *regp, *eo_regp; 
-  char done_with_regs = 0; 
-  char stack_used = 0; 
- 
-  regp = stack; 
-  eo_regp = argp = regp + 16; 
- 
-  if (cif->flags == ARM_TYPE_STRUCT) 
-    { 
-      rvalue = *(void **) regp; 
-      regp += 4; 
-    } 
- 
-  for (i = 0, n = cif->nargs; i < n; i++) 
-    { 
-      ffi_type *ty = arg_types[i]; 
-      int is_vfp_type = vfp_type_p (ty); 
-      size_t z = ty->size; 
- 
-      if (vi < cif->vfp_nargs && is_vfp_type) 
-	{ 
-	  avalue[i] = vfp_space + cif->vfp_args[vi++] * 4; 
-	  continue; 
-	} 
-      else if (!done_with_regs && !is_vfp_type) 
-	{ 
-	  char *tregp = ffi_align (ty, regp); 
- 
-	  z = (z < 4) ? 4 : z;	// pad 
- 
-	  /* If the arguments either fits into the registers or uses registers 
-	     and stack, while we haven't read other things from the stack */ 
-	  if (tregp + z <= eo_regp || !stack_used) 
-	    { 
-	      /* Because we're little endian, this is what it turns into.  */ 
-	      avalue[i] = (void *) tregp; 
-	      regp = tregp + z; 
- 
-	      /* If we read past the last core register, make sure we 
-		 have not read from the stack before and continue 
-		 reading after regp.  */ 
-	      if (regp > eo_regp) 
-		{ 
-		  FFI_ASSERT (!stack_used); 
-		  argp = regp; 
-		} 
-	      if (regp >= eo_regp) 
-		{ 
-		  done_with_regs = 1; 
-		  stack_used = 1; 
-		} 
-	      continue; 
-	    } 
-	} 
- 
-      stack_used = 1; 
-      argp = ffi_align (ty, argp); 
-      avalue[i] = (void *) argp; 
-      argp += z; 
-    } 
- 
-  return rvalue; 
-} 
- 
-struct closure_frame 
-{ 
-  char vfp_space[8*8] __attribute__((aligned(8))); 
-  char result[8*4]; 
-  char argp[]; 
-}; 
- 
-int FFI_HIDDEN 
-ffi_closure_inner_SYSV (ffi_cif *cif, 
-		        void (*fun) (ffi_cif *, void *, void **, void *), 
-		        void *user_data, 
-		        struct closure_frame *frame) 
-{ 
-  void **avalue = (void **) alloca (cif->nargs * sizeof (void *)); 
-  void *rvalue = ffi_prep_incoming_args_SYSV (cif, frame->result, 
-					      frame->argp, avalue); 
-  fun (cif, rvalue, avalue, user_data); 
-  return cif->flags; 
-} 
- 
-int FFI_HIDDEN 
-ffi_closure_inner_VFP (ffi_cif *cif, 
-		       void (*fun) (ffi_cif *, void *, void **, void *), 
-		       void *user_data, 
-		       struct closure_frame *frame) 
-{ 
-  void **avalue = (void **) alloca (cif->nargs * sizeof (void *)); 
-  void *rvalue = ffi_prep_incoming_args_VFP (cif, frame->result, frame->argp, 
-					     frame->vfp_space, avalue); 
-  fun (cif, rvalue, avalue, user_data); 
-  return cif->flags; 
-} 
- 
-void ffi_closure_SYSV (void) FFI_HIDDEN; 
-void ffi_closure_VFP (void) FFI_HIDDEN; 
-void ffi_go_closure_SYSV (void) FFI_HIDDEN; 
-void ffi_go_closure_VFP (void) FFI_HIDDEN; 
- 
-/* the cif must already be prep'ed */ 
- 
-ffi_status 
-ffi_prep_closure_loc (ffi_closure * closure, 
-		      ffi_cif * cif, 
-		      void (*fun) (ffi_cif *, void *, void **, void *), 
-		      void *user_data, void *codeloc) 
-{ 
-  void (*closure_func) (void) = ffi_closure_SYSV; 
- 
-  if (cif->abi == FFI_VFP) 
-    { 
-      /* We only need take the vfp path if there are vfp arguments.  */ 
-      if (cif->vfp_used) 
-	closure_func = ffi_closure_VFP; 
-    } 
-  else if (cif->abi != FFI_SYSV) 
-    return FFI_BAD_ABI; 
- 
-#if FFI_EXEC_TRAMPOLINE_TABLE 
-  void **config = (void **)((uint8_t *)codeloc - PAGE_MAX_SIZE); 
-  config[0] = closure; 
-  config[1] = closure_func; 
-#else 
- 
-#ifndef _M_ARM 
-  memcpy(closure->tramp, ffi_arm_trampoline, 8); 
-#else 
-  // cast away function type so MSVC doesn't set the lower bit of the function pointer 
-  memcpy(closure->tramp, (void*)((uintptr_t)ffi_arm_trampoline & 0xFFFFFFFE), FFI_TRAMPOLINE_CLOSURE_OFFSET); 
-#endif 
- 
-#if defined (__QNX__) 
-  msync(closure->tramp, 8, 0x1000000);	/* clear data map */ 
-  msync(codeloc, 8, 0x1000000);	/* clear insn map */ 
-#elif defined(_MSC_VER) 
-  FlushInstructionCache(GetCurrentProcess(), closure->tramp, FFI_TRAMPOLINE_SIZE); 
-#else 
-  __clear_cache(closure->tramp, closure->tramp + 8);	/* clear data map */ 
-  __clear_cache(codeloc, codeloc + 8);			/* clear insn map */ 
-#endif 
-#ifdef _M_ARM 
-  *(void(**)(void))(closure->tramp + FFI_TRAMPOLINE_CLOSURE_FUNCTION) = closure_func; 
-#else 
-  *(void (**)(void))(closure->tramp + 8) = closure_func; 
-#endif 
-#endif 
- 
-  closure->cif = cif; 
-  closure->fun = fun; 
-  closure->user_data = user_data; 
- 
-  return FFI_OK; 
-} 
- 
-ffi_status 
-ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif *cif, 
-		     void (*fun) (ffi_cif *, void *, void **, void *)) 
-{ 
-  void (*closure_func) (void) = ffi_go_closure_SYSV; 
- 
-  if (cif->abi == FFI_VFP) 
-    { 
-      /* We only need take the vfp path if there are vfp arguments.  */ 
-      if (cif->vfp_used) 
-	closure_func = ffi_go_closure_VFP; 
-    } 
-  else if (cif->abi != FFI_SYSV) 
-    return FFI_BAD_ABI; 
- 
-  closure->tramp = closure_func; 
-  closure->cif = cif; 
-  closure->fun = fun; 
- 
-  return FFI_OK; 
-} 
- 
-/* Below are routines for VFP hard-float support. */ 
- 
-/* A subroutine of vfp_type_p.  Given a structure type, return the type code 
-   of the first non-structure element.  Recurse for structure elements. 
-   Return -1 if the structure is in fact empty, i.e. no nested elements.  */ 
- 
-static int 
-is_hfa0 (const ffi_type *ty) 
-{ 
-  ffi_type **elements = ty->elements; 
-  int i, ret = -1; 
- 
-  if (elements != NULL) 
-    for (i = 0; elements[i]; ++i) 
-      { 
-        ret = elements[i]->type; 
-        if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX) 
-          { 
-            ret = is_hfa0 (elements[i]); 
-            if (ret < 0) 
-              continue; 
-          } 
-        break; 
-      } 
- 
-  return ret; 
-} 
- 
-/* A subroutine of vfp_type_p.  Given a structure type, return true if all 
-   of the non-structure elements are the same as CANDIDATE.  */ 
- 
-static int 
-is_hfa1 (const ffi_type *ty, int candidate) 
-{ 
-  ffi_type **elements = ty->elements; 
-  int i; 
- 
-  if (elements != NULL) 
-    for (i = 0; elements[i]; ++i) 
-      { 
-        int t = elements[i]->type; 
-        if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX) 
-          { 
-            if (!is_hfa1 (elements[i], candidate)) 
-              return 0; 
-          } 
-        else if (t != candidate) 
-          return 0; 
-      } 
- 
-  return 1; 
-} 
- 
-/* Determine if TY is an homogenous floating point aggregate (HFA). 
-   That is, a structure consisting of 1 to 4 members of all the same type, 
-   where that type is a floating point scalar. 
- 
-   Returns non-zero iff TY is an HFA.  The result is an encoded value where 
-   bits 0-7 contain the type code, and bits 8-10 contain the element count.  */ 
- 
-static int 
-vfp_type_p (const ffi_type *ty) 
-{ 
-  ffi_type **elements; 
-  int candidate, i; 
-  size_t size, ele_count; 
- 
-  /* Quickest tests first.  */ 
-  candidate = ty->type; 
-  switch (ty->type) 
-    { 
-    default: 
-      return 0; 
-    case FFI_TYPE_FLOAT: 
-    case FFI_TYPE_DOUBLE: 
-      ele_count = 1; 
-      goto done; 
-    case FFI_TYPE_COMPLEX: 
-      candidate = ty->elements[0]->type; 
-      if (candidate != FFI_TYPE_FLOAT && candidate != FFI_TYPE_DOUBLE) 
-	return 0; 
-      ele_count = 2; 
-      goto done; 
-    case FFI_TYPE_STRUCT: 
-      break; 
-    } 
- 
-  /* No HFA types are smaller than 4 bytes, or larger than 32 bytes.  */ 
-  size = ty->size; 
-  if (size < 4 || size > 32) 
-    return 0; 
- 
-  /* Find the type of the first non-structure member.  */ 
-  elements = ty->elements; 
-  candidate = elements[0]->type; 
-  if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX) 
-    { 
-      for (i = 0; ; ++i) 
-        { 
-          candidate = is_hfa0 (elements[i]); 
-          if (candidate >= 0) 
-            break; 
-        } 
-    } 
- 
-  /* If the first member is not a floating point type, it's not an HFA. 
-     Also quickly re-check the size of the structure.  */ 
-  switch (candidate) 
-    { 
-    case FFI_TYPE_FLOAT: 
-      ele_count = size / sizeof(float); 
-      if (size != ele_count * sizeof(float)) 
-        return 0; 
-      break; 
-    case FFI_TYPE_DOUBLE: 
-      ele_count = size / sizeof(double); 
-      if (size != ele_count * sizeof(double)) 
-        return 0; 
-      break; 
-    default: 
-      return 0; 
-    } 
-  if (ele_count > 4) 
-    return 0; 
- 
-  /* Finally, make sure that all scalar elements are the same type.  */ 
-  for (i = 0; elements[i]; ++i) 
-    { 
-      int t = elements[i]->type; 
-      if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX) 
-        { 
-          if (!is_hfa1 (elements[i], candidate)) 
-            return 0; 
-        } 
-      else if (t != candidate) 
-        return 0; 
-    } 
- 
-  /* All tests succeeded.  Encode the result.  */ 
- done: 
-  return (ele_count << 8) | candidate; 
-} 
- 
-static int 
-place_vfp_arg (ffi_cif *cif, int h) 
-{ 
-  unsigned short reg = cif->vfp_reg_free; 
-  int align = 1, nregs = h >> 8; 
- 
-  if ((h & 0xff) == FFI_TYPE_DOUBLE) 
-    align = 2, nregs *= 2; 
- 
-  /* Align register number. */ 
-  if ((reg & 1) && align == 2) 
-    reg++; 
- 
-  while (reg + nregs <= 16) 
-    { 
-      int s, new_used = 0; 
-      for (s = reg; s < reg + nregs; s++) 
-	{ 
-	  new_used |= (1 << s); 
-	  if (cif->vfp_used & (1 << s)) 
-	    { 
-	      reg += align; 
-	      goto next_reg; 
-	    } 
-	} 
-      /* Found regs to allocate. */ 
-      cif->vfp_used |= new_used; 
-      cif->vfp_args[cif->vfp_nargs++] = (signed char)reg; 
- 
-      /* Update vfp_reg_free. */ 
-      if (cif->vfp_used & (1 << cif->vfp_reg_free)) 
-	{ 
-	  reg += nregs; 
-	  while (cif->vfp_used & (1 << reg)) 
-	    reg += 1; 
-	  cif->vfp_reg_free = reg; 
-	} 
-      return 0; 
-    next_reg:; 
-    } 
-  // done, mark all regs as used 
-  cif->vfp_reg_free = 16; 
-  cif->vfp_used = 0xFFFF; 
-  return 1; 
-} 
- 
-static void 
-layout_vfp_args (ffi_cif * cif) 
-{ 
-  unsigned int i; 
-  /* Init VFP fields */ 
-  cif->vfp_used = 0; 
-  cif->vfp_nargs = 0; 
-  cif->vfp_reg_free = 0; 
-  memset (cif->vfp_args, -1, 16);	/* Init to -1. */ 
- 
-  for (i = 0; i < cif->nargs; i++) 
-    { 
-      int h = vfp_type_p (cif->arg_types[i]); 
-      if (h && place_vfp_arg (cif, h) == 1) 
-	break; 
-    } 
-} 
- 
-#endif /* __arm__ or _M_ARM */ 
+/* -----------------------------------------------------------------------
+   ffi.c - Copyright (c) 2011 Timothy Wall
+           Copyright (c) 2011 Plausible Labs Cooperative, Inc.
+           Copyright (c) 2011 Anthony Green
+	   Copyright (c) 2011 Free Software Foundation
+           Copyright (c) 1998, 2008, 2011  Red Hat, Inc.
+
+   ARM Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#if defined(__arm__) || defined(_M_ARM)
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include "internal.h"
+
+#if defined(_MSC_VER) && defined(_M_ARM)
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#endif
+
+#if FFI_EXEC_TRAMPOLINE_TABLE
+
+#ifdef __MACH__
+#include <mach/machine/vm_param.h>
+#endif
+
+#else
+#ifndef _M_ARM
+extern unsigned int ffi_arm_trampoline[2] FFI_HIDDEN;
+#else
+extern unsigned int ffi_arm_trampoline[3] FFI_HIDDEN;
+#endif
+#endif
+
+/* Forward declares. */
+static int vfp_type_p (const ffi_type *);
+static void layout_vfp_args (ffi_cif *);
+
+static void *
+ffi_align (ffi_type *ty, void *p)
+{
+  /* Align if necessary */
+  size_t alignment;
+#ifdef _WIN32_WCE
+  alignment = 4;
+#else
+  alignment = ty->alignment;
+  if (alignment < 4)
+    alignment = 4;
+#endif
+  return (void *) FFI_ALIGN (p, alignment);
+}
+
+static size_t
+ffi_put_arg (ffi_type *ty, void *src, void *dst)
+{
+  size_t z = ty->size;
+
+  switch (ty->type)
+    {
+    case FFI_TYPE_SINT8:
+      *(UINT32 *)dst = *(SINT8 *)src;
+      break;
+    case FFI_TYPE_UINT8:
+      *(UINT32 *)dst = *(UINT8 *)src;
+      break;
+    case FFI_TYPE_SINT16:
+      *(UINT32 *)dst = *(SINT16 *)src;
+      break;
+    case FFI_TYPE_UINT16:
+      *(UINT32 *)dst = *(UINT16 *)src;
+      break;
+
+    case FFI_TYPE_INT:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_POINTER:
+#ifndef _MSC_VER
+    case FFI_TYPE_FLOAT:
+#endif
+      *(UINT32 *)dst = *(UINT32 *)src;
+      break;
+
+#ifdef _MSC_VER
+    // casting a float* to a UINT32* doesn't work on Windows
+    case FFI_TYPE_FLOAT:
+        *(uintptr_t *)dst = 0;
+        *(float *)dst = *(float *)src;
+        break;
+#endif
+
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_DOUBLE:
+      *(UINT64 *)dst = *(UINT64 *)src;
+      break;
+
+    case FFI_TYPE_STRUCT:
+    case FFI_TYPE_COMPLEX:
+      memcpy (dst, src, z);
+      break;
+
+    default:
+      abort();
+    }
+
+  return FFI_ALIGN (z, 4);
+}
+
+/* ffi_prep_args is called once stack space has been allocated
+   for the function's arguments.
+
+   The vfp_space parameter is the load area for VFP regs, the return
+   value is cif->vfp_used (word bitset of VFP regs used for passing
+   arguments). These are only used for the VFP hard-float ABI.
+*/
+static void
+ffi_prep_args_SYSV (ffi_cif *cif, int flags, void *rvalue,
+		    void **avalue, char *argp)
+{
+  ffi_type **arg_types = cif->arg_types;
+  int i, n;
+
+  if (flags == ARM_TYPE_STRUCT)
+    {
+      *(void **) argp = rvalue;
+      argp += 4;
+    }
+
+  for (i = 0, n = cif->nargs; i < n; i++)
+    {
+      ffi_type *ty = arg_types[i];
+      argp = ffi_align (ty, argp);
+      argp += ffi_put_arg (ty, avalue[i], argp);
+    }
+}
+
+static void
+ffi_prep_args_VFP (ffi_cif *cif, int flags, void *rvalue,
+                   void **avalue, char *stack, char *vfp_space)
+{
+  ffi_type **arg_types = cif->arg_types;
+  int i, n, vi = 0;
+  char *argp, *regp, *eo_regp;
+  char stack_used = 0;
+  char done_with_regs = 0;
+
+  /* The first 4 words on the stack are used for values
+     passed in core registers.  */
+  regp = stack;
+  eo_regp = argp = regp + 16;
+
+  /* If the function returns an FFI_TYPE_STRUCT in memory,
+     that address is passed in r0 to the function.  */
+  if (flags == ARM_TYPE_STRUCT)
+    {
+      *(void **) regp = rvalue;
+      regp += 4;
+    }
+
+  for (i = 0, n = cif->nargs; i < n; i++)
+    {
+      ffi_type *ty = arg_types[i];
+      void *a = avalue[i];
+      int is_vfp_type = vfp_type_p (ty);
+
+      /* Allocated in VFP registers. */
+      if (vi < cif->vfp_nargs && is_vfp_type)
+	{
+	  char *vfp_slot = vfp_space + cif->vfp_args[vi++] * 4;
+	  ffi_put_arg (ty, a, vfp_slot);
+	  continue;
+	}
+      /* Try allocating in core registers. */
+      else if (!done_with_regs && !is_vfp_type)
+	{
+	  char *tregp = ffi_align (ty, regp);
+	  size_t size = ty->size;
+	  size = (size < 4) ? 4 : size;	// pad
+	  /* Check if there is space left in the aligned register
+	     area to place the argument.  */
+	  if (tregp + size <= eo_regp)
+	    {
+	      regp = tregp + ffi_put_arg (ty, a, tregp);
+	      done_with_regs = (regp == argp);
+	      // ensure we did not write into the stack area
+	      FFI_ASSERT (regp <= argp);
+	      continue;
+	    }
+	  /* In case there are no arguments in the stack area yet,
+	     the argument is passed in the remaining core registers
+	     and on the stack.  */
+	  else if (!stack_used)
+	    {
+	      stack_used = 1;
+	      done_with_regs = 1;
+	      argp = tregp + ffi_put_arg (ty, a, tregp);
+	      FFI_ASSERT (eo_regp < argp);
+	      continue;
+	    }
+	}
+      /* Base case, arguments are passed on the stack */
+      stack_used = 1;
+      argp = ffi_align (ty, argp);
+      argp += ffi_put_arg (ty, a, argp);
+    }
+}
+
+/* Perform machine dependent cif processing */
+ffi_status FFI_HIDDEN
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+  int flags = 0, cabi = cif->abi;
+  size_t bytes = cif->bytes;
+
+  /* Map out the register placements of VFP register args.  The VFP
+     hard-float calling conventions are slightly more sophisticated
+     than the base calling conventions, so we do it here instead of
+     in ffi_prep_args(). */
+  if (cabi == FFI_VFP)
+    layout_vfp_args (cif);
+
+  /* Set the return type flag */
+  switch (cif->rtype->type)
+    {
+    case FFI_TYPE_VOID:
+      flags = ARM_TYPE_VOID;
+      break;
+
+    case FFI_TYPE_INT:
+    case FFI_TYPE_UINT8:
+    case FFI_TYPE_SINT8:
+    case FFI_TYPE_UINT16:
+    case FFI_TYPE_SINT16:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_POINTER:
+      flags = ARM_TYPE_INT;
+      break;
+
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_UINT64:
+      flags = ARM_TYPE_INT64;
+      break;
+
+    case FFI_TYPE_FLOAT:
+      flags = (cabi == FFI_VFP ? ARM_TYPE_VFP_S : ARM_TYPE_INT);
+      break;
+    case FFI_TYPE_DOUBLE:
+      flags = (cabi == FFI_VFP ? ARM_TYPE_VFP_D : ARM_TYPE_INT64);
+      break;
+
+    case FFI_TYPE_STRUCT:
+    case FFI_TYPE_COMPLEX:
+      if (cabi == FFI_VFP)
+	{
+	  int h = vfp_type_p (cif->rtype);
+
+	  flags = ARM_TYPE_VFP_N;
+	  if (h == 0x100 + FFI_TYPE_FLOAT)
+	    flags = ARM_TYPE_VFP_S;
+	  if (h == 0x100 + FFI_TYPE_DOUBLE)
+	    flags = ARM_TYPE_VFP_D;
+	  if (h != 0)
+	      break;
+	}
+
+      /* A Composite Type not larger than 4 bytes is returned in r0.
+	 A Composite Type larger than 4 bytes, or whose size cannot
+	 be determined statically ... is stored in memory at an
+	 address passed [in r0].  */
+      if (cif->rtype->size <= 4)
+	flags = ARM_TYPE_INT;
+      else
+	{
+	  flags = ARM_TYPE_STRUCT;
+	  bytes += 4;
+	}
+      break;
+
+    default:
+      abort();
+    }
+
+  /* Round the stack up to a multiple of 8 bytes.  This isn't needed
+     everywhere, but it is on some platforms, and it doesn't harm anything
+     when it isn't needed.  */
+  bytes = FFI_ALIGN (bytes, 8);
+
+  /* Minimum stack space is the 4 register arguments that we pop.  */
+  if (bytes < 4*4)
+    bytes = 4*4;
+
+  cif->bytes = bytes;
+  cif->flags = flags;
+
+  return FFI_OK;
+}
+
+/* Perform machine dependent cif processing for variadic calls */
+ffi_status FFI_HIDDEN
+ffi_prep_cif_machdep_var (ffi_cif * cif,
+			  unsigned int nfixedargs, unsigned int ntotalargs)
+{
+  /* VFP variadic calls actually use the SYSV ABI */
+  if (cif->abi == FFI_VFP)
+    cif->abi = FFI_SYSV;
+
+  return ffi_prep_cif_machdep (cif);
+}
+
+/* Prototypes for assembly functions, in sysv.S.  */
+
+struct call_frame
+{
+  void *fp;
+  void *lr;
+  void *rvalue;
+  int flags;
+  void *closure;
+};
+
+extern void ffi_call_SYSV (void *stack, struct call_frame *,
+			   void (*fn) (void)) FFI_HIDDEN;
+extern void ffi_call_VFP (void *vfp_space, struct call_frame *,
+			   void (*fn) (void), unsigned vfp_used) FFI_HIDDEN;
+
+static void
+ffi_call_int (ffi_cif * cif, void (*fn) (void), void *rvalue,
+	      void **avalue, void *closure)
+{
+  int flags = cif->flags;
+  ffi_type *rtype = cif->rtype;
+  size_t bytes, rsize, vfp_size;
+  char *stack, *vfp_space, *new_rvalue;
+  struct call_frame *frame;
+
+  rsize = 0;
+  if (rvalue == NULL)
+    {
+      /* If the return value is a struct and we don't have a return
+	 value address then we need to make one.  Otherwise the return
+	 value is in registers and we can ignore them.  */
+      if (flags == ARM_TYPE_STRUCT)
+	rsize = rtype->size;
+      else
+	flags = ARM_TYPE_VOID;
+    }
+  else if (flags == ARM_TYPE_VFP_N)
+    {
+      /* Largest case is double x 4. */
+      rsize = 32;
+    }
+  else if (flags == ARM_TYPE_INT && rtype->type == FFI_TYPE_STRUCT)
+    rsize = 4;
+
+  /* Largest case.  */
+  vfp_size = (cif->abi == FFI_VFP && cif->vfp_used ? 8*8: 0);
+
+  bytes = cif->bytes;
+  stack = alloca (vfp_size + bytes + sizeof(struct call_frame) + rsize);
+
+  vfp_space = NULL;
+  if (vfp_size)
+    {
+      vfp_space = stack;
+      stack += vfp_size;
+    }
+
+  frame = (struct call_frame *)(stack + bytes);
+
+  new_rvalue = rvalue;
+  if (rsize)
+    new_rvalue = (void *)(frame + 1);
+
+  frame->rvalue = new_rvalue;
+  frame->flags = flags;
+  frame->closure = closure;
+
+  if (vfp_space)
+    {
+      ffi_prep_args_VFP (cif, flags, new_rvalue, avalue, stack, vfp_space);
+      ffi_call_VFP (vfp_space, frame, fn, cif->vfp_used);
+    }
+  else
+    {
+      ffi_prep_args_SYSV (cif, flags, new_rvalue, avalue, stack);
+      ffi_call_SYSV (stack, frame, fn);
+    }
+
+  if (rvalue && rvalue != new_rvalue)
+    memcpy (rvalue, new_rvalue, rtype->size);
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue,
+	     void **avalue, void *closure)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
+
+static void *
+ffi_prep_incoming_args_SYSV (ffi_cif *cif, void *rvalue,
+			     char *argp, void **avalue)
+{
+  ffi_type **arg_types = cif->arg_types;
+  int i, n;
+
+  if (cif->flags == ARM_TYPE_STRUCT)
+    {
+      rvalue = *(void **) argp;
+      argp += 4;
+    }
+  else
+    {
+      if (cif->rtype->size && cif->rtype->size < 4)
+        *(uint32_t *) rvalue = 0;
+    }
+
+  for (i = 0, n = cif->nargs; i < n; i++)
+    {
+      ffi_type *ty = arg_types[i];
+      size_t z = ty->size;
+
+      argp = ffi_align (ty, argp);
+      avalue[i] = (void *) argp;
+      argp += z;
+    }
+
+  return rvalue;
+}
+
+static void *
+ffi_prep_incoming_args_VFP (ffi_cif *cif, void *rvalue, char *stack,
+			    char *vfp_space, void **avalue)
+{
+  ffi_type **arg_types = cif->arg_types;
+  int i, n, vi = 0;
+  char *argp, *regp, *eo_regp;
+  char done_with_regs = 0;
+  char stack_used = 0;
+
+  regp = stack;
+  eo_regp = argp = regp + 16;
+
+  if (cif->flags == ARM_TYPE_STRUCT)
+    {
+      rvalue = *(void **) regp;
+      regp += 4;
+    }
+
+  for (i = 0, n = cif->nargs; i < n; i++)
+    {
+      ffi_type *ty = arg_types[i];
+      int is_vfp_type = vfp_type_p (ty);
+      size_t z = ty->size;
+
+      if (vi < cif->vfp_nargs && is_vfp_type)
+	{
+	  avalue[i] = vfp_space + cif->vfp_args[vi++] * 4;
+	  continue;
+	}
+      else if (!done_with_regs && !is_vfp_type)
+	{
+	  char *tregp = ffi_align (ty, regp);
+
+	  z = (z < 4) ? 4 : z;	// pad
+
+	  /* If the arguments either fits into the registers or uses registers
+	     and stack, while we haven't read other things from the stack */
+	  if (tregp + z <= eo_regp || !stack_used)
+	    {
+	      /* Because we're little endian, this is what it turns into.  */
+	      avalue[i] = (void *) tregp;
+	      regp = tregp + z;
+
+	      /* If we read past the last core register, make sure we
+		 have not read from the stack before and continue
+		 reading after regp.  */
+	      if (regp > eo_regp)
+		{
+		  FFI_ASSERT (!stack_used);
+		  argp = regp;
+		}
+	      if (regp >= eo_regp)
+		{
+		  done_with_regs = 1;
+		  stack_used = 1;
+		}
+	      continue;
+	    }
+	}
+
+      stack_used = 1;
+      argp = ffi_align (ty, argp);
+      avalue[i] = (void *) argp;
+      argp += z;
+    }
+
+  return rvalue;
+}
+
+struct closure_frame
+{
+  char vfp_space[8*8] __attribute__((aligned(8)));
+  char result[8*4];
+  char argp[];
+};
+
+int FFI_HIDDEN
+ffi_closure_inner_SYSV (ffi_cif *cif,
+		        void (*fun) (ffi_cif *, void *, void **, void *),
+		        void *user_data,
+		        struct closure_frame *frame)
+{
+  void **avalue = (void **) alloca (cif->nargs * sizeof (void *));
+  void *rvalue = ffi_prep_incoming_args_SYSV (cif, frame->result,
+					      frame->argp, avalue);
+  fun (cif, rvalue, avalue, user_data);
+  return cif->flags;
+}
+
+int FFI_HIDDEN
+ffi_closure_inner_VFP (ffi_cif *cif,
+		       void (*fun) (ffi_cif *, void *, void **, void *),
+		       void *user_data,
+		       struct closure_frame *frame)
+{
+  void **avalue = (void **) alloca (cif->nargs * sizeof (void *));
+  void *rvalue = ffi_prep_incoming_args_VFP (cif, frame->result, frame->argp,
+					     frame->vfp_space, avalue);
+  fun (cif, rvalue, avalue, user_data);
+  return cif->flags;
+}
+
+void ffi_closure_SYSV (void) FFI_HIDDEN;
+void ffi_closure_VFP (void) FFI_HIDDEN;
+void ffi_go_closure_SYSV (void) FFI_HIDDEN;
+void ffi_go_closure_VFP (void) FFI_HIDDEN;
+
+/* the cif must already be prep'ed */
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure * closure,
+		      ffi_cif * cif,
+		      void (*fun) (ffi_cif *, void *, void **, void *),
+		      void *user_data, void *codeloc)
+{
+  void (*closure_func) (void) = ffi_closure_SYSV;
+
+  if (cif->abi == FFI_VFP)
+    {
+      /* We only need take the vfp path if there are vfp arguments.  */
+      if (cif->vfp_used)
+	closure_func = ffi_closure_VFP;
+    }
+  else if (cif->abi != FFI_SYSV)
+    return FFI_BAD_ABI;
+
+#if FFI_EXEC_TRAMPOLINE_TABLE
+  void **config = (void **)((uint8_t *)codeloc - PAGE_MAX_SIZE);
+  config[0] = closure;
+  config[1] = closure_func;
+#else
+
+#ifndef _M_ARM
+  memcpy(closure->tramp, ffi_arm_trampoline, 8);
+#else
+  // cast away function type so MSVC doesn't set the lower bit of the function pointer
+  memcpy(closure->tramp, (void*)((uintptr_t)ffi_arm_trampoline & 0xFFFFFFFE), FFI_TRAMPOLINE_CLOSURE_OFFSET);
+#endif
+
+#if defined (__QNX__)
+  msync(closure->tramp, 8, 0x1000000);	/* clear data map */
+  msync(codeloc, 8, 0x1000000);	/* clear insn map */
+#elif defined(_MSC_VER)
+  FlushInstructionCache(GetCurrentProcess(), closure->tramp, FFI_TRAMPOLINE_SIZE);
+#else
+  __clear_cache(closure->tramp, closure->tramp + 8);	/* clear data map */
+  __clear_cache(codeloc, codeloc + 8);			/* clear insn map */
+#endif
+#ifdef _M_ARM
+  *(void(**)(void))(closure->tramp + FFI_TRAMPOLINE_CLOSURE_FUNCTION) = closure_func;
+#else
+  *(void (**)(void))(closure->tramp + 8) = closure_func;
+#endif
+#endif
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif *cif,
+		     void (*fun) (ffi_cif *, void *, void **, void *))
+{
+  void (*closure_func) (void) = ffi_go_closure_SYSV;
+
+  if (cif->abi == FFI_VFP)
+    {
+      /* We only need take the vfp path if there are vfp arguments.  */
+      if (cif->vfp_used)
+	closure_func = ffi_go_closure_VFP;
+    }
+  else if (cif->abi != FFI_SYSV)
+    return FFI_BAD_ABI;
+
+  closure->tramp = closure_func;
+  closure->cif = cif;
+  closure->fun = fun;
+
+  return FFI_OK;
+}
+
+/* Below are routines for VFP hard-float support. */
+
+/* A subroutine of vfp_type_p.  Given a structure type, return the type code
+   of the first non-structure element.  Recurse for structure elements.
+   Return -1 if the structure is in fact empty, i.e. no nested elements.  */
+
+static int
+is_hfa0 (const ffi_type *ty)
+{
+  ffi_type **elements = ty->elements;
+  int i, ret = -1;
+
+  if (elements != NULL)
+    for (i = 0; elements[i]; ++i)
+      {
+        ret = elements[i]->type;
+        if (ret == FFI_TYPE_STRUCT || ret == FFI_TYPE_COMPLEX)
+          {
+            ret = is_hfa0 (elements[i]);
+            if (ret < 0)
+              continue;
+          }
+        break;
+      }
+
+  return ret;
+}
+
+/* A subroutine of vfp_type_p.  Given a structure type, return true if all
+   of the non-structure elements are the same as CANDIDATE.  */
+
+static int
+is_hfa1 (const ffi_type *ty, int candidate)
+{
+  ffi_type **elements = ty->elements;
+  int i;
+
+  if (elements != NULL)
+    for (i = 0; elements[i]; ++i)
+      {
+        int t = elements[i]->type;
+        if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
+          {
+            if (!is_hfa1 (elements[i], candidate))
+              return 0;
+          }
+        else if (t != candidate)
+          return 0;
+      }
+
+  return 1;
+}
+
+/* Determine if TY is an homogenous floating point aggregate (HFA).
+   That is, a structure consisting of 1 to 4 members of all the same type,
+   where that type is a floating point scalar.
+
+   Returns non-zero iff TY is an HFA.  The result is an encoded value where
+   bits 0-7 contain the type code, and bits 8-10 contain the element count.  */
+
+static int
+vfp_type_p (const ffi_type *ty)
+{
+  ffi_type **elements;
+  int candidate, i;
+  size_t size, ele_count;
+
+  /* Quickest tests first.  */
+  candidate = ty->type;
+  switch (ty->type)
+    {
+    default:
+      return 0;
+    case FFI_TYPE_FLOAT:
+    case FFI_TYPE_DOUBLE:
+      ele_count = 1;
+      goto done;
+    case FFI_TYPE_COMPLEX:
+      candidate = ty->elements[0]->type;
+      if (candidate != FFI_TYPE_FLOAT && candidate != FFI_TYPE_DOUBLE)
+	return 0;
+      ele_count = 2;
+      goto done;
+    case FFI_TYPE_STRUCT:
+      break;
+    }
+
+  /* No HFA types are smaller than 4 bytes, or larger than 32 bytes.  */
+  size = ty->size;
+  if (size < 4 || size > 32)
+    return 0;
+
+  /* Find the type of the first non-structure member.  */
+  elements = ty->elements;
+  candidate = elements[0]->type;
+  if (candidate == FFI_TYPE_STRUCT || candidate == FFI_TYPE_COMPLEX)
+    {
+      for (i = 0; ; ++i)
+        {
+          candidate = is_hfa0 (elements[i]);
+          if (candidate >= 0)
+            break;
+        }
+    }
+
+  /* If the first member is not a floating point type, it's not an HFA.
+     Also quickly re-check the size of the structure.  */
+  switch (candidate)
+    {
+    case FFI_TYPE_FLOAT:
+      ele_count = size / sizeof(float);
+      if (size != ele_count * sizeof(float))
+        return 0;
+      break;
+    case FFI_TYPE_DOUBLE:
+      ele_count = size / sizeof(double);
+      if (size != ele_count * sizeof(double))
+        return 0;
+      break;
+    default:
+      return 0;
+    }
+  if (ele_count > 4)
+    return 0;
+
+  /* Finally, make sure that all scalar elements are the same type.  */
+  for (i = 0; elements[i]; ++i)
+    {
+      int t = elements[i]->type;
+      if (t == FFI_TYPE_STRUCT || t == FFI_TYPE_COMPLEX)
+        {
+          if (!is_hfa1 (elements[i], candidate))
+            return 0;
+        }
+      else if (t != candidate)
+        return 0;
+    }
+
+  /* All tests succeeded.  Encode the result.  */
+ done:
+  return (ele_count << 8) | candidate;
+}
+
+static int
+place_vfp_arg (ffi_cif *cif, int h)
+{
+  unsigned short reg = cif->vfp_reg_free;
+  int align = 1, nregs = h >> 8;
+
+  if ((h & 0xff) == FFI_TYPE_DOUBLE)
+    align = 2, nregs *= 2;
+
+  /* Align register number. */
+  if ((reg & 1) && align == 2)
+    reg++;
+
+  while (reg + nregs <= 16)
+    {
+      int s, new_used = 0;
+      for (s = reg; s < reg + nregs; s++)
+	{
+	  new_used |= (1 << s);
+	  if (cif->vfp_used & (1 << s))
+	    {
+	      reg += align;
+	      goto next_reg;
+	    }
+	}
+      /* Found regs to allocate. */
+      cif->vfp_used |= new_used;
+      cif->vfp_args[cif->vfp_nargs++] = (signed char)reg;
+
+      /* Update vfp_reg_free. */
+      if (cif->vfp_used & (1 << cif->vfp_reg_free))
+	{
+	  reg += nregs;
+	  while (cif->vfp_used & (1 << reg))
+	    reg += 1;
+	  cif->vfp_reg_free = reg;
+	}
+      return 0;
+    next_reg:;
+    }
+  // done, mark all regs as used
+  cif->vfp_reg_free = 16;
+  cif->vfp_used = 0xFFFF;
+  return 1;
+}
+
+static void
+layout_vfp_args (ffi_cif * cif)
+{
+  unsigned int i;
+  /* Init VFP fields */
+  cif->vfp_used = 0;
+  cif->vfp_nargs = 0;
+  cif->vfp_reg_free = 0;
+  memset (cif->vfp_args, -1, 16);	/* Init to -1. */
+
+  for (i = 0; i < cif->nargs; i++)
+    {
+      int h = vfp_type_p (cif->arg_types[i]);
+      if (h && place_vfp_arg (cif, h) == 1)
+	break;
+    }
+}
+
+#endif /* __arm__ or _M_ARM */
diff --git a/contrib/restricted/libffi/src/arm/ffitarget.h b/contrib/restricted/libffi/src/arm/ffitarget.h
index 24e1de72b9..cb57b84880 100644
--- a/contrib/restricted/libffi/src/arm/ffitarget.h
+++ b/contrib/restricted/libffi/src/arm/ffitarget.h
@@ -1,89 +1,89 @@
-/* -----------------------------------------------------------------*-C-*- 
-   ffitarget.h - Copyright (c) 2012  Anthony Green 
-                 Copyright (c) 2010  CodeSourcery 
-                 Copyright (c) 1996-2003  Red Hat, Inc. 
- 
-   Target configuration macros for ARM. 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
- 
-   ----------------------------------------------------------------------- */ 
- 
-#ifndef LIBFFI_TARGET_H 
-#define LIBFFI_TARGET_H 
- 
-#ifndef LIBFFI_H 
-#error "Please do not include ffitarget.h directly into your source.  Use ffi.h instead." 
-#endif 
- 
-#ifndef LIBFFI_ASM 
-typedef unsigned long          ffi_arg; 
-typedef signed long            ffi_sarg; 
- 
-typedef enum ffi_abi { 
-  FFI_FIRST_ABI = 0, 
-  FFI_SYSV, 
-  FFI_VFP, 
-  FFI_LAST_ABI, 
-#if defined(__ARM_PCS_VFP) || defined(_M_ARM) 
-  FFI_DEFAULT_ABI = FFI_VFP, 
-#else 
-  FFI_DEFAULT_ABI = FFI_SYSV, 
-#endif 
-} ffi_abi; 
-#endif 
- 
-#define FFI_EXTRA_CIF_FIELDS			\ 
-  int vfp_used;					\ 
-  unsigned short vfp_reg_free, vfp_nargs;	\ 
-  signed char vfp_args[16]			\ 
- 
-#define FFI_TARGET_SPECIFIC_VARIADIC 
-#ifndef _M_ARM 
-#define FFI_TARGET_HAS_COMPLEX_TYPE 
-#endif 
- 
-/* ---- Definitions for closures ----------------------------------------- */ 
- 
-#define FFI_CLOSURES 1 
-#define FFI_GO_CLOSURES 1 
-#define FFI_NATIVE_RAW_API 0 
- 
-#if defined (FFI_EXEC_TRAMPOLINE_TABLE) && FFI_EXEC_TRAMPOLINE_TABLE 
- 
-#ifdef __MACH__ 
-#define FFI_TRAMPOLINE_SIZE 12 
-#define FFI_TRAMPOLINE_CLOSURE_OFFSET 8 
-#else 
-#error "No trampoline table implementation" 
-#endif 
- 
-#else 
-#ifdef _MSC_VER 
-#define FFI_TRAMPOLINE_SIZE 16 
-#define FFI_TRAMPOLINE_CLOSURE_FUNCTION 12 
-#else 
-#define FFI_TRAMPOLINE_SIZE 12 
-#endif 
-#define FFI_TRAMPOLINE_CLOSURE_OFFSET FFI_TRAMPOLINE_SIZE 
-#endif 
- 
-#endif 
+/* -----------------------------------------------------------------*-C-*-
+   ffitarget.h - Copyright (c) 2012  Anthony Green
+                 Copyright (c) 2010  CodeSourcery
+                 Copyright (c) 1996-2003  Red Hat, Inc.
+
+   Target configuration macros for ARM.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+
+   ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source.  Use ffi.h instead."
+#endif
+
+#ifndef LIBFFI_ASM
+typedef unsigned long          ffi_arg;
+typedef signed long            ffi_sarg;
+
+typedef enum ffi_abi {
+  FFI_FIRST_ABI = 0,
+  FFI_SYSV,
+  FFI_VFP,
+  FFI_LAST_ABI,
+#if defined(__ARM_PCS_VFP) || defined(_M_ARM)
+  FFI_DEFAULT_ABI = FFI_VFP,
+#else
+  FFI_DEFAULT_ABI = FFI_SYSV,
+#endif
+} ffi_abi;
+#endif
+
+#define FFI_EXTRA_CIF_FIELDS			\
+  int vfp_used;					\
+  unsigned short vfp_reg_free, vfp_nargs;	\
+  signed char vfp_args[16]			\
+
+#define FFI_TARGET_SPECIFIC_VARIADIC
+#ifndef _M_ARM
+#define FFI_TARGET_HAS_COMPLEX_TYPE
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_GO_CLOSURES 1
+#define FFI_NATIVE_RAW_API 0
+
+#if defined (FFI_EXEC_TRAMPOLINE_TABLE) && FFI_EXEC_TRAMPOLINE_TABLE
+
+#ifdef __MACH__
+#define FFI_TRAMPOLINE_SIZE 12
+#define FFI_TRAMPOLINE_CLOSURE_OFFSET 8
+#else
+#error "No trampoline table implementation"
+#endif
+
+#else
+#ifdef _MSC_VER
+#define FFI_TRAMPOLINE_SIZE 16
+#define FFI_TRAMPOLINE_CLOSURE_FUNCTION 12
+#else
+#define FFI_TRAMPOLINE_SIZE 12
+#endif
+#define FFI_TRAMPOLINE_CLOSURE_OFFSET FFI_TRAMPOLINE_SIZE
+#endif
+
+#endif
diff --git a/contrib/restricted/libffi/src/arm/internal.h b/contrib/restricted/libffi/src/arm/internal.h
index 418dc460f3..6cf0b2ae5d 100644
--- a/contrib/restricted/libffi/src/arm/internal.h
+++ b/contrib/restricted/libffi/src/arm/internal.h
@@ -1,7 +1,7 @@
-#define ARM_TYPE_VFP_S	0 
-#define ARM_TYPE_VFP_D	1 
-#define ARM_TYPE_VFP_N	2 
-#define ARM_TYPE_INT64	3 
-#define ARM_TYPE_INT	4 
-#define ARM_TYPE_VOID	5 
-#define ARM_TYPE_STRUCT	6 
+#define ARM_TYPE_VFP_S	0
+#define ARM_TYPE_VFP_D	1
+#define ARM_TYPE_VFP_N	2
+#define ARM_TYPE_INT64	3
+#define ARM_TYPE_INT	4
+#define ARM_TYPE_VOID	5
+#define ARM_TYPE_STRUCT	6
diff --git a/contrib/restricted/libffi/src/arm/sysv.S b/contrib/restricted/libffi/src/arm/sysv.S
index dec168db0b..63180a4639 100644
--- a/contrib/restricted/libffi/src/arm/sysv.S
+++ b/contrib/restricted/libffi/src/arm/sysv.S
@@ -1,385 +1,385 @@
-/* ----------------------------------------------------------------------- 
-   sysv.S - Copyright (c) 1998, 2008, 2011 Red Hat, Inc. 
-	    Copyright (c) 2011 Plausible Labs Cooperative, Inc. 
- 
-   ARM Foreign Function Interface 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-#ifdef __arm__ 
-#define LIBFFI_ASM 
-#include <fficonfig.h> 
-#include <ffi.h> 
-#include <ffi_cfi.h> 
-#include "internal.h" 
- 
-/* GCC 4.8 provides __ARM_ARCH; construct it otherwise.  */ 
-#ifndef __ARM_ARCH 
-# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \ 
-     || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \ 
-     || defined(__ARM_ARCH_7EM__) 
-#  define __ARM_ARCH 7 
-# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \ 
-        || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \ 
-        || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \ 
-	|| defined(__ARM_ARCH_6M__) 
-#  define __ARM_ARCH 6 
-# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \ 
-	|| defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \ 
-	|| defined(__ARM_ARCH_5TEJ__) 
-#  define __ARM_ARCH 5 
-# else 
-#  define __ARM_ARCH 4 
-# endif 
-#endif 
- 
-/* Conditionally compile unwinder directives.  */ 
-#ifdef __ARM_EABI__ 
-# define UNWIND(...)	__VA_ARGS__ 
-#else 
-# define UNWIND(...) 
-#endif 
- 
-#if defined(HAVE_AS_CFI_PSEUDO_OP) && defined(__ARM_EABI__) 
-	.cfi_sections	.debug_frame 
-#endif 
- 
-#define CONCAT(a, b)	CONCAT2(a, b) 
-#define CONCAT2(a, b)	a ## b 
- 
-#ifdef __USER_LABEL_PREFIX__ 
-# define CNAME(X)	CONCAT (__USER_LABEL_PREFIX__, X) 
-#else 
-# define CNAME(X)	X 
-#endif 
-#ifdef __ELF__ 
-# define SIZE(X)	.size CNAME(X), . - CNAME(X) 
-# define TYPE(X, Y)	.type CNAME(X), Y 
-#else 
-# define SIZE(X) 
-# define TYPE(X, Y) 
-#endif 
- 
-#define ARM_FUNC_START_LOCAL(name)	\ 
-	.align	3;			\ 
-	TYPE(CNAME(name), %function);	\ 
-	CNAME(name): 
- 
-#define ARM_FUNC_START(name)		\ 
-	.globl CNAME(name);		\ 
-	FFI_HIDDEN(CNAME(name));	\ 
-	ARM_FUNC_START_LOCAL(name) 
- 
-#define ARM_FUNC_END(name) \ 
-	SIZE(name) 
- 
-/* Aid in defining a jump table with 8 bytes between entries.  */ 
-/* ??? The clang assembler doesn't handle .if with symbolic expressions.  */ 
-#ifdef __clang__ 
-# define E(index) 
-#else 
-# define E(index)				\ 
-	.if . - 0b - 8*index;			\ 
-	.error "type table out of sync";	\ 
-	.endif 
-#endif 
- 
-	.text 
-	.syntax unified 
-	.arm 
- 
-#ifndef __clang__ 
-	/* We require interworking on LDM, which implies ARMv5T, 
-	   which implies the existance of BLX.  */ 
- 	.arch	armv5t 
-#endif 
- 
-	/* Note that we use STC and LDC to encode VFP instructions, 
-	   so that we do not need ".fpu vfp", nor get that added to 
-	   the object file attributes.  These will not be executed 
-	   unless the FFI_VFP abi is used.  */ 
- 
-	@ r0:   stack 
-	@ r1:   frame 
-	@ r2:   fn 
-	@ r3:	vfp_used 
- 
-ARM_FUNC_START(ffi_call_VFP) 
-	UNWIND(.fnstart) 
-	cfi_startproc 
- 
-	cmp	r3, #3			@ load only d0 if possible 
-#ifdef __clang__ 
-	vldrle d0, [sp] 
-	vldmgt sp, {d0-d7} 
-#else 
-	ldcle	p11, cr0, [r0]		@ vldrle d0, [sp] 
-	ldcgt	p11, cr0, [r0], {16}	@ vldmgt sp, {d0-d7} 
-#endif 
-	add	r0, r0, #64		@ discard the vfp register args 
-	/* FALLTHRU */ 
-ARM_FUNC_END(ffi_call_VFP) 
- 
-ARM_FUNC_START(ffi_call_SYSV) 
-	stm	r1, {fp, lr} 
-	mov	fp, r1 
- 
-	@ This is a bit of a lie wrt the origin of the unwind info, but 
-	@ now we've got the usual frame pointer and two saved registers. 
-	UNWIND(.save {fp,lr}) 
-	UNWIND(.setfp fp, sp) 
-	cfi_def_cfa(fp, 8) 
-	cfi_rel_offset(fp, 0) 
-	cfi_rel_offset(lr, 4) 
- 
-	mov	sp, r0		@ install the stack pointer 
-	mov	lr, r2		@ move the fn pointer out of the way 
-	ldr	ip, [fp, #16]	@ install the static chain 
-	ldmia	sp!, {r0-r3}	@ move first 4 parameters in registers. 
-	blx	lr		@ call fn 
- 
-	@ Load r2 with the pointer to storage for the return value 
-	@ Load r3 with the return type code 
-	ldr	r2, [fp, #8] 
-	ldr	r3, [fp, #12] 
- 
-	@ Deallocate the stack with the arguments. 
-	mov	sp, fp 
-	cfi_def_cfa_register(sp) 
- 
-	@ Store values stored in registers. 
-	.align	3 
-	add	pc, pc, r3, lsl #3 
-	nop 
-0: 
-E(ARM_TYPE_VFP_S) 
-#ifdef __clang__ 
-	vstr s0, [r2] 
-#else 
-	stc	p10, cr0, [r2]		@ vstr s0, [r2] 
-#endif 
-	pop	{fp,pc} 
-E(ARM_TYPE_VFP_D) 
-#ifdef __clang__ 
-	vstr d0, [r2] 
-#else 
-	stc	p11, cr0, [r2]		@ vstr d0, [r2] 
-#endif 
-	pop	{fp,pc} 
-E(ARM_TYPE_VFP_N) 
-#ifdef __clang__ 
-	vstm r2, {d0-d3} 
-#else 
-	stc	p11, cr0, [r2], {8}	@ vstm r2, {d0-d3} 
-#endif 
-	pop	{fp,pc} 
-E(ARM_TYPE_INT64) 
-	str	r1, [r2, #4] 
-	nop 
-E(ARM_TYPE_INT) 
-	str	r0, [r2] 
-	pop	{fp,pc} 
-E(ARM_TYPE_VOID) 
-	pop	{fp,pc} 
-	nop 
-E(ARM_TYPE_STRUCT) 
-	pop	{fp,pc} 
- 
-	cfi_endproc 
-	UNWIND(.fnend) 
-ARM_FUNC_END(ffi_call_SYSV) 
- 
- 
-/* 
-	int ffi_closure_inner_* (cif, fun, user_data, frame) 
-*/ 
- 
-ARM_FUNC_START(ffi_go_closure_SYSV) 
-	cfi_startproc 
-	stmdb	sp!, {r0-r3}			@ save argument regs 
-	cfi_adjust_cfa_offset(16) 
-	ldr	r0, [ip, #4]			@ load cif 
-	ldr	r1, [ip, #8]			@ load fun 
-	mov	r2, ip				@ load user_data 
-	b	0f 
-	cfi_endproc 
-ARM_FUNC_END(ffi_go_closure_SYSV) 
- 
-ARM_FUNC_START(ffi_closure_SYSV) 
-	UNWIND(.fnstart) 
-	cfi_startproc 
-	stmdb	sp!, {r0-r3}			@ save argument regs 
-	cfi_adjust_cfa_offset(16) 
- 
-#if FFI_EXEC_TRAMPOLINE_TABLE 
-	ldr ip, [ip]				@ ip points to the config page, dereference to get the ffi_closure* 
-#endif 
-	ldr	r0, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET]	@ load cif 
-	ldr	r1, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+4]  @ load fun 
-	ldr	r2, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+8]  @ load user_data 
-0: 
-	add	ip, sp, #16			@ compute entry sp 
-	sub	sp, sp, #64+32			@ allocate frame 
-	cfi_adjust_cfa_offset(64+32) 
-	stmdb	sp!, {ip,lr} 
- 
-	/* Remember that EABI unwind info only applies at call sites. 
-	   We need do nothing except note the save of the stack pointer 
-	   and the link registers.  */ 
-	UNWIND(.save {sp,lr}) 
-	cfi_adjust_cfa_offset(8) 
-	cfi_rel_offset(lr, 4) 
- 
-	add	r3, sp, #8			@ load frame 
-	bl	CNAME(ffi_closure_inner_SYSV) 
- 
-	@ Load values returned in registers. 
-	add	r2, sp, #8+64			@ load result 
-	adr	r3, CNAME(ffi_closure_ret) 
-	add	pc, r3, r0, lsl #3 
-	cfi_endproc 
-	UNWIND(.fnend) 
-ARM_FUNC_END(ffi_closure_SYSV) 
- 
-ARM_FUNC_START(ffi_go_closure_VFP) 
-	cfi_startproc 
-	stmdb	sp!, {r0-r3}			@ save argument regs 
-	cfi_adjust_cfa_offset(16) 
-	ldr	r0, [ip, #4]			@ load cif 
-	ldr	r1, [ip, #8]			@ load fun 
-	mov	r2, ip				@ load user_data 
-	b	0f 
-	cfi_endproc 
-ARM_FUNC_END(ffi_go_closure_VFP) 
- 
-ARM_FUNC_START(ffi_closure_VFP) 
-	UNWIND(.fnstart) 
-	cfi_startproc 
-	stmdb	sp!, {r0-r3}			@ save argument regs 
-	cfi_adjust_cfa_offset(16) 
- 
-#if FFI_EXEC_TRAMPOLINE_TABLE 
-	ldr ip, [ip]				@ ip points to the config page, dereference to get the ffi_closure* 
-#endif 
-	ldr	r0, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET]	@ load cif 
-	ldr	r1, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+4]  @ load fun 
-	ldr	r2, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+8]  @ load user_data 
-0: 
-	add	ip, sp, #16 
-	sub	sp, sp, #64+32			@ allocate frame 
-	cfi_adjust_cfa_offset(64+32) 
-#ifdef __clang__ 
-	vstm sp, {d0-d7} 
-#else 
-	stc	p11, cr0, [sp], {16}		@ vstm sp, {d0-d7} 
-#endif 
-	stmdb	sp!, {ip,lr} 
- 
-	/* See above.  */ 
-	UNWIND(.save {sp,lr}) 
-	cfi_adjust_cfa_offset(8) 
-	cfi_rel_offset(lr, 4) 
- 
-	add	r3, sp, #8			@ load frame 
-	bl	CNAME(ffi_closure_inner_VFP) 
- 
-	@ Load values returned in registers. 
-	add	r2, sp, #8+64			@ load result 
-	adr	r3, CNAME(ffi_closure_ret) 
-	add	pc, r3, r0, lsl #3 
-	cfi_endproc 
-	UNWIND(.fnend) 
-ARM_FUNC_END(ffi_closure_VFP) 
- 
-/* Load values returned in registers for both closure entry points. 
-   Note that we use LDM with SP in the register set.  This is deprecated 
-   by ARM, but not yet unpredictable.  */ 
- 
-ARM_FUNC_START_LOCAL(ffi_closure_ret) 
-	cfi_startproc 
-	cfi_rel_offset(sp, 0) 
-	cfi_rel_offset(lr, 4) 
-0: 
-E(ARM_TYPE_VFP_S) 
-#ifdef __clang__ 
-	vldr s0, [r2] 
-#else 
-	ldc	p10, cr0, [r2]			@ vldr s0, [r2] 
-#endif 
-	ldm	sp, {sp,pc} 
-E(ARM_TYPE_VFP_D) 
-#ifdef __clang__ 
-	vldr d0, [r2] 
-#else 
-	ldc	p11, cr0, [r2]			@ vldr d0, [r2] 
-#endif 
-	ldm	sp, {sp,pc} 
-E(ARM_TYPE_VFP_N) 
-#ifdef __clang__ 
-	vldm r2, {d0-d3} 
-#else 
-	ldc	p11, cr0, [r2], {8}		@ vldm r2, {d0-d3} 
-#endif 
-	ldm	sp, {sp,pc} 
-E(ARM_TYPE_INT64) 
-	ldr	r1, [r2, #4] 
-	nop 
-E(ARM_TYPE_INT) 
-	ldr	r0, [r2] 
-	ldm	sp, {sp,pc} 
-E(ARM_TYPE_VOID) 
-	ldm	sp, {sp,pc} 
-	nop 
-E(ARM_TYPE_STRUCT) 
-	ldm	sp, {sp,pc} 
-	cfi_endproc 
-ARM_FUNC_END(ffi_closure_ret) 
- 
-#if FFI_EXEC_TRAMPOLINE_TABLE 
- 
-#ifdef __MACH__ 
-#include <mach/machine/vm_param.h> 
- 
-.align	PAGE_MAX_SHIFT 
-ARM_FUNC_START(ffi_closure_trampoline_table_page) 
-.rept	PAGE_MAX_SIZE / FFI_TRAMPOLINE_SIZE 
-	adr ip, #-PAGE_MAX_SIZE   @ the config page is PAGE_MAX_SIZE behind the trampoline page 
-	sub ip, #8				  @ account for pc bias 
-	ldr	pc, [ip, #4]		  @ jump to ffi_closure_SYSV or ffi_closure_VFP 
-.endr 
-ARM_FUNC_END(ffi_closure_trampoline_table_page) 
-#endif 
- 
-#else 
- 
-ARM_FUNC_START(ffi_arm_trampoline) 
-0:	adr	ip, 0b 
-	ldr	pc, 1f 
-1:	.long	0 
-ARM_FUNC_END(ffi_arm_trampoline) 
- 
-#endif /* FFI_EXEC_TRAMPOLINE_TABLE */ 
-#endif /* __arm__ */ 
- 
-#if defined __ELF__ && defined __linux__ 
-	.section	.note.GNU-stack,"",%progbits 
-#endif 
+/* -----------------------------------------------------------------------
+   sysv.S - Copyright (c) 1998, 2008, 2011 Red Hat, Inc.
+	    Copyright (c) 2011 Plausible Labs Cooperative, Inc.
+
+   ARM Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifdef __arm__
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_cfi.h>
+#include "internal.h"
+
+/* GCC 4.8 provides __ARM_ARCH; construct it otherwise.  */
+#ifndef __ARM_ARCH
+# if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) \
+     || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) \
+     || defined(__ARM_ARCH_7EM__)
+#  define __ARM_ARCH 7
+# elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) \
+        || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) \
+        || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) \
+	|| defined(__ARM_ARCH_6M__)
+#  define __ARM_ARCH 6
+# elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__) \
+	|| defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) \
+	|| defined(__ARM_ARCH_5TEJ__)
+#  define __ARM_ARCH 5
+# else
+#  define __ARM_ARCH 4
+# endif
+#endif
+
+/* Conditionally compile unwinder directives.  */
+#ifdef __ARM_EABI__
+# define UNWIND(...)	__VA_ARGS__
+#else
+# define UNWIND(...)
+#endif
+
+#if defined(HAVE_AS_CFI_PSEUDO_OP) && defined(__ARM_EABI__)
+	.cfi_sections	.debug_frame
+#endif
+
+#define CONCAT(a, b)	CONCAT2(a, b)
+#define CONCAT2(a, b)	a ## b
+
+#ifdef __USER_LABEL_PREFIX__
+# define CNAME(X)	CONCAT (__USER_LABEL_PREFIX__, X)
+#else
+# define CNAME(X)	X
+#endif
+#ifdef __ELF__
+# define SIZE(X)	.size CNAME(X), . - CNAME(X)
+# define TYPE(X, Y)	.type CNAME(X), Y
+#else
+# define SIZE(X)
+# define TYPE(X, Y)
+#endif
+
+#define ARM_FUNC_START_LOCAL(name)	\
+	.align	3;			\
+	TYPE(CNAME(name), %function);	\
+	CNAME(name):
+
+#define ARM_FUNC_START(name)		\
+	.globl CNAME(name);		\
+	FFI_HIDDEN(CNAME(name));	\
+	ARM_FUNC_START_LOCAL(name)
+
+#define ARM_FUNC_END(name) \
+	SIZE(name)
+
+/* Aid in defining a jump table with 8 bytes between entries.  */
+/* ??? The clang assembler doesn't handle .if with symbolic expressions.  */
+#ifdef __clang__
+# define E(index)
+#else
+# define E(index)				\
+	.if . - 0b - 8*index;			\
+	.error "type table out of sync";	\
+	.endif
+#endif
+
+	.text
+	.syntax unified
+	.arm
+
+#ifndef __clang__
+	/* We require interworking on LDM, which implies ARMv5T,
+	   which implies the existance of BLX.  */
+ 	.arch	armv5t
+#endif
+
+	/* Note that we use STC and LDC to encode VFP instructions,
+	   so that we do not need ".fpu vfp", nor get that added to
+	   the object file attributes.  These will not be executed
+	   unless the FFI_VFP abi is used.  */
+
+	@ r0:   stack
+	@ r1:   frame
+	@ r2:   fn
+	@ r3:	vfp_used
+
+ARM_FUNC_START(ffi_call_VFP)
+	UNWIND(.fnstart)
+	cfi_startproc
+
+	cmp	r3, #3			@ load only d0 if possible
+#ifdef __clang__
+	vldrle d0, [sp]
+	vldmgt sp, {d0-d7}
+#else
+	ldcle	p11, cr0, [r0]		@ vldrle d0, [sp]
+	ldcgt	p11, cr0, [r0], {16}	@ vldmgt sp, {d0-d7}
+#endif
+	add	r0, r0, #64		@ discard the vfp register args
+	/* FALLTHRU */
+ARM_FUNC_END(ffi_call_VFP)
+
+ARM_FUNC_START(ffi_call_SYSV)
+	stm	r1, {fp, lr}
+	mov	fp, r1
+
+	@ This is a bit of a lie wrt the origin of the unwind info, but
+	@ now we've got the usual frame pointer and two saved registers.
+	UNWIND(.save {fp,lr})
+	UNWIND(.setfp fp, sp)
+	cfi_def_cfa(fp, 8)
+	cfi_rel_offset(fp, 0)
+	cfi_rel_offset(lr, 4)
+
+	mov	sp, r0		@ install the stack pointer
+	mov	lr, r2		@ move the fn pointer out of the way
+	ldr	ip, [fp, #16]	@ install the static chain
+	ldmia	sp!, {r0-r3}	@ move first 4 parameters in registers.
+	blx	lr		@ call fn
+
+	@ Load r2 with the pointer to storage for the return value
+	@ Load r3 with the return type code
+	ldr	r2, [fp, #8]
+	ldr	r3, [fp, #12]
+
+	@ Deallocate the stack with the arguments.
+	mov	sp, fp
+	cfi_def_cfa_register(sp)
+
+	@ Store values stored in registers.
+	.align	3
+	add	pc, pc, r3, lsl #3
+	nop
+0:
+E(ARM_TYPE_VFP_S)
+#ifdef __clang__
+	vstr s0, [r2]
+#else
+	stc	p10, cr0, [r2]		@ vstr s0, [r2]
+#endif
+	pop	{fp,pc}
+E(ARM_TYPE_VFP_D)
+#ifdef __clang__
+	vstr d0, [r2]
+#else
+	stc	p11, cr0, [r2]		@ vstr d0, [r2]
+#endif
+	pop	{fp,pc}
+E(ARM_TYPE_VFP_N)
+#ifdef __clang__
+	vstm r2, {d0-d3}
+#else
+	stc	p11, cr0, [r2], {8}	@ vstm r2, {d0-d3}
+#endif
+	pop	{fp,pc}
+E(ARM_TYPE_INT64)
+	str	r1, [r2, #4]
+	nop
+E(ARM_TYPE_INT)
+	str	r0, [r2]
+	pop	{fp,pc}
+E(ARM_TYPE_VOID)
+	pop	{fp,pc}
+	nop
+E(ARM_TYPE_STRUCT)
+	pop	{fp,pc}
+
+	cfi_endproc
+	UNWIND(.fnend)
+ARM_FUNC_END(ffi_call_SYSV)
+
+
+/*
+	int ffi_closure_inner_* (cif, fun, user_data, frame)
+*/
+
+ARM_FUNC_START(ffi_go_closure_SYSV)
+	cfi_startproc
+	stmdb	sp!, {r0-r3}			@ save argument regs
+	cfi_adjust_cfa_offset(16)
+	ldr	r0, [ip, #4]			@ load cif
+	ldr	r1, [ip, #8]			@ load fun
+	mov	r2, ip				@ load user_data
+	b	0f
+	cfi_endproc
+ARM_FUNC_END(ffi_go_closure_SYSV)
+
+ARM_FUNC_START(ffi_closure_SYSV)
+	UNWIND(.fnstart)
+	cfi_startproc
+	stmdb	sp!, {r0-r3}			@ save argument regs
+	cfi_adjust_cfa_offset(16)
+
+#if FFI_EXEC_TRAMPOLINE_TABLE
+	ldr ip, [ip]				@ ip points to the config page, dereference to get the ffi_closure*
+#endif
+	ldr	r0, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET]	@ load cif
+	ldr	r1, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+4]  @ load fun
+	ldr	r2, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+8]  @ load user_data
+0:
+	add	ip, sp, #16			@ compute entry sp
+	sub	sp, sp, #64+32			@ allocate frame
+	cfi_adjust_cfa_offset(64+32)
+	stmdb	sp!, {ip,lr}
+
+	/* Remember that EABI unwind info only applies at call sites.
+	   We need do nothing except note the save of the stack pointer
+	   and the link registers.  */
+	UNWIND(.save {sp,lr})
+	cfi_adjust_cfa_offset(8)
+	cfi_rel_offset(lr, 4)
+
+	add	r3, sp, #8			@ load frame
+	bl	CNAME(ffi_closure_inner_SYSV)
+
+	@ Load values returned in registers.
+	add	r2, sp, #8+64			@ load result
+	adr	r3, CNAME(ffi_closure_ret)
+	add	pc, r3, r0, lsl #3
+	cfi_endproc
+	UNWIND(.fnend)
+ARM_FUNC_END(ffi_closure_SYSV)
+
+ARM_FUNC_START(ffi_go_closure_VFP)
+	cfi_startproc
+	stmdb	sp!, {r0-r3}			@ save argument regs
+	cfi_adjust_cfa_offset(16)
+	ldr	r0, [ip, #4]			@ load cif
+	ldr	r1, [ip, #8]			@ load fun
+	mov	r2, ip				@ load user_data
+	b	0f
+	cfi_endproc
+ARM_FUNC_END(ffi_go_closure_VFP)
+
+ARM_FUNC_START(ffi_closure_VFP)
+	UNWIND(.fnstart)
+	cfi_startproc
+	stmdb	sp!, {r0-r3}			@ save argument regs
+	cfi_adjust_cfa_offset(16)
+
+#if FFI_EXEC_TRAMPOLINE_TABLE
+	ldr ip, [ip]				@ ip points to the config page, dereference to get the ffi_closure*
+#endif
+	ldr	r0, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET]	@ load cif
+	ldr	r1, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+4]  @ load fun
+	ldr	r2, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+8]  @ load user_data
+0:
+	add	ip, sp, #16
+	sub	sp, sp, #64+32			@ allocate frame
+	cfi_adjust_cfa_offset(64+32)
+#ifdef __clang__
+	vstm sp, {d0-d7}
+#else
+	stc	p11, cr0, [sp], {16}		@ vstm sp, {d0-d7}
+#endif
+	stmdb	sp!, {ip,lr}
+
+	/* See above.  */
+	UNWIND(.save {sp,lr})
+	cfi_adjust_cfa_offset(8)
+	cfi_rel_offset(lr, 4)
+
+	add	r3, sp, #8			@ load frame
+	bl	CNAME(ffi_closure_inner_VFP)
+
+	@ Load values returned in registers.
+	add	r2, sp, #8+64			@ load result
+	adr	r3, CNAME(ffi_closure_ret)
+	add	pc, r3, r0, lsl #3
+	cfi_endproc
+	UNWIND(.fnend)
+ARM_FUNC_END(ffi_closure_VFP)
+
+/* Load values returned in registers for both closure entry points.
+   Note that we use LDM with SP in the register set.  This is deprecated
+   by ARM, but not yet unpredictable.  */
+
+ARM_FUNC_START_LOCAL(ffi_closure_ret)
+	cfi_startproc
+	cfi_rel_offset(sp, 0)
+	cfi_rel_offset(lr, 4)
+0:
+E(ARM_TYPE_VFP_S)
+#ifdef __clang__
+	vldr s0, [r2]
+#else
+	ldc	p10, cr0, [r2]			@ vldr s0, [r2]
+#endif
+	ldm	sp, {sp,pc}
+E(ARM_TYPE_VFP_D)
+#ifdef __clang__
+	vldr d0, [r2]
+#else
+	ldc	p11, cr0, [r2]			@ vldr d0, [r2]
+#endif
+	ldm	sp, {sp,pc}
+E(ARM_TYPE_VFP_N)
+#ifdef __clang__
+	vldm r2, {d0-d3}
+#else
+	ldc	p11, cr0, [r2], {8}		@ vldm r2, {d0-d3}
+#endif
+	ldm	sp, {sp,pc}
+E(ARM_TYPE_INT64)
+	ldr	r1, [r2, #4]
+	nop
+E(ARM_TYPE_INT)
+	ldr	r0, [r2]
+	ldm	sp, {sp,pc}
+E(ARM_TYPE_VOID)
+	ldm	sp, {sp,pc}
+	nop
+E(ARM_TYPE_STRUCT)
+	ldm	sp, {sp,pc}
+	cfi_endproc
+ARM_FUNC_END(ffi_closure_ret)
+
+#if FFI_EXEC_TRAMPOLINE_TABLE
+
+#ifdef __MACH__
+#include <mach/machine/vm_param.h>
+
+.align	PAGE_MAX_SHIFT
+ARM_FUNC_START(ffi_closure_trampoline_table_page)
+.rept	PAGE_MAX_SIZE / FFI_TRAMPOLINE_SIZE
+	adr ip, #-PAGE_MAX_SIZE   @ the config page is PAGE_MAX_SIZE behind the trampoline page
+	sub ip, #8				  @ account for pc bias
+	ldr	pc, [ip, #4]		  @ jump to ffi_closure_SYSV or ffi_closure_VFP
+.endr
+ARM_FUNC_END(ffi_closure_trampoline_table_page)
+#endif
+
+#else
+
+ARM_FUNC_START(ffi_arm_trampoline)
+0:	adr	ip, 0b
+	ldr	pc, 1f
+1:	.long	0
+ARM_FUNC_END(ffi_arm_trampoline)
+
+#endif /* FFI_EXEC_TRAMPOLINE_TABLE */
+#endif /* __arm__ */
+
+#if defined __ELF__ && defined __linux__
+	.section	.note.GNU-stack,"",%progbits
+#endif
diff --git a/contrib/restricted/libffi/src/arm/sysv_msvc_arm32.S b/contrib/restricted/libffi/src/arm/sysv_msvc_arm32.S
index 29ab997467..5c99d0207a 100644
--- a/contrib/restricted/libffi/src/arm/sysv_msvc_arm32.S
+++ b/contrib/restricted/libffi/src/arm/sysv_msvc_arm32.S
@@ -1,311 +1,311 @@
-/* ----------------------------------------------------------------------- 
-   sysv.S - Copyright (c) 1998, 2008, 2011 Red Hat, Inc. 
-        Copyright (c) 2011 Plausible Labs Cooperative, Inc. 
-        Copyright (c) 2019 Microsoft Corporation. 
- 
-   ARM Foreign Function Interface 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-#define LIBFFI_ASM 
-#include <fficonfig.h> 
-#include <ffi.h> 
-#include <ffi_cfi.h> 
-#include "internal.h" 
-#include "ksarm.h" 
- 
- 
-        ; 8 byte aligned AREA to support 8 byte aligned jump tables 
-        MACRO 
-        NESTED_ENTRY_FFI $FuncName, $AreaName, $ExceptHandler 
- 
-        ; compute the function's labels 
-        __DeriveFunctionLabels $FuncName 
- 
-        ; determine the area we will put the function into 
-__FuncArea   SETS    "|.text|" 
-        IF "$AreaName" != "" 
-__FuncArea   SETS    "$AreaName" 
-        ENDIF 
- 
-        ; set up the exception handler itself 
-__FuncExceptionHandler SETS "" 
-        IF "$ExceptHandler" != "" 
-__FuncExceptionHandler SETS    "|$ExceptHandler|" 
-        ENDIF 
- 
-        ; switch to the specified area, jump tables require 8 byte alignment 
-        AREA    $__FuncArea,CODE,CODEALIGN,ALIGN=3,READONLY 
- 
-        ; export the function name 
-        __ExportProc $FuncName 
- 
-        ; flush any pending literal pool stuff 
-        ROUT 
- 
-        ; reset the state of the unwind code tracking 
-        __ResetUnwindState 
- 
-        MEND 
- 
-;        MACRO 
-;        TABLE_ENTRY $Type, $Table 
-;$Type_$Table 
-;        MEND 
- 
-#define E(index,table) return_##index##_##table 
- 
-    ; r0:   stack 
-    ; r1:   frame 
-    ; r2:   fn 
-    ; r3:   vfp_used 
- 
-    ; fake entry point exists only to generate exists only to  
-    ; generate .pdata for exception unwinding 
-    NESTED_ENTRY_FFI ffi_call_VFP_fake 
-    PROLOG_PUSH  {r11, lr}          ; save fp and lr for unwind 
- 
-    ALTERNATE_ENTRY ffi_call_VFP 
-    cmp    r3, #3                   ; load only d0 if possible 
-    vldrle d0, [r0] 
-    vldmgt r0, {d0-d7} 
-    add    r0, r0, #64              ; discard the vfp register args 
-    b ffi_call_SYSV 
-    NESTED_END ffi_call_VFP_fake 
- 
-    ; fake entry point exists only to generate exists only to  
-    ; generate .pdata for exception unwinding 
-    NESTED_ENTRY_FFI ffi_call_SYSV_fake 
-    PROLOG_PUSH  {r11, lr}          ; save fp and lr for unwind 
- 
-    ALTERNATE_ENTRY ffi_call_SYSV 
-    stm    r1, {fp, lr} 
-    mov    fp, r1 
- 
-    mov    sp, r0                   ; install the stack pointer 
-    mov    lr, r2                   ; move the fn pointer out of the way 
-    ldr    ip, [fp, #16]            ; install the static chain 
-    ldmia  sp!, {r0-r3}             ; move first 4 parameters in registers. 
-    blx    lr                       ; call fn 
- 
-    ; Load r2 with the pointer to storage for the return value 
-    ; Load r3 with the return type code 
-    ldr    r2, [fp, #8] 
-    ldr    r3, [fp, #12] 
- 
-    ; Deallocate the stack with the arguments. 
-    mov    sp, fp 
- 
-    ; Store values stored in registers. 
-    ALIGN 8 
-    lsl     r3, #3 
-    add     r3, r3, pc 
-    add     r3, #8 
-    mov     pc, r3 
- 
- 
-E(ARM_TYPE_VFP_S, ffi_call) 
-    ALIGN 8 
-    vstr s0, [r2] 
-    pop    {fp,pc} 
-E(ARM_TYPE_VFP_D, ffi_call) 
-    ALIGN 8 
-    vstr d0, [r2] 
-    pop    {fp,pc} 
-E(ARM_TYPE_VFP_N, ffi_call) 
-    ALIGN 8 
-    vstm r2, {d0-d3} 
-    pop    {fp,pc} 
-E(ARM_TYPE_INT64, ffi_call) 
-    ALIGN 8 
-    str    r1, [r2, #4] 
-    nop 
-E(ARM_TYPE_INT, ffi_call) 
-    ALIGN 8 
-    str    r0, [r2] 
-    pop    {fp,pc} 
-E(ARM_TYPE_VOID, ffi_call) 
-    ALIGN 8 
-    pop    {fp,pc} 
-    nop 
-E(ARM_TYPE_STRUCT, ffi_call) 
-    ALIGN 8 
-    cmp r3, #ARM_TYPE_STRUCT 
-    pop    {fp,pc} 
-    NESTED_END ffi_call_SYSV_fake 
- 
-    IMPORT |ffi_closure_inner_SYSV| 
-    /* 
-    int ffi_closure_inner_SYSV 
-    ( 
-        cif,        ; r0 
-        fun,        ; r1 
-        user_data,  ; r2 
-        frame       ; r3 
-    ) 
-    */ 
- 
-    NESTED_ENTRY_FFI ffi_go_closure_SYSV 
-    stmdb   sp!, {r0-r3}            ; save argument regs 
-    ldr     r0, [ip, #4]            ; load cif 
-    ldr     r1, [ip, #8]            ; load fun 
-    mov     r2, ip                  ; load user_data 
-    b       ffi_go_closure_SYSV_0 
-    NESTED_END ffi_go_closure_SYSV 
- 
-    ; r3:    ffi_closure 
- 
-    ; fake entry point exists only to generate exists only to  
-    ; generate .pdata for exception unwinding 
-    NESTED_ENTRY_FFI ffi_closure_SYSV_fake   
-    PROLOG_PUSH  {r11, lr}          ; save fp and lr for unwind 
-    ALTERNATE_ENTRY ffi_closure_SYSV 
-    ldmfd   sp!, {ip,r0}            ; restore fp (r0 is used for stack alignment) 
-    stmdb   sp!, {r0-r3}            ; save argument regs 
- 
-    ldr     r0, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET]    ; ffi_closure->cif 
-    ldr     r1, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+4]  ; ffi_closure->fun 
-    ldr     r2, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+8]  ; ffi_closure->user_data 
- 
-    ALTERNATE_ENTRY ffi_go_closure_SYSV_0 
-    add     ip, sp, #16             ; compute entry sp 
- 
-    sub     sp, sp, #64+32          ; allocate frame parameter (sizeof(vfp_space) = 64, sizeof(result) = 32) 
-    mov     r3, sp                  ; set frame parameter 
-    stmdb   sp!, {ip,lr} 
- 
-    bl      ffi_closure_inner_SYSV  ; call the Python closure 
- 
-                                    ; Load values returned in registers. 
-    add     r2, sp, #64+8           ; address of closure_frame->result 
-    bl      ffi_closure_ret         ; move result to correct register or memory for type 
- 
-    ldmfd   sp!, {ip,lr} 
-    mov     sp, ip                  ; restore stack pointer 
-    mov     pc, lr 
-    NESTED_END ffi_closure_SYSV_fake 
- 
-    IMPORT |ffi_closure_inner_VFP| 
-    /* 
-    int ffi_closure_inner_VFP 
-    ( 
-        cif,        ; r0 
-        fun,        ; r1 
-        user_data,  ; r2 
-        frame       ; r3 
-    ) 
-    */ 
- 
-    NESTED_ENTRY_FFI ffi_go_closure_VFP 
-    stmdb   sp!, {r0-r3}			; save argument regs 
-    ldr	r0, [ip, #4]			; load cif 
-    ldr	r1, [ip, #8]			; load fun 
-    mov	r2, ip				; load user_data 
-    b	ffi_go_closure_VFP_0 
-    NESTED_END ffi_go_closure_VFP 
- 
-    ; fake entry point exists only to generate exists only to  
-    ; generate .pdata for exception unwinding 
-    ; r3:    closure 
-    NESTED_ENTRY_FFI ffi_closure_VFP_fake 
-    PROLOG_PUSH  {r11, lr}          ; save fp and lr for unwind 
- 
-    ALTERNATE_ENTRY ffi_closure_VFP 
-    ldmfd   sp!, {ip,r0}            ; restore fp (r0 is used for stack alignment) 
-    stmdb   sp!, {r0-r3}            ; save argument regs 
- 
-    ldr     r0, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET]    ; load cif 
-    ldr     r1, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+4]  ; load fun 
-    ldr     r2, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+8]  ; load user_data 
- 
-    ALTERNATE_ENTRY ffi_go_closure_VFP_0 
-    add     ip, sp, #16             ; compute entry sp 
-    sub     sp, sp, #32             ; save space for closure_frame->result 
-    vstmdb  sp!, {d0-d7}            ; push closure_frame->vfp_space 
- 
-    mov     r3, sp                  ; save closure_frame 
-    stmdb   sp!, {ip,lr} 
- 
-    bl      ffi_closure_inner_VFP 
- 
-    ; Load values returned in registers. 
-    add     r2, sp, #64+8           ; load result 
-    bl      ffi_closure_ret 
-    ldmfd   sp!, {ip,lr} 
-    mov     sp, ip                  ; restore stack pointer 
-    mov     pc, lr 
-    NESTED_END ffi_closure_VFP_fake 
- 
-/* Load values returned in registers for both closure entry points. 
-   Note that we use LDM with SP in the register set.  This is deprecated 
-   by ARM, but not yet unpredictable.  */ 
- 
-    NESTED_ENTRY_FFI ffi_closure_ret 
-    stmdb sp!, {fp,lr} 
- 
-    ALIGN 8 
-    lsl     r0, #3 
-    add     r0, r0, pc 
-    add     r0, #8 
-    mov     pc, r0 
- 
-E(ARM_TYPE_VFP_S, ffi_closure) 
-    ALIGN 8 
-    vldr s0, [r2] 
-    b call_epilogue 
-E(ARM_TYPE_VFP_D, ffi_closure) 
-    ALIGN 8 
-    vldr d0, [r2] 
-    b call_epilogue 
-E(ARM_TYPE_VFP_N, ffi_closure) 
-    ALIGN 8 
-    vldm r2, {d0-d3} 
-    b call_epilogue 
-E(ARM_TYPE_INT64, ffi_closure) 
-    ALIGN 8 
-    ldr    r1, [r2, #4] 
-    nop 
-E(ARM_TYPE_INT, ffi_closure) 
-    ALIGN 8 
-    ldr    r0, [r2] 
-    b call_epilogue 
-E(ARM_TYPE_VOID, ffi_closure) 
-    ALIGN 8 
-    b call_epilogue 
-    nop 
-E(ARM_TYPE_STRUCT, ffi_closure) 
-    ALIGN 8 
-    b call_epilogue 
-call_epilogue 
-    ldmfd sp!, {fp,pc} 
-    NESTED_END ffi_closure_ret 
- 
-    AREA |.trampoline|, DATA, THUMB, READONLY 
-    EXPORT |ffi_arm_trampoline| 
-|ffi_arm_trampoline| DATA 
-thisproc    adr    ip, thisproc 
-            stmdb  sp!, {ip, r0} 
-            ldr    pc, [pc, #0] 
-            DCD    0 
-            ;ENDP 
- 
-    END 
-\ No newline at end of file
+/* -----------------------------------------------------------------------
+   sysv.S - Copyright (c) 1998, 2008, 2011 Red Hat, Inc.
+        Copyright (c) 2011 Plausible Labs Cooperative, Inc.
+        Copyright (c) 2019 Microsoft Corporation.
+
+   ARM Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_cfi.h>
+#include "internal.h"
+#include "ksarm.h"
+
+
+        ; 8 byte aligned AREA to support 8 byte aligned jump tables
+        MACRO
+        NESTED_ENTRY_FFI $FuncName, $AreaName, $ExceptHandler
+
+        ; compute the function's labels
+        __DeriveFunctionLabels $FuncName
+
+        ; determine the area we will put the function into
+__FuncArea   SETS    "|.text|"
+        IF "$AreaName" != ""
+__FuncArea   SETS    "$AreaName"
+        ENDIF
+
+        ; set up the exception handler itself
+__FuncExceptionHandler SETS ""
+        IF "$ExceptHandler" != ""
+__FuncExceptionHandler SETS    "|$ExceptHandler|"
+        ENDIF
+
+        ; switch to the specified area, jump tables require 8 byte alignment
+        AREA    $__FuncArea,CODE,CODEALIGN,ALIGN=3,READONLY
+
+        ; export the function name
+        __ExportProc $FuncName
+
+        ; flush any pending literal pool stuff
+        ROUT
+
+        ; reset the state of the unwind code tracking
+        __ResetUnwindState
+
+        MEND
+
+;        MACRO
+;        TABLE_ENTRY $Type, $Table
+;$Type_$Table
+;        MEND
+
+#define E(index,table) return_##index##_##table
+
+    ; r0:   stack
+    ; r1:   frame
+    ; r2:   fn
+    ; r3:   vfp_used
+
+    ; fake entry point exists only to generate exists only to 
+    ; generate .pdata for exception unwinding
+    NESTED_ENTRY_FFI ffi_call_VFP_fake
+    PROLOG_PUSH  {r11, lr}          ; save fp and lr for unwind
+
+    ALTERNATE_ENTRY ffi_call_VFP
+    cmp    r3, #3                   ; load only d0 if possible
+    vldrle d0, [r0]
+    vldmgt r0, {d0-d7}
+    add    r0, r0, #64              ; discard the vfp register args
+    b ffi_call_SYSV
+    NESTED_END ffi_call_VFP_fake
+
+    ; fake entry point exists only to generate exists only to 
+    ; generate .pdata for exception unwinding
+    NESTED_ENTRY_FFI ffi_call_SYSV_fake
+    PROLOG_PUSH  {r11, lr}          ; save fp and lr for unwind
+
+    ALTERNATE_ENTRY ffi_call_SYSV
+    stm    r1, {fp, lr}
+    mov    fp, r1
+
+    mov    sp, r0                   ; install the stack pointer
+    mov    lr, r2                   ; move the fn pointer out of the way
+    ldr    ip, [fp, #16]            ; install the static chain
+    ldmia  sp!, {r0-r3}             ; move first 4 parameters in registers.
+    blx    lr                       ; call fn
+
+    ; Load r2 with the pointer to storage for the return value
+    ; Load r3 with the return type code
+    ldr    r2, [fp, #8]
+    ldr    r3, [fp, #12]
+
+    ; Deallocate the stack with the arguments.
+    mov    sp, fp
+
+    ; Store values stored in registers.
+    ALIGN 8
+    lsl     r3, #3
+    add     r3, r3, pc
+    add     r3, #8
+    mov     pc, r3
+
+
+E(ARM_TYPE_VFP_S, ffi_call)
+    ALIGN 8
+    vstr s0, [r2]
+    pop    {fp,pc}
+E(ARM_TYPE_VFP_D, ffi_call)
+    ALIGN 8
+    vstr d0, [r2]
+    pop    {fp,pc}
+E(ARM_TYPE_VFP_N, ffi_call)
+    ALIGN 8
+    vstm r2, {d0-d3}
+    pop    {fp,pc}
+E(ARM_TYPE_INT64, ffi_call)
+    ALIGN 8
+    str    r1, [r2, #4]
+    nop
+E(ARM_TYPE_INT, ffi_call)
+    ALIGN 8
+    str    r0, [r2]
+    pop    {fp,pc}
+E(ARM_TYPE_VOID, ffi_call)
+    ALIGN 8
+    pop    {fp,pc}
+    nop
+E(ARM_TYPE_STRUCT, ffi_call)
+    ALIGN 8
+    cmp r3, #ARM_TYPE_STRUCT
+    pop    {fp,pc}
+    NESTED_END ffi_call_SYSV_fake
+
+    IMPORT |ffi_closure_inner_SYSV|
+    /*
+    int ffi_closure_inner_SYSV
+    (
+        cif,        ; r0
+        fun,        ; r1
+        user_data,  ; r2
+        frame       ; r3
+    )
+    */
+
+    NESTED_ENTRY_FFI ffi_go_closure_SYSV
+    stmdb   sp!, {r0-r3}            ; save argument regs
+    ldr     r0, [ip, #4]            ; load cif
+    ldr     r1, [ip, #8]            ; load fun
+    mov     r2, ip                  ; load user_data
+    b       ffi_go_closure_SYSV_0
+    NESTED_END ffi_go_closure_SYSV
+
+    ; r3:    ffi_closure
+
+    ; fake entry point exists only to generate exists only to 
+    ; generate .pdata for exception unwinding
+    NESTED_ENTRY_FFI ffi_closure_SYSV_fake  
+    PROLOG_PUSH  {r11, lr}          ; save fp and lr for unwind
+    ALTERNATE_ENTRY ffi_closure_SYSV
+    ldmfd   sp!, {ip,r0}            ; restore fp (r0 is used for stack alignment)
+    stmdb   sp!, {r0-r3}            ; save argument regs
+
+    ldr     r0, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET]    ; ffi_closure->cif
+    ldr     r1, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+4]  ; ffi_closure->fun
+    ldr     r2, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+8]  ; ffi_closure->user_data
+
+    ALTERNATE_ENTRY ffi_go_closure_SYSV_0
+    add     ip, sp, #16             ; compute entry sp
+
+    sub     sp, sp, #64+32          ; allocate frame parameter (sizeof(vfp_space) = 64, sizeof(result) = 32)
+    mov     r3, sp                  ; set frame parameter
+    stmdb   sp!, {ip,lr}
+
+    bl      ffi_closure_inner_SYSV  ; call the Python closure
+
+                                    ; Load values returned in registers.
+    add     r2, sp, #64+8           ; address of closure_frame->result
+    bl      ffi_closure_ret         ; move result to correct register or memory for type
+
+    ldmfd   sp!, {ip,lr}
+    mov     sp, ip                  ; restore stack pointer
+    mov     pc, lr
+    NESTED_END ffi_closure_SYSV_fake
+
+    IMPORT |ffi_closure_inner_VFP|
+    /*
+    int ffi_closure_inner_VFP
+    (
+        cif,        ; r0
+        fun,        ; r1
+        user_data,  ; r2
+        frame       ; r3
+    )
+    */
+
+    NESTED_ENTRY_FFI ffi_go_closure_VFP
+    stmdb   sp!, {r0-r3}			; save argument regs
+    ldr	r0, [ip, #4]			; load cif
+    ldr	r1, [ip, #8]			; load fun
+    mov	r2, ip				; load user_data
+    b	ffi_go_closure_VFP_0
+    NESTED_END ffi_go_closure_VFP
+
+    ; fake entry point exists only to generate exists only to 
+    ; generate .pdata for exception unwinding
+    ; r3:    closure
+    NESTED_ENTRY_FFI ffi_closure_VFP_fake
+    PROLOG_PUSH  {r11, lr}          ; save fp and lr for unwind
+
+    ALTERNATE_ENTRY ffi_closure_VFP
+    ldmfd   sp!, {ip,r0}            ; restore fp (r0 is used for stack alignment)
+    stmdb   sp!, {r0-r3}            ; save argument regs
+
+    ldr     r0, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET]    ; load cif
+    ldr     r1, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+4]  ; load fun
+    ldr     r2, [ip, #FFI_TRAMPOLINE_CLOSURE_OFFSET+8]  ; load user_data
+
+    ALTERNATE_ENTRY ffi_go_closure_VFP_0
+    add     ip, sp, #16             ; compute entry sp
+    sub     sp, sp, #32             ; save space for closure_frame->result
+    vstmdb  sp!, {d0-d7}            ; push closure_frame->vfp_space
+
+    mov     r3, sp                  ; save closure_frame
+    stmdb   sp!, {ip,lr}
+
+    bl      ffi_closure_inner_VFP
+
+    ; Load values returned in registers.
+    add     r2, sp, #64+8           ; load result
+    bl      ffi_closure_ret
+    ldmfd   sp!, {ip,lr}
+    mov     sp, ip                  ; restore stack pointer
+    mov     pc, lr
+    NESTED_END ffi_closure_VFP_fake
+
+/* Load values returned in registers for both closure entry points.
+   Note that we use LDM with SP in the register set.  This is deprecated
+   by ARM, but not yet unpredictable.  */
+
+    NESTED_ENTRY_FFI ffi_closure_ret
+    stmdb sp!, {fp,lr}
+
+    ALIGN 8
+    lsl     r0, #3
+    add     r0, r0, pc
+    add     r0, #8
+    mov     pc, r0
+
+E(ARM_TYPE_VFP_S, ffi_closure)
+    ALIGN 8
+    vldr s0, [r2]
+    b call_epilogue
+E(ARM_TYPE_VFP_D, ffi_closure)
+    ALIGN 8
+    vldr d0, [r2]
+    b call_epilogue
+E(ARM_TYPE_VFP_N, ffi_closure)
+    ALIGN 8
+    vldm r2, {d0-d3}
+    b call_epilogue
+E(ARM_TYPE_INT64, ffi_closure)
+    ALIGN 8
+    ldr    r1, [r2, #4]
+    nop
+E(ARM_TYPE_INT, ffi_closure)
+    ALIGN 8
+    ldr    r0, [r2]
+    b call_epilogue
+E(ARM_TYPE_VOID, ffi_closure)
+    ALIGN 8
+    b call_epilogue
+    nop
+E(ARM_TYPE_STRUCT, ffi_closure)
+    ALIGN 8
+    b call_epilogue
+call_epilogue
+    ldmfd sp!, {fp,pc}
+    NESTED_END ffi_closure_ret
+
+    AREA |.trampoline|, DATA, THUMB, READONLY
+    EXPORT |ffi_arm_trampoline|
+|ffi_arm_trampoline| DATA
+thisproc    adr    ip, thisproc
+            stmdb  sp!, {ip, r0}
+            ldr    pc, [pc, #0]
+            DCD    0
+            ;ENDP
+
+    END
+\ No newline at end of file
diff --git a/contrib/restricted/libffi/src/closures.c b/contrib/restricted/libffi/src/closures.c
index 2c65fa0a6a..5120021652 100644
--- a/contrib/restricted/libffi/src/closures.c
+++ b/contrib/restricted/libffi/src/closures.c
@@ -1,6 +1,6 @@
 /* -----------------------------------------------------------------------
-   closures.c - Copyright (c) 2019 Anthony Green 
-                Copyright (c) 2007, 2009, 2010 Red Hat, Inc. 
+   closures.c - Copyright (c) 2019 Anthony Green
+                Copyright (c) 2007, 2009, 2010 Red Hat, Inc.
                 Copyright (C) 2007, 2009, 2010 Free Software Foundation, Inc
                 Copyright (c) 2011 Plausible Labs Cooperative, Inc.
 
@@ -31,88 +31,88 @@
 #define _GNU_SOURCE 1
 #endif
 
-#include <fficonfig.h> 
+#include <fficonfig.h>
 #include <ffi.h>
 #include <ffi_common.h>
 
-#ifdef __NetBSD__ 
-#include <sys/param.h> 
-#endif 
- 
-#if __NetBSD_Version__ - 0 >= 799007200 
-/* NetBSD with PROT_MPROTECT */ 
-#include <sys/mman.h> 
- 
-#include <stddef.h> 
-#include <unistd.h> 
- 
-static const size_t overhead = 
-  (sizeof(max_align_t) > sizeof(void *) + sizeof(size_t)) ? 
-    sizeof(max_align_t) 
-    : sizeof(void *) + sizeof(size_t); 
- 
-#define ADD_TO_POINTER(p, d) ((void *)((uintptr_t)(p) + (d))) 
- 
-void * 
-ffi_closure_alloc (size_t size, void **code) 
-{ 
-  static size_t page_size; 
-  size_t rounded_size; 
-  void *codeseg, *dataseg; 
-  int prot; 
- 
-  /* Expect that PAX mprotect is active and a separate code mapping is necessary. */ 
-  if (!code) 
-    return NULL; 
- 
-  /* Obtain system page size. */ 
-  if (!page_size) 
-    page_size = sysconf(_SC_PAGESIZE); 
- 
-  /* Round allocation size up to the next page, keeping in mind the size field and pointer to code map. */ 
-  rounded_size = (size + overhead + page_size - 1) & ~(page_size - 1); 
- 
-  /* Primary mapping is RW, but request permission to switch to PROT_EXEC later. */ 
-  prot = PROT_READ | PROT_WRITE | PROT_MPROTECT(PROT_EXEC); 
-  dataseg = mmap(NULL, rounded_size, prot, MAP_ANON | MAP_PRIVATE, -1, 0); 
-  if (dataseg == MAP_FAILED) 
-    return NULL; 
- 
-  /* Create secondary mapping and switch it to RX. */ 
-  codeseg = mremap(dataseg, rounded_size, NULL, rounded_size, MAP_REMAPDUP); 
-  if (codeseg == MAP_FAILED) { 
-    munmap(dataseg, rounded_size); 
-    return NULL; 
-  } 
-  if (mprotect(codeseg, rounded_size, PROT_READ | PROT_EXEC) == -1) { 
-    munmap(codeseg, rounded_size); 
-    munmap(dataseg, rounded_size); 
-    return NULL; 
-  } 
- 
-  /* Remember allocation size and location of the secondary mapping for ffi_closure_free. */ 
-  memcpy(dataseg, &rounded_size, sizeof(rounded_size)); 
-  memcpy(ADD_TO_POINTER(dataseg, sizeof(size_t)), &codeseg, sizeof(void *)); 
-  *code = ADD_TO_POINTER(codeseg, overhead); 
-  return ADD_TO_POINTER(dataseg, overhead); 
-} 
- 
-void 
-ffi_closure_free (void *ptr) 
-{ 
-  void *codeseg, *dataseg; 
-  size_t rounded_size; 
- 
-  dataseg = ADD_TO_POINTER(ptr, -overhead); 
-  memcpy(&rounded_size, dataseg, sizeof(rounded_size)); 
-  memcpy(&codeseg, ADD_TO_POINTER(dataseg, sizeof(size_t)), sizeof(void *)); 
-  munmap(dataseg, rounded_size); 
-  munmap(codeseg, rounded_size); 
-} 
-#else /* !NetBSD with PROT_MPROTECT */ 
- 
+#ifdef __NetBSD__
+#include <sys/param.h>
+#endif
+
+#if __NetBSD_Version__ - 0 >= 799007200
+/* NetBSD with PROT_MPROTECT */
+#include <sys/mman.h>
+
+#include <stddef.h>
+#include <unistd.h>
+
+static const size_t overhead =
+  (sizeof(max_align_t) > sizeof(void *) + sizeof(size_t)) ?
+    sizeof(max_align_t)
+    : sizeof(void *) + sizeof(size_t);
+
+#define ADD_TO_POINTER(p, d) ((void *)((uintptr_t)(p) + (d)))
+
+void *
+ffi_closure_alloc (size_t size, void **code)
+{
+  static size_t page_size;
+  size_t rounded_size;
+  void *codeseg, *dataseg;
+  int prot;
+
+  /* Expect that PAX mprotect is active and a separate code mapping is necessary. */
+  if (!code)
+    return NULL;
+
+  /* Obtain system page size. */
+  if (!page_size)
+    page_size = sysconf(_SC_PAGESIZE);
+
+  /* Round allocation size up to the next page, keeping in mind the size field and pointer to code map. */
+  rounded_size = (size + overhead + page_size - 1) & ~(page_size - 1);
+
+  /* Primary mapping is RW, but request permission to switch to PROT_EXEC later. */
+  prot = PROT_READ | PROT_WRITE | PROT_MPROTECT(PROT_EXEC);
+  dataseg = mmap(NULL, rounded_size, prot, MAP_ANON | MAP_PRIVATE, -1, 0);
+  if (dataseg == MAP_FAILED)
+    return NULL;
+
+  /* Create secondary mapping and switch it to RX. */
+  codeseg = mremap(dataseg, rounded_size, NULL, rounded_size, MAP_REMAPDUP);
+  if (codeseg == MAP_FAILED) {
+    munmap(dataseg, rounded_size);
+    return NULL;
+  }
+  if (mprotect(codeseg, rounded_size, PROT_READ | PROT_EXEC) == -1) {
+    munmap(codeseg, rounded_size);
+    munmap(dataseg, rounded_size);
+    return NULL;
+  }
+
+  /* Remember allocation size and location of the secondary mapping for ffi_closure_free. */
+  memcpy(dataseg, &rounded_size, sizeof(rounded_size));
+  memcpy(ADD_TO_POINTER(dataseg, sizeof(size_t)), &codeseg, sizeof(void *));
+  *code = ADD_TO_POINTER(codeseg, overhead);
+  return ADD_TO_POINTER(dataseg, overhead);
+}
+
+void
+ffi_closure_free (void *ptr)
+{
+  void *codeseg, *dataseg;
+  size_t rounded_size;
+
+  dataseg = ADD_TO_POINTER(ptr, -overhead);
+  memcpy(&rounded_size, dataseg, sizeof(rounded_size));
+  memcpy(&codeseg, ADD_TO_POINTER(dataseg, sizeof(size_t)), sizeof(void *));
+  munmap(dataseg, rounded_size);
+  munmap(codeseg, rounded_size);
+}
+#else /* !NetBSD with PROT_MPROTECT */
+
 #if !FFI_MMAP_EXEC_WRIT && !FFI_EXEC_TRAMPOLINE_TABLE
-# if __linux__ && !defined(__ANDROID__) 
+# if __linux__ && !defined(__ANDROID__)
 /* This macro indicates it may be forbidden to map anonymous memory
    with both write and execute permission.  Code compiled when this
    option is defined will attempt to map such pages once, but if it
@@ -123,7 +123,7 @@ ffi_closure_free (void *ptr)
 #  define FFI_MMAP_EXEC_WRIT 1
 #  define HAVE_MNTENT 1
 # endif
-# if defined(X86_WIN32) || defined(X86_WIN64) || defined(_M_ARM64) || defined(__OS2__) 
+# if defined(X86_WIN32) || defined(X86_WIN64) || defined(_M_ARM64) || defined(__OS2__)
 /* Windows systems may have Data Execution Protection (DEP) enabled, 
    which requires the use of VirtualMalloc/VirtualFree to alloc/free
    executable memory. */
@@ -132,7 +132,7 @@ ffi_closure_free (void *ptr)
 #endif
 
 #if FFI_MMAP_EXEC_WRIT && !defined FFI_MMAP_EXEC_SELINUX
-# if defined(__linux__) && !defined(__ANDROID__) 
+# if defined(__linux__) && !defined(__ANDROID__)
 /* When defined to 1 check for SELinux and if SELinux is active,
    don't attempt PROT_EXEC|PROT_WRITE mapping at all, as that
    might cause audit messages.  */
@@ -142,216 +142,216 @@ ffi_closure_free (void *ptr)
 
 #if FFI_CLOSURES
 
-#if FFI_EXEC_TRAMPOLINE_TABLE 
-
-#ifdef __MACH__ 
- 
-#include <mach/mach.h> 
-#include <pthread.h> 
-#include <stdio.h> 
-#include <stdlib.h> 
- 
-extern void *ffi_closure_trampoline_table_page; 
- 
-typedef struct ffi_trampoline_table ffi_trampoline_table; 
-typedef struct ffi_trampoline_table_entry ffi_trampoline_table_entry; 
- 
-struct ffi_trampoline_table 
-{ 
-  /* contiguous writable and executable pages */ 
-  vm_address_t config_page; 
-  vm_address_t trampoline_page; 
- 
-  /* free list tracking */ 
-  uint16_t free_count; 
-  ffi_trampoline_table_entry *free_list; 
-  ffi_trampoline_table_entry *free_list_pool; 
- 
-  ffi_trampoline_table *prev; 
-  ffi_trampoline_table *next; 
-}; 
- 
-struct ffi_trampoline_table_entry 
-{ 
-  void *(*trampoline) (void); 
-  ffi_trampoline_table_entry *next; 
-}; 
- 
-/* Total number of trampolines that fit in one trampoline table */ 
-#define FFI_TRAMPOLINE_COUNT (PAGE_MAX_SIZE / FFI_TRAMPOLINE_SIZE) 
- 
-static pthread_mutex_t ffi_trampoline_lock = PTHREAD_MUTEX_INITIALIZER; 
-static ffi_trampoline_table *ffi_trampoline_tables = NULL; 
- 
-static ffi_trampoline_table * 
-ffi_trampoline_table_alloc (void) 
-{ 
-  ffi_trampoline_table *table; 
-  vm_address_t config_page; 
-  vm_address_t trampoline_page; 
-  vm_address_t trampoline_page_template; 
-  vm_prot_t cur_prot; 
-  vm_prot_t max_prot; 
-  kern_return_t kt; 
-  uint16_t i; 
- 
-  /* Allocate two pages -- a config page and a placeholder page */ 
-  config_page = 0x0; 
-  kt = vm_allocate (mach_task_self (), &config_page, PAGE_MAX_SIZE * 2, 
-		    VM_FLAGS_ANYWHERE); 
-  if (kt != KERN_SUCCESS) 
-    return NULL; 
- 
-  /* Remap the trampoline table on top of the placeholder page */ 
-  trampoline_page = config_page + PAGE_MAX_SIZE; 
-  trampoline_page_template = (vm_address_t)&ffi_closure_trampoline_table_page; 
-#ifdef __arm__ 
-  /* ffi_closure_trampoline_table_page can be thumb-biased on some ARM archs */ 
-  trampoline_page_template &= ~1UL; 
-#endif 
-  kt = vm_remap (mach_task_self (), &trampoline_page, PAGE_MAX_SIZE, 0x0, 
-		 VM_FLAGS_OVERWRITE, mach_task_self (), trampoline_page_template, 
-		 FALSE, &cur_prot, &max_prot, VM_INHERIT_SHARE); 
-  if (kt != KERN_SUCCESS) 
-    { 
-      vm_deallocate (mach_task_self (), config_page, PAGE_MAX_SIZE * 2); 
-      return NULL; 
-    } 
- 
-  /* We have valid trampoline and config pages */ 
-  table = calloc (1, sizeof (ffi_trampoline_table)); 
-  table->free_count = FFI_TRAMPOLINE_COUNT; 
-  table->config_page = config_page; 
-  table->trampoline_page = trampoline_page; 
- 
-  /* Create and initialize the free list */ 
-  table->free_list_pool = 
-    calloc (FFI_TRAMPOLINE_COUNT, sizeof (ffi_trampoline_table_entry)); 
- 
-  for (i = 0; i < table->free_count; i++) 
-    { 
-      ffi_trampoline_table_entry *entry = &table->free_list_pool[i]; 
-      entry->trampoline = 
-	(void *) (table->trampoline_page + (i * FFI_TRAMPOLINE_SIZE)); 
- 
-      if (i < table->free_count - 1) 
-	entry->next = &table->free_list_pool[i + 1]; 
-    } 
- 
-  table->free_list = table->free_list_pool; 
- 
-  return table; 
-} 
- 
-static void 
-ffi_trampoline_table_free (ffi_trampoline_table *table) 
-{ 
-  /* Remove from the list */ 
-  if (table->prev != NULL) 
-    table->prev->next = table->next; 
- 
-  if (table->next != NULL) 
-    table->next->prev = table->prev; 
- 
-  /* Deallocate pages */ 
-  vm_deallocate (mach_task_self (), table->config_page, PAGE_MAX_SIZE * 2); 
- 
-  /* Deallocate free list */ 
-  free (table->free_list_pool); 
-  free (table); 
-} 
- 
-void * 
-ffi_closure_alloc (size_t size, void **code) 
-{ 
-  /* Create the closure */ 
-  ffi_closure *closure = malloc (size); 
-  if (closure == NULL) 
-    return NULL; 
- 
-  pthread_mutex_lock (&ffi_trampoline_lock); 
- 
-  /* Check for an active trampoline table with available entries. */ 
-  ffi_trampoline_table *table = ffi_trampoline_tables; 
-  if (table == NULL || table->free_list == NULL) 
-    { 
-      table = ffi_trampoline_table_alloc (); 
-      if (table == NULL) 
-	{ 
-	  pthread_mutex_unlock (&ffi_trampoline_lock); 
-	  free (closure); 
-	  return NULL; 
-	} 
- 
-      /* Insert the new table at the top of the list */ 
-      table->next = ffi_trampoline_tables; 
-      if (table->next != NULL) 
-	table->next->prev = table; 
- 
-      ffi_trampoline_tables = table; 
-    } 
- 
-  /* Claim the free entry */ 
-  ffi_trampoline_table_entry *entry = ffi_trampoline_tables->free_list; 
-  ffi_trampoline_tables->free_list = entry->next; 
-  ffi_trampoline_tables->free_count--; 
-  entry->next = NULL; 
- 
-  pthread_mutex_unlock (&ffi_trampoline_lock); 
- 
-  /* Initialize the return values */ 
-  *code = entry->trampoline; 
-  closure->trampoline_table = table; 
-  closure->trampoline_table_entry = entry; 
- 
-  return closure; 
-} 
- 
-void 
-ffi_closure_free (void *ptr) 
-{ 
-  ffi_closure *closure = ptr; 
- 
-  pthread_mutex_lock (&ffi_trampoline_lock); 
- 
-  /* Fetch the table and entry references */ 
-  ffi_trampoline_table *table = closure->trampoline_table; 
-  ffi_trampoline_table_entry *entry = closure->trampoline_table_entry; 
- 
-  /* Return the entry to the free list */ 
-  entry->next = table->free_list; 
-  table->free_list = entry; 
-  table->free_count++; 
- 
-  /* If all trampolines within this table are free, and at least one other table exists, deallocate 
-   * the table */ 
-  if (table->free_count == FFI_TRAMPOLINE_COUNT 
-      && ffi_trampoline_tables != table) 
-    { 
-      ffi_trampoline_table_free (table); 
-    } 
-  else if (ffi_trampoline_tables != table) 
-    { 
-      /* Otherwise, bump this table to the top of the list */ 
-      table->prev = NULL; 
-      table->next = ffi_trampoline_tables; 
-      if (ffi_trampoline_tables != NULL) 
-	ffi_trampoline_tables->prev = table; 
- 
-      ffi_trampoline_tables = table; 
-    } 
- 
-  pthread_mutex_unlock (&ffi_trampoline_lock); 
- 
-  /* Free the closure */ 
-  free (closure); 
-} 
- 
-#endif 
- 
+#if FFI_EXEC_TRAMPOLINE_TABLE
+
+#ifdef __MACH__
+
+#include <mach/mach.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+extern void *ffi_closure_trampoline_table_page;
+
+typedef struct ffi_trampoline_table ffi_trampoline_table;
+typedef struct ffi_trampoline_table_entry ffi_trampoline_table_entry;
+
+struct ffi_trampoline_table
+{
+  /* contiguous writable and executable pages */
+  vm_address_t config_page;
+  vm_address_t trampoline_page;
+
+  /* free list tracking */
+  uint16_t free_count;
+  ffi_trampoline_table_entry *free_list;
+  ffi_trampoline_table_entry *free_list_pool;
+
+  ffi_trampoline_table *prev;
+  ffi_trampoline_table *next;
+};
+
+struct ffi_trampoline_table_entry
+{
+  void *(*trampoline) (void);
+  ffi_trampoline_table_entry *next;
+};
+
+/* Total number of trampolines that fit in one trampoline table */
+#define FFI_TRAMPOLINE_COUNT (PAGE_MAX_SIZE / FFI_TRAMPOLINE_SIZE)
+
+static pthread_mutex_t ffi_trampoline_lock = PTHREAD_MUTEX_INITIALIZER;
+static ffi_trampoline_table *ffi_trampoline_tables = NULL;
+
+static ffi_trampoline_table *
+ffi_trampoline_table_alloc (void)
+{
+  ffi_trampoline_table *table;
+  vm_address_t config_page;
+  vm_address_t trampoline_page;
+  vm_address_t trampoline_page_template;
+  vm_prot_t cur_prot;
+  vm_prot_t max_prot;
+  kern_return_t kt;
+  uint16_t i;
+
+  /* Allocate two pages -- a config page and a placeholder page */
+  config_page = 0x0;
+  kt = vm_allocate (mach_task_self (), &config_page, PAGE_MAX_SIZE * 2,
+		    VM_FLAGS_ANYWHERE);
+  if (kt != KERN_SUCCESS)
+    return NULL;
+
+  /* Remap the trampoline table on top of the placeholder page */
+  trampoline_page = config_page + PAGE_MAX_SIZE;
+  trampoline_page_template = (vm_address_t)&ffi_closure_trampoline_table_page;
+#ifdef __arm__
+  /* ffi_closure_trampoline_table_page can be thumb-biased on some ARM archs */
+  trampoline_page_template &= ~1UL;
+#endif
+  kt = vm_remap (mach_task_self (), &trampoline_page, PAGE_MAX_SIZE, 0x0,
+		 VM_FLAGS_OVERWRITE, mach_task_self (), trampoline_page_template,
+		 FALSE, &cur_prot, &max_prot, VM_INHERIT_SHARE);
+  if (kt != KERN_SUCCESS)
+    {
+      vm_deallocate (mach_task_self (), config_page, PAGE_MAX_SIZE * 2);
+      return NULL;
+    }
+
+  /* We have valid trampoline and config pages */
+  table = calloc (1, sizeof (ffi_trampoline_table));
+  table->free_count = FFI_TRAMPOLINE_COUNT;
+  table->config_page = config_page;
+  table->trampoline_page = trampoline_page;
+
+  /* Create and initialize the free list */
+  table->free_list_pool =
+    calloc (FFI_TRAMPOLINE_COUNT, sizeof (ffi_trampoline_table_entry));
+
+  for (i = 0; i < table->free_count; i++)
+    {
+      ffi_trampoline_table_entry *entry = &table->free_list_pool[i];
+      entry->trampoline =
+	(void *) (table->trampoline_page + (i * FFI_TRAMPOLINE_SIZE));
+
+      if (i < table->free_count - 1)
+	entry->next = &table->free_list_pool[i + 1];
+    }
+
+  table->free_list = table->free_list_pool;
+
+  return table;
+}
+
+static void
+ffi_trampoline_table_free (ffi_trampoline_table *table)
+{
+  /* Remove from the list */
+  if (table->prev != NULL)
+    table->prev->next = table->next;
+
+  if (table->next != NULL)
+    table->next->prev = table->prev;
+
+  /* Deallocate pages */
+  vm_deallocate (mach_task_self (), table->config_page, PAGE_MAX_SIZE * 2);
+
+  /* Deallocate free list */
+  free (table->free_list_pool);
+  free (table);
+}
+
+void *
+ffi_closure_alloc (size_t size, void **code)
+{
+  /* Create the closure */
+  ffi_closure *closure = malloc (size);
+  if (closure == NULL)
+    return NULL;
+
+  pthread_mutex_lock (&ffi_trampoline_lock);
+
+  /* Check for an active trampoline table with available entries. */
+  ffi_trampoline_table *table = ffi_trampoline_tables;
+  if (table == NULL || table->free_list == NULL)
+    {
+      table = ffi_trampoline_table_alloc ();
+      if (table == NULL)
+	{
+	  pthread_mutex_unlock (&ffi_trampoline_lock);
+	  free (closure);
+	  return NULL;
+	}
+
+      /* Insert the new table at the top of the list */
+      table->next = ffi_trampoline_tables;
+      if (table->next != NULL)
+	table->next->prev = table;
+
+      ffi_trampoline_tables = table;
+    }
+
+  /* Claim the free entry */
+  ffi_trampoline_table_entry *entry = ffi_trampoline_tables->free_list;
+  ffi_trampoline_tables->free_list = entry->next;
+  ffi_trampoline_tables->free_count--;
+  entry->next = NULL;
+
+  pthread_mutex_unlock (&ffi_trampoline_lock);
+
+  /* Initialize the return values */
+  *code = entry->trampoline;
+  closure->trampoline_table = table;
+  closure->trampoline_table_entry = entry;
+
+  return closure;
+}
+
+void
+ffi_closure_free (void *ptr)
+{
+  ffi_closure *closure = ptr;
+
+  pthread_mutex_lock (&ffi_trampoline_lock);
+
+  /* Fetch the table and entry references */
+  ffi_trampoline_table *table = closure->trampoline_table;
+  ffi_trampoline_table_entry *entry = closure->trampoline_table_entry;
+
+  /* Return the entry to the free list */
+  entry->next = table->free_list;
+  table->free_list = entry;
+  table->free_count++;
+
+  /* If all trampolines within this table are free, and at least one other table exists, deallocate
+   * the table */
+  if (table->free_count == FFI_TRAMPOLINE_COUNT
+      && ffi_trampoline_tables != table)
+    {
+      ffi_trampoline_table_free (table);
+    }
+  else if (ffi_trampoline_tables != table)
+    {
+      /* Otherwise, bump this table to the top of the list */
+      table->prev = NULL;
+      table->next = ffi_trampoline_tables;
+      if (ffi_trampoline_tables != NULL)
+	ffi_trampoline_tables->prev = table;
+
+      ffi_trampoline_tables = table;
+    }
+
+  pthread_mutex_unlock (&ffi_trampoline_lock);
+
+  /* Free the closure */
+  free (closure);
+}
+
+#endif
+
 // Per-target implementation; It's unclear what can reasonable be shared between two OS/architecture implementations.
 
-#elif FFI_MMAP_EXEC_WRIT /* !FFI_EXEC_TRAMPOLINE_TABLE */ 
+#elif FFI_MMAP_EXEC_WRIT /* !FFI_EXEC_TRAMPOLINE_TABLE */
 
 #define USE_LOCKS 1
 #define USE_DL_PREFIX 1
@@ -386,7 +386,7 @@ ffi_closure_free (void *ptr)
 #endif
 #include <string.h>
 #include <stdio.h>
-#if !defined(X86_WIN32) && !defined(X86_WIN64) && !defined(_M_ARM64) 
+#if !defined(X86_WIN32) && !defined(X86_WIN64) && !defined(_M_ARM64)
 #ifdef HAVE_MNTENT
 #include <mntent.h>
 #endif /* HAVE_MNTENT */
@@ -456,26 +456,26 @@ static int emutramp_enabled = -1;
 static int
 emutramp_enabled_check (void)
 {
-  char *buf = NULL; 
-  size_t len = 0; 
-  FILE *f; 
-  int ret; 
-  f = fopen ("/proc/self/status", "r"); 
-  if (f == NULL) 
+  char *buf = NULL;
+  size_t len = 0;
+  FILE *f;
+  int ret;
+  f = fopen ("/proc/self/status", "r");
+  if (f == NULL)
     return 0;
-  ret = 0; 
- 
-  while (getline (&buf, &len, f) != -1) 
-    if (!strncmp (buf, "PaX:", 4)) 
-      { 
-        char emutramp; 
-        if (sscanf (buf, "%*s %*c%c", &emutramp) == 1) 
-          ret = (emutramp == 'E'); 
-        break; 
-      } 
-  free (buf); 
-  fclose (f); 
-  return ret; 
+  ret = 0;
+
+  while (getline (&buf, &len, f) != -1)
+    if (!strncmp (buf, "PaX:", 4))
+      {
+        char emutramp;
+        if (sscanf (buf, "%*s %*c%c", &emutramp) == 1)
+          ret = (emutramp == 'E');
+        break;
+      }
+  free (buf);
+  fclose (f);
+  return ret;
 }
 
 #define is_emutramp_enabled() (emutramp_enabled >= 0 ? emutramp_enabled \
@@ -512,7 +512,7 @@ static int dlmalloc_trim(size_t) MAYBE_UNUSED;
 static size_t dlmalloc_usable_size(void*) MAYBE_UNUSED;
 static void dlmalloc_stats(void) MAYBE_UNUSED;
 
-#if !(defined(X86_WIN32) || defined(X86_WIN64) || defined(_M_ARM64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) 
+#if !(defined(X86_WIN32) || defined(X86_WIN64) || defined(_M_ARM64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX)
 /* Use these for mmap and munmap within dlmalloc.c.  */
 static void *dlmmap(void *, size_t, int, int, int, off_t);
 static int dlmunmap(void *, size_t);
@@ -526,7 +526,7 @@ static int dlmunmap(void *, size_t);
 #undef mmap
 #undef munmap
 
-#if !(defined(X86_WIN32) || defined(X86_WIN64) || defined(_M_ARM64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) 
+#if !(defined(X86_WIN32) || defined(X86_WIN64) || defined(_M_ARM64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX)
 
 /* A mutex used to synchronize access to *exec* variables in this file.  */
 static pthread_mutex_t open_temp_exec_file_mutex = PTHREAD_MUTEX_INITIALIZER;
@@ -540,16 +540,16 @@ static size_t execsize = 0;
 
 /* Open a temporary file name, and immediately unlink it.  */
 static int
-open_temp_exec_file_name (char *name, int flags) 
+open_temp_exec_file_name (char *name, int flags)
 {
-  int fd; 
-
-#ifdef HAVE_MKOSTEMP 
-  fd = mkostemp (name, flags); 
-#else 
-  fd = mkstemp (name); 
-#endif 
- 
+  int fd;
+
+#ifdef HAVE_MKOSTEMP
+  fd = mkostemp (name, flags);
+#else
+  fd = mkstemp (name);
+#endif
+
   if (fd != -1)
     unlink (name);
 
@@ -561,38 +561,38 @@ static int
 open_temp_exec_file_dir (const char *dir)
 {
   static const char suffix[] = "/ffiXXXXXX";
-  int lendir, flags; 
-  char *tempname; 
-#ifdef O_TMPFILE 
-  int fd; 
-#endif 
-
-#ifdef O_CLOEXEC 
-  flags = O_CLOEXEC; 
-#else 
-  flags = 0; 
-#endif 
- 
-#ifdef O_TMPFILE 
-  fd = open (dir, flags | O_RDWR | O_EXCL | O_TMPFILE, 0700); 
-  /* If the running system does not support the O_TMPFILE flag then retry without it. */ 
-  if (fd != -1 || (errno != EINVAL && errno != EISDIR && errno != EOPNOTSUPP)) { 
-    return fd; 
-  } else { 
-    errno = 0; 
-  } 
-#endif 
- 
-  lendir = (int) strlen (dir); 
-  tempname = __builtin_alloca (lendir + sizeof (suffix)); 
- 
+  int lendir, flags;
+  char *tempname;
+#ifdef O_TMPFILE
+  int fd;
+#endif
+
+#ifdef O_CLOEXEC
+  flags = O_CLOEXEC;
+#else
+  flags = 0;
+#endif
+
+#ifdef O_TMPFILE
+  fd = open (dir, flags | O_RDWR | O_EXCL | O_TMPFILE, 0700);
+  /* If the running system does not support the O_TMPFILE flag then retry without it. */
+  if (fd != -1 || (errno != EINVAL && errno != EISDIR && errno != EOPNOTSUPP)) {
+    return fd;
+  } else {
+    errno = 0;
+  }
+#endif
+
+  lendir = (int) strlen (dir);
+  tempname = __builtin_alloca (lendir + sizeof (suffix));
+
   if (!tempname)
     return -1;
 
   memcpy (tempname, dir, lendir);
   memcpy (tempname + lendir, suffix, sizeof (suffix));
 
-  return open_temp_exec_file_name (tempname, flags); 
+  return open_temp_exec_file_name (tempname, flags);
 }
 
 /* Open a temporary file in the directory in the named environment
@@ -701,7 +701,7 @@ open_temp_exec_file_opts_next (void)
 }
 
 /* Return a file descriptor of a temporary zero-sized file in a
-   writable and executable filesystem.  */ 
+   writable and executable filesystem.  */
 static int
 open_temp_exec_file (void)
 {
@@ -724,36 +724,36 @@ open_temp_exec_file (void)
   return fd;
 }
 
-/* We need to allocate space in a file that will be backing a writable 
-   mapping.  Several problems exist with the usual approaches: 
-   - fallocate() is Linux-only 
-   - posix_fallocate() is not available on all platforms 
-   - ftruncate() does not allocate space on filesystems with sparse files 
-   Failure to allocate the space will cause SIGBUS to be thrown when 
-   the mapping is subsequently written to.  */ 
-static int 
-allocate_space (int fd, off_t offset, off_t len) 
-{ 
-  static size_t page_size; 
- 
-  /* Obtain system page size. */ 
-  if (!page_size) 
-    page_size = sysconf(_SC_PAGESIZE); 
- 
-  unsigned char buf[page_size]; 
-  memset (buf, 0, page_size); 
- 
-  while (len > 0) 
-    { 
-      off_t to_write = (len < page_size) ? len : page_size; 
-      if (write (fd, buf, to_write) < to_write) 
-        return -1; 
-      len -= to_write; 
-    } 
- 
-  return 0; 
-} 
- 
+/* We need to allocate space in a file that will be backing a writable
+   mapping.  Several problems exist with the usual approaches:
+   - fallocate() is Linux-only
+   - posix_fallocate() is not available on all platforms
+   - ftruncate() does not allocate space on filesystems with sparse files
+   Failure to allocate the space will cause SIGBUS to be thrown when
+   the mapping is subsequently written to.  */
+static int
+allocate_space (int fd, off_t offset, off_t len)
+{
+  static size_t page_size;
+
+  /* Obtain system page size. */
+  if (!page_size)
+    page_size = sysconf(_SC_PAGESIZE);
+
+  unsigned char buf[page_size];
+  memset (buf, 0, page_size);
+
+  while (len > 0)
+    {
+      off_t to_write = (len < page_size) ? len : page_size;
+      if (write (fd, buf, to_write) < to_write)
+        return -1;
+      len -= to_write;
+    }
+
+  return 0;
+}
+
 /* Map in a chunk of memory from the temporary exec file into separate
    locations in the virtual memory address space, one writable and one
    executable.  Returns the address of the writable portion, after
@@ -775,7 +775,7 @@ dlmmap_locked (void *start, size_t length, int prot, int flags, off_t offset)
 
   offset = execsize;
 
-  if (allocate_space (execfd, offset, length)) 
+  if (allocate_space (execfd, offset, length))
     return MFAIL;
 
   flags &= ~(MAP_PRIVATE | MAP_ANONYMOUS);
@@ -790,13 +790,13 @@ dlmmap_locked (void *start, size_t length, int prot, int flags, off_t offset)
 	  close (execfd);
 	  goto retry_open;
 	}
-      if (ftruncate (execfd, offset) != 0) 
-      { 
-        /* Fixme : Error logs can be added here. Returning an error for 
-         * ftruncte() will not add any advantage as it is being 
-         * validating in the error case. */ 
-      } 
- 
+      if (ftruncate (execfd, offset) != 0)
+      {
+        /* Fixme : Error logs can be added here. Returning an error for
+         * ftruncte() will not add any advantage as it is being
+         * validating in the error case. */
+      }
+
       return MFAIL;
     }
   else if (!offset
@@ -808,12 +808,12 @@ dlmmap_locked (void *start, size_t length, int prot, int flags, off_t offset)
   if (start == MFAIL)
     {
       munmap (ptr, length);
-      if (ftruncate (execfd, offset) != 0) 
-      { 
-        /* Fixme : Error logs can be added here. Returning an error for 
-         * ftruncte() will not add any advantage as it is being 
-         * validating in the error case. */ 
-      } 
+      if (ftruncate (execfd, offset) != 0)
+      {
+        /* Fixme : Error logs can be added here. Returning an error for
+         * ftruncte() will not add any advantage as it is being
+         * validating in the error case. */
+      }
       return start;
     }
 
@@ -908,7 +908,7 @@ segment_holding_code (mstate m, char* addr)
 }
 #endif
 
-#endif /* !(defined(X86_WIN32) || defined(X86_WIN64) || defined(_M_ARM64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) */ 
+#endif /* !(defined(X86_WIN32) || defined(X86_WIN64) || defined(_M_ARM64) || defined(__OS2__)) || defined (__CYGWIN__) || defined(__INTERIX) */
 
 /* Allocate a chunk of memory with the given size.  Returns a pointer
    to the writable address, and sets *CODE to the executable
@@ -933,20 +933,20 @@ ffi_closure_alloc (size_t size, void **code)
   return ptr;
 }
 
-void * 
-ffi_data_to_code_pointer (void *data) 
-{ 
-  msegmentptr seg = segment_holding (gm, data); 
-  /* We expect closures to be allocated with ffi_closure_alloc(), in 
-     which case seg will be non-NULL.  However, some users take on the 
-     burden of managing this memory themselves, in which case this 
-     we'll just return data. */ 
-  if (seg) 
-    return add_segment_exec_offset (data, seg); 
-  else 
-    return data; 
-} 
- 
+void *
+ffi_data_to_code_pointer (void *data)
+{
+  msegmentptr seg = segment_holding (gm, data);
+  /* We expect closures to be allocated with ffi_closure_alloc(), in
+     which case seg will be non-NULL.  However, some users take on the
+     burden of managing this memory themselves, in which case this
+     we'll just return data. */
+  if (seg)
+    return add_segment_exec_offset (data, seg);
+  else
+    return data;
+}
+
 /* Release a chunk of memory allocated with ffi_closure_alloc.  If
    FFI_CLOSURE_FREE_CODE is nonzero, the given address can be the
    writable or the executable address given.  Otherwise, only the
@@ -986,13 +986,13 @@ ffi_closure_free (void *ptr)
   free (ptr);
 }
 
-void * 
-ffi_data_to_code_pointer (void *data) 
-{ 
-  return data; 
-} 
- 
+void *
+ffi_data_to_code_pointer (void *data)
+{
+  return data;
+}
+
 # endif /* ! FFI_MMAP_EXEC_WRIT */
 #endif /* FFI_CLOSURES */
- 
-#endif /* NetBSD with PROT_MPROTECT */ 
+
+#endif /* NetBSD with PROT_MPROTECT */
diff --git a/contrib/restricted/libffi/src/dlmalloc.c b/contrib/restricted/libffi/src/dlmalloc.c
index c859e4373a..ec85fcec2a 100644
--- a/contrib/restricted/libffi/src/dlmalloc.c
+++ b/contrib/restricted/libffi/src/dlmalloc.c
@@ -438,11 +438,11 @@ DEFAULT_MMAP_THRESHOLD       default: 256K
 
 */
 
-#if defined __linux__ && !defined _GNU_SOURCE 
-/* mremap() on Linux requires this via sys/mman.h */ 
-#define _GNU_SOURCE 1 
-#endif 
- 
+#if defined __linux__ && !defined _GNU_SOURCE
+/* mremap() on Linux requires this via sys/mman.h */
+#define _GNU_SOURCE 1
+#endif
+
 #ifndef WIN32
 #ifdef _WIN32
 #define WIN32 1
@@ -1260,7 +1260,7 @@ extern void*     sbrk(ptrdiff_t);
 #define SIZE_T_BITSIZE      (sizeof(size_t) << 3)
 
 /* Some constants coerced to size_t */
-/* Annoying but necessary to avoid errors on some platforms */ 
+/* Annoying but necessary to avoid errors on some platforms */
 #define SIZE_T_ZERO         ((size_t)0)
 #define SIZE_T_ONE          ((size_t)1)
 #define SIZE_T_TWO          ((size_t)2)
@@ -1414,7 +1414,7 @@ static int win32munmap(void* ptr, size_t size) {
 #define CALL_MORECORE(S)     MFAIL
 #endif /* HAVE_MORECORE */
 
-/* mstate bit set if contiguous morecore disabled or failed */ 
+/* mstate bit set if contiguous morecore disabled or failed */
 #define USE_NONCONTIGUOUS_BIT (4U)
 
 /* segment bit set in create_mspace_with_base */
@@ -1666,7 +1666,7 @@ struct malloc_chunk {
 typedef struct malloc_chunk  mchunk;
 typedef struct malloc_chunk* mchunkptr;
 typedef struct malloc_chunk* sbinptr;  /* The type of bins of chunks */
-typedef size_t bindex_t;               /* Described below */ 
+typedef size_t bindex_t;               /* Described below */
 typedef unsigned int binmap_t;         /* Described below */
 typedef unsigned int flag_t;           /* The type of various bit flag sets */
 
@@ -2296,7 +2296,7 @@ static size_t traverse_and_check(mstate m);
 #define treebin_at(M,i)     (&((M)->treebins[i]))
 
 /* assign tree index for size S to variable I */
-#if defined(__GNUC__) && defined(__i386__) 
+#if defined(__GNUC__) && defined(__i386__)
 #define compute_tree_index(S, I)\
 {\
   size_t X = S >> TREEBIN_SHIFT;\
@@ -2361,7 +2361,7 @@ static size_t traverse_and_check(mstate m);
 
 /* index corresponding to given bit */
 
-#if defined(__GNUC__) && defined(__i386__) 
+#if defined(__GNUC__) && defined(__i386__)
 #define compute_bit2idx(X, I)\
 {\
   unsigned int J;\
@@ -3095,8 +3095,8 @@ static void internal_malloc_stats(mstate m) {
      and choose its bk node as its replacement.
   2. If x was the last node of its size, but not a leaf node, it must
      be replaced with a leaf node (not merely one with an open left or
-     right), to make sure that lefts and rights of descendants 
-     correspond properly to bit masks.  We use the rightmost descendant 
+     right), to make sure that lefts and rights of descendants
+     correspond properly to bit masks.  We use the rightmost descendant
      of x.  We could use any other leaf, but this is easy to locate and
      tends to counteract removal of leftmosts elsewhere, and so keeps
      paths shorter than minimally guaranteed.  This doesn't loop much
@@ -3393,7 +3393,7 @@ static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) {
   *ss = m->seg; /* Push current record */
   m->seg.base = tbase;
   m->seg.size = tsize;
-  (void)set_segment_flags(&m->seg, mmapped); 
+  (void)set_segment_flags(&m->seg, mmapped);
   m->seg.next = ss;
 
   /* Insert trailing fenceposts */
@@ -3553,7 +3553,7 @@ static void* sys_alloc(mstate m, size_t nb) {
     if (!is_initialized(m)) { /* first-time initialization */
       m->seg.base = m->least_addr = tbase;
       m->seg.size = tsize;
-      (void)set_segment_flags(&m->seg, mmap_flag); 
+      (void)set_segment_flags(&m->seg, mmap_flag);
       m->magic = mparams.magic;
       init_bins(m);
       if (is_global(m)) 
@@ -5096,10 +5096,10 @@ History:
         Wolfram Gloger (Gloger@lrz.uni-muenchen.de).
       * Use last_remainder in more cases.
       * Pack bins using idea from  colin@nyx10.cs.du.edu
-      * Use ordered bins instead of best-fit threshold 
+      * Use ordered bins instead of best-fit threshold
       * Eliminate block-local decls to simplify tracing and debugging.
       * Support another case of realloc via move into top
-      * Fix error occurring when initial sbrk_base not word-aligned. 
+      * Fix error occurring when initial sbrk_base not word-aligned.
       * Rely on page size for units instead of SBRK_UNIT to
         avoid surprises about sbrk alignment conventions.
       * Add mallinfo, mallopt. Thanks to Raymond Nijssen
diff --git a/contrib/restricted/libffi/src/java_raw_api.c b/contrib/restricted/libffi/src/java_raw_api.c
index f56e8cee07..114d3e47fc 100644
--- a/contrib/restricted/libffi/src/java_raw_api.c
+++ b/contrib/restricted/libffi/src/java_raw_api.c
@@ -39,7 +39,7 @@
 #include <ffi_common.h>
 #include <stdlib.h>
 
-#if !defined(NO_JAVA_RAW_API) 
+#if !defined(NO_JAVA_RAW_API)
 
 size_t
 ffi_java_raw_size (ffi_cif *cif)
@@ -60,9 +60,9 @@ ffi_java_raw_size (ffi_cif *cif)
 	case FFI_TYPE_STRUCT:
 	  /* No structure parameters in Java.	*/
 	  abort();
-	case FFI_TYPE_COMPLEX: 
-	  /* Not supported yet.  */ 
-	  abort(); 
+	case FFI_TYPE_COMPLEX:
+	  /* Not supported yet.  */
+	  abort();
 	default:
 	  result += FFI_SIZEOF_JAVA_RAW;
       }
@@ -107,14 +107,14 @@ ffi_java_raw_to_ptrarray (ffi_cif *cif, ffi_java_raw *raw, void **args)
 	  *args = (void*) &(raw++)->ptr;
 	  break;
 
-	case FFI_TYPE_COMPLEX: 
-	  /* Not supported yet.  */ 
-	  abort(); 
- 
+	case FFI_TYPE_COMPLEX:
+	  /* Not supported yet.  */
+	  abort();
+
 	default:
 	  *args = raw;
 	  raw +=
-	    FFI_ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw); 
+	    FFI_ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw);
 	}
     }
 
@@ -133,16 +133,16 @@ ffi_java_raw_to_ptrarray (ffi_cif *cif, ffi_java_raw *raw, void **args)
 	  *args = (void*) raw;
 	  raw += 2;
 	  break;
-	case FFI_TYPE_COMPLEX: 
-	  /* Not supported yet.  */ 
-	  abort(); 
+	case FFI_TYPE_COMPLEX:
+	  /* Not supported yet.  */
+	  abort();
 	default:
 	  *args = (void*) raw++;
       }
 #else /* FFI_SIZEOF_JAVA_RAW != 8 */
 	*args = (void*) raw;
 	raw +=
-	  FFI_ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw); 
+	  FFI_ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw);
 #endif /* FFI_SIZEOF_JAVA_RAW == 8 */
     }
 
@@ -234,7 +234,7 @@ ffi_java_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_java_raw *raw)
 #else
 	  memcpy ((void*) raw->data, (void*)*args, (*tp)->size);
 	  raw +=
-	    FFI_ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw); 
+	    FFI_ALIGN ((*tp)->size, sizeof(ffi_java_raw)) / sizeof(ffi_java_raw);
 #endif
 	}
     }
@@ -264,10 +264,10 @@ ffi_java_rvalue_to_raw (ffi_cif *cif, void *rvalue)
       *(SINT64 *)rvalue <<= 32;
       break;
 
-    case FFI_TYPE_COMPLEX: 
-      /* Not supported yet.  */ 
-      abort(); 
- 
+    case FFI_TYPE_COMPLEX:
+      /* Not supported yet.  */
+      abort();
+
     default:
       break;
     }
@@ -293,10 +293,10 @@ ffi_java_raw_to_rvalue (ffi_cif *cif, void *rvalue)
       *(SINT64 *)rvalue >>= 32;
       break;
 
-    case FFI_TYPE_COMPLEX: 
-      /* Not supported yet.  */ 
-      abort(); 
- 
+    case FFI_TYPE_COMPLEX:
+      /* Not supported yet.  */
+      abort();
+
     default:
       break;
     }
@@ -371,4 +371,4 @@ ffi_prep_java_raw_closure (ffi_java_raw_closure* cl,
 
 #endif /* FFI_CLOSURES */
 #endif /* !FFI_NATIVE_RAW_API */
-#endif /* !NO_JAVA_RAW_API */ 
+#endif /* !NO_JAVA_RAW_API */
diff --git a/contrib/restricted/libffi/src/powerpc/aix.S b/contrib/restricted/libffi/src/powerpc/aix.S
index 60544b489d..7ba541595f 100644
--- a/contrib/restricted/libffi/src/powerpc/aix.S
+++ b/contrib/restricted/libffi/src/powerpc/aix.S
@@ -1,566 +1,566 @@
-/* ----------------------------------------------------------------------- 
-   aix.S - Copyright (c) 2002, 2009 Free Software Foundation, Inc. 
-   based on darwin.S by John Hornkvist 
- 
-   PowerPC Assembly glue. 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS 
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
-   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR 
-   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
-   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
-   OTHER DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-	.set r0,0 
-	.set r1,1 
-	.set r2,2 
-	.set r3,3 
-	.set r4,4 
-	.set r5,5 
-	.set r6,6 
-	.set r7,7 
-	.set r8,8 
-	.set r9,9 
-	.set r10,10 
-	.set r11,11 
-	.set r12,12 
-	.set r13,13 
-	.set r14,14 
-	.set r15,15 
-	.set r16,16 
-	.set r17,17 
-	.set r18,18 
-	.set r19,19 
-	.set r20,20 
-	.set r21,21 
-	.set r22,22 
-	.set r23,23 
-	.set r24,24 
-	.set r25,25 
-	.set r26,26 
-	.set r27,27 
-	.set r28,28 
-	.set r29,29 
-	.set r30,30 
-	.set r31,31 
-	.set f0,0 
-	.set f1,1 
-	.set f2,2 
-	.set f3,3 
-	.set f4,4 
-	.set f5,5 
-	.set f6,6 
-	.set f7,7 
-	.set f8,8 
-	.set f9,9 
-	.set f10,10 
-	.set f11,11 
-	.set f12,12 
-	.set f13,13 
-	.set f14,14 
-	.set f15,15 
-	.set f16,16 
-	.set f17,17 
-	.set f18,18 
-	.set f19,19 
-	.set f20,20 
-	.set f21,21 
- 
-	.extern .ffi_prep_args 
- 
-#define LIBFFI_ASM 
-#include <fficonfig.h> 
-#include <ffi.h> 
-#define JUMPTARGET(name) name 
-#define L(x) x 
-	.file "aix.S" 
-	.toc 
- 
-	/* void ffi_call_AIX(extended_cif *ecif, unsigned long bytes, 
-	 *		     unsigned int flags, unsigned int *rvalue, 
-	 *		     void (*fn)(), 
-	 *		     void (*prep_args)(extended_cif*, unsigned *const)); 
-	 * r3=ecif, r4=bytes, r5=flags, r6=rvalue, r7=fn, r8=prep_args 
-	 */ 
- 
-.csect .text[PR] 
-	.align 2 
-	.globl ffi_call_AIX 
-	.globl .ffi_call_AIX 
-.csect ffi_call_AIX[DS] 
-ffi_call_AIX: 
-#ifdef __64BIT__ 
-	.llong .ffi_call_AIX, TOC[tc0], 0 
-	.csect .text[PR] 
-.ffi_call_AIX: 
-	.function .ffi_call_AIX,.ffi_call_AIX,16,044,LFE..0-LFB..0 
-	.bf __LINE__ 
-	.line 1 
-LFB..0: 
-	/* Save registers we use.  */ 
-	mflr	r0 
- 
-	std	r28,-32(r1) 
-	std	r29,-24(r1) 
-	std	r30,-16(r1) 
-	std	r31, -8(r1) 
- 
-	std	r0, 16(r1) 
-LCFI..0: 
-	mr	r28, r1		/* our AP.  */ 
-	stdux	r1, r1, r4 
-LCFI..1: 
- 
-	/* Save arguments over call...  */ 
-	mr	r31, r5	/* flags, */ 
-	mr	r30, r6	/* rvalue, */ 
-	mr	r29, r7	/* function address.  */ 
-	std	r2, 40(r1) 
- 
-	/* Call ffi_prep_args.  */ 
-	mr	r4, r1 
-	bl	.ffi_prep_args 
-	nop 
- 
-	/* Now do the call.  */ 
-	ld	r0, 0(r29) 
-	ld	r2, 8(r29) 
-	ld	r11, 16(r29) 
-	/* Set up cr1 with bits 4-7 of the flags.  */ 
-	mtcrf	0x40, r31 
-	mtctr	r0 
-	/* Load all those argument registers.  */ 
-	/* We have set up a nice stack frame, just load it into registers. */ 
-	ld	r3, 40+(1*8)(r1) 
-	ld	r4, 40+(2*8)(r1) 
-	ld	r5, 40+(3*8)(r1) 
-	ld	r6, 40+(4*8)(r1) 
-	nop 
-	ld	r7, 40+(5*8)(r1) 
-	ld	r8, 40+(6*8)(r1) 
-	ld	r9, 40+(7*8)(r1) 
-	ld	r10,40+(8*8)(r1) 
- 
-L1: 
-	/* Load all the FP registers.  */ 
-	bf	6,L2 /* 2f + 0x18 */ 
-	lfd	f1,-32-(13*8)(r28) 
-	lfd	f2,-32-(12*8)(r28) 
-	lfd	f3,-32-(11*8)(r28) 
-	lfd	f4,-32-(10*8)(r28) 
-	nop 
-	lfd	f5,-32-(9*8)(r28) 
-	lfd	f6,-32-(8*8)(r28) 
-	lfd	f7,-32-(7*8)(r28) 
-	lfd	f8,-32-(6*8)(r28) 
-	nop 
-	lfd	f9,-32-(5*8)(r28) 
-	lfd	f10,-32-(4*8)(r28) 
-	lfd	f11,-32-(3*8)(r28) 
-	lfd	f12,-32-(2*8)(r28) 
-	nop 
-	lfd	f13,-32-(1*8)(r28) 
- 
-L2: 
-	/* Make the call.  */ 
-	bctrl 
-	ld	r2, 40(r1) 
- 
-	/* Now, deal with the return value.  */ 
-	mtcrf	0x01, r31 
- 
-	bt	30, L(done_return_value) 
-	bt	29, L(fp_return_value) 
-	std	r3, 0(r30) 
- 
-	/* Fall through...  */ 
- 
-L(done_return_value): 
-	/* Restore the registers we used and return.  */ 
-	mr	r1, r28 
-	ld	r0, 16(r28) 
-	ld	r28, -32(r1) 
-	mtlr	r0 
-	ld	r29, -24(r1) 
-	ld	r30, -16(r1) 
-	ld	r31, -8(r1) 
-	blr 
- 
-L(fp_return_value): 
-	bf	28, L(float_return_value) 
-	stfd	f1, 0(r30) 
-	bf	31, L(done_return_value) 
-	stfd	f2, 8(r30) 
-	b	L(done_return_value) 
-L(float_return_value): 
-	stfs	f1, 0(r30) 
-	b	L(done_return_value) 
-LFE..0: 
-#else /* ! __64BIT__ */ 
-	 
-	.long .ffi_call_AIX, TOC[tc0], 0 
-	.csect .text[PR] 
-.ffi_call_AIX: 
-	.function .ffi_call_AIX,.ffi_call_AIX,16,044,LFE..0-LFB..0 
-	.bf __LINE__ 
-	.line 1 
-LFB..0: 
-	/* Save registers we use.  */ 
-	mflr	r0 
- 
-	stw	r28,-16(r1) 
-	stw	r29,-12(r1) 
-	stw	r30, -8(r1) 
-	stw	r31, -4(r1) 
- 
-	stw	r0, 8(r1) 
-LCFI..0: 
-	mr	r28, r1		/* out AP.  */ 
-	stwux	r1, r1, r4 
-LCFI..1: 
- 
-	/* Save arguments over call...  */ 
-	mr	r31, r5	/* flags, */ 
-	mr	r30, r6	/* rvalue, */ 
-	mr	r29, r7	/* function address, */ 
-	stw	r2, 20(r1) 
- 
-	/* Call ffi_prep_args.  */ 
-	mr	r4, r1 
-	bl	.ffi_prep_args 
-	nop 
- 
-	/* Now do the call.  */ 
-	lwz	r0, 0(r29) 
-	lwz	r2, 4(r29) 
-	lwz	r11, 8(r29) 
-	/* Set up cr1 with bits 4-7 of the flags.  */ 
-	mtcrf	0x40, r31 
-	mtctr	r0 
-	/* Load all those argument registers.  */ 
-	/* We have set up a nice stack frame, just load it into registers. */ 
-	lwz	r3, 20+(1*4)(r1) 
-	lwz	r4, 20+(2*4)(r1) 
-	lwz	r5, 20+(3*4)(r1) 
-	lwz	r6, 20+(4*4)(r1) 
-	nop 
-	lwz	r7, 20+(5*4)(r1) 
-	lwz	r8, 20+(6*4)(r1) 
-	lwz	r9, 20+(7*4)(r1) 
-	lwz	r10,20+(8*4)(r1) 
- 
-L1: 
-	/* Load all the FP registers.  */ 
-	bf	6,L2 /* 2f + 0x18 */ 
-	lfd	f1,-16-(13*8)(r28) 
-	lfd	f2,-16-(12*8)(r28) 
-	lfd	f3,-16-(11*8)(r28) 
-	lfd	f4,-16-(10*8)(r28) 
-	nop 
-	lfd	f5,-16-(9*8)(r28) 
-	lfd	f6,-16-(8*8)(r28) 
-	lfd	f7,-16-(7*8)(r28) 
-	lfd	f8,-16-(6*8)(r28) 
-	nop 
-	lfd	f9,-16-(5*8)(r28) 
-	lfd	f10,-16-(4*8)(r28) 
-	lfd	f11,-16-(3*8)(r28) 
-	lfd	f12,-16-(2*8)(r28) 
-	nop 
-	lfd	f13,-16-(1*8)(r28) 
- 
-L2: 
-	/* Make the call.  */ 
-	bctrl 
-	lwz	r2, 20(r1) 
- 
-	/* Now, deal with the return value.  */ 
-	mtcrf	0x01, r31 
- 
-	bt	30, L(done_return_value) 
-	bt	29, L(fp_return_value) 
-	stw	r3, 0(r30) 
-	bf	28, L(done_return_value) 
-	stw	r4, 4(r30) 
- 
-	/* Fall through...  */ 
- 
-L(done_return_value): 
-	/* Restore the registers we used and return.  */ 
-	mr	r1, r28 
-	lwz	r0, 8(r28) 
-	lwz	r28,-16(r1) 
-	mtlr	r0 
-	lwz	r29,-12(r1) 
-	lwz	r30, -8(r1) 
-	lwz	r31, -4(r1) 
-	blr 
- 
-L(fp_return_value): 
-	bf	28, L(float_return_value) 
-	stfd	f1, 0(r30) 
-	b	L(done_return_value) 
-L(float_return_value): 
-	stfs	f1, 0(r30) 
-	b	L(done_return_value) 
-LFE..0: 
-#endif 
-	.ef __LINE__ 
-	.long 0 
-	.byte 0,0,0,1,128,4,0,0 
-/* END(ffi_call_AIX) */ 
- 
-	/* void ffi_call_go_AIX(extended_cif *ecif, unsigned long bytes, 
-	 *		        unsigned int flags, unsigned int *rvalue, 
-	 *		        void (*fn)(), 
-	 *		        void (*prep_args)(extended_cif*, unsigned *const), 
-	 *                      void *closure); 
-	 * r3=ecif, r4=bytes, r5=flags, r6=rvalue, r7=fn, r8=prep_args, r9=closure 
-	 */ 
- 
-.csect .text[PR] 
-	.align 2 
-	.globl ffi_call_go_AIX 
-	.globl .ffi_call_go_AIX 
-.csect ffi_call_go_AIX[DS] 
-ffi_call_go_AIX: 
-#ifdef __64BIT__ 
-	.llong .ffi_call_go_AIX, TOC[tc0], 0 
-	.csect .text[PR] 
-.ffi_call_go_AIX: 
-	.function .ffi_call_go_AIX,.ffi_call_go_AIX,16,044,LFE..1-LFB..1 
-	.bf __LINE__ 
-	.line 1 
-LFB..1: 
-	/* Save registers we use.  */ 
-	mflr	r0 
- 
-	std	r28,-32(r1) 
-	std	r29,-24(r1) 
-	std	r30,-16(r1) 
-	std	r31, -8(r1) 
- 
-	std	r9, 8(r1)	/* closure, saved in cr field. */ 
-	std	r0, 16(r1) 
-LCFI..2: 
-	mr	r28, r1		/* our AP.  */ 
-	stdux	r1, r1, r4 
-LCFI..3: 
- 
-	/* Save arguments over call...  */ 
-	mr	r31, r5	/* flags, */ 
-	mr	r30, r6	/* rvalue, */ 
-	mr	r29, r7	/* function address,  */ 
-	std	r2, 40(r1) 
- 
-	/* Call ffi_prep_args.  */ 
-	mr	r4, r1 
-	bl	.ffi_prep_args 
-	nop 
- 
-	/* Now do the call.  */ 
-	ld	r0, 0(r29) 
-	ld	r2, 8(r29) 
-	ld      r11, 8(r28)	/* closure */ 
-	/* Set up cr1 with bits 4-7 of the flags.  */ 
-	mtcrf	0x40, r31 
-	mtctr	r0 
-	/* Load all those argument registers.  */ 
-	/* We have set up a nice stack frame, just load it into registers. */ 
-	ld	r3, 40+(1*8)(r1) 
-	ld	r4, 40+(2*8)(r1) 
-	ld	r5, 40+(3*8)(r1) 
-	ld	r6, 40+(4*8)(r1) 
-	nop 
-	ld	r7, 40+(5*8)(r1) 
-	ld	r8, 40+(6*8)(r1) 
-	ld	r9, 40+(7*8)(r1) 
-	ld	r10,40+(8*8)(r1) 
- 
-	b	L1 
-LFE..1: 
-#else /* ! __64BIT__ */ 
-	 
-	.long .ffi_call_go_AIX, TOC[tc0], 0 
-	.csect .text[PR] 
-.ffi_call_go_AIX: 
-	.function .ffi_call_go_AIX,.ffi_call_go_AIX,16,044,LFE..1-LFB..1 
-	.bf __LINE__ 
-	.line 1 
-	/* Save registers we use.  */ 
-LFB..1: 
-	mflr	r0 
- 
-	stw	r28,-16(r1) 
-	stw	r29,-12(r1) 
-	stw	r30, -8(r1) 
-	stw	r31, -4(r1) 
- 
-	stw	r9, 4(r1)	/* closure, saved in cr field.  */ 
-	stw	r0, 8(r1) 
-LCFI..2: 
-	mr	r28, r1		/* out AP.  */ 
-	stwux	r1, r1, r4 
-LCFI..3: 
- 
-	/* Save arguments over call...  */ 
-	mr	r31, r5	/* flags, */ 
-	mr	r30, r6	/* rvalue, */ 
-	mr	r29, r7	/* function address, */ 
-	stw	r2, 20(r1) 
- 
-	/* Call ffi_prep_args.  */ 
-	mr	r4, r1 
-	bl	.ffi_prep_args 
-	nop 
- 
-	/* Now do the call.  */ 
-	lwz	r0, 0(r29) 
-	lwz	r2, 4(r29) 
-	lwz	r11, 4(r28)	/* closure */ 
-	/* Set up cr1 with bits 4-7 of the flags.  */ 
-	mtcrf	0x40, r31 
-	mtctr	r0 
-	/* Load all those argument registers.  */ 
-	/* We have set up a nice stack frame, just load it into registers. */ 
-	lwz	r3, 20+(1*4)(r1) 
-	lwz	r4, 20+(2*4)(r1) 
-	lwz	r5, 20+(3*4)(r1) 
-	lwz	r6, 20+(4*4)(r1) 
-	nop 
-	lwz	r7, 20+(5*4)(r1) 
-	lwz	r8, 20+(6*4)(r1) 
-	lwz	r9, 20+(7*4)(r1) 
-	lwz	r10,20+(8*4)(r1) 
- 
-	b	L1 
-LFE..1: 
-#endif 
-	.ef __LINE__ 
-	.long 0 
-	.byte 0,0,0,1,128,4,0,0 
-/* END(ffi_call_go_AIX) */ 
- 
-.csect .text[PR] 
-	.align 2 
-	.globl ffi_call_DARWIN 
-	.globl .ffi_call_DARWIN 
-.csect ffi_call_DARWIN[DS] 
-ffi_call_DARWIN: 
-#ifdef __64BIT__ 
-	.llong .ffi_call_DARWIN, TOC[tc0], 0 
-#else 
-	.long .ffi_call_DARWIN, TOC[tc0], 0 
-#endif 
-	.csect .text[PR] 
-.ffi_call_DARWIN: 
-	blr 
-	.long 0 
-	.byte 0,0,0,0,0,0,0,0 
-/* END(ffi_call_DARWIN) */ 
- 
-/* EH frame stuff.  */ 
- 
-#define LR_REGNO		0x41		/* Link Register (65), see rs6000.md */ 
-#ifdef __64BIT__ 
-#define PTRSIZE			8 
-#define LOG2_PTRSIZE		3 
-#define FDE_ENCODING		0x1c		/* DW_EH_PE_pcrel|DW_EH_PE_sdata8 */ 
-#define EH_DATA_ALIGN_FACT	0x78		/* LEB128 -8 */ 
-#else 
-#define PTRSIZE			4 
-#define LOG2_PTRSIZE		2 
-#define FDE_ENCODING		0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4 */ 
-#define EH_DATA_ALIGN_FACT	0x7c		/* LEB128 -4 */ 
-#endif 
-	.csect	_unwind.ro_[RO],4 
-	.align	LOG2_PTRSIZE 
-	.globl	_GLOBAL__F_libffi_src_powerpc_aix 
-_GLOBAL__F_libffi_src_powerpc_aix: 
-Lframe..1: 
-	.vbyte	4,LECIE..1-LSCIE..1	/* CIE Length */ 
-LSCIE..1: 
-	.vbyte	4,0			/* CIE Identifier Tag */ 
-	.byte	0x3			/* CIE Version */ 
-	.byte	"zR"			/* CIE Augmentation */ 
-	.byte	0 
-	.byte	0x1			/* uleb128 0x1; CIE Code Alignment Factor */ 
-	.byte	EH_DATA_ALIGN_FACT	/* leb128 -4/-8; CIE Data Alignment Factor */ 
-	.byte	0x41			/* CIE RA Column */ 
-	.byte	0x1			/* uleb128 0x1; Augmentation size */ 
-	.byte	FDE_ENCODING		/* FDE Encoding (pcrel|sdata4/8) */ 
-	.byte	0xc			/* DW_CFA_def_cfa */ 
-	.byte	0x1			/*     uleb128 0x1; Register r1 */ 
-	.byte	0			/*     uleb128 0x0; Offset 0 */ 
-	.align	LOG2_PTRSIZE 
-LECIE..1: 
-LSFDE..1: 
-	.vbyte	4,LEFDE..1-LASFDE..1	/* FDE Length */ 
-LASFDE..1: 
-	.vbyte	4,LASFDE..1-Lframe..1	/* FDE CIE offset */ 
-	.vbyte	PTRSIZE,LFB..0-$	/* FDE initial location */ 
-	.vbyte	PTRSIZE,LFE..0-LFB..0	/* FDE address range */ 
-	.byte   0			/* uleb128 0x0; Augmentation size */ 
-	.byte	0x4			/* DW_CFA_advance_loc4 */ 
-	.vbyte	4,LCFI..0-LFB..0 
-	.byte	0x11			/* DW_CFA_def_offset_extended_sf */ 
-	.byte	LR_REGNO		/*     uleb128 LR_REGNO; Register LR */ 
-	.byte	0x7e			/*     leb128 -2; Offset -2 (8/16) */ 
-	.byte	0x9f			/* DW_CFA_offset Register r31 */ 
-	.byte	0x1			/*     uleb128 0x1; Offset 1 (-4/-8) */ 
-	.byte	0x9e			/* DW_CFA_offset Register r30 */ 
-	.byte	0x2			/*     uleb128 0x2; Offset 2 (-8/-16) */ 
-	.byte	0x9d			/* DW_CFA_offset Register r29 */ 
-	.byte	0x3			/*     uleb128 0x3; Offset 3 (-12/-24) */ 
-	.byte	0x9c			/* DW_CFA_offset Register r28 */ 
-	.byte	0x4			/*     uleb128 0x4; Offset 4 (-16/-32) */ 
-	.byte	0x4			/* DW_CFA_advance_loc4 */ 
-	.vbyte	4,LCFI..1-LCFI..0 
-	.byte	0xd			/* DW_CFA_def_cfa_register */ 
-	.byte	0x1c			/*     uleb128 28; Register r28 */ 
-	.align	LOG2_PTRSIZE 
-LEFDE..1: 
-LSFDE..2: 
-	.vbyte	4,LEFDE..2-LASFDE..2	/* FDE Length */ 
-LASFDE..2: 
-	.vbyte	4,LASFDE..2-Lframe..1	/* FDE CIE offset */ 
-	.vbyte	PTRSIZE,LFB..1-$	/* FDE initial location */ 
-	.vbyte	PTRSIZE,LFE..1-LFB..1	/* FDE address range */ 
-	.byte   0			/* uleb128 0x0; Augmentation size */ 
-	.byte	0x4			/* DW_CFA_advance_loc4 */ 
-	.vbyte	4,LCFI..2-LFB..1 
-	.byte	0x11			/* DW_CFA_def_offset_extended_sf */ 
-	.byte	LR_REGNO		/*     uleb128 LR_REGNO; Register LR */ 
-	.byte	0x7e			/*     leb128 -2; Offset -2 (8/16) */ 
-	.byte	0x9f			/* DW_CFA_offset Register r31 */ 
-	.byte	0x1			/*     uleb128 0x1; Offset 1 (-4/-8) */ 
-	.byte	0x9e			/* DW_CFA_offset Register r30 */ 
-	.byte	0x2			/*     uleb128 0x2; Offset 2 (-8/-16) */ 
-	.byte	0x9d			/* DW_CFA_offset Register r29 */ 
-	.byte	0x3			/*     uleb128 0x3; Offset 3 (-12/-24) */ 
-	.byte	0x9c			/* DW_CFA_offset Register r28 */ 
-	.byte	0x4			/*     uleb128 0x4; Offset 4 (-16/-32) */ 
-	.byte	0x4			/* DW_CFA_advance_loc4 */ 
-	.vbyte	4,LCFI..3-LCFI..2 
-	.byte	0xd			/* DW_CFA_def_cfa_register */ 
-	.byte	0x1c			/*     uleb128 28; Register r28 */ 
-	.align	LOG2_PTRSIZE 
-LEFDE..2: 
-	.vbyte	4,0			/* End of FDEs */ 
- 
-	.csect	.text[PR] 
-	.ref	_GLOBAL__F_libffi_src_powerpc_aix	/* Prevents garbage collection by AIX linker */ 
- 
+/* -----------------------------------------------------------------------
+   aix.S - Copyright (c) 2002, 2009 Free Software Foundation, Inc.
+   based on darwin.S by John Hornkvist
+
+   PowerPC Assembly glue.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+	.set r0,0
+	.set r1,1
+	.set r2,2
+	.set r3,3
+	.set r4,4
+	.set r5,5
+	.set r6,6
+	.set r7,7
+	.set r8,8
+	.set r9,9
+	.set r10,10
+	.set r11,11
+	.set r12,12
+	.set r13,13
+	.set r14,14
+	.set r15,15
+	.set r16,16
+	.set r17,17
+	.set r18,18
+	.set r19,19
+	.set r20,20
+	.set r21,21
+	.set r22,22
+	.set r23,23
+	.set r24,24
+	.set r25,25
+	.set r26,26
+	.set r27,27
+	.set r28,28
+	.set r29,29
+	.set r30,30
+	.set r31,31
+	.set f0,0
+	.set f1,1
+	.set f2,2
+	.set f3,3
+	.set f4,4
+	.set f5,5
+	.set f6,6
+	.set f7,7
+	.set f8,8
+	.set f9,9
+	.set f10,10
+	.set f11,11
+	.set f12,12
+	.set f13,13
+	.set f14,14
+	.set f15,15
+	.set f16,16
+	.set f17,17
+	.set f18,18
+	.set f19,19
+	.set f20,20
+	.set f21,21
+
+	.extern .ffi_prep_args
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#define JUMPTARGET(name) name
+#define L(x) x
+	.file "aix.S"
+	.toc
+
+	/* void ffi_call_AIX(extended_cif *ecif, unsigned long bytes,
+	 *		     unsigned int flags, unsigned int *rvalue,
+	 *		     void (*fn)(),
+	 *		     void (*prep_args)(extended_cif*, unsigned *const));
+	 * r3=ecif, r4=bytes, r5=flags, r6=rvalue, r7=fn, r8=prep_args
+	 */
+
+.csect .text[PR]
+	.align 2
+	.globl ffi_call_AIX
+	.globl .ffi_call_AIX
+.csect ffi_call_AIX[DS]
+ffi_call_AIX:
+#ifdef __64BIT__
+	.llong .ffi_call_AIX, TOC[tc0], 0
+	.csect .text[PR]
+.ffi_call_AIX:
+	.function .ffi_call_AIX,.ffi_call_AIX,16,044,LFE..0-LFB..0
+	.bf __LINE__
+	.line 1
+LFB..0:
+	/* Save registers we use.  */
+	mflr	r0
+
+	std	r28,-32(r1)
+	std	r29,-24(r1)
+	std	r30,-16(r1)
+	std	r31, -8(r1)
+
+	std	r0, 16(r1)
+LCFI..0:
+	mr	r28, r1		/* our AP.  */
+	stdux	r1, r1, r4
+LCFI..1:
+
+	/* Save arguments over call...  */
+	mr	r31, r5	/* flags, */
+	mr	r30, r6	/* rvalue, */
+	mr	r29, r7	/* function address.  */
+	std	r2, 40(r1)
+
+	/* Call ffi_prep_args.  */
+	mr	r4, r1
+	bl	.ffi_prep_args
+	nop
+
+	/* Now do the call.  */
+	ld	r0, 0(r29)
+	ld	r2, 8(r29)
+	ld	r11, 16(r29)
+	/* Set up cr1 with bits 4-7 of the flags.  */
+	mtcrf	0x40, r31
+	mtctr	r0
+	/* Load all those argument registers.  */
+	/* We have set up a nice stack frame, just load it into registers. */
+	ld	r3, 40+(1*8)(r1)
+	ld	r4, 40+(2*8)(r1)
+	ld	r5, 40+(3*8)(r1)
+	ld	r6, 40+(4*8)(r1)
+	nop
+	ld	r7, 40+(5*8)(r1)
+	ld	r8, 40+(6*8)(r1)
+	ld	r9, 40+(7*8)(r1)
+	ld	r10,40+(8*8)(r1)
+
+L1:
+	/* Load all the FP registers.  */
+	bf	6,L2 /* 2f + 0x18 */
+	lfd	f1,-32-(13*8)(r28)
+	lfd	f2,-32-(12*8)(r28)
+	lfd	f3,-32-(11*8)(r28)
+	lfd	f4,-32-(10*8)(r28)
+	nop
+	lfd	f5,-32-(9*8)(r28)
+	lfd	f6,-32-(8*8)(r28)
+	lfd	f7,-32-(7*8)(r28)
+	lfd	f8,-32-(6*8)(r28)
+	nop
+	lfd	f9,-32-(5*8)(r28)
+	lfd	f10,-32-(4*8)(r28)
+	lfd	f11,-32-(3*8)(r28)
+	lfd	f12,-32-(2*8)(r28)
+	nop
+	lfd	f13,-32-(1*8)(r28)
+
+L2:
+	/* Make the call.  */
+	bctrl
+	ld	r2, 40(r1)
+
+	/* Now, deal with the return value.  */
+	mtcrf	0x01, r31
+
+	bt	30, L(done_return_value)
+	bt	29, L(fp_return_value)
+	std	r3, 0(r30)
+
+	/* Fall through...  */
+
+L(done_return_value):
+	/* Restore the registers we used and return.  */
+	mr	r1, r28
+	ld	r0, 16(r28)
+	ld	r28, -32(r1)
+	mtlr	r0
+	ld	r29, -24(r1)
+	ld	r30, -16(r1)
+	ld	r31, -8(r1)
+	blr
+
+L(fp_return_value):
+	bf	28, L(float_return_value)
+	stfd	f1, 0(r30)
+	bf	31, L(done_return_value)
+	stfd	f2, 8(r30)
+	b	L(done_return_value)
+L(float_return_value):
+	stfs	f1, 0(r30)
+	b	L(done_return_value)
+LFE..0:
+#else /* ! __64BIT__ */
+	
+	.long .ffi_call_AIX, TOC[tc0], 0
+	.csect .text[PR]
+.ffi_call_AIX:
+	.function .ffi_call_AIX,.ffi_call_AIX,16,044,LFE..0-LFB..0
+	.bf __LINE__
+	.line 1
+LFB..0:
+	/* Save registers we use.  */
+	mflr	r0
+
+	stw	r28,-16(r1)
+	stw	r29,-12(r1)
+	stw	r30, -8(r1)
+	stw	r31, -4(r1)
+
+	stw	r0, 8(r1)
+LCFI..0:
+	mr	r28, r1		/* out AP.  */
+	stwux	r1, r1, r4
+LCFI..1:
+
+	/* Save arguments over call...  */
+	mr	r31, r5	/* flags, */
+	mr	r30, r6	/* rvalue, */
+	mr	r29, r7	/* function address, */
+	stw	r2, 20(r1)
+
+	/* Call ffi_prep_args.  */
+	mr	r4, r1
+	bl	.ffi_prep_args
+	nop
+
+	/* Now do the call.  */
+	lwz	r0, 0(r29)
+	lwz	r2, 4(r29)
+	lwz	r11, 8(r29)
+	/* Set up cr1 with bits 4-7 of the flags.  */
+	mtcrf	0x40, r31
+	mtctr	r0
+	/* Load all those argument registers.  */
+	/* We have set up a nice stack frame, just load it into registers. */
+	lwz	r3, 20+(1*4)(r1)
+	lwz	r4, 20+(2*4)(r1)
+	lwz	r5, 20+(3*4)(r1)
+	lwz	r6, 20+(4*4)(r1)
+	nop
+	lwz	r7, 20+(5*4)(r1)
+	lwz	r8, 20+(6*4)(r1)
+	lwz	r9, 20+(7*4)(r1)
+	lwz	r10,20+(8*4)(r1)
+
+L1:
+	/* Load all the FP registers.  */
+	bf	6,L2 /* 2f + 0x18 */
+	lfd	f1,-16-(13*8)(r28)
+	lfd	f2,-16-(12*8)(r28)
+	lfd	f3,-16-(11*8)(r28)
+	lfd	f4,-16-(10*8)(r28)
+	nop
+	lfd	f5,-16-(9*8)(r28)
+	lfd	f6,-16-(8*8)(r28)
+	lfd	f7,-16-(7*8)(r28)
+	lfd	f8,-16-(6*8)(r28)
+	nop
+	lfd	f9,-16-(5*8)(r28)
+	lfd	f10,-16-(4*8)(r28)
+	lfd	f11,-16-(3*8)(r28)
+	lfd	f12,-16-(2*8)(r28)
+	nop
+	lfd	f13,-16-(1*8)(r28)
+
+L2:
+	/* Make the call.  */
+	bctrl
+	lwz	r2, 20(r1)
+
+	/* Now, deal with the return value.  */
+	mtcrf	0x01, r31
+
+	bt	30, L(done_return_value)
+	bt	29, L(fp_return_value)
+	stw	r3, 0(r30)
+	bf	28, L(done_return_value)
+	stw	r4, 4(r30)
+
+	/* Fall through...  */
+
+L(done_return_value):
+	/* Restore the registers we used and return.  */
+	mr	r1, r28
+	lwz	r0, 8(r28)
+	lwz	r28,-16(r1)
+	mtlr	r0
+	lwz	r29,-12(r1)
+	lwz	r30, -8(r1)
+	lwz	r31, -4(r1)
+	blr
+
+L(fp_return_value):
+	bf	28, L(float_return_value)
+	stfd	f1, 0(r30)
+	b	L(done_return_value)
+L(float_return_value):
+	stfs	f1, 0(r30)
+	b	L(done_return_value)
+LFE..0:
+#endif
+	.ef __LINE__
+	.long 0
+	.byte 0,0,0,1,128,4,0,0
+/* END(ffi_call_AIX) */
+
+	/* void ffi_call_go_AIX(extended_cif *ecif, unsigned long bytes,
+	 *		        unsigned int flags, unsigned int *rvalue,
+	 *		        void (*fn)(),
+	 *		        void (*prep_args)(extended_cif*, unsigned *const),
+	 *                      void *closure);
+	 * r3=ecif, r4=bytes, r5=flags, r6=rvalue, r7=fn, r8=prep_args, r9=closure
+	 */
+
+.csect .text[PR]
+	.align 2
+	.globl ffi_call_go_AIX
+	.globl .ffi_call_go_AIX
+.csect ffi_call_go_AIX[DS]
+ffi_call_go_AIX:
+#ifdef __64BIT__
+	.llong .ffi_call_go_AIX, TOC[tc0], 0
+	.csect .text[PR]
+.ffi_call_go_AIX:
+	.function .ffi_call_go_AIX,.ffi_call_go_AIX,16,044,LFE..1-LFB..1
+	.bf __LINE__
+	.line 1
+LFB..1:
+	/* Save registers we use.  */
+	mflr	r0
+
+	std	r28,-32(r1)
+	std	r29,-24(r1)
+	std	r30,-16(r1)
+	std	r31, -8(r1)
+
+	std	r9, 8(r1)	/* closure, saved in cr field. */
+	std	r0, 16(r1)
+LCFI..2:
+	mr	r28, r1		/* our AP.  */
+	stdux	r1, r1, r4
+LCFI..3:
+
+	/* Save arguments over call...  */
+	mr	r31, r5	/* flags, */
+	mr	r30, r6	/* rvalue, */
+	mr	r29, r7	/* function address,  */
+	std	r2, 40(r1)
+
+	/* Call ffi_prep_args.  */
+	mr	r4, r1
+	bl	.ffi_prep_args
+	nop
+
+	/* Now do the call.  */
+	ld	r0, 0(r29)
+	ld	r2, 8(r29)
+	ld      r11, 8(r28)	/* closure */
+	/* Set up cr1 with bits 4-7 of the flags.  */
+	mtcrf	0x40, r31
+	mtctr	r0
+	/* Load all those argument registers.  */
+	/* We have set up a nice stack frame, just load it into registers. */
+	ld	r3, 40+(1*8)(r1)
+	ld	r4, 40+(2*8)(r1)
+	ld	r5, 40+(3*8)(r1)
+	ld	r6, 40+(4*8)(r1)
+	nop
+	ld	r7, 40+(5*8)(r1)
+	ld	r8, 40+(6*8)(r1)
+	ld	r9, 40+(7*8)(r1)
+	ld	r10,40+(8*8)(r1)
+
+	b	L1
+LFE..1:
+#else /* ! __64BIT__ */
+	
+	.long .ffi_call_go_AIX, TOC[tc0], 0
+	.csect .text[PR]
+.ffi_call_go_AIX:
+	.function .ffi_call_go_AIX,.ffi_call_go_AIX,16,044,LFE..1-LFB..1
+	.bf __LINE__
+	.line 1
+	/* Save registers we use.  */
+LFB..1:
+	mflr	r0
+
+	stw	r28,-16(r1)
+	stw	r29,-12(r1)
+	stw	r30, -8(r1)
+	stw	r31, -4(r1)
+
+	stw	r9, 4(r1)	/* closure, saved in cr field.  */
+	stw	r0, 8(r1)
+LCFI..2:
+	mr	r28, r1		/* out AP.  */
+	stwux	r1, r1, r4
+LCFI..3:
+
+	/* Save arguments over call...  */
+	mr	r31, r5	/* flags, */
+	mr	r30, r6	/* rvalue, */
+	mr	r29, r7	/* function address, */
+	stw	r2, 20(r1)
+
+	/* Call ffi_prep_args.  */
+	mr	r4, r1
+	bl	.ffi_prep_args
+	nop
+
+	/* Now do the call.  */
+	lwz	r0, 0(r29)
+	lwz	r2, 4(r29)
+	lwz	r11, 4(r28)	/* closure */
+	/* Set up cr1 with bits 4-7 of the flags.  */
+	mtcrf	0x40, r31
+	mtctr	r0
+	/* Load all those argument registers.  */
+	/* We have set up a nice stack frame, just load it into registers. */
+	lwz	r3, 20+(1*4)(r1)
+	lwz	r4, 20+(2*4)(r1)
+	lwz	r5, 20+(3*4)(r1)
+	lwz	r6, 20+(4*4)(r1)
+	nop
+	lwz	r7, 20+(5*4)(r1)
+	lwz	r8, 20+(6*4)(r1)
+	lwz	r9, 20+(7*4)(r1)
+	lwz	r10,20+(8*4)(r1)
+
+	b	L1
+LFE..1:
+#endif
+	.ef __LINE__
+	.long 0
+	.byte 0,0,0,1,128,4,0,0
+/* END(ffi_call_go_AIX) */
+
+.csect .text[PR]
+	.align 2
+	.globl ffi_call_DARWIN
+	.globl .ffi_call_DARWIN
+.csect ffi_call_DARWIN[DS]
+ffi_call_DARWIN:
+#ifdef __64BIT__
+	.llong .ffi_call_DARWIN, TOC[tc0], 0
+#else
+	.long .ffi_call_DARWIN, TOC[tc0], 0
+#endif
+	.csect .text[PR]
+.ffi_call_DARWIN:
+	blr
+	.long 0
+	.byte 0,0,0,0,0,0,0,0
+/* END(ffi_call_DARWIN) */
+
+/* EH frame stuff.  */
+
+#define LR_REGNO		0x41		/* Link Register (65), see rs6000.md */
+#ifdef __64BIT__
+#define PTRSIZE			8
+#define LOG2_PTRSIZE		3
+#define FDE_ENCODING		0x1c		/* DW_EH_PE_pcrel|DW_EH_PE_sdata8 */
+#define EH_DATA_ALIGN_FACT	0x78		/* LEB128 -8 */
+#else
+#define PTRSIZE			4
+#define LOG2_PTRSIZE		2
+#define FDE_ENCODING		0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4 */
+#define EH_DATA_ALIGN_FACT	0x7c		/* LEB128 -4 */
+#endif
+	.csect	_unwind.ro_[RO],4
+	.align	LOG2_PTRSIZE
+	.globl	_GLOBAL__F_libffi_src_powerpc_aix
+_GLOBAL__F_libffi_src_powerpc_aix:
+Lframe..1:
+	.vbyte	4,LECIE..1-LSCIE..1	/* CIE Length */
+LSCIE..1:
+	.vbyte	4,0			/* CIE Identifier Tag */
+	.byte	0x3			/* CIE Version */
+	.byte	"zR"			/* CIE Augmentation */
+	.byte	0
+	.byte	0x1			/* uleb128 0x1; CIE Code Alignment Factor */
+	.byte	EH_DATA_ALIGN_FACT	/* leb128 -4/-8; CIE Data Alignment Factor */
+	.byte	0x41			/* CIE RA Column */
+	.byte	0x1			/* uleb128 0x1; Augmentation size */
+	.byte	FDE_ENCODING		/* FDE Encoding (pcrel|sdata4/8) */
+	.byte	0xc			/* DW_CFA_def_cfa */
+	.byte	0x1			/*     uleb128 0x1; Register r1 */
+	.byte	0			/*     uleb128 0x0; Offset 0 */
+	.align	LOG2_PTRSIZE
+LECIE..1:
+LSFDE..1:
+	.vbyte	4,LEFDE..1-LASFDE..1	/* FDE Length */
+LASFDE..1:
+	.vbyte	4,LASFDE..1-Lframe..1	/* FDE CIE offset */
+	.vbyte	PTRSIZE,LFB..0-$	/* FDE initial location */
+	.vbyte	PTRSIZE,LFE..0-LFB..0	/* FDE address range */
+	.byte   0			/* uleb128 0x0; Augmentation size */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.vbyte	4,LCFI..0-LFB..0
+	.byte	0x11			/* DW_CFA_def_offset_extended_sf */
+	.byte	LR_REGNO		/*     uleb128 LR_REGNO; Register LR */
+	.byte	0x7e			/*     leb128 -2; Offset -2 (8/16) */
+	.byte	0x9f			/* DW_CFA_offset Register r31 */
+	.byte	0x1			/*     uleb128 0x1; Offset 1 (-4/-8) */
+	.byte	0x9e			/* DW_CFA_offset Register r30 */
+	.byte	0x2			/*     uleb128 0x2; Offset 2 (-8/-16) */
+	.byte	0x9d			/* DW_CFA_offset Register r29 */
+	.byte	0x3			/*     uleb128 0x3; Offset 3 (-12/-24) */
+	.byte	0x9c			/* DW_CFA_offset Register r28 */
+	.byte	0x4			/*     uleb128 0x4; Offset 4 (-16/-32) */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.vbyte	4,LCFI..1-LCFI..0
+	.byte	0xd			/* DW_CFA_def_cfa_register */
+	.byte	0x1c			/*     uleb128 28; Register r28 */
+	.align	LOG2_PTRSIZE
+LEFDE..1:
+LSFDE..2:
+	.vbyte	4,LEFDE..2-LASFDE..2	/* FDE Length */
+LASFDE..2:
+	.vbyte	4,LASFDE..2-Lframe..1	/* FDE CIE offset */
+	.vbyte	PTRSIZE,LFB..1-$	/* FDE initial location */
+	.vbyte	PTRSIZE,LFE..1-LFB..1	/* FDE address range */
+	.byte   0			/* uleb128 0x0; Augmentation size */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.vbyte	4,LCFI..2-LFB..1
+	.byte	0x11			/* DW_CFA_def_offset_extended_sf */
+	.byte	LR_REGNO		/*     uleb128 LR_REGNO; Register LR */
+	.byte	0x7e			/*     leb128 -2; Offset -2 (8/16) */
+	.byte	0x9f			/* DW_CFA_offset Register r31 */
+	.byte	0x1			/*     uleb128 0x1; Offset 1 (-4/-8) */
+	.byte	0x9e			/* DW_CFA_offset Register r30 */
+	.byte	0x2			/*     uleb128 0x2; Offset 2 (-8/-16) */
+	.byte	0x9d			/* DW_CFA_offset Register r29 */
+	.byte	0x3			/*     uleb128 0x3; Offset 3 (-12/-24) */
+	.byte	0x9c			/* DW_CFA_offset Register r28 */
+	.byte	0x4			/*     uleb128 0x4; Offset 4 (-16/-32) */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.vbyte	4,LCFI..3-LCFI..2
+	.byte	0xd			/* DW_CFA_def_cfa_register */
+	.byte	0x1c			/*     uleb128 28; Register r28 */
+	.align	LOG2_PTRSIZE
+LEFDE..2:
+	.vbyte	4,0			/* End of FDEs */
+
+	.csect	.text[PR]
+	.ref	_GLOBAL__F_libffi_src_powerpc_aix	/* Prevents garbage collection by AIX linker */
+
diff --git a/contrib/restricted/libffi/src/powerpc/aix_closure.S b/contrib/restricted/libffi/src/powerpc/aix_closure.S
index be836056e6..132c785edd 100644
--- a/contrib/restricted/libffi/src/powerpc/aix_closure.S
+++ b/contrib/restricted/libffi/src/powerpc/aix_closure.S
@@ -1,694 +1,694 @@
-/* ----------------------------------------------------------------------- 
-   aix_closure.S - Copyright (c) 2002, 2003, 2009 Free Software Foundation, Inc. 
-   based on darwin_closure.S 
- 
-   PowerPC Assembly glue. 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS 
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
-   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR 
-   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
-   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
-   OTHER DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-	.set r0,0 
-	.set r1,1 
-	.set r2,2 
-	.set r3,3 
-	.set r4,4 
-	.set r5,5 
-	.set r6,6 
-	.set r7,7 
-	.set r8,8 
-	.set r9,9 
-	.set r10,10 
-	.set r11,11 
-	.set r12,12 
-	.set r13,13 
-	.set r14,14 
-	.set r15,15 
-	.set r16,16 
-	.set r17,17 
-	.set r18,18 
-	.set r19,19 
-	.set r20,20 
-	.set r21,21 
-	.set r22,22 
-	.set r23,23 
-	.set r24,24 
-	.set r25,25 
-	.set r26,26 
-	.set r27,27 
-	.set r28,28 
-	.set r29,29 
-	.set r30,30 
-	.set r31,31 
-	.set f0,0 
-	.set f1,1 
-	.set f2,2 
-	.set f3,3 
-	.set f4,4 
-	.set f5,5 
-	.set f6,6 
-	.set f7,7 
-	.set f8,8 
-	.set f9,9 
-	.set f10,10 
-	.set f11,11 
-	.set f12,12 
-	.set f13,13 
-	.set f14,14 
-	.set f15,15 
-	.set f16,16 
-	.set f17,17 
-	.set f18,18 
-	.set f19,19 
-	.set f20,20 
-	.set f21,21 
- 
-	.extern .ffi_closure_helper_DARWIN 
-	.extern .ffi_go_closure_helper_DARWIN 
- 
-#define LIBFFI_ASM 
-#define JUMPTARGET(name) name 
-#define L(x) x 
-	.file "aix_closure.S" 
-	.toc 
-LC..60: 
-	.tc L..60[TC],L..60 
-	.csect .text[PR] 
-	.align 2 
- 
-.csect .text[PR] 
-	.align 2 
-	.globl ffi_closure_ASM 
-	.globl .ffi_closure_ASM 
-.csect ffi_closure_ASM[DS] 
-ffi_closure_ASM: 
-#ifdef __64BIT__ 
-	.llong .ffi_closure_ASM, TOC[tc0], 0 
-	.csect .text[PR] 
-.ffi_closure_ASM: 
-	.function .ffi_closure_ASM,.ffi_closure_ASM,16,044,LFE..0-LFB..0 
-	.bf __LINE__ 
-	.line 1 
-LFB..0: 
-/* we want to build up an area for the parameters passed */ 
-/* in registers (both floating point and integer) */ 
- 
-	/* we store gpr 3 to gpr 10 (aligned to 4) 
-	in the parents outgoing area  */ 
-	std   r3, 48+(0*8)(r1) 
-	std   r4, 48+(1*8)(r1) 
-	std   r5, 48+(2*8)(r1) 
-	std   r6, 48+(3*8)(r1) 
-	mflr  r0 
- 
-	std   r7, 48+(4*8)(r1) 
-	std   r8, 48+(5*8)(r1) 
-	std   r9, 48+(6*8)(r1) 
-	std   r10, 48+(7*8)(r1) 
-	std   r0, 16(r1)	/* save the return address */ 
-LCFI..0: 
-	/* 48  Bytes (Linkage Area) */ 
-	/* 64  Bytes (params) */ 
-	/* 16  Bytes (result) */ 
-	/* 104 Bytes (13*8 from FPR) */ 
-	/* 8   Bytes (alignment) */ 
-	/* 240 Bytes */ 
- 
-	stdu  r1, -240(r1)	/* skip over caller save area 
-				   keep stack aligned to 16  */ 
-LCFI..1: 
- 
-	/* next save fpr 1 to fpr 13 (aligned to 8) */ 
-	stfd  f1, 128+(0*8)(r1) 
-	stfd  f2, 128+(1*8)(r1) 
-	stfd  f3, 128+(2*8)(r1) 
-	stfd  f4, 128+(3*8)(r1) 
-	stfd  f5, 128+(4*8)(r1) 
-	stfd  f6, 128+(5*8)(r1) 
-	stfd  f7, 128+(6*8)(r1) 
-	stfd  f8, 128+(7*8)(r1) 
-	stfd  f9, 128+(8*8)(r1) 
-	stfd  f10, 128+(9*8)(r1) 
-	stfd  f11, 128+(10*8)(r1) 
-	stfd  f12, 128+(11*8)(r1) 
-	stfd  f13, 128+(12*8)(r1) 
- 
-	/* set up registers for the routine that actually does the work */ 
-	/* get the context pointer from the trampoline */ 
-	mr r3, r11 
- 
-	/* now load up the pointer to the result storage */ 
-	addi r4, r1, 112 
- 
-	/* now load up the pointer to the saved gpr registers */ 
-	addi r5, r1, 288 
- 
-	/* now load up the pointer to the saved fpr registers */ 
-	addi r6, r1, 128 
- 
-	/* make the call */ 
-	bl .ffi_closure_helper_DARWIN 
-	nop 
- 
-.Ldoneclosure: 
- 
-	/* now r3 contains the return type */ 
-	/* so use it to look up in a table */ 
-	/* so we know how to deal with each type */ 
- 
-	/* look up the proper starting point in table  */ 
-	/* by using return type as offset */ 
-	lhz	r3, 10(r3)	/* load type from return type */ 
-	ld	r4, LC..60(2)	/* get address of jump table */ 
-	sldi	r3, r3, 4	/* now multiply return type by 16 */ 
-	ld	r0, 240+16(r1)	/* load return address */ 
-	add	r3, r3, r4	/* add contents of table to table address */ 
-	mtctr	r3 
-	bctr			/* jump to it */ 
- 
-/* Each fragment must be exactly 16 bytes long (4 instructions). 
-   Align to 16 byte boundary for cache and dispatch efficiency.  */ 
-	.align 4 
- 
-L..60: 
-/* case FFI_TYPE_VOID */ 
-	mtlr r0 
-	addi r1, r1, 240 
-	blr 
-	nop 
- 
-/* case FFI_TYPE_INT */ 
-	lwa r3, 112+4(r1) 
-	mtlr r0 
-	addi r1, r1, 240 
-	blr 
- 
-/* case FFI_TYPE_FLOAT */ 
-	lfs f1, 112+0(r1) 
-	mtlr r0 
-	addi r1, r1, 240 
-	blr 
- 
-/* case FFI_TYPE_DOUBLE */ 
-	lfd f1, 112+0(r1) 
-	mtlr r0 
-	addi r1, r1, 240 
-	blr 
- 
-/* case FFI_TYPE_LONGDOUBLE */ 
-	lfd f1, 112+0(r1) 
-	mtlr r0 
-	lfd f2, 112+8(r1) 
-	b L..finish 
- 
-/* case FFI_TYPE_UINT8 */ 
-	lbz r3, 112+7(r1) 
-	mtlr r0 
-	addi r1, r1, 240 
-	blr 
- 
-/* case FFI_TYPE_SINT8 */ 
-	lbz r3, 112+7(r1) 
-	mtlr r0 
-	extsb r3, r3 
-	b L..finish 
- 
-/* case FFI_TYPE_UINT16 */ 
-	lhz r3, 112+6(r1) 
-	mtlr r0 
-L..finish: 
-	addi r1, r1, 240 
-	blr 
- 
-/* case FFI_TYPE_SINT16 */ 
-	lha r3, 112+6(r1) 
-	mtlr r0 
-	addi r1, r1, 240 
-	blr 
- 
-/* case FFI_TYPE_UINT32 */ 
-	lwz r3, 112+4(r1) 
-	mtlr r0 
-	addi r1, r1, 240 
-	blr 
- 
-/* case FFI_TYPE_SINT32 */ 
-	lwa r3, 112+4(r1) 
-	mtlr r0 
-	addi r1, r1, 240 
-	blr 
- 
-/* case FFI_TYPE_UINT64 */ 
-	ld r3, 112+0(r1) 
-	mtlr r0 
-	addi r1, r1, 240 
-	blr 
- 
-/* case FFI_TYPE_SINT64 */ 
-	ld r3, 112+0(r1) 
-	mtlr r0 
-	addi r1, r1, 240 
-	blr 
- 
-/* case FFI_TYPE_STRUCT */ 
-	mtlr r0 
-	addi r1, r1, 240 
-	blr 
-	nop 
- 
-/* case FFI_TYPE_POINTER */ 
-	ld r3, 112+0(r1) 
-	mtlr r0 
-	addi r1, r1, 240 
-	blr 
-LFE..0: 
- 
-#else /* ! __64BIT__ */ 
-	 
-	.long .ffi_closure_ASM, TOC[tc0], 0 
-	.csect .text[PR] 
-.ffi_closure_ASM: 
-	.function .ffi_closure_ASM,.ffi_closure_ASM,16,044,LFE..0-LFB..0 
-	.bf __LINE__ 
-	.line 1 
-LFB..0: 
-/* we want to build up an area for the parameters passed */ 
-/* in registers (both floating point and integer) */ 
- 
-	/* we store gpr 3 to gpr 10 (aligned to 4) 
-	in the parents outgoing area  */ 
-	stw   r3, 24+(0*4)(r1) 
-	stw   r4, 24+(1*4)(r1) 
-	stw   r5, 24+(2*4)(r1) 
-	stw   r6, 24+(3*4)(r1) 
-	mflr  r0 
- 
-	stw   r7, 24+(4*4)(r1) 
-	stw   r8, 24+(5*4)(r1) 
-	stw   r9, 24+(6*4)(r1) 
-	stw   r10, 24+(7*4)(r1) 
-	stw   r0, 8(r1) 
-LCFI..0: 
-	/* 24 Bytes (Linkage Area) */ 
-	/* 32 Bytes (params) */ 
-	/* 16  Bytes (result) */ 
-	/* 104 Bytes (13*8 from FPR) */ 
-	/* 176 Bytes */ 
- 
-	stwu  r1, -176(r1)	/* skip over caller save area 
-				   keep stack aligned to 16  */ 
-LCFI..1: 
- 
-	/* next save fpr 1 to fpr 13 (aligned to 8) */ 
-	stfd  f1, 72+(0*8)(r1) 
-	stfd  f2, 72+(1*8)(r1) 
-	stfd  f3, 72+(2*8)(r1) 
-	stfd  f4, 72+(3*8)(r1) 
-	stfd  f5, 72+(4*8)(r1) 
-	stfd  f6, 72+(5*8)(r1) 
-	stfd  f7, 72+(6*8)(r1) 
-	stfd  f8, 72+(7*8)(r1) 
-	stfd  f9, 72+(8*8)(r1) 
-	stfd  f10, 72+(9*8)(r1) 
-	stfd  f11, 72+(10*8)(r1) 
-	stfd  f12, 72+(11*8)(r1) 
-	stfd  f13, 72+(12*8)(r1) 
- 
-	/* set up registers for the routine that actually does the work */ 
-	/* get the context pointer from the trampoline */ 
-	mr r3, r11 
- 
-	/* now load up the pointer to the result storage */ 
-	addi r4, r1, 56 
- 
-	/* now load up the pointer to the saved gpr registers */ 
-	addi r5, r1, 200 
- 
-	/* now load up the pointer to the saved fpr registers */ 
-	addi r6, r1, 72 
- 
-	/* make the call */ 
-	bl .ffi_closure_helper_DARWIN 
-	nop 
- 
-.Ldoneclosure: 
- 
-	/* now r3 contains the return type */ 
-	/* so use it to look up in a table */ 
-	/* so we know how to deal with each type */ 
- 
-	/* look up the proper starting point in table  */ 
-	/* by using return type as offset */ 
-	lhz	r3, 6(r3)	/* load type from return type */ 
-	lwz	r4, LC..60(2)	/* get address of jump table */ 
-	slwi	r3, r3, 4	/* now multiply return type by 16 */ 
-	lwz	r0, 176+8(r1)	/* load return address */ 
-	add	r3, r3, r4	/* add contents of table to table address */ 
-	mtctr	r3 
-	bctr			/* jump to it */ 
- 
-/* Each fragment must be exactly 16 bytes long (4 instructions). 
-   Align to 16 byte boundary for cache and dispatch efficiency.  */ 
-	.align 4 
- 
-L..60: 
-/* case FFI_TYPE_VOID */ 
-	mtlr r0 
-	addi r1, r1, 176 
-	blr 
-	nop 
- 
-/* case FFI_TYPE_INT */ 
-	lwz r3, 56+0(r1) 
-	mtlr r0 
-	addi r1, r1, 176 
-	blr 
- 
-/* case FFI_TYPE_FLOAT */ 
-	lfs f1, 56+0(r1) 
-	mtlr r0 
-	addi r1, r1, 176 
-	blr 
- 
-/* case FFI_TYPE_DOUBLE */ 
-	lfd f1, 56+0(r1) 
-	mtlr r0 
-	addi r1, r1, 176 
-	blr 
- 
-/* case FFI_TYPE_LONGDOUBLE */ 
-	lfd f1, 56+0(r1) 
-	mtlr r0 
-	lfd f2, 56+8(r1) 
-	b L..finish 
- 
-/* case FFI_TYPE_UINT8 */ 
-	lbz r3, 56+3(r1) 
-	mtlr r0 
-	addi r1, r1, 176 
-	blr 
- 
-/* case FFI_TYPE_SINT8 */ 
-	lbz r3, 56+3(r1) 
-	mtlr r0 
-	extsb r3, r3 
-	b L..finish 
- 
-/* case FFI_TYPE_UINT16 */ 
-	lhz r3, 56+2(r1) 
-	mtlr r0 
-	addi r1, r1, 176 
-	blr 
- 
-/* case FFI_TYPE_SINT16 */ 
-	lha r3, 56+2(r1) 
-	mtlr r0 
-	addi r1, r1, 176 
-	blr 
- 
-/* case FFI_TYPE_UINT32 */ 
-	lwz r3, 56+0(r1) 
-	mtlr r0 
-	addi r1, r1, 176 
-	blr 
- 
-/* case FFI_TYPE_SINT32 */ 
-	lwz r3, 56+0(r1) 
-	mtlr r0 
-	addi r1, r1, 176 
-	blr 
- 
-/* case FFI_TYPE_UINT64 */ 
-	lwz r3, 56+0(r1) 
-	mtlr r0 
-	lwz r4, 56+4(r1) 
-	b L..finish 
- 
-/* case FFI_TYPE_SINT64 */ 
-	lwz r3, 56+0(r1) 
-	mtlr r0 
-	lwz r4, 56+4(r1) 
-	b L..finish 
- 
-/* case FFI_TYPE_STRUCT */ 
-	mtlr r0 
-	addi r1, r1, 176 
-	blr 
-	nop 
- 
-/* case FFI_TYPE_POINTER */ 
-	lwz r3, 56+0(r1) 
-	mtlr r0 
-L..finish: 
-	addi r1, r1, 176 
-	blr 
-LFE..0: 
-#endif 
-	.ef __LINE__ 
-/* END(ffi_closure_ASM) */ 
- 
- 
-.csect .text[PR] 
-	.align 2 
-	.globl ffi_go_closure_ASM 
-	.globl .ffi_go_closure_ASM 
-.csect ffi_go_closure_ASM[DS] 
-ffi_go_closure_ASM: 
-#ifdef __64BIT__ 
-	.llong .ffi_go_closure_ASM, TOC[tc0], 0 
-	.csect .text[PR] 
-.ffi_go_closure_ASM: 
-	.function .ffi_go_closure_ASM,.ffi_go_closure_ASM,16,044,LFE..1-LFB..1 
-	.bf __LINE__ 
-	.line 1 
-LFB..1: 
-/* we want to build up an area for the parameters passed */ 
-/* in registers (both floating point and integer) */ 
- 
-	/* we store gpr 3 to gpr 10 (aligned to 4) 
-	in the parents outgoing area  */ 
-	std   r3, 48+(0*8)(r1) 
-	std   r4, 48+(1*8)(r1) 
-	std   r5, 48+(2*8)(r1) 
-	std   r6, 48+(3*8)(r1) 
-	mflr  r0 
- 
-	std   r7, 48+(4*8)(r1) 
-	std   r8, 48+(5*8)(r1) 
-	std   r9, 48+(6*8)(r1) 
-	std   r10, 48+(7*8)(r1) 
-	std   r0, 16(r1)	/* save the return address */ 
-LCFI..2: 
-	/* 48  Bytes (Linkage Area) */ 
-	/* 64  Bytes (params) */ 
-	/* 16  Bytes (result) */ 
-	/* 104 Bytes (13*8 from FPR) */ 
-	/* 8   Bytes (alignment) */ 
-	/* 240 Bytes */ 
- 
-	stdu  r1, -240(r1)	/* skip over caller save area 
-				   keep stack aligned to 16  */ 
-LCFI..3: 
- 
-	/* next save fpr 1 to fpr 13 (aligned to 8) */ 
-	stfd  f1, 128+(0*8)(r1) 
-	stfd  f2, 128+(1*8)(r1) 
-	stfd  f3, 128+(2*8)(r1) 
-	stfd  f4, 128+(3*8)(r1) 
-	stfd  f5, 128+(4*8)(r1) 
-	stfd  f6, 128+(5*8)(r1) 
-	stfd  f7, 128+(6*8)(r1) 
-	stfd  f8, 128+(7*8)(r1) 
-	stfd  f9, 128+(8*8)(r1) 
-	stfd  f10, 128+(9*8)(r1) 
-	stfd  f11, 128+(10*8)(r1) 
-	stfd  f12, 128+(11*8)(r1) 
-	stfd  f13, 128+(12*8)(r1) 
- 
-	/* set up registers for the routine that actually does the work */ 
-	mr r3, r11	/* go closure */ 
- 
-	/* now load up the pointer to the result storage */ 
-	addi r4, r1, 112 
- 
-	/* now load up the pointer to the saved gpr registers */ 
-	addi r5, r1, 288 
- 
-	/* now load up the pointer to the saved fpr registers */ 
-	addi r6, r1, 128 
- 
-	/* make the call */ 
-	bl .ffi_go_closure_helper_DARWIN 
-	nop 
- 
-	b .Ldoneclosure 
-LFE..1: 
- 
-#else /* ! __64BIT__ */ 
-	 
-	.long .ffi_go_closure_ASM, TOC[tc0], 0 
-	.csect .text[PR] 
-.ffi_go_closure_ASM: 
-	.function .ffi_go_closure_ASM,.ffi_go_closure_ASM,16,044,LFE..1-LFB..1 
-	.bf __LINE__ 
-	.line 1 
-LFB..1: 
-/* we want to build up an area for the parameters passed */ 
-/* in registers (both floating point and integer) */ 
- 
-	/* we store gpr 3 to gpr 10 (aligned to 4) 
-	in the parents outgoing area  */ 
-	stw   r3, 24+(0*4)(r1) 
-	stw   r4, 24+(1*4)(r1) 
-	stw   r5, 24+(2*4)(r1) 
-	stw   r6, 24+(3*4)(r1) 
-	mflr  r0 
- 
-	stw   r7, 24+(4*4)(r1) 
-	stw   r8, 24+(5*4)(r1) 
-	stw   r9, 24+(6*4)(r1) 
-	stw   r10, 24+(7*4)(r1) 
-	stw   r0, 8(r1) 
-LCFI..2: 
-	/* 24 Bytes (Linkage Area) */ 
-	/* 32 Bytes (params) */ 
-	/* 16  Bytes (result) */ 
-	/* 104 Bytes (13*8 from FPR) */ 
-	/* 176 Bytes */ 
- 
-	stwu  r1, -176(r1)	/* skip over caller save area 
-				   keep stack aligned to 16  */ 
-LCFI..3: 
- 
-	/* next save fpr 1 to fpr 13 (aligned to 8) */ 
-	stfd  f1, 72+(0*8)(r1) 
-	stfd  f2, 72+(1*8)(r1) 
-	stfd  f3, 72+(2*8)(r1) 
-	stfd  f4, 72+(3*8)(r1) 
-	stfd  f5, 72+(4*8)(r1) 
-	stfd  f6, 72+(5*8)(r1) 
-	stfd  f7, 72+(6*8)(r1) 
-	stfd  f8, 72+(7*8)(r1) 
-	stfd  f9, 72+(8*8)(r1) 
-	stfd  f10, 72+(9*8)(r1) 
-	stfd  f11, 72+(10*8)(r1) 
-	stfd  f12, 72+(11*8)(r1) 
-	stfd  f13, 72+(12*8)(r1) 
- 
-	/* set up registers for the routine that actually does the work */ 
-	mr   r3, 11	/* go closure */ 
- 
-	/* now load up the pointer to the result storage */ 
-	addi r4, r1, 56 
- 
-	/* now load up the pointer to the saved gpr registers */ 
-	addi r5, r1, 200 
- 
-	/* now load up the pointer to the saved fpr registers */ 
-	addi r6, r1, 72 
- 
-	/* make the call */ 
-	bl .ffi_go_closure_helper_DARWIN 
-	nop 
- 
-	b    .Ldoneclosure 
-LFE..1: 
-#endif 
-	.ef __LINE__ 
-/* END(ffi_go_closure_ASM) */ 
- 
-/* EH frame stuff.  */ 
- 
-#define LR_REGNO		0x41		/* Link Register (65), see rs6000.md */ 
-#ifdef __64BIT__ 
-#define PTRSIZE			8 
-#define LOG2_PTRSIZE		3 
-#define CFA_OFFSET		0xf0,0x01	/* LEB128 240 */ 
-#define FDE_ENCODING		0x1c		/* DW_EH_PE_pcrel|DW_EH_PE_sdata8 */ 
-#define EH_DATA_ALIGN_FACT	0x78		/* LEB128 -8 */ 
-#else 
-#define PTRSIZE			4 
-#define LOG2_PTRSIZE		2 
-#define CFA_OFFSET		0xb0,0x01	/* LEB128 176 */ 
-#define FDE_ENCODING		0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4 */ 
-#define EH_DATA_ALIGN_FACT	0x7c		/* LEB128 -4 */ 
-#endif 
- 
-	.csect	_unwind.ro_[RO],4 
-	.align	LOG2_PTRSIZE 
-	.globl	_GLOBAL__F_libffi_src_powerpc_aix_closure 
-_GLOBAL__F_libffi_src_powerpc_aix_closure: 
-Lframe..1: 
-	.vbyte	4,LECIE..1-LSCIE..1	/* CIE Length */ 
-LSCIE..1: 
-	.vbyte	4,0			/* CIE Identifier Tag */ 
-	.byte	0x3			/* CIE Version */ 
-	.byte	"zR"			/* CIE Augmentation */ 
-	.byte	0 
-	.byte	0x1			/* uleb128 0x1; CIE Code Alignment Factor */ 
-	.byte	EH_DATA_ALIGN_FACT	/* leb128 -4/-8; CIE Data Alignment Factor */ 
-	.byte	LR_REGNO		/* CIE RA Column */ 
-	.byte	0x1			/* uleb128 0x1; Augmentation size */ 
-	.byte	FDE_ENCODING		/* FDE Encoding (pcrel|sdata4/8) */ 
-	.byte	0xc			/* DW_CFA_def_cfa */ 
-	.byte	0x1			/*     uleb128 0x1; Register r1 */ 
-	.byte	0			/*     uleb128 0x0; Offset 0 */ 
-	.align	LOG2_PTRSIZE 
-LECIE..1: 
-LSFDE..1: 
-	.vbyte	4,LEFDE..1-LASFDE..1	/* FDE Length */ 
-LASFDE..1: 
-	.vbyte	4,LASFDE..1-Lframe..1	/* FDE CIE offset */ 
-	.vbyte	PTRSIZE,LFB..0-$	/* FDE initial location */ 
-	.vbyte	PTRSIZE,LFE..0-LFB..0	/* FDE address range */ 
-	.byte	0			/* uleb128 0x0; Augmentation size */ 
-	.byte	0x4			/* DW_CFA_advance_loc4 */ 
-	.vbyte	4,LCFI..1-LCFI..0 
-	.byte	0xe			/* DW_CFA_def_cfa_offset */ 
-	.byte	CFA_OFFSET		/*     uleb128 176/240 */ 
-	.byte	0x4			/* DW_CFA_advance_loc4 */ 
-	.vbyte	4,LCFI..0-LFB..0 
-	.byte	0x11			/* DW_CFA_offset_extended_sf */ 
-	.byte	LR_REGNO		/*     uleb128 LR_REGNO; Register LR */ 
-	.byte	0x7e			/*     leb128 -2; Offset -2 (8/16) */ 
-	.align	LOG2_PTRSIZE 
-LEFDE..1: 
-LSFDE..2: 
-	.vbyte	4,LEFDE..2-LASFDE..2	/* FDE Length */ 
-LASFDE..2: 
-	.vbyte	4,LASFDE..2-Lframe..1	/* FDE CIE offset */ 
-	.vbyte	PTRSIZE,LFB..1-$	/* FDE initial location */ 
-	.vbyte	PTRSIZE,LFE..1-LFB..1	/* FDE address range */ 
-	.byte	0			/* uleb128 0x0; Augmentation size */ 
-	.byte	0x4			/* DW_CFA_advance_loc4 */ 
-	.vbyte	4,LCFI..3-LCFI..2 
-	.byte	0xe			/* DW_CFA_def_cfa_offset */ 
-	.byte	CFA_OFFSET		/*     uleb128 176/240 */ 
-	.byte	0x4			/* DW_CFA_advance_loc4 */ 
-	.vbyte	4,LCFI..2-LFB..1 
-	.byte	0x11			/* DW_CFA_offset_extended_sf */ 
-	.byte	LR_REGNO		/*     uleb128 LR_REGNO; Register LR */ 
-	.byte	0x7e			/*     leb128 -2; Offset -2 (8/16) */ 
-	.align	LOG2_PTRSIZE 
-LEFDE..2: 
-	.vbyte	4,0			/* End of FDEs */ 
- 
-	.csect	.text[PR] 
-	.ref	_GLOBAL__F_libffi_src_powerpc_aix_closure	/* Prevents garbage collection by AIX linker */ 
- 
+/* -----------------------------------------------------------------------
+   aix_closure.S - Copyright (c) 2002, 2003, 2009 Free Software Foundation, Inc.
+   based on darwin_closure.S
+
+   PowerPC Assembly glue.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+	.set r0,0
+	.set r1,1
+	.set r2,2
+	.set r3,3
+	.set r4,4
+	.set r5,5
+	.set r6,6
+	.set r7,7
+	.set r8,8
+	.set r9,9
+	.set r10,10
+	.set r11,11
+	.set r12,12
+	.set r13,13
+	.set r14,14
+	.set r15,15
+	.set r16,16
+	.set r17,17
+	.set r18,18
+	.set r19,19
+	.set r20,20
+	.set r21,21
+	.set r22,22
+	.set r23,23
+	.set r24,24
+	.set r25,25
+	.set r26,26
+	.set r27,27
+	.set r28,28
+	.set r29,29
+	.set r30,30
+	.set r31,31
+	.set f0,0
+	.set f1,1
+	.set f2,2
+	.set f3,3
+	.set f4,4
+	.set f5,5
+	.set f6,6
+	.set f7,7
+	.set f8,8
+	.set f9,9
+	.set f10,10
+	.set f11,11
+	.set f12,12
+	.set f13,13
+	.set f14,14
+	.set f15,15
+	.set f16,16
+	.set f17,17
+	.set f18,18
+	.set f19,19
+	.set f20,20
+	.set f21,21
+
+	.extern .ffi_closure_helper_DARWIN
+	.extern .ffi_go_closure_helper_DARWIN
+
+#define LIBFFI_ASM
+#define JUMPTARGET(name) name
+#define L(x) x
+	.file "aix_closure.S"
+	.toc
+LC..60:
+	.tc L..60[TC],L..60
+	.csect .text[PR]
+	.align 2
+
+.csect .text[PR]
+	.align 2
+	.globl ffi_closure_ASM
+	.globl .ffi_closure_ASM
+.csect ffi_closure_ASM[DS]
+ffi_closure_ASM:
+#ifdef __64BIT__
+	.llong .ffi_closure_ASM, TOC[tc0], 0
+	.csect .text[PR]
+.ffi_closure_ASM:
+	.function .ffi_closure_ASM,.ffi_closure_ASM,16,044,LFE..0-LFB..0
+	.bf __LINE__
+	.line 1
+LFB..0:
+/* we want to build up an area for the parameters passed */
+/* in registers (both floating point and integer) */
+
+	/* we store gpr 3 to gpr 10 (aligned to 4)
+	in the parents outgoing area  */
+	std   r3, 48+(0*8)(r1)
+	std   r4, 48+(1*8)(r1)
+	std   r5, 48+(2*8)(r1)
+	std   r6, 48+(3*8)(r1)
+	mflr  r0
+
+	std   r7, 48+(4*8)(r1)
+	std   r8, 48+(5*8)(r1)
+	std   r9, 48+(6*8)(r1)
+	std   r10, 48+(7*8)(r1)
+	std   r0, 16(r1)	/* save the return address */
+LCFI..0:
+	/* 48  Bytes (Linkage Area) */
+	/* 64  Bytes (params) */
+	/* 16  Bytes (result) */
+	/* 104 Bytes (13*8 from FPR) */
+	/* 8   Bytes (alignment) */
+	/* 240 Bytes */
+
+	stdu  r1, -240(r1)	/* skip over caller save area
+				   keep stack aligned to 16  */
+LCFI..1:
+
+	/* next save fpr 1 to fpr 13 (aligned to 8) */
+	stfd  f1, 128+(0*8)(r1)
+	stfd  f2, 128+(1*8)(r1)
+	stfd  f3, 128+(2*8)(r1)
+	stfd  f4, 128+(3*8)(r1)
+	stfd  f5, 128+(4*8)(r1)
+	stfd  f6, 128+(5*8)(r1)
+	stfd  f7, 128+(6*8)(r1)
+	stfd  f8, 128+(7*8)(r1)
+	stfd  f9, 128+(8*8)(r1)
+	stfd  f10, 128+(9*8)(r1)
+	stfd  f11, 128+(10*8)(r1)
+	stfd  f12, 128+(11*8)(r1)
+	stfd  f13, 128+(12*8)(r1)
+
+	/* set up registers for the routine that actually does the work */
+	/* get the context pointer from the trampoline */
+	mr r3, r11
+
+	/* now load up the pointer to the result storage */
+	addi r4, r1, 112
+
+	/* now load up the pointer to the saved gpr registers */
+	addi r5, r1, 288
+
+	/* now load up the pointer to the saved fpr registers */
+	addi r6, r1, 128
+
+	/* make the call */
+	bl .ffi_closure_helper_DARWIN
+	nop
+
+.Ldoneclosure:
+
+	/* now r3 contains the return type */
+	/* so use it to look up in a table */
+	/* so we know how to deal with each type */
+
+	/* look up the proper starting point in table  */
+	/* by using return type as offset */
+	lhz	r3, 10(r3)	/* load type from return type */
+	ld	r4, LC..60(2)	/* get address of jump table */
+	sldi	r3, r3, 4	/* now multiply return type by 16 */
+	ld	r0, 240+16(r1)	/* load return address */
+	add	r3, r3, r4	/* add contents of table to table address */
+	mtctr	r3
+	bctr			/* jump to it */
+
+/* Each fragment must be exactly 16 bytes long (4 instructions).
+   Align to 16 byte boundary for cache and dispatch efficiency.  */
+	.align 4
+
+L..60:
+/* case FFI_TYPE_VOID */
+	mtlr r0
+	addi r1, r1, 240
+	blr
+	nop
+
+/* case FFI_TYPE_INT */
+	lwa r3, 112+4(r1)
+	mtlr r0
+	addi r1, r1, 240
+	blr
+
+/* case FFI_TYPE_FLOAT */
+	lfs f1, 112+0(r1)
+	mtlr r0
+	addi r1, r1, 240
+	blr
+
+/* case FFI_TYPE_DOUBLE */
+	lfd f1, 112+0(r1)
+	mtlr r0
+	addi r1, r1, 240
+	blr
+
+/* case FFI_TYPE_LONGDOUBLE */
+	lfd f1, 112+0(r1)
+	mtlr r0
+	lfd f2, 112+8(r1)
+	b L..finish
+
+/* case FFI_TYPE_UINT8 */
+	lbz r3, 112+7(r1)
+	mtlr r0
+	addi r1, r1, 240
+	blr
+
+/* case FFI_TYPE_SINT8 */
+	lbz r3, 112+7(r1)
+	mtlr r0
+	extsb r3, r3
+	b L..finish
+
+/* case FFI_TYPE_UINT16 */
+	lhz r3, 112+6(r1)
+	mtlr r0
+L..finish:
+	addi r1, r1, 240
+	blr
+
+/* case FFI_TYPE_SINT16 */
+	lha r3, 112+6(r1)
+	mtlr r0
+	addi r1, r1, 240
+	blr
+
+/* case FFI_TYPE_UINT32 */
+	lwz r3, 112+4(r1)
+	mtlr r0
+	addi r1, r1, 240
+	blr
+
+/* case FFI_TYPE_SINT32 */
+	lwa r3, 112+4(r1)
+	mtlr r0
+	addi r1, r1, 240
+	blr
+
+/* case FFI_TYPE_UINT64 */
+	ld r3, 112+0(r1)
+	mtlr r0
+	addi r1, r1, 240
+	blr
+
+/* case FFI_TYPE_SINT64 */
+	ld r3, 112+0(r1)
+	mtlr r0
+	addi r1, r1, 240
+	blr
+
+/* case FFI_TYPE_STRUCT */
+	mtlr r0
+	addi r1, r1, 240
+	blr
+	nop
+
+/* case FFI_TYPE_POINTER */
+	ld r3, 112+0(r1)
+	mtlr r0
+	addi r1, r1, 240
+	blr
+LFE..0:
+
+#else /* ! __64BIT__ */
+	
+	.long .ffi_closure_ASM, TOC[tc0], 0
+	.csect .text[PR]
+.ffi_closure_ASM:
+	.function .ffi_closure_ASM,.ffi_closure_ASM,16,044,LFE..0-LFB..0
+	.bf __LINE__
+	.line 1
+LFB..0:
+/* we want to build up an area for the parameters passed */
+/* in registers (both floating point and integer) */
+
+	/* we store gpr 3 to gpr 10 (aligned to 4)
+	in the parents outgoing area  */
+	stw   r3, 24+(0*4)(r1)
+	stw   r4, 24+(1*4)(r1)
+	stw   r5, 24+(2*4)(r1)
+	stw   r6, 24+(3*4)(r1)
+	mflr  r0
+
+	stw   r7, 24+(4*4)(r1)
+	stw   r8, 24+(5*4)(r1)
+	stw   r9, 24+(6*4)(r1)
+	stw   r10, 24+(7*4)(r1)
+	stw   r0, 8(r1)
+LCFI..0:
+	/* 24 Bytes (Linkage Area) */
+	/* 32 Bytes (params) */
+	/* 16  Bytes (result) */
+	/* 104 Bytes (13*8 from FPR) */
+	/* 176 Bytes */
+
+	stwu  r1, -176(r1)	/* skip over caller save area
+				   keep stack aligned to 16  */
+LCFI..1:
+
+	/* next save fpr 1 to fpr 13 (aligned to 8) */
+	stfd  f1, 72+(0*8)(r1)
+	stfd  f2, 72+(1*8)(r1)
+	stfd  f3, 72+(2*8)(r1)
+	stfd  f4, 72+(3*8)(r1)
+	stfd  f5, 72+(4*8)(r1)
+	stfd  f6, 72+(5*8)(r1)
+	stfd  f7, 72+(6*8)(r1)
+	stfd  f8, 72+(7*8)(r1)
+	stfd  f9, 72+(8*8)(r1)
+	stfd  f10, 72+(9*8)(r1)
+	stfd  f11, 72+(10*8)(r1)
+	stfd  f12, 72+(11*8)(r1)
+	stfd  f13, 72+(12*8)(r1)
+
+	/* set up registers for the routine that actually does the work */
+	/* get the context pointer from the trampoline */
+	mr r3, r11
+
+	/* now load up the pointer to the result storage */
+	addi r4, r1, 56
+
+	/* now load up the pointer to the saved gpr registers */
+	addi r5, r1, 200
+
+	/* now load up the pointer to the saved fpr registers */
+	addi r6, r1, 72
+
+	/* make the call */
+	bl .ffi_closure_helper_DARWIN
+	nop
+
+.Ldoneclosure:
+
+	/* now r3 contains the return type */
+	/* so use it to look up in a table */
+	/* so we know how to deal with each type */
+
+	/* look up the proper starting point in table  */
+	/* by using return type as offset */
+	lhz	r3, 6(r3)	/* load type from return type */
+	lwz	r4, LC..60(2)	/* get address of jump table */
+	slwi	r3, r3, 4	/* now multiply return type by 16 */
+	lwz	r0, 176+8(r1)	/* load return address */
+	add	r3, r3, r4	/* add contents of table to table address */
+	mtctr	r3
+	bctr			/* jump to it */
+
+/* Each fragment must be exactly 16 bytes long (4 instructions).
+   Align to 16 byte boundary for cache and dispatch efficiency.  */
+	.align 4
+
+L..60:
+/* case FFI_TYPE_VOID */
+	mtlr r0
+	addi r1, r1, 176
+	blr
+	nop
+
+/* case FFI_TYPE_INT */
+	lwz r3, 56+0(r1)
+	mtlr r0
+	addi r1, r1, 176
+	blr
+
+/* case FFI_TYPE_FLOAT */
+	lfs f1, 56+0(r1)
+	mtlr r0
+	addi r1, r1, 176
+	blr
+
+/* case FFI_TYPE_DOUBLE */
+	lfd f1, 56+0(r1)
+	mtlr r0
+	addi r1, r1, 176
+	blr
+
+/* case FFI_TYPE_LONGDOUBLE */
+	lfd f1, 56+0(r1)
+	mtlr r0
+	lfd f2, 56+8(r1)
+	b L..finish
+
+/* case FFI_TYPE_UINT8 */
+	lbz r3, 56+3(r1)
+	mtlr r0
+	addi r1, r1, 176
+	blr
+
+/* case FFI_TYPE_SINT8 */
+	lbz r3, 56+3(r1)
+	mtlr r0
+	extsb r3, r3
+	b L..finish
+
+/* case FFI_TYPE_UINT16 */
+	lhz r3, 56+2(r1)
+	mtlr r0
+	addi r1, r1, 176
+	blr
+
+/* case FFI_TYPE_SINT16 */
+	lha r3, 56+2(r1)
+	mtlr r0
+	addi r1, r1, 176
+	blr
+
+/* case FFI_TYPE_UINT32 */
+	lwz r3, 56+0(r1)
+	mtlr r0
+	addi r1, r1, 176
+	blr
+
+/* case FFI_TYPE_SINT32 */
+	lwz r3, 56+0(r1)
+	mtlr r0
+	addi r1, r1, 176
+	blr
+
+/* case FFI_TYPE_UINT64 */
+	lwz r3, 56+0(r1)
+	mtlr r0
+	lwz r4, 56+4(r1)
+	b L..finish
+
+/* case FFI_TYPE_SINT64 */
+	lwz r3, 56+0(r1)
+	mtlr r0
+	lwz r4, 56+4(r1)
+	b L..finish
+
+/* case FFI_TYPE_STRUCT */
+	mtlr r0
+	addi r1, r1, 176
+	blr
+	nop
+
+/* case FFI_TYPE_POINTER */
+	lwz r3, 56+0(r1)
+	mtlr r0
+L..finish:
+	addi r1, r1, 176
+	blr
+LFE..0:
+#endif
+	.ef __LINE__
+/* END(ffi_closure_ASM) */
+
+
+.csect .text[PR]
+	.align 2
+	.globl ffi_go_closure_ASM
+	.globl .ffi_go_closure_ASM
+.csect ffi_go_closure_ASM[DS]
+ffi_go_closure_ASM:
+#ifdef __64BIT__
+	.llong .ffi_go_closure_ASM, TOC[tc0], 0
+	.csect .text[PR]
+.ffi_go_closure_ASM:
+	.function .ffi_go_closure_ASM,.ffi_go_closure_ASM,16,044,LFE..1-LFB..1
+	.bf __LINE__
+	.line 1
+LFB..1:
+/* we want to build up an area for the parameters passed */
+/* in registers (both floating point and integer) */
+
+	/* we store gpr 3 to gpr 10 (aligned to 4)
+	in the parents outgoing area  */
+	std   r3, 48+(0*8)(r1)
+	std   r4, 48+(1*8)(r1)
+	std   r5, 48+(2*8)(r1)
+	std   r6, 48+(3*8)(r1)
+	mflr  r0
+
+	std   r7, 48+(4*8)(r1)
+	std   r8, 48+(5*8)(r1)
+	std   r9, 48+(6*8)(r1)
+	std   r10, 48+(7*8)(r1)
+	std   r0, 16(r1)	/* save the return address */
+LCFI..2:
+	/* 48  Bytes (Linkage Area) */
+	/* 64  Bytes (params) */
+	/* 16  Bytes (result) */
+	/* 104 Bytes (13*8 from FPR) */
+	/* 8   Bytes (alignment) */
+	/* 240 Bytes */
+
+	stdu  r1, -240(r1)	/* skip over caller save area
+				   keep stack aligned to 16  */
+LCFI..3:
+
+	/* next save fpr 1 to fpr 13 (aligned to 8) */
+	stfd  f1, 128+(0*8)(r1)
+	stfd  f2, 128+(1*8)(r1)
+	stfd  f3, 128+(2*8)(r1)
+	stfd  f4, 128+(3*8)(r1)
+	stfd  f5, 128+(4*8)(r1)
+	stfd  f6, 128+(5*8)(r1)
+	stfd  f7, 128+(6*8)(r1)
+	stfd  f8, 128+(7*8)(r1)
+	stfd  f9, 128+(8*8)(r1)
+	stfd  f10, 128+(9*8)(r1)
+	stfd  f11, 128+(10*8)(r1)
+	stfd  f12, 128+(11*8)(r1)
+	stfd  f13, 128+(12*8)(r1)
+
+	/* set up registers for the routine that actually does the work */
+	mr r3, r11	/* go closure */
+
+	/* now load up the pointer to the result storage */
+	addi r4, r1, 112
+
+	/* now load up the pointer to the saved gpr registers */
+	addi r5, r1, 288
+
+	/* now load up the pointer to the saved fpr registers */
+	addi r6, r1, 128
+
+	/* make the call */
+	bl .ffi_go_closure_helper_DARWIN
+	nop
+
+	b .Ldoneclosure
+LFE..1:
+
+#else /* ! __64BIT__ */
+	
+	.long .ffi_go_closure_ASM, TOC[tc0], 0
+	.csect .text[PR]
+.ffi_go_closure_ASM:
+	.function .ffi_go_closure_ASM,.ffi_go_closure_ASM,16,044,LFE..1-LFB..1
+	.bf __LINE__
+	.line 1
+LFB..1:
+/* we want to build up an area for the parameters passed */
+/* in registers (both floating point and integer) */
+
+	/* we store gpr 3 to gpr 10 (aligned to 4)
+	in the parents outgoing area  */
+	stw   r3, 24+(0*4)(r1)
+	stw   r4, 24+(1*4)(r1)
+	stw   r5, 24+(2*4)(r1)
+	stw   r6, 24+(3*4)(r1)
+	mflr  r0
+
+	stw   r7, 24+(4*4)(r1)
+	stw   r8, 24+(5*4)(r1)
+	stw   r9, 24+(6*4)(r1)
+	stw   r10, 24+(7*4)(r1)
+	stw   r0, 8(r1)
+LCFI..2:
+	/* 24 Bytes (Linkage Area) */
+	/* 32 Bytes (params) */
+	/* 16  Bytes (result) */
+	/* 104 Bytes (13*8 from FPR) */
+	/* 176 Bytes */
+
+	stwu  r1, -176(r1)	/* skip over caller save area
+				   keep stack aligned to 16  */
+LCFI..3:
+
+	/* next save fpr 1 to fpr 13 (aligned to 8) */
+	stfd  f1, 72+(0*8)(r1)
+	stfd  f2, 72+(1*8)(r1)
+	stfd  f3, 72+(2*8)(r1)
+	stfd  f4, 72+(3*8)(r1)
+	stfd  f5, 72+(4*8)(r1)
+	stfd  f6, 72+(5*8)(r1)
+	stfd  f7, 72+(6*8)(r1)
+	stfd  f8, 72+(7*8)(r1)
+	stfd  f9, 72+(8*8)(r1)
+	stfd  f10, 72+(9*8)(r1)
+	stfd  f11, 72+(10*8)(r1)
+	stfd  f12, 72+(11*8)(r1)
+	stfd  f13, 72+(12*8)(r1)
+
+	/* set up registers for the routine that actually does the work */
+	mr   r3, 11	/* go closure */
+
+	/* now load up the pointer to the result storage */
+	addi r4, r1, 56
+
+	/* now load up the pointer to the saved gpr registers */
+	addi r5, r1, 200
+
+	/* now load up the pointer to the saved fpr registers */
+	addi r6, r1, 72
+
+	/* make the call */
+	bl .ffi_go_closure_helper_DARWIN
+	nop
+
+	b    .Ldoneclosure
+LFE..1:
+#endif
+	.ef __LINE__
+/* END(ffi_go_closure_ASM) */
+
+/* EH frame stuff.  */
+
+#define LR_REGNO		0x41		/* Link Register (65), see rs6000.md */
+#ifdef __64BIT__
+#define PTRSIZE			8
+#define LOG2_PTRSIZE		3
+#define CFA_OFFSET		0xf0,0x01	/* LEB128 240 */
+#define FDE_ENCODING		0x1c		/* DW_EH_PE_pcrel|DW_EH_PE_sdata8 */
+#define EH_DATA_ALIGN_FACT	0x78		/* LEB128 -8 */
+#else
+#define PTRSIZE			4
+#define LOG2_PTRSIZE		2
+#define CFA_OFFSET		0xb0,0x01	/* LEB128 176 */
+#define FDE_ENCODING		0x1b		/* DW_EH_PE_pcrel|DW_EH_PE_sdata4 */
+#define EH_DATA_ALIGN_FACT	0x7c		/* LEB128 -4 */
+#endif
+
+	.csect	_unwind.ro_[RO],4
+	.align	LOG2_PTRSIZE
+	.globl	_GLOBAL__F_libffi_src_powerpc_aix_closure
+_GLOBAL__F_libffi_src_powerpc_aix_closure:
+Lframe..1:
+	.vbyte	4,LECIE..1-LSCIE..1	/* CIE Length */
+LSCIE..1:
+	.vbyte	4,0			/* CIE Identifier Tag */
+	.byte	0x3			/* CIE Version */
+	.byte	"zR"			/* CIE Augmentation */
+	.byte	0
+	.byte	0x1			/* uleb128 0x1; CIE Code Alignment Factor */
+	.byte	EH_DATA_ALIGN_FACT	/* leb128 -4/-8; CIE Data Alignment Factor */
+	.byte	LR_REGNO		/* CIE RA Column */
+	.byte	0x1			/* uleb128 0x1; Augmentation size */
+	.byte	FDE_ENCODING		/* FDE Encoding (pcrel|sdata4/8) */
+	.byte	0xc			/* DW_CFA_def_cfa */
+	.byte	0x1			/*     uleb128 0x1; Register r1 */
+	.byte	0			/*     uleb128 0x0; Offset 0 */
+	.align	LOG2_PTRSIZE
+LECIE..1:
+LSFDE..1:
+	.vbyte	4,LEFDE..1-LASFDE..1	/* FDE Length */
+LASFDE..1:
+	.vbyte	4,LASFDE..1-Lframe..1	/* FDE CIE offset */
+	.vbyte	PTRSIZE,LFB..0-$	/* FDE initial location */
+	.vbyte	PTRSIZE,LFE..0-LFB..0	/* FDE address range */
+	.byte	0			/* uleb128 0x0; Augmentation size */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.vbyte	4,LCFI..1-LCFI..0
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.byte	CFA_OFFSET		/*     uleb128 176/240 */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.vbyte	4,LCFI..0-LFB..0
+	.byte	0x11			/* DW_CFA_offset_extended_sf */
+	.byte	LR_REGNO		/*     uleb128 LR_REGNO; Register LR */
+	.byte	0x7e			/*     leb128 -2; Offset -2 (8/16) */
+	.align	LOG2_PTRSIZE
+LEFDE..1:
+LSFDE..2:
+	.vbyte	4,LEFDE..2-LASFDE..2	/* FDE Length */
+LASFDE..2:
+	.vbyte	4,LASFDE..2-Lframe..1	/* FDE CIE offset */
+	.vbyte	PTRSIZE,LFB..1-$	/* FDE initial location */
+	.vbyte	PTRSIZE,LFE..1-LFB..1	/* FDE address range */
+	.byte	0			/* uleb128 0x0; Augmentation size */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.vbyte	4,LCFI..3-LCFI..2
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.byte	CFA_OFFSET		/*     uleb128 176/240 */
+	.byte	0x4			/* DW_CFA_advance_loc4 */
+	.vbyte	4,LCFI..2-LFB..1
+	.byte	0x11			/* DW_CFA_offset_extended_sf */
+	.byte	LR_REGNO		/*     uleb128 LR_REGNO; Register LR */
+	.byte	0x7e			/*     leb128 -2; Offset -2 (8/16) */
+	.align	LOG2_PTRSIZE
+LEFDE..2:
+	.vbyte	4,0			/* End of FDEs */
+
+	.csect	.text[PR]
+	.ref	_GLOBAL__F_libffi_src_powerpc_aix_closure	/* Prevents garbage collection by AIX linker */
+
diff --git a/contrib/restricted/libffi/src/powerpc/asm.h b/contrib/restricted/libffi/src/powerpc/asm.h
index 4f8a4b4c6b..27b22f670a 100644
--- a/contrib/restricted/libffi/src/powerpc/asm.h
+++ b/contrib/restricted/libffi/src/powerpc/asm.h
@@ -1,125 +1,125 @@
-/* ----------------------------------------------------------------------- 
-   asm.h - Copyright (c) 1998 Geoffrey Keating 
- 
-   PowerPC Assembly glue. 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS 
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
-   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR 
-   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
-   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
-   OTHER DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-#define ASM_GLOBAL_DIRECTIVE .globl 
- 
- 
-#define C_SYMBOL_NAME(name) name 
-/* Macro for a label.  */ 
-#ifdef	__STDC__ 
-#define C_LABEL(name)		name##: 
-#else 
-#define C_LABEL(name)		name/**/: 
-#endif 
- 
-/* This seems to always be the case on PPC.  */ 
-#define ALIGNARG(log2) log2 
-/* For ELF we need the `.type' directive to make shared libs work right.  */ 
-#define ASM_TYPE_DIRECTIVE(name,typearg) .type name,typearg; 
-#define ASM_SIZE_DIRECTIVE(name) .size name,.-name 
- 
-/* If compiled for profiling, call `_mcount' at the start of each function.  */ 
-#ifdef	PROF 
-/* The mcount code relies on the return address being on the stack 
-   to locate our caller and so it can restore it; so store one just 
-   for its benefit.  */ 
-#ifdef PIC 
-#define CALL_MCOUNT							      \ 
-  .pushsection;								      \ 
-  .section ".data";							      \ 
-  .align ALIGNARG(2);							      \ 
-0:.long 0;								      \ 
-  .previous;								      \ 
-  mflr  %r0;								      \ 
-  stw   %r0,4(%r1);							      \ 
-  bl    _GLOBAL_OFFSET_TABLE_@local-4;					      \ 
-  mflr  %r11;								      \ 
-  lwz   %r0,0b@got(%r11);						      \ 
-  bl    JUMPTARGET(_mcount); 
-#else  /* PIC */ 
-#define CALL_MCOUNT							      \ 
-  .section ".data";							      \ 
-  .align ALIGNARG(2);							      \ 
-0:.long 0;								      \ 
-  .previous;								      \ 
-  mflr  %r0;								      \ 
-  lis   %r11,0b@ha;							      \ 
-  stw   %r0,4(%r1);							      \ 
-  addi  %r0,%r11,0b@l;							      \ 
-  bl    JUMPTARGET(_mcount); 
-#endif /* PIC */ 
-#else  /* PROF */ 
-#define CALL_MCOUNT		/* Do nothing.  */ 
-#endif /* PROF */ 
- 
-#define	ENTRY(name)							      \ 
-  ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name);				      \ 
-  ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function)			      \ 
-  .align ALIGNARG(2);							      \ 
-  C_LABEL(name)								      \ 
-  CALL_MCOUNT 
- 
-#define EALIGN_W_0  /* No words to insert.  */ 
-#define EALIGN_W_1  nop 
-#define EALIGN_W_2  nop;nop 
-#define EALIGN_W_3  nop;nop;nop 
-#define EALIGN_W_4  EALIGN_W_3;nop 
-#define EALIGN_W_5  EALIGN_W_4;nop 
-#define EALIGN_W_6  EALIGN_W_5;nop 
-#define EALIGN_W_7  EALIGN_W_6;nop 
- 
-/* EALIGN is like ENTRY, but does alignment to 'words'*4 bytes 
-   past a 2^align boundary.  */ 
-#ifdef PROF 
-#define EFFI_ALIGN(name, alignt, words)					      \ 
-  ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name);				      \ 
-  ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function)			      \ 
-  .align ALIGNARG(2);							      \ 
-  C_LABEL(name)								      \ 
-  CALL_MCOUNT								      \ 
-  b 0f;									      \ 
-  .align ALIGNARG(alignt);						      \ 
-  EALIGN_W_##words;							      \ 
-  0: 
-#else /* PROF */ 
-#define EFFI_ALIGN(name, alignt, words)					      \ 
-  ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name);				      \ 
-  ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function)			      \ 
-  .align ALIGNARG(alignt);						      \ 
-  EALIGN_W_##words;							      \ 
-  C_LABEL(name) 
-#endif 
- 
-#define END(name)							      \ 
-  ASM_SIZE_DIRECTIVE(name) 
- 
-#ifdef PIC 
-#define JUMPTARGET(name) name##@plt 
-#else 
-#define JUMPTARGET(name) name 
-#endif 
- 
-/* Local labels stripped out by the linker.  */ 
-#define L(x) .L##x 
+/* -----------------------------------------------------------------------
+   asm.h - Copyright (c) 1998 Geoffrey Keating
+
+   PowerPC Assembly glue.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#define ASM_GLOBAL_DIRECTIVE .globl
+
+
+#define C_SYMBOL_NAME(name) name
+/* Macro for a label.  */
+#ifdef	__STDC__
+#define C_LABEL(name)		name##:
+#else
+#define C_LABEL(name)		name/**/:
+#endif
+
+/* This seems to always be the case on PPC.  */
+#define ALIGNARG(log2) log2
+/* For ELF we need the `.type' directive to make shared libs work right.  */
+#define ASM_TYPE_DIRECTIVE(name,typearg) .type name,typearg;
+#define ASM_SIZE_DIRECTIVE(name) .size name,.-name
+
+/* If compiled for profiling, call `_mcount' at the start of each function.  */
+#ifdef	PROF
+/* The mcount code relies on the return address being on the stack
+   to locate our caller and so it can restore it; so store one just
+   for its benefit.  */
+#ifdef PIC
+#define CALL_MCOUNT							      \
+  .pushsection;								      \
+  .section ".data";							      \
+  .align ALIGNARG(2);							      \
+0:.long 0;								      \
+  .previous;								      \
+  mflr  %r0;								      \
+  stw   %r0,4(%r1);							      \
+  bl    _GLOBAL_OFFSET_TABLE_@local-4;					      \
+  mflr  %r11;								      \
+  lwz   %r0,0b@got(%r11);						      \
+  bl    JUMPTARGET(_mcount);
+#else  /* PIC */
+#define CALL_MCOUNT							      \
+  .section ".data";							      \
+  .align ALIGNARG(2);							      \
+0:.long 0;								      \
+  .previous;								      \
+  mflr  %r0;								      \
+  lis   %r11,0b@ha;							      \
+  stw   %r0,4(%r1);							      \
+  addi  %r0,%r11,0b@l;							      \
+  bl    JUMPTARGET(_mcount);
+#endif /* PIC */
+#else  /* PROF */
+#define CALL_MCOUNT		/* Do nothing.  */
+#endif /* PROF */
+
+#define	ENTRY(name)							      \
+  ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name);				      \
+  ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function)			      \
+  .align ALIGNARG(2);							      \
+  C_LABEL(name)								      \
+  CALL_MCOUNT
+
+#define EALIGN_W_0  /* No words to insert.  */
+#define EALIGN_W_1  nop
+#define EALIGN_W_2  nop;nop
+#define EALIGN_W_3  nop;nop;nop
+#define EALIGN_W_4  EALIGN_W_3;nop
+#define EALIGN_W_5  EALIGN_W_4;nop
+#define EALIGN_W_6  EALIGN_W_5;nop
+#define EALIGN_W_7  EALIGN_W_6;nop
+
+/* EALIGN is like ENTRY, but does alignment to 'words'*4 bytes
+   past a 2^align boundary.  */
+#ifdef PROF
+#define EFFI_ALIGN(name, alignt, words)					      \
+  ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name);				      \
+  ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function)			      \
+  .align ALIGNARG(2);							      \
+  C_LABEL(name)								      \
+  CALL_MCOUNT								      \
+  b 0f;									      \
+  .align ALIGNARG(alignt);						      \
+  EALIGN_W_##words;							      \
+  0:
+#else /* PROF */
+#define EFFI_ALIGN(name, alignt, words)					      \
+  ASM_GLOBAL_DIRECTIVE C_SYMBOL_NAME(name);				      \
+  ASM_TYPE_DIRECTIVE (C_SYMBOL_NAME(name),@function)			      \
+  .align ALIGNARG(alignt);						      \
+  EALIGN_W_##words;							      \
+  C_LABEL(name)
+#endif
+
+#define END(name)							      \
+  ASM_SIZE_DIRECTIVE(name)
+
+#ifdef PIC
+#define JUMPTARGET(name) name##@plt
+#else
+#define JUMPTARGET(name) name
+#endif
+
+/* Local labels stripped out by the linker.  */
+#define L(x) .L##x
diff --git a/contrib/restricted/libffi/src/powerpc/darwin.S b/contrib/restricted/libffi/src/powerpc/darwin.S
index 5012ffba65..066eb82efe 100644
--- a/contrib/restricted/libffi/src/powerpc/darwin.S
+++ b/contrib/restricted/libffi/src/powerpc/darwin.S
@@ -1,378 +1,378 @@
-/* ----------------------------------------------------------------------- 
-   darwin.S - Copyright (c) 2000 John Hornkvist 
-	      Copyright (c) 2004, 2010 Free Software Foundation, Inc. 
- 
-   PowerPC Assembly glue. 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS 
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
-   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR 
-   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
-   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
-   OTHER DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-#define LIBFFI_ASM 
-#if defined(__ppc64__) 
-#define MODE_CHOICE(x, y) y 
-#else 
-#define MODE_CHOICE(x, y) x 
-#endif 
- 
-#define machine_choice	MODE_CHOICE(ppc7400,ppc64) 
- 
-; Define some pseudo-opcodes for size-independent load & store of GPRs ... 
-#define lgu		MODE_CHOICE(lwzu, ldu) 
-#define lg		MODE_CHOICE(lwz,ld) 
-#define sg		MODE_CHOICE(stw,std) 
-#define sgu		MODE_CHOICE(stwu,stdu) 
-#define sgux		MODE_CHOICE(stwux,stdux) 
- 
-; ... and the size of GPRs and their storage indicator. 
-#define GPR_BYTES	MODE_CHOICE(4,8) 
-#define LOG2_GPR_BYTES	MODE_CHOICE(2,3)	/* log2(GPR_BYTES) */ 
-#define g_long		MODE_CHOICE(long, quad)	/* usage is ".g_long" */ 
- 
-; From the ABI doc: "Mac OS X ABI Function Call Guide" Version 2009-02-04. 
-#define LINKAGE_SIZE	MODE_CHOICE(24,48) 
-#define PARAM_AREA	MODE_CHOICE(32,64) 
-#define SAVED_LR_OFFSET	MODE_CHOICE(8,16)	/* save position for lr */ 
- 
-/* If there is any FP stuff we make space for all of the regs.  */ 
-#define SAVED_FPR_COUNT 13 
-#define FPR_SIZE	8 
-#define RESULT_BYTES	16 
- 
-/* This should be kept in step with the same value in ffi_darwin.c.  */ 
-#define ASM_NEEDS_REGISTERS 4 
-#define SAVE_REGS_SIZE (ASM_NEEDS_REGISTERS * GPR_BYTES) 
- 
-#include <fficonfig.h> 
-#include <ffi.h> 
- 
-#define JUMPTARGET(name) name 
-#define L(x) x 
- 
-	.text 
-	.align 2 
-	.globl _ffi_prep_args 
- 
-	.align 2 
-	.globl _ffi_call_DARWIN 
- 
-	/* We arrive here with: 
-	   r3 = ptr to extended cif. 
-	   r4 = -bytes. 
-	   r5 = cif flags. 
-	   r6 = ptr to return value. 
-	   r7 = fn pointer (user func). 
-	   r8 = fn pointer (ffi_prep_args). 
-	   r9 = ffi_type* for the ret val.  */ 
- 
-_ffi_call_DARWIN: 
-Lstartcode: 
-	mr   	r12,r8	/* We only need r12 until the call, 
-			   so it does not have to be saved.  */ 
-LFB1: 
-	/* Save the old stack pointer as AP.  */ 
-	mr	r8,r1 
-LCFI0: 
-	 
-	/* Save the retval type in parents frame.  */ 
-	sg	r9,(LINKAGE_SIZE+6*GPR_BYTES)(r8) 
- 
-	/* Allocate the stack space we need.  */ 
-	sgux	r1,r1,r4 
- 
-	/* Save registers we use.  */ 
-	mflr	r9 
-	sg	r9,SAVED_LR_OFFSET(r8) 
- 
-	sg	r28,-(4 * GPR_BYTES)(r8)	 
-	sg	r29,-(3 * GPR_BYTES)(r8) 
-	sg	r30,-(2 * GPR_BYTES)(r8) 
-	sg	r31,-(    GPR_BYTES)(r8) 
- 
-#if !defined(POWERPC_DARWIN) 
-	/* The TOC slot is reserved in the Darwin ABI and r2 is volatile.  */ 
-	sg	r2,(5 * GPR_BYTES)(r1) 
-#endif 
- 
-LCFI1: 
- 
-	/* Save arguments over call.  */ 
-	mr	r31,r5	/* flags,  */ 
-	mr	r30,r6	/* rvalue,  */ 
-	mr	r29,r7	/* function address,  */ 
-	mr	r28,r8	/* our AP.  */ 
-LCFI2: 
-	/* Call ffi_prep_args. r3 = extended cif, r4 = stack ptr copy.  */ 
-	mr	r4,r1 
-	li	r9,0 
- 
-	mtctr	r12 /* r12 holds address of _ffi_prep_args.  */ 
-	bctrl 
- 
-#if !defined(POWERPC_DARWIN) 
-	/* The TOC slot is reserved in the Darwin ABI and r2 is volatile.  */ 
-	lg     r2,(5 * GPR_BYTES)(r1) 
-#endif 
-	/* Now do the call. 
-	   Set up cr1 with bits 4-7 of the flags.  */ 
-	mtcrf	0x40,r31 
-	/* Get the address to call into CTR.  */ 
-	mtctr	r29 
-	/* Load all those argument registers. 
-	   We have set up a nice stack frame, just load it into registers.  */ 
-	lg     r3, (LINKAGE_SIZE                )(r1) 
-	lg     r4, (LINKAGE_SIZE +     GPR_BYTES)(r1) 
-	lg     r5, (LINKAGE_SIZE + 2 * GPR_BYTES)(r1) 
-	lg     r6, (LINKAGE_SIZE + 3 * GPR_BYTES)(r1) 
-	nop 
-	lg     r7, (LINKAGE_SIZE + 4 * GPR_BYTES)(r1) 
-	lg     r8, (LINKAGE_SIZE + 5 * GPR_BYTES)(r1) 
-	lg     r9, (LINKAGE_SIZE + 6 * GPR_BYTES)(r1) 
-	lg     r10,(LINKAGE_SIZE + 7 * GPR_BYTES)(r1) 
- 
-L1: 
-	/* ... Load all the FP registers.  */ 
-	bf	6,L2	/* No floats to load.  */ 
-	lfd	f1, -SAVE_REGS_SIZE-(13*FPR_SIZE)(r28) 
-	lfd	f2, -SAVE_REGS_SIZE-(12*FPR_SIZE)(r28) 
-	lfd	f3, -SAVE_REGS_SIZE-(11*FPR_SIZE)(r28) 
-	lfd	f4, -SAVE_REGS_SIZE-(10*FPR_SIZE)(r28) 
-	nop 
-	lfd	f5, -SAVE_REGS_SIZE-( 9*FPR_SIZE)(r28) 
-	lfd	f6, -SAVE_REGS_SIZE-( 8*FPR_SIZE)(r28) 
-	lfd	f7, -SAVE_REGS_SIZE-( 7*FPR_SIZE)(r28) 
-	lfd	f8, -SAVE_REGS_SIZE-( 6*FPR_SIZE)(r28) 
-	nop 
-	lfd     f9, -SAVE_REGS_SIZE-( 5*FPR_SIZE)(r28) 
-	lfd     f10,-SAVE_REGS_SIZE-( 4*FPR_SIZE)(r28) 
-	lfd     f11,-SAVE_REGS_SIZE-( 3*FPR_SIZE)(r28) 
-	lfd     f12,-SAVE_REGS_SIZE-( 2*FPR_SIZE)(r28) 
-	nop 
-	lfd     f13,-SAVE_REGS_SIZE-( 1*FPR_SIZE)(r28) 
- 
-L2: 
-	mr	r12,r29	/* Put the target address in r12 as specified.  */ 
-	mtctr  	r12 
-	nop 
-	nop 
- 
-	/* Make the call.  */ 
-	bctrl 
- 
-	/* Now, deal with the return value.  */ 
- 
-	/* m64 structure returns can occupy the same set of registers as 
-	   would be used to pass such a structure as arg0 - so take care  
-	   not to step on any possibly hot regs.  */ 
- 
-	/* Get the flags.. */ 
-	mtcrf	0x03,r31 ; we need c6 & cr7 now. 
-	; FLAG_RETURNS_NOTHING also covers struct ret-by-ref. 
-	bt	30,L(done_return_value)	  ; FLAG_RETURNS_NOTHING 
-	bf	27,L(scalar_return_value) ; not FLAG_RETURNS_STRUCT 
-	 
-	/* OK, so we have a struct.  */ 
-#if defined(__ppc64__) 
-	bt	31,L(maybe_return_128) ; FLAG_RETURNS_128BITS, special case  
- 
-	/* OK, we have to map the return back to a mem struct. 
-	   We are about to trample the parents param area, so recover the 
-	   return type.  r29 is free, since the call is done.  */ 
-	lg	r29,(LINKAGE_SIZE + 6 * GPR_BYTES)(r28) 
- 
-	sg	r3, (LINKAGE_SIZE                )(r28) 
-	sg	r4, (LINKAGE_SIZE +     GPR_BYTES)(r28) 
-	sg	r5, (LINKAGE_SIZE + 2 * GPR_BYTES)(r28) 
-	sg	r6, (LINKAGE_SIZE + 3 * GPR_BYTES)(r28) 
-	nop 
-	sg	r7, (LINKAGE_SIZE + 4 * GPR_BYTES)(r28) 
-	sg	r8, (LINKAGE_SIZE + 5 * GPR_BYTES)(r28) 
-	sg	r9, (LINKAGE_SIZE + 6 * GPR_BYTES)(r28) 
-	sg	r10,(LINKAGE_SIZE + 7 * GPR_BYTES)(r28) 
-	/* OK, so do the block move - we trust that memcpy will not trample 
-	   the fprs...  */ 
-	mr 	r3,r30 ; dest 
-	addi	r4,r28,LINKAGE_SIZE ; source 
-	/* The size is a size_t, should be long.  */ 
-	lg	r5,0(r29) 
-	/* Figure out small structs */ 
-	cmpi	0,r5,4 
-	bgt	L3	; 1, 2 and 4 bytes have special rules. 
-	cmpi	0,r5,3 
-	beq	L3	; not 3 
-	addi	r4,r4,8 
-	subf	r4,r5,r4 
-L3: 
-	bl	_memcpy 
-	 
-	/* ... do we need the FP registers? - recover the flags.. */ 
-	mtcrf	0x03,r31 ; we need c6 & cr7 now. 
-	bf	29,L(done_return_value)	/* No floats in the struct.  */ 
-	stfd	f1, -SAVE_REGS_SIZE-(13*FPR_SIZE)(r28) 
-	stfd	f2, -SAVE_REGS_SIZE-(12*FPR_SIZE)(r28) 
-	stfd	f3, -SAVE_REGS_SIZE-(11*FPR_SIZE)(r28) 
-	stfd	f4, -SAVE_REGS_SIZE-(10*FPR_SIZE)(r28) 
-	nop 
-	stfd	f5, -SAVE_REGS_SIZE-( 9*FPR_SIZE)(r28) 
-	stfd	f6, -SAVE_REGS_SIZE-( 8*FPR_SIZE)(r28) 
-	stfd	f7, -SAVE_REGS_SIZE-( 7*FPR_SIZE)(r28) 
-	stfd	f8, -SAVE_REGS_SIZE-( 6*FPR_SIZE)(r28) 
-	nop 
-	stfd	f9, -SAVE_REGS_SIZE-( 5*FPR_SIZE)(r28) 
-	stfd	f10,-SAVE_REGS_SIZE-( 4*FPR_SIZE)(r28) 
-	stfd	f11,-SAVE_REGS_SIZE-( 3*FPR_SIZE)(r28) 
-	stfd	f12,-SAVE_REGS_SIZE-( 2*FPR_SIZE)(r28) 
-	nop 
-	stfd	f13,-SAVE_REGS_SIZE-( 1*FPR_SIZE)(r28) 
- 
-	mr	r3,r29	; ffi_type * 
-	mr	r4,r30	; dest 
-	addi	r5,r28,-SAVE_REGS_SIZE-(13*FPR_SIZE) ; fprs 
-	xor	r6,r6,r6 
-	sg	r6,(LINKAGE_SIZE + 7 * GPR_BYTES)(r28) 
-	addi	r6,r28,(LINKAGE_SIZE + 7 * GPR_BYTES) ; point to a zeroed counter. 
-	bl 	_darwin64_struct_floats_to_mem 
- 
-	b L(done_return_value) 
-#else 
-	stw	r3,0(r30) ; m32 the only struct return in reg is 4 bytes. 
-#endif 
-	b L(done_return_value) 
- 
-L(fp_return_value): 
-	/* Do we have long double to store?  */ 
-	bf	31,L(fd_return_value) ; FLAG_RETURNS_128BITS 
-	stfd	f1,0(r30) 
-	stfd	f2,FPR_SIZE(r30) 
-	b	L(done_return_value) 
- 
-L(fd_return_value): 
-	/* Do we have double to store?  */ 
-	bf	28,L(float_return_value) 
-	stfd	f1,0(r30) 
-	b	L(done_return_value) 
- 
-L(float_return_value): 
-	/* We only have a float to store.  */ 
-	stfs	f1,0(r30) 
-	b	L(done_return_value) 
- 
-L(scalar_return_value): 
-	bt	29,L(fp_return_value)	; FLAG_RETURNS_FP 
-	; ffi_arg is defined as unsigned long.  
-	sg	r3,0(r30)		; Save the reg. 
-	bf	28,L(done_return_value) ; not FLAG_RETURNS_64BITS  
- 
-#if defined(__ppc64__) 
-L(maybe_return_128): 
-	std	r3,0(r30) 
-	bf	31,L(done_return_value) ; not FLAG_RETURNS_128BITS  
-	std	r4,8(r30) 
-#else 
-	stw	r4,4(r30) 
-#endif 
- 
-	/* Fall through.  */ 
-	/* We want this at the end to simplify eh epilog computation.  */ 
- 
-L(done_return_value): 
-	/* Restore the registers we used and return.  */ 
-	lg	r29,SAVED_LR_OFFSET(r28) 
-	; epilog 
-	lg	r31,-(1 * GPR_BYTES)(r28) 
-	mtlr	r29 
-	lg	r30,-(2 * GPR_BYTES)(r28) 
-	lg	r29,-(3 * GPR_BYTES)(r28) 
-	lg	r28,-(4 * GPR_BYTES)(r28) 
-	lg	r1,0(r1) 
-	blr 
-LFE1: 
-	.align	1 
-/* END(_ffi_call_DARWIN)  */ 
- 
-/* Provide a null definition of _ffi_call_AIX.  */ 
-	.text 
-	.globl _ffi_call_AIX 
-	.align 2 
-_ffi_call_AIX: 
-	blr 
-/* END(_ffi_call_AIX)  */ 
- 
-/* EH stuff.  */ 
- 
-#define EH_DATA_ALIGN_FACT MODE_CHOICE(0x7c,0x78) 
- 
-	.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support 
-EH_frame1: 
-	.set	L$set$0,LECIE1-LSCIE1 
-	.long	L$set$0	; Length of Common Information Entry 
-LSCIE1: 
-	.long	0x0	; CIE Identifier Tag 
-	.byte	0x1	; CIE Version 
-	.ascii	"zR\0"	; CIE Augmentation 
-	.byte	0x1	; uleb128 0x1; CIE Code Alignment Factor 
-	.byte	EH_DATA_ALIGN_FACT ; sleb128 -4; CIE Data Alignment Factor 
-	.byte	0x41	; CIE RA Column 
-	.byte	0x1	; uleb128 0x1; Augmentation size 
-	.byte	0x10	; FDE Encoding (pcrel) 
-	.byte	0xc	; DW_CFA_def_cfa 
-	.byte	0x1	; uleb128 0x1 
-	.byte	0x0	; uleb128 0x0 
-	.align	LOG2_GPR_BYTES 
-LECIE1: 
- 
-	.globl _ffi_call_DARWIN.eh 
-_ffi_call_DARWIN.eh: 
-LSFDE1: 
-	.set	L$set$1,LEFDE1-LASFDE1 
-	.long	L$set$1	; FDE Length 
-LASFDE1: 
-	.long	LASFDE1-EH_frame1 ; FDE CIE offset 
-	.g_long	Lstartcode-.	; FDE initial location 
-	.set	L$set$3,LFE1-Lstartcode 
-	.g_long	L$set$3	; FDE address range 
-	.byte   0x0     ; uleb128 0x0; Augmentation size 
-	.byte	0x4	; DW_CFA_advance_loc4 
-	.set	L$set$4,LCFI0-Lstartcode 
-	.long	L$set$4 
-	.byte	0xd	; DW_CFA_def_cfa_register 
-	.byte	0x08	; uleb128 0x08 
-	.byte	0x4	; DW_CFA_advance_loc4 
-	.set	L$set$5,LCFI1-LCFI0 
-	.long	L$set$5 
-	.byte   0x11    ; DW_CFA_offset_extended_sf 
-	.byte	0x41	; uleb128 0x41 
-	.byte   0x7e    ; sleb128 -2 
-	.byte	0x9f	; DW_CFA_offset, column 0x1f 
-	.byte	0x1	; uleb128 0x1 
-	.byte	0x9e	; DW_CFA_offset, column 0x1e 
-	.byte	0x2	; uleb128 0x2 
-	.byte	0x9d	; DW_CFA_offset, column 0x1d 
-	.byte	0x3	; uleb128 0x3 
-	.byte	0x9c	; DW_CFA_offset, column 0x1c 
-	.byte	0x4	; uleb128 0x4 
-	.byte	0x4	; DW_CFA_advance_loc4 
-	.set	L$set$6,LCFI2-LCFI1 
-	.long	L$set$6 
-	.byte	0xd	; DW_CFA_def_cfa_register 
-	.byte	0x1c	; uleb128 0x1c 
-	.align LOG2_GPR_BYTES 
-LEFDE1: 
-	.align 1 
- 
+/* -----------------------------------------------------------------------
+   darwin.S - Copyright (c) 2000 John Hornkvist
+	      Copyright (c) 2004, 2010 Free Software Foundation, Inc.
+
+   PowerPC Assembly glue.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#define LIBFFI_ASM
+#if defined(__ppc64__)
+#define MODE_CHOICE(x, y) y
+#else
+#define MODE_CHOICE(x, y) x
+#endif
+
+#define machine_choice	MODE_CHOICE(ppc7400,ppc64)
+
+; Define some pseudo-opcodes for size-independent load & store of GPRs ...
+#define lgu		MODE_CHOICE(lwzu, ldu)
+#define lg		MODE_CHOICE(lwz,ld)
+#define sg		MODE_CHOICE(stw,std)
+#define sgu		MODE_CHOICE(stwu,stdu)
+#define sgux		MODE_CHOICE(stwux,stdux)
+
+; ... and the size of GPRs and their storage indicator.
+#define GPR_BYTES	MODE_CHOICE(4,8)
+#define LOG2_GPR_BYTES	MODE_CHOICE(2,3)	/* log2(GPR_BYTES) */
+#define g_long		MODE_CHOICE(long, quad)	/* usage is ".g_long" */
+
+; From the ABI doc: "Mac OS X ABI Function Call Guide" Version 2009-02-04.
+#define LINKAGE_SIZE	MODE_CHOICE(24,48)
+#define PARAM_AREA	MODE_CHOICE(32,64)
+#define SAVED_LR_OFFSET	MODE_CHOICE(8,16)	/* save position for lr */
+
+/* If there is any FP stuff we make space for all of the regs.  */
+#define SAVED_FPR_COUNT 13
+#define FPR_SIZE	8
+#define RESULT_BYTES	16
+
+/* This should be kept in step with the same value in ffi_darwin.c.  */
+#define ASM_NEEDS_REGISTERS 4
+#define SAVE_REGS_SIZE (ASM_NEEDS_REGISTERS * GPR_BYTES)
+
+#include <fficonfig.h>
+#include <ffi.h>
+
+#define JUMPTARGET(name) name
+#define L(x) x
+
+	.text
+	.align 2
+	.globl _ffi_prep_args
+
+	.align 2
+	.globl _ffi_call_DARWIN
+
+	/* We arrive here with:
+	   r3 = ptr to extended cif.
+	   r4 = -bytes.
+	   r5 = cif flags.
+	   r6 = ptr to return value.
+	   r7 = fn pointer (user func).
+	   r8 = fn pointer (ffi_prep_args).
+	   r9 = ffi_type* for the ret val.  */
+
+_ffi_call_DARWIN:
+Lstartcode:
+	mr   	r12,r8	/* We only need r12 until the call,
+			   so it does not have to be saved.  */
+LFB1:
+	/* Save the old stack pointer as AP.  */
+	mr	r8,r1
+LCFI0:
+	
+	/* Save the retval type in parents frame.  */
+	sg	r9,(LINKAGE_SIZE+6*GPR_BYTES)(r8)
+
+	/* Allocate the stack space we need.  */
+	sgux	r1,r1,r4
+
+	/* Save registers we use.  */
+	mflr	r9
+	sg	r9,SAVED_LR_OFFSET(r8)
+
+	sg	r28,-(4 * GPR_BYTES)(r8)	
+	sg	r29,-(3 * GPR_BYTES)(r8)
+	sg	r30,-(2 * GPR_BYTES)(r8)
+	sg	r31,-(    GPR_BYTES)(r8)
+
+#if !defined(POWERPC_DARWIN)
+	/* The TOC slot is reserved in the Darwin ABI and r2 is volatile.  */
+	sg	r2,(5 * GPR_BYTES)(r1)
+#endif
+
+LCFI1:
+
+	/* Save arguments over call.  */
+	mr	r31,r5	/* flags,  */
+	mr	r30,r6	/* rvalue,  */
+	mr	r29,r7	/* function address,  */
+	mr	r28,r8	/* our AP.  */
+LCFI2:
+	/* Call ffi_prep_args. r3 = extended cif, r4 = stack ptr copy.  */
+	mr	r4,r1
+	li	r9,0
+
+	mtctr	r12 /* r12 holds address of _ffi_prep_args.  */
+	bctrl
+
+#if !defined(POWERPC_DARWIN)
+	/* The TOC slot is reserved in the Darwin ABI and r2 is volatile.  */
+	lg     r2,(5 * GPR_BYTES)(r1)
+#endif
+	/* Now do the call.
+	   Set up cr1 with bits 4-7 of the flags.  */
+	mtcrf	0x40,r31
+	/* Get the address to call into CTR.  */
+	mtctr	r29
+	/* Load all those argument registers.
+	   We have set up a nice stack frame, just load it into registers.  */
+	lg     r3, (LINKAGE_SIZE                )(r1)
+	lg     r4, (LINKAGE_SIZE +     GPR_BYTES)(r1)
+	lg     r5, (LINKAGE_SIZE + 2 * GPR_BYTES)(r1)
+	lg     r6, (LINKAGE_SIZE + 3 * GPR_BYTES)(r1)
+	nop
+	lg     r7, (LINKAGE_SIZE + 4 * GPR_BYTES)(r1)
+	lg     r8, (LINKAGE_SIZE + 5 * GPR_BYTES)(r1)
+	lg     r9, (LINKAGE_SIZE + 6 * GPR_BYTES)(r1)
+	lg     r10,(LINKAGE_SIZE + 7 * GPR_BYTES)(r1)
+
+L1:
+	/* ... Load all the FP registers.  */
+	bf	6,L2	/* No floats to load.  */
+	lfd	f1, -SAVE_REGS_SIZE-(13*FPR_SIZE)(r28)
+	lfd	f2, -SAVE_REGS_SIZE-(12*FPR_SIZE)(r28)
+	lfd	f3, -SAVE_REGS_SIZE-(11*FPR_SIZE)(r28)
+	lfd	f4, -SAVE_REGS_SIZE-(10*FPR_SIZE)(r28)
+	nop
+	lfd	f5, -SAVE_REGS_SIZE-( 9*FPR_SIZE)(r28)
+	lfd	f6, -SAVE_REGS_SIZE-( 8*FPR_SIZE)(r28)
+	lfd	f7, -SAVE_REGS_SIZE-( 7*FPR_SIZE)(r28)
+	lfd	f8, -SAVE_REGS_SIZE-( 6*FPR_SIZE)(r28)
+	nop
+	lfd     f9, -SAVE_REGS_SIZE-( 5*FPR_SIZE)(r28)
+	lfd     f10,-SAVE_REGS_SIZE-( 4*FPR_SIZE)(r28)
+	lfd     f11,-SAVE_REGS_SIZE-( 3*FPR_SIZE)(r28)
+	lfd     f12,-SAVE_REGS_SIZE-( 2*FPR_SIZE)(r28)
+	nop
+	lfd     f13,-SAVE_REGS_SIZE-( 1*FPR_SIZE)(r28)
+
+L2:
+	mr	r12,r29	/* Put the target address in r12 as specified.  */
+	mtctr  	r12
+	nop
+	nop
+
+	/* Make the call.  */
+	bctrl
+
+	/* Now, deal with the return value.  */
+
+	/* m64 structure returns can occupy the same set of registers as
+	   would be used to pass such a structure as arg0 - so take care 
+	   not to step on any possibly hot regs.  */
+
+	/* Get the flags.. */
+	mtcrf	0x03,r31 ; we need c6 & cr7 now.
+	; FLAG_RETURNS_NOTHING also covers struct ret-by-ref.
+	bt	30,L(done_return_value)	  ; FLAG_RETURNS_NOTHING
+	bf	27,L(scalar_return_value) ; not FLAG_RETURNS_STRUCT
+	
+	/* OK, so we have a struct.  */
+#if defined(__ppc64__)
+	bt	31,L(maybe_return_128) ; FLAG_RETURNS_128BITS, special case 
+
+	/* OK, we have to map the return back to a mem struct.
+	   We are about to trample the parents param area, so recover the
+	   return type.  r29 is free, since the call is done.  */
+	lg	r29,(LINKAGE_SIZE + 6 * GPR_BYTES)(r28)
+
+	sg	r3, (LINKAGE_SIZE                )(r28)
+	sg	r4, (LINKAGE_SIZE +     GPR_BYTES)(r28)
+	sg	r5, (LINKAGE_SIZE + 2 * GPR_BYTES)(r28)
+	sg	r6, (LINKAGE_SIZE + 3 * GPR_BYTES)(r28)
+	nop
+	sg	r7, (LINKAGE_SIZE + 4 * GPR_BYTES)(r28)
+	sg	r8, (LINKAGE_SIZE + 5 * GPR_BYTES)(r28)
+	sg	r9, (LINKAGE_SIZE + 6 * GPR_BYTES)(r28)
+	sg	r10,(LINKAGE_SIZE + 7 * GPR_BYTES)(r28)
+	/* OK, so do the block move - we trust that memcpy will not trample
+	   the fprs...  */
+	mr 	r3,r30 ; dest
+	addi	r4,r28,LINKAGE_SIZE ; source
+	/* The size is a size_t, should be long.  */
+	lg	r5,0(r29)
+	/* Figure out small structs */
+	cmpi	0,r5,4
+	bgt	L3	; 1, 2 and 4 bytes have special rules.
+	cmpi	0,r5,3
+	beq	L3	; not 3
+	addi	r4,r4,8
+	subf	r4,r5,r4
+L3:
+	bl	_memcpy
+	
+	/* ... do we need the FP registers? - recover the flags.. */
+	mtcrf	0x03,r31 ; we need c6 & cr7 now.
+	bf	29,L(done_return_value)	/* No floats in the struct.  */
+	stfd	f1, -SAVE_REGS_SIZE-(13*FPR_SIZE)(r28)
+	stfd	f2, -SAVE_REGS_SIZE-(12*FPR_SIZE)(r28)
+	stfd	f3, -SAVE_REGS_SIZE-(11*FPR_SIZE)(r28)
+	stfd	f4, -SAVE_REGS_SIZE-(10*FPR_SIZE)(r28)
+	nop
+	stfd	f5, -SAVE_REGS_SIZE-( 9*FPR_SIZE)(r28)
+	stfd	f6, -SAVE_REGS_SIZE-( 8*FPR_SIZE)(r28)
+	stfd	f7, -SAVE_REGS_SIZE-( 7*FPR_SIZE)(r28)
+	stfd	f8, -SAVE_REGS_SIZE-( 6*FPR_SIZE)(r28)
+	nop
+	stfd	f9, -SAVE_REGS_SIZE-( 5*FPR_SIZE)(r28)
+	stfd	f10,-SAVE_REGS_SIZE-( 4*FPR_SIZE)(r28)
+	stfd	f11,-SAVE_REGS_SIZE-( 3*FPR_SIZE)(r28)
+	stfd	f12,-SAVE_REGS_SIZE-( 2*FPR_SIZE)(r28)
+	nop
+	stfd	f13,-SAVE_REGS_SIZE-( 1*FPR_SIZE)(r28)
+
+	mr	r3,r29	; ffi_type *
+	mr	r4,r30	; dest
+	addi	r5,r28,-SAVE_REGS_SIZE-(13*FPR_SIZE) ; fprs
+	xor	r6,r6,r6
+	sg	r6,(LINKAGE_SIZE + 7 * GPR_BYTES)(r28)
+	addi	r6,r28,(LINKAGE_SIZE + 7 * GPR_BYTES) ; point to a zeroed counter.
+	bl 	_darwin64_struct_floats_to_mem
+
+	b L(done_return_value)
+#else
+	stw	r3,0(r30) ; m32 the only struct return in reg is 4 bytes.
+#endif
+	b L(done_return_value)
+
+L(fp_return_value):
+	/* Do we have long double to store?  */
+	bf	31,L(fd_return_value) ; FLAG_RETURNS_128BITS
+	stfd	f1,0(r30)
+	stfd	f2,FPR_SIZE(r30)
+	b	L(done_return_value)
+
+L(fd_return_value):
+	/* Do we have double to store?  */
+	bf	28,L(float_return_value)
+	stfd	f1,0(r30)
+	b	L(done_return_value)
+
+L(float_return_value):
+	/* We only have a float to store.  */
+	stfs	f1,0(r30)
+	b	L(done_return_value)
+
+L(scalar_return_value):
+	bt	29,L(fp_return_value)	; FLAG_RETURNS_FP
+	; ffi_arg is defined as unsigned long. 
+	sg	r3,0(r30)		; Save the reg.
+	bf	28,L(done_return_value) ; not FLAG_RETURNS_64BITS 
+
+#if defined(__ppc64__)
+L(maybe_return_128):
+	std	r3,0(r30)
+	bf	31,L(done_return_value) ; not FLAG_RETURNS_128BITS 
+	std	r4,8(r30)
+#else
+	stw	r4,4(r30)
+#endif
+
+	/* Fall through.  */
+	/* We want this at the end to simplify eh epilog computation.  */
+
+L(done_return_value):
+	/* Restore the registers we used and return.  */
+	lg	r29,SAVED_LR_OFFSET(r28)
+	; epilog
+	lg	r31,-(1 * GPR_BYTES)(r28)
+	mtlr	r29
+	lg	r30,-(2 * GPR_BYTES)(r28)
+	lg	r29,-(3 * GPR_BYTES)(r28)
+	lg	r28,-(4 * GPR_BYTES)(r28)
+	lg	r1,0(r1)
+	blr
+LFE1:
+	.align	1
+/* END(_ffi_call_DARWIN)  */
+
+/* Provide a null definition of _ffi_call_AIX.  */
+	.text
+	.globl _ffi_call_AIX
+	.align 2
+_ffi_call_AIX:
+	blr
+/* END(_ffi_call_AIX)  */
+
+/* EH stuff.  */
+
+#define EH_DATA_ALIGN_FACT MODE_CHOICE(0x7c,0x78)
+
+	.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EH_frame1:
+	.set	L$set$0,LECIE1-LSCIE1
+	.long	L$set$0	; Length of Common Information Entry
+LSCIE1:
+	.long	0x0	; CIE Identifier Tag
+	.byte	0x1	; CIE Version
+	.ascii	"zR\0"	; CIE Augmentation
+	.byte	0x1	; uleb128 0x1; CIE Code Alignment Factor
+	.byte	EH_DATA_ALIGN_FACT ; sleb128 -4; CIE Data Alignment Factor
+	.byte	0x41	; CIE RA Column
+	.byte	0x1	; uleb128 0x1; Augmentation size
+	.byte	0x10	; FDE Encoding (pcrel)
+	.byte	0xc	; DW_CFA_def_cfa
+	.byte	0x1	; uleb128 0x1
+	.byte	0x0	; uleb128 0x0
+	.align	LOG2_GPR_BYTES
+LECIE1:
+
+	.globl _ffi_call_DARWIN.eh
+_ffi_call_DARWIN.eh:
+LSFDE1:
+	.set	L$set$1,LEFDE1-LASFDE1
+	.long	L$set$1	; FDE Length
+LASFDE1:
+	.long	LASFDE1-EH_frame1 ; FDE CIE offset
+	.g_long	Lstartcode-.	; FDE initial location
+	.set	L$set$3,LFE1-Lstartcode
+	.g_long	L$set$3	; FDE address range
+	.byte   0x0     ; uleb128 0x0; Augmentation size
+	.byte	0x4	; DW_CFA_advance_loc4
+	.set	L$set$4,LCFI0-Lstartcode
+	.long	L$set$4
+	.byte	0xd	; DW_CFA_def_cfa_register
+	.byte	0x08	; uleb128 0x08
+	.byte	0x4	; DW_CFA_advance_loc4
+	.set	L$set$5,LCFI1-LCFI0
+	.long	L$set$5
+	.byte   0x11    ; DW_CFA_offset_extended_sf
+	.byte	0x41	; uleb128 0x41
+	.byte   0x7e    ; sleb128 -2
+	.byte	0x9f	; DW_CFA_offset, column 0x1f
+	.byte	0x1	; uleb128 0x1
+	.byte	0x9e	; DW_CFA_offset, column 0x1e
+	.byte	0x2	; uleb128 0x2
+	.byte	0x9d	; DW_CFA_offset, column 0x1d
+	.byte	0x3	; uleb128 0x3
+	.byte	0x9c	; DW_CFA_offset, column 0x1c
+	.byte	0x4	; uleb128 0x4
+	.byte	0x4	; DW_CFA_advance_loc4
+	.set	L$set$6,LCFI2-LCFI1
+	.long	L$set$6
+	.byte	0xd	; DW_CFA_def_cfa_register
+	.byte	0x1c	; uleb128 0x1c
+	.align LOG2_GPR_BYTES
+LEFDE1:
+	.align 1
+
diff --git a/contrib/restricted/libffi/src/powerpc/darwin_closure.S b/contrib/restricted/libffi/src/powerpc/darwin_closure.S
index 0e111edd35..3121e6ac26 100644
--- a/contrib/restricted/libffi/src/powerpc/darwin_closure.S
+++ b/contrib/restricted/libffi/src/powerpc/darwin_closure.S
@@ -1,571 +1,571 @@
-/* ----------------------------------------------------------------------- 
-   darwin_closure.S - Copyright (c) 2002, 2003, 2004, 2010,  
-   Free Software Foundation, Inc.  
-   based on ppc_closure.S 
- 
-   PowerPC Assembly glue. 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS 
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
-   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR 
-   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
-   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
-   OTHER DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-#define LIBFFI_ASM 
-#define L(x) x 
- 
-#if defined(__ppc64__) 
-#define MODE_CHOICE(x, y) y 
-#else 
-#define MODE_CHOICE(x, y) x 
-#endif 
- 
-#define machine_choice	MODE_CHOICE(ppc7400,ppc64) 
- 
-; Define some pseudo-opcodes for size-independent load & store of GPRs ... 
-#define lgu		MODE_CHOICE(lwzu, ldu) 
-#define lg		MODE_CHOICE(lwz,ld) 
-#define sg		MODE_CHOICE(stw,std) 
-#define sgu		MODE_CHOICE(stwu,stdu) 
- 
-; ... and the size of GPRs and their storage indicator. 
-#define GPR_BYTES	MODE_CHOICE(4,8) 
-#define LOG2_GPR_BYTES	MODE_CHOICE(2,3)	/* log2(GPR_BYTES) */ 
-#define g_long		MODE_CHOICE(long, quad)	/* usage is ".g_long" */ 
- 
-; From the ABI doc: "Mac OS X ABI Function Call Guide" Version 2009-02-04. 
-#define LINKAGE_SIZE	MODE_CHOICE(24,48) 
-#define PARAM_AREA	MODE_CHOICE(32,64) 
- 
-#define SAVED_CR_OFFSET	MODE_CHOICE(4,8)	/* save position for CR */ 
-#define SAVED_LR_OFFSET	MODE_CHOICE(8,16)	/* save position for lr */ 
- 
-/* WARNING: if ffi_type is changed... here be monsters.   
-   Offsets of items within the result type.  */ 
-#define FFI_TYPE_TYPE	MODE_CHOICE(6,10) 
-#define FFI_TYPE_ELEM	MODE_CHOICE(8,16) 
- 
-#define SAVED_FPR_COUNT 13 
-#define FPR_SIZE	8 
-/* biggest m64 struct ret is 8GPRS + 13FPRS = 168 bytes - rounded to 16bytes = 176. */ 
-#define RESULT_BYTES	MODE_CHOICE(16,176) 
- 
-; The whole stack frame **MUST** be 16byte-aligned. 
-#define SAVE_SIZE (((LINKAGE_SIZE+PARAM_AREA+SAVED_FPR_COUNT*FPR_SIZE+RESULT_BYTES)+15) & -16LL) 
-#define PAD_SIZE (SAVE_SIZE-(LINKAGE_SIZE+PARAM_AREA+SAVED_FPR_COUNT*FPR_SIZE+RESULT_BYTES)) 
- 
-#define PARENT_PARM_BASE (SAVE_SIZE+LINKAGE_SIZE) 
-#define FP_SAVE_BASE (LINKAGE_SIZE+PARAM_AREA) 
- 
-#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050 
-; We no longer need the pic symbol stub for Darwin >= 9. 
-#define BLCLS_HELP _ffi_closure_helper_DARWIN 
-#define STRUCT_RETVALUE_P _darwin64_struct_ret_by_value_p 
-#define PASS_STR_FLOATS _darwin64_pass_struct_floats 
-#undef WANT_STUB 
-#else 
-#define BLCLS_HELP L_ffi_closure_helper_DARWIN$stub 
-#define STRUCT_RETVALUE_P L_darwin64_struct_ret_by_value_p$stub 
-#define PASS_STR_FLOATS L_darwin64_pass_struct_floats$stub 
-#define WANT_STUB 
-#endif 
- 
-/* m32/m64 
- 
-   The stack layout looks like this: 
- 
-   |   Additional params...			| |     Higher address 
-   ~						~ ~ 
-   |   Parameters      (at least 8*4/8=32/64)	| | NUM_GPR_ARG_REGISTERS 
-   |--------------------------------------------| | 
-   |   TOC=R2 (AIX) Reserved (Darwin)   4/8	| | 
-   |--------------------------------------------| | 
-   |   Reserved                       2*4/8	| | 
-   |--------------------------------------------| | 
-   |   Space for callee`s LR		4/8	| | 
-   |--------------------------------------------| | 
-   |   Saved CR [low word for m64]      4/8	| | 
-   |--------------------------------------------| | 
-   |   Current backchain pointer	4/8	|-/ Parent`s frame. 
-   |--------------------------------------------| <+ <<< on entry to 
-   |   Result Bytes		       16/176	| | 
-   |--------------------------------------------| | 
-   ~   padding to 16-byte alignment		~ ~ 
-   |--------------------------------------------| | 
-   |   NUM_FPR_ARG_REGISTERS slots		| | 
-   |   here fp13 .. fp1		       13*8	| | 
-   |--------------------------------------------| | 
-   |   R3..R10			  8*4/8=32/64	| | NUM_GPR_ARG_REGISTERS 
-   |--------------------------------------------| | 
-   |   TOC=R2 (AIX) Reserved (Darwin)   4/8	| | 
-   |--------------------------------------------| |	stack	| 
-   |   Reserved [compiler,binder]     2*4/8	| |	grows	| 
-   |--------------------------------------------| |	down	V 
-   |   Space for callees LR		4/8	| | 
-   |--------------------------------------------| |	lower addresses 
-   |   Saved CR [low word for m64]      4/8	| | 
-   |--------------------------------------------| |     stack pointer here 
-   |   Current backchain pointer	4/8	|-/	during 
-   |--------------------------------------------|   <<<	call. 
- 
-*/ 
- 
-	.file	"darwin_closure.S" 
- 
-	.machine machine_choice 
- 
-	.text 
-	.globl _ffi_closure_ASM 
-	.align LOG2_GPR_BYTES 
-_ffi_closure_ASM: 
-LFB1: 
-Lstartcode: 
-	mflr	r0			/* extract return address  */ 
-	sg	r0,SAVED_LR_OFFSET(r1)	/* save the return address  */ 
-LCFI0: 
-	sgu	r1,-SAVE_SIZE(r1)	/* skip over caller save area 
-					keep stack aligned to 16.  */ 
-LCFI1: 
-	/* We want to build up an area for the parameters passed 
-	   in registers. (both floating point and integer)  */ 
- 
-	/* Put gpr 3 to gpr 10 in the parents outgoing area... 
-	   ... the remainder of any params that overflowed the regs will 
-	   follow here.  */ 
-	sg	r3, (PARENT_PARM_BASE                )(r1) 
-	sg	r4, (PARENT_PARM_BASE + GPR_BYTES    )(r1) 
-	sg	r5, (PARENT_PARM_BASE + GPR_BYTES * 2)(r1) 
-	sg	r6, (PARENT_PARM_BASE + GPR_BYTES * 3)(r1) 
-	sg	r7, (PARENT_PARM_BASE + GPR_BYTES * 4)(r1) 
-	sg	r8, (PARENT_PARM_BASE + GPR_BYTES * 5)(r1) 
-	sg	r9, (PARENT_PARM_BASE + GPR_BYTES * 6)(r1) 
-	sg	r10,(PARENT_PARM_BASE + GPR_BYTES * 7)(r1) 
- 
-	/* We save fpr 1 to fpr 14 in our own save frame.  */ 
-	stfd	f1, (FP_SAVE_BASE                 )(r1) 
-	stfd	f2, (FP_SAVE_BASE +  FPR_SIZE     )(r1) 
-	stfd	f3, (FP_SAVE_BASE +  FPR_SIZE * 2 )(r1) 
-	stfd	f4, (FP_SAVE_BASE +  FPR_SIZE * 3 )(r1) 
-	stfd	f5, (FP_SAVE_BASE +  FPR_SIZE * 4 )(r1) 
-	stfd	f6, (FP_SAVE_BASE +  FPR_SIZE * 5 )(r1) 
-	stfd	f7, (FP_SAVE_BASE +  FPR_SIZE * 6 )(r1) 
-	stfd	f8, (FP_SAVE_BASE +  FPR_SIZE * 7 )(r1) 
-	stfd	f9, (FP_SAVE_BASE +  FPR_SIZE * 8 )(r1) 
-	stfd	f10,(FP_SAVE_BASE +  FPR_SIZE * 9 )(r1) 
-	stfd	f11,(FP_SAVE_BASE +  FPR_SIZE * 10)(r1) 
-	stfd	f12,(FP_SAVE_BASE +  FPR_SIZE * 11)(r1) 
-	stfd	f13,(FP_SAVE_BASE +  FPR_SIZE * 12)(r1) 
- 
-	/* Set up registers for the routine that actually does the work 
-	   get the context pointer from the trampoline.  */ 
-	mr	r3,r11 
- 
-	/* Now load up the pointer to the result storage.  */ 
-	addi	r4,r1,(SAVE_SIZE-RESULT_BYTES) 
- 
-	/* Now load up the pointer to the saved gpr registers.  */ 
-	addi	r5,r1,PARENT_PARM_BASE 
- 
-	/* Now load up the pointer to the saved fpr registers.  */ 
-	addi	r6,r1,FP_SAVE_BASE 
- 
-	/* Make the call.  */ 
-	bl	BLCLS_HELP 
- 
-	/* r3 contains the rtype pointer... save it since we will need 
-	   it later.  */ 
-	sg	r3,LINKAGE_SIZE(r1)	; ffi_type * result_type 
-	lg	r0,0(r3)		; size => r0 
-	lhz	r3,FFI_TYPE_TYPE(r3)	; type => r3 
- 
-	/* The helper will have intercepted structure returns and inserted 
-	   the caller`s destination address for structs returned by ref.  */ 
- 
-	/* r3 contains the return type  so use it to look up in a table 
-	   so we know how to deal with each type.  */ 
- 
-	addi	r5,r1,(SAVE_SIZE-RESULT_BYTES) /* Otherwise, our return is here.  */ 
-	bl	Lget_ret_type0_addr	/* Get pointer to Lret_type0 into LR.  */ 
-	mflr	r4			/* Move to r4.  */ 
-	slwi	r3,r3,4			/* Now multiply return type by 16.  */ 
-	add	r3,r3,r4		/* Add contents of table to table address.  */ 
-	mtctr	r3 
-	bctr			 	 /* Jump to it.  */ 
-LFE1: 
-/* Each of the ret_typeX code fragments has to be exactly 16 bytes long 
-   (4 instructions). For cache effectiveness we align to a 16 byte boundary 
-   first.  */ 
- 
-	.align 4 
- 
-	nop 
-	nop 
-	nop 
-Lget_ret_type0_addr: 
-	blrl 
- 
-/* case FFI_TYPE_VOID  */ 
-Lret_type0: 
-	b	Lfinish 
-	nop 
-	nop 
-	nop 
- 
-/* case FFI_TYPE_INT  */ 
-Lret_type1: 
-	lg	r3,0(r5) 
-	b	Lfinish 
-	nop 
-	nop 
- 
-/* case FFI_TYPE_FLOAT  */ 
-Lret_type2: 
-	lfs	f1,0(r5) 
-	b	Lfinish 
-	nop 
-	nop 
- 
-/* case FFI_TYPE_DOUBLE  */ 
-Lret_type3: 
-	lfd	f1,0(r5) 
-	b	Lfinish 
-	nop 
-	nop 
- 
-/* case FFI_TYPE_LONGDOUBLE  */ 
-Lret_type4: 
-	lfd	f1,0(r5) 
-	lfd	f2,8(r5) 
-	b	Lfinish 
-	nop 
- 
-/* case FFI_TYPE_UINT8  */ 
-Lret_type5: 
-#if defined(__ppc64__) 
-	lbz	r3,7(r5) 
-#else 
-	lbz	r3,3(r5) 
-#endif 
-	b	Lfinish 
-	nop 
-	nop 
- 
-/* case FFI_TYPE_SINT8  */ 
-Lret_type6: 
-#if defined(__ppc64__) 
-	lbz	r3,7(r5) 
-#else 
-	lbz	r3,3(r5) 
-#endif 
-	extsb	r3,r3 
-	b	Lfinish 
-	nop 
- 
-/* case FFI_TYPE_UINT16  */ 
-Lret_type7: 
-#if defined(__ppc64__) 
-	lhz	r3,6(r5) 
-#else 
-	lhz	r3,2(r5) 
-#endif 
-	b	Lfinish 
-	nop 
-	nop 
- 
-/* case FFI_TYPE_SINT16  */ 
-Lret_type8: 
-#if defined(__ppc64__) 
-	lha	r3,6(r5) 
-#else 
-	lha	r3,2(r5) 
-#endif 
-	b	Lfinish 
-	nop 
-	nop 
- 
-/* case FFI_TYPE_UINT32  */ 
-Lret_type9: 
-#if defined(__ppc64__) 
-	lwz	r3,4(r5) 
-#else 
-	lwz	r3,0(r5) 
-#endif 
-	b	Lfinish 
-	nop 
-	nop 
- 
-/* case FFI_TYPE_SINT32  */ 
-Lret_type10: 
-#if defined(__ppc64__) 
-	lwz	r3,4(r5) 
-#else 
-	lwz	r3,0(r5) 
-#endif 
-	b	Lfinish 
-	nop 
-	nop 
- 
-/* case FFI_TYPE_UINT64  */ 
-Lret_type11: 
-#if defined(__ppc64__) 
-	lg	r3,0(r5) 
-	b	Lfinish 
-	nop 
-#else 
-	lwz	r3,0(r5) 
-	lwz	r4,4(r5) 
-	b	Lfinish 
-#endif 
-	nop 
- 
-/* case FFI_TYPE_SINT64  */ 
-Lret_type12: 
-#if defined(__ppc64__) 
-	lg	r3,0(r5) 
-	b	Lfinish 
-	nop 
-#else 
-	lwz	r3,0(r5) 
-	lwz	r4,4(r5) 
-	b	Lfinish 
-#endif 
-	nop 
- 
-/* case FFI_TYPE_STRUCT  */ 
-Lret_type13: 
-#if defined(__ppc64__) 
-	lg	r3,0(r5)		; we need at least this... 
-	cmpi	0,r0,4 
-	bgt	Lstructend		; not a special small case 
-	b	Lsmallstruct		; see if we need more. 
-#else 
-	cmpwi	0,r0,4 
-	bgt	Lfinish		; not by value 
-	lg	r3,0(r5) 
-	b	Lfinish 
-#endif 
-/* case FFI_TYPE_POINTER  */ 
-Lret_type14: 
-	lg	r3,0(r5) 
-	b	Lfinish 
-	nop 
-	nop 
- 
-#if defined(__ppc64__) 
-Lsmallstruct: 
-	beq	Lfour			; continuation of Lret13. 
-	cmpi	0,r0,3 
-	beq	Lfinish			; don`t adjust this - can`t be any floats here... 
-	srdi	r3,r3,48 
-	cmpi	0,r0,2 
-	beq	Lfinish			; .. or here .. 
-	srdi	r3,r3,8 
-	b 	Lfinish			; .. or here. 
- 
-Lfour: 
-	lg	r6,LINKAGE_SIZE(r1)	; get the result type 
-	lg	r6,FFI_TYPE_ELEM(r6)	; elements array pointer 
-	lg	r6,0(r6)		; first element 
-	lhz	r0,FFI_TYPE_TYPE(r6)	; OK go the type 
-	cmpi	0,r0,2			; FFI_TYPE_FLOAT 
-	bne	Lfourint 
-	lfs	f1,0(r5)		; just one float in the struct. 
-	b 	Lfinish 
- 
-Lfourint: 
-	srdi	r3,r3,32		; four bytes. 
-	b 	Lfinish 
- 
-Lstructend: 
-	lg	r3,LINKAGE_SIZE(r1)	; get the result type 
-	bl	STRUCT_RETVALUE_P 
-	cmpi	0,r3,0 
-	beq	Lfinish			; nope. 
-	/* Recover a pointer to the results.  */ 
-	addi	r11,r1,(SAVE_SIZE-RESULT_BYTES) 
-	lg	r3,0(r11)		; we need at least this... 
-	lg	r4,8(r11) 
-	cmpi	0,r0,16 
-	beq	Lfinish		; special case 16 bytes we don't consider floats. 
- 
-	/* OK, frustratingly, the process of saving the struct to mem might have 
-	   messed with the FPRs, so we have to re-load them :(. 
-	   We`ll use our FPRs space again - calling:  
-	   void darwin64_pass_struct_floats (ffi_type *s, char *src,  
-					     unsigned *nfpr, double **fprs)  
-	   We`ll temporarily pinch the first two slots of the param area for local 
-	   vars used by the routine.  */ 
-	xor	r6,r6,r6 
-	addi	r5,r1,PARENT_PARM_BASE		; some space 
-	sg	r6,0(r5)			; *nfpr zeroed. 
-	addi	r6,r5,8				; **fprs 
-	addi	r3,r1,FP_SAVE_BASE		; pointer to FPRs space 
-	sg	r3,0(r6) 
-	mr	r4,r11				; the struct is here... 
-	lg	r3,LINKAGE_SIZE(r1)		; ffi_type * result_type. 
-	bl	PASS_STR_FLOATS			; get struct floats into FPR save space. 
-	/* See if we used any floats  */ 
-	lwz	r0,(SAVE_SIZE-RESULT_BYTES)(r1)	 
-	cmpi	0,r0,0 
-	beq	Lstructints			; nope. 
-	/* OK load `em up... */ 
-	lfd	f1, (FP_SAVE_BASE                 )(r1) 
-	lfd	f2, (FP_SAVE_BASE +  FPR_SIZE     )(r1) 
-	lfd	f3, (FP_SAVE_BASE +  FPR_SIZE * 2 )(r1) 
-	lfd	f4, (FP_SAVE_BASE +  FPR_SIZE * 3 )(r1) 
-	lfd	f5, (FP_SAVE_BASE +  FPR_SIZE * 4 )(r1) 
-	lfd	f6, (FP_SAVE_BASE +  FPR_SIZE * 5 )(r1) 
-	lfd	f7, (FP_SAVE_BASE +  FPR_SIZE * 6 )(r1) 
-	lfd	f8, (FP_SAVE_BASE +  FPR_SIZE * 7 )(r1) 
-	lfd	f9, (FP_SAVE_BASE +  FPR_SIZE * 8 )(r1) 
-	lfd	f10,(FP_SAVE_BASE +  FPR_SIZE * 9 )(r1) 
-	lfd	f11,(FP_SAVE_BASE +  FPR_SIZE * 10)(r1) 
-	lfd	f12,(FP_SAVE_BASE +  FPR_SIZE * 11)(r1) 
-	lfd	f13,(FP_SAVE_BASE +  FPR_SIZE * 12)(r1) 
- 
-	/* point back at our saved struct.  */ 
-Lstructints: 
-	addi	r11,r1,(SAVE_SIZE-RESULT_BYTES) 
-	lg	r3,0(r11)			; we end up picking the 
-	lg	r4,8(r11)			; first two again. 
-	lg	r5,16(r11) 
-	lg	r6,24(r11) 
-	lg	r7,32(r11) 
-	lg	r8,40(r11) 
-	lg	r9,48(r11) 
-	lg	r10,56(r11) 
-#endif 
- 
-/* case done  */ 
-Lfinish: 
-	addi	r1,r1,SAVE_SIZE		/* Restore stack pointer.  */ 
-	lg	r0,SAVED_LR_OFFSET(r1)	/* Get return address.  */ 
-	mtlr	r0			/* Reset link register.  */ 
-	blr 
-Lendcode: 
-	.align 1 
-	 
-/* END(ffi_closure_ASM)  */ 
- 
-/* EH frame stuff.  */ 
-#define EH_DATA_ALIGN_FACT MODE_CHOICE(0x7c,0x78) 
-/* 176, 400 */ 
-#define EH_FRAME_OFFSETA MODE_CHOICE(176,0x90) 
-#define EH_FRAME_OFFSETB MODE_CHOICE(1,3) 
- 
-	.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support 
-EH_frame1: 
-	.set	L$set$0,LECIE1-LSCIE1 
-	.long	L$set$0	; Length of Common Information Entry 
-LSCIE1: 
-	.long	0x0	; CIE Identifier Tag 
-	.byte	0x1	; CIE Version 
-	.ascii	"zR\0"	; CIE Augmentation 
-	.byte	0x1	; uleb128 0x1; CIE Code Alignment Factor 
-	.byte	EH_DATA_ALIGN_FACT ; sleb128 -4; CIE Data Alignment Factor 
-	.byte	0x41	; CIE RA Column 
-	.byte	0x1	; uleb128 0x1; Augmentation size 
-	.byte	0x10	; FDE Encoding (pcrel) 
-	.byte	0xc	; DW_CFA_def_cfa 
-	.byte	0x1	; uleb128 0x1 
-	.byte	0x0	; uleb128 0x0 
-	.align	LOG2_GPR_BYTES 
-LECIE1: 
-	.globl _ffi_closure_ASM.eh 
-_ffi_closure_ASM.eh: 
-LSFDE1: 
-	.set	L$set$1,LEFDE1-LASFDE1 
-	.long	L$set$1	; FDE Length 
- 
-LASFDE1: 
-	.long	LASFDE1-EH_frame1	; FDE CIE offset 
-	.g_long	Lstartcode-.	; FDE initial location 
-	.set	L$set$2,LFE1-Lstartcode 
-	.g_long	L$set$2	; FDE address range 
-	.byte   0x0     ; uleb128 0x0; Augmentation size 
-	.byte	0x4	; DW_CFA_advance_loc4 
-	.set	L$set$3,LCFI1-LCFI0 
-	.long	L$set$3 
-	.byte	0xe	; DW_CFA_def_cfa_offset 
-	.byte	EH_FRAME_OFFSETA,EH_FRAME_OFFSETB	; uleb128 176,1/190,3 
-	.byte	0x4	; DW_CFA_advance_loc4 
-	.set	L$set$4,LCFI0-Lstartcode 
-	.long	L$set$4 
-	.byte   0x11    ; DW_CFA_offset_extended_sf 
-	.byte	0x41	; uleb128 0x41 
-	.byte   0x7e    ; sleb128 -2 
-	.align	LOG2_GPR_BYTES 
-LEFDE1: 
-	.align 	1 
- 
-#ifdef WANT_STUB 
-	.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 
-	.align 5 
-L_ffi_closure_helper_DARWIN$stub: 
-	.indirect_symbol _ffi_closure_helper_DARWIN 
-	mflr r0 
-	bcl 20,31,"L1$spb" 
-"L1$spb": 
-	mflr r11 
-	addis r11,r11,ha16(L_ffi_closure_helper_DARWIN$lazy_ptr-"L1$spb") 
-	mtlr r0 
-	lwzu r12,lo16(L_ffi_closure_helper_DARWIN$lazy_ptr-"L1$spb")(r11) 
-	mtctr r12 
-	bctr 
-	.lazy_symbol_pointer 
-L_ffi_closure_helper_DARWIN$lazy_ptr: 
-	.indirect_symbol _ffi_closure_helper_DARWIN 
-	.g_long	dyld_stub_binding_helper 
- 
-#if defined(__ppc64__) 
-	.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 
-	.align 5 
-L_darwin64_struct_ret_by_value_p$stub: 
-	.indirect_symbol _darwin64_struct_ret_by_value_p 
-	mflr r0 
-	bcl 20,31,"L2$spb" 
-"L2$spb": 
-	mflr r11 
-	addis r11,r11,ha16(L_darwin64_struct_ret_by_value_p$lazy_ptr-"L2$spb") 
-	mtlr r0 
-	lwzu r12,lo16(L_darwin64_struct_ret_by_value_p$lazy_ptr-"L2$spb")(r11) 
-	mtctr r12 
-	bctr 
-	.lazy_symbol_pointer 
-L_darwin64_struct_ret_by_value_p$lazy_ptr: 
-	.indirect_symbol _darwin64_struct_ret_by_value_p 
-	.g_long	dyld_stub_binding_helper 
- 
-	.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32 
-	.align 5 
-L_darwin64_pass_struct_floats$stub: 
-	.indirect_symbol _darwin64_pass_struct_floats 
-	mflr r0 
-	bcl 20,31,"L3$spb" 
-"L3$spb": 
-	mflr r11 
-	addis r11,r11,ha16(L_darwin64_pass_struct_floats$lazy_ptr-"L3$spb") 
-	mtlr r0 
-	lwzu r12,lo16(L_darwin64_pass_struct_floats$lazy_ptr-"L3$spb")(r11) 
-	mtctr r12 
-	bctr 
-	.lazy_symbol_pointer 
-L_darwin64_pass_struct_floats$lazy_ptr: 
-	.indirect_symbol _darwin64_pass_struct_floats 
-	.g_long	dyld_stub_binding_helper 
-#  endif 
-#endif 
+/* -----------------------------------------------------------------------
+   darwin_closure.S - Copyright (c) 2002, 2003, 2004, 2010, 
+   Free Software Foundation, Inc. 
+   based on ppc_closure.S
+
+   PowerPC Assembly glue.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#define LIBFFI_ASM
+#define L(x) x
+
+#if defined(__ppc64__)
+#define MODE_CHOICE(x, y) y
+#else
+#define MODE_CHOICE(x, y) x
+#endif
+
+#define machine_choice	MODE_CHOICE(ppc7400,ppc64)
+
+; Define some pseudo-opcodes for size-independent load & store of GPRs ...
+#define lgu		MODE_CHOICE(lwzu, ldu)
+#define lg		MODE_CHOICE(lwz,ld)
+#define sg		MODE_CHOICE(stw,std)
+#define sgu		MODE_CHOICE(stwu,stdu)
+
+; ... and the size of GPRs and their storage indicator.
+#define GPR_BYTES	MODE_CHOICE(4,8)
+#define LOG2_GPR_BYTES	MODE_CHOICE(2,3)	/* log2(GPR_BYTES) */
+#define g_long		MODE_CHOICE(long, quad)	/* usage is ".g_long" */
+
+; From the ABI doc: "Mac OS X ABI Function Call Guide" Version 2009-02-04.
+#define LINKAGE_SIZE	MODE_CHOICE(24,48)
+#define PARAM_AREA	MODE_CHOICE(32,64)
+
+#define SAVED_CR_OFFSET	MODE_CHOICE(4,8)	/* save position for CR */
+#define SAVED_LR_OFFSET	MODE_CHOICE(8,16)	/* save position for lr */
+
+/* WARNING: if ffi_type is changed... here be monsters.  
+   Offsets of items within the result type.  */
+#define FFI_TYPE_TYPE	MODE_CHOICE(6,10)
+#define FFI_TYPE_ELEM	MODE_CHOICE(8,16)
+
+#define SAVED_FPR_COUNT 13
+#define FPR_SIZE	8
+/* biggest m64 struct ret is 8GPRS + 13FPRS = 168 bytes - rounded to 16bytes = 176. */
+#define RESULT_BYTES	MODE_CHOICE(16,176)
+
+; The whole stack frame **MUST** be 16byte-aligned.
+#define SAVE_SIZE (((LINKAGE_SIZE+PARAM_AREA+SAVED_FPR_COUNT*FPR_SIZE+RESULT_BYTES)+15) & -16LL)
+#define PAD_SIZE (SAVE_SIZE-(LINKAGE_SIZE+PARAM_AREA+SAVED_FPR_COUNT*FPR_SIZE+RESULT_BYTES))
+
+#define PARENT_PARM_BASE (SAVE_SIZE+LINKAGE_SIZE)
+#define FP_SAVE_BASE (LINKAGE_SIZE+PARAM_AREA)
+
+#if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 1050
+; We no longer need the pic symbol stub for Darwin >= 9.
+#define BLCLS_HELP _ffi_closure_helper_DARWIN
+#define STRUCT_RETVALUE_P _darwin64_struct_ret_by_value_p
+#define PASS_STR_FLOATS _darwin64_pass_struct_floats
+#undef WANT_STUB
+#else
+#define BLCLS_HELP L_ffi_closure_helper_DARWIN$stub
+#define STRUCT_RETVALUE_P L_darwin64_struct_ret_by_value_p$stub
+#define PASS_STR_FLOATS L_darwin64_pass_struct_floats$stub
+#define WANT_STUB
+#endif
+
+/* m32/m64
+
+   The stack layout looks like this:
+
+   |   Additional params...			| |     Higher address
+   ~						~ ~
+   |   Parameters      (at least 8*4/8=32/64)	| | NUM_GPR_ARG_REGISTERS
+   |--------------------------------------------| |
+   |   TOC=R2 (AIX) Reserved (Darwin)   4/8	| |
+   |--------------------------------------------| |
+   |   Reserved                       2*4/8	| |
+   |--------------------------------------------| |
+   |   Space for callee`s LR		4/8	| |
+   |--------------------------------------------| |
+   |   Saved CR [low word for m64]      4/8	| |
+   |--------------------------------------------| |
+   |   Current backchain pointer	4/8	|-/ Parent`s frame.
+   |--------------------------------------------| <+ <<< on entry to
+   |   Result Bytes		       16/176	| |
+   |--------------------------------------------| |
+   ~   padding to 16-byte alignment		~ ~
+   |--------------------------------------------| |
+   |   NUM_FPR_ARG_REGISTERS slots		| |
+   |   here fp13 .. fp1		       13*8	| |
+   |--------------------------------------------| |
+   |   R3..R10			  8*4/8=32/64	| | NUM_GPR_ARG_REGISTERS
+   |--------------------------------------------| |
+   |   TOC=R2 (AIX) Reserved (Darwin)   4/8	| |
+   |--------------------------------------------| |	stack	|
+   |   Reserved [compiler,binder]     2*4/8	| |	grows	|
+   |--------------------------------------------| |	down	V
+   |   Space for callees LR		4/8	| |
+   |--------------------------------------------| |	lower addresses
+   |   Saved CR [low word for m64]      4/8	| |
+   |--------------------------------------------| |     stack pointer here
+   |   Current backchain pointer	4/8	|-/	during
+   |--------------------------------------------|   <<<	call.
+
+*/
+
+	.file	"darwin_closure.S"
+
+	.machine machine_choice
+
+	.text
+	.globl _ffi_closure_ASM
+	.align LOG2_GPR_BYTES
+_ffi_closure_ASM:
+LFB1:
+Lstartcode:
+	mflr	r0			/* extract return address  */
+	sg	r0,SAVED_LR_OFFSET(r1)	/* save the return address  */
+LCFI0:
+	sgu	r1,-SAVE_SIZE(r1)	/* skip over caller save area
+					keep stack aligned to 16.  */
+LCFI1:
+	/* We want to build up an area for the parameters passed
+	   in registers. (both floating point and integer)  */
+
+	/* Put gpr 3 to gpr 10 in the parents outgoing area...
+	   ... the remainder of any params that overflowed the regs will
+	   follow here.  */
+	sg	r3, (PARENT_PARM_BASE                )(r1)
+	sg	r4, (PARENT_PARM_BASE + GPR_BYTES    )(r1)
+	sg	r5, (PARENT_PARM_BASE + GPR_BYTES * 2)(r1)
+	sg	r6, (PARENT_PARM_BASE + GPR_BYTES * 3)(r1)
+	sg	r7, (PARENT_PARM_BASE + GPR_BYTES * 4)(r1)
+	sg	r8, (PARENT_PARM_BASE + GPR_BYTES * 5)(r1)
+	sg	r9, (PARENT_PARM_BASE + GPR_BYTES * 6)(r1)
+	sg	r10,(PARENT_PARM_BASE + GPR_BYTES * 7)(r1)
+
+	/* We save fpr 1 to fpr 14 in our own save frame.  */
+	stfd	f1, (FP_SAVE_BASE                 )(r1)
+	stfd	f2, (FP_SAVE_BASE +  FPR_SIZE     )(r1)
+	stfd	f3, (FP_SAVE_BASE +  FPR_SIZE * 2 )(r1)
+	stfd	f4, (FP_SAVE_BASE +  FPR_SIZE * 3 )(r1)
+	stfd	f5, (FP_SAVE_BASE +  FPR_SIZE * 4 )(r1)
+	stfd	f6, (FP_SAVE_BASE +  FPR_SIZE * 5 )(r1)
+	stfd	f7, (FP_SAVE_BASE +  FPR_SIZE * 6 )(r1)
+	stfd	f8, (FP_SAVE_BASE +  FPR_SIZE * 7 )(r1)
+	stfd	f9, (FP_SAVE_BASE +  FPR_SIZE * 8 )(r1)
+	stfd	f10,(FP_SAVE_BASE +  FPR_SIZE * 9 )(r1)
+	stfd	f11,(FP_SAVE_BASE +  FPR_SIZE * 10)(r1)
+	stfd	f12,(FP_SAVE_BASE +  FPR_SIZE * 11)(r1)
+	stfd	f13,(FP_SAVE_BASE +  FPR_SIZE * 12)(r1)
+
+	/* Set up registers for the routine that actually does the work
+	   get the context pointer from the trampoline.  */
+	mr	r3,r11
+
+	/* Now load up the pointer to the result storage.  */
+	addi	r4,r1,(SAVE_SIZE-RESULT_BYTES)
+
+	/* Now load up the pointer to the saved gpr registers.  */
+	addi	r5,r1,PARENT_PARM_BASE
+
+	/* Now load up the pointer to the saved fpr registers.  */
+	addi	r6,r1,FP_SAVE_BASE
+
+	/* Make the call.  */
+	bl	BLCLS_HELP
+
+	/* r3 contains the rtype pointer... save it since we will need
+	   it later.  */
+	sg	r3,LINKAGE_SIZE(r1)	; ffi_type * result_type
+	lg	r0,0(r3)		; size => r0
+	lhz	r3,FFI_TYPE_TYPE(r3)	; type => r3
+
+	/* The helper will have intercepted structure returns and inserted
+	   the caller`s destination address for structs returned by ref.  */
+
+	/* r3 contains the return type  so use it to look up in a table
+	   so we know how to deal with each type.  */
+
+	addi	r5,r1,(SAVE_SIZE-RESULT_BYTES) /* Otherwise, our return is here.  */
+	bl	Lget_ret_type0_addr	/* Get pointer to Lret_type0 into LR.  */
+	mflr	r4			/* Move to r4.  */
+	slwi	r3,r3,4			/* Now multiply return type by 16.  */
+	add	r3,r3,r4		/* Add contents of table to table address.  */
+	mtctr	r3
+	bctr			 	 /* Jump to it.  */
+LFE1:
+/* Each of the ret_typeX code fragments has to be exactly 16 bytes long
+   (4 instructions). For cache effectiveness we align to a 16 byte boundary
+   first.  */
+
+	.align 4
+
+	nop
+	nop
+	nop
+Lget_ret_type0_addr:
+	blrl
+
+/* case FFI_TYPE_VOID  */
+Lret_type0:
+	b	Lfinish
+	nop
+	nop
+	nop
+
+/* case FFI_TYPE_INT  */
+Lret_type1:
+	lg	r3,0(r5)
+	b	Lfinish
+	nop
+	nop
+
+/* case FFI_TYPE_FLOAT  */
+Lret_type2:
+	lfs	f1,0(r5)
+	b	Lfinish
+	nop
+	nop
+
+/* case FFI_TYPE_DOUBLE  */
+Lret_type3:
+	lfd	f1,0(r5)
+	b	Lfinish
+	nop
+	nop
+
+/* case FFI_TYPE_LONGDOUBLE  */
+Lret_type4:
+	lfd	f1,0(r5)
+	lfd	f2,8(r5)
+	b	Lfinish
+	nop
+
+/* case FFI_TYPE_UINT8  */
+Lret_type5:
+#if defined(__ppc64__)
+	lbz	r3,7(r5)
+#else
+	lbz	r3,3(r5)
+#endif
+	b	Lfinish
+	nop
+	nop
+
+/* case FFI_TYPE_SINT8  */
+Lret_type6:
+#if defined(__ppc64__)
+	lbz	r3,7(r5)
+#else
+	lbz	r3,3(r5)
+#endif
+	extsb	r3,r3
+	b	Lfinish
+	nop
+
+/* case FFI_TYPE_UINT16  */
+Lret_type7:
+#if defined(__ppc64__)
+	lhz	r3,6(r5)
+#else
+	lhz	r3,2(r5)
+#endif
+	b	Lfinish
+	nop
+	nop
+
+/* case FFI_TYPE_SINT16  */
+Lret_type8:
+#if defined(__ppc64__)
+	lha	r3,6(r5)
+#else
+	lha	r3,2(r5)
+#endif
+	b	Lfinish
+	nop
+	nop
+
+/* case FFI_TYPE_UINT32  */
+Lret_type9:
+#if defined(__ppc64__)
+	lwz	r3,4(r5)
+#else
+	lwz	r3,0(r5)
+#endif
+	b	Lfinish
+	nop
+	nop
+
+/* case FFI_TYPE_SINT32  */
+Lret_type10:
+#if defined(__ppc64__)
+	lwz	r3,4(r5)
+#else
+	lwz	r3,0(r5)
+#endif
+	b	Lfinish
+	nop
+	nop
+
+/* case FFI_TYPE_UINT64  */
+Lret_type11:
+#if defined(__ppc64__)
+	lg	r3,0(r5)
+	b	Lfinish
+	nop
+#else
+	lwz	r3,0(r5)
+	lwz	r4,4(r5)
+	b	Lfinish
+#endif
+	nop
+
+/* case FFI_TYPE_SINT64  */
+Lret_type12:
+#if defined(__ppc64__)
+	lg	r3,0(r5)
+	b	Lfinish
+	nop
+#else
+	lwz	r3,0(r5)
+	lwz	r4,4(r5)
+	b	Lfinish
+#endif
+	nop
+
+/* case FFI_TYPE_STRUCT  */
+Lret_type13:
+#if defined(__ppc64__)
+	lg	r3,0(r5)		; we need at least this...
+	cmpi	0,r0,4
+	bgt	Lstructend		; not a special small case
+	b	Lsmallstruct		; see if we need more.
+#else
+	cmpwi	0,r0,4
+	bgt	Lfinish		; not by value
+	lg	r3,0(r5)
+	b	Lfinish
+#endif
+/* case FFI_TYPE_POINTER  */
+Lret_type14:
+	lg	r3,0(r5)
+	b	Lfinish
+	nop
+	nop
+
+#if defined(__ppc64__)
+Lsmallstruct:
+	beq	Lfour			; continuation of Lret13.
+	cmpi	0,r0,3
+	beq	Lfinish			; don`t adjust this - can`t be any floats here...
+	srdi	r3,r3,48
+	cmpi	0,r0,2
+	beq	Lfinish			; .. or here ..
+	srdi	r3,r3,8
+	b 	Lfinish			; .. or here.
+
+Lfour:
+	lg	r6,LINKAGE_SIZE(r1)	; get the result type
+	lg	r6,FFI_TYPE_ELEM(r6)	; elements array pointer
+	lg	r6,0(r6)		; first element
+	lhz	r0,FFI_TYPE_TYPE(r6)	; OK go the type
+	cmpi	0,r0,2			; FFI_TYPE_FLOAT
+	bne	Lfourint
+	lfs	f1,0(r5)		; just one float in the struct.
+	b 	Lfinish
+
+Lfourint:
+	srdi	r3,r3,32		; four bytes.
+	b 	Lfinish
+
+Lstructend:
+	lg	r3,LINKAGE_SIZE(r1)	; get the result type
+	bl	STRUCT_RETVALUE_P
+	cmpi	0,r3,0
+	beq	Lfinish			; nope.
+	/* Recover a pointer to the results.  */
+	addi	r11,r1,(SAVE_SIZE-RESULT_BYTES)
+	lg	r3,0(r11)		; we need at least this...
+	lg	r4,8(r11)
+	cmpi	0,r0,16
+	beq	Lfinish		; special case 16 bytes we don't consider floats.
+
+	/* OK, frustratingly, the process of saving the struct to mem might have
+	   messed with the FPRs, so we have to re-load them :(.
+	   We`ll use our FPRs space again - calling: 
+	   void darwin64_pass_struct_floats (ffi_type *s, char *src, 
+					     unsigned *nfpr, double **fprs) 
+	   We`ll temporarily pinch the first two slots of the param area for local
+	   vars used by the routine.  */
+	xor	r6,r6,r6
+	addi	r5,r1,PARENT_PARM_BASE		; some space
+	sg	r6,0(r5)			; *nfpr zeroed.
+	addi	r6,r5,8				; **fprs
+	addi	r3,r1,FP_SAVE_BASE		; pointer to FPRs space
+	sg	r3,0(r6)
+	mr	r4,r11				; the struct is here...
+	lg	r3,LINKAGE_SIZE(r1)		; ffi_type * result_type.
+	bl	PASS_STR_FLOATS			; get struct floats into FPR save space.
+	/* See if we used any floats  */
+	lwz	r0,(SAVE_SIZE-RESULT_BYTES)(r1)	
+	cmpi	0,r0,0
+	beq	Lstructints			; nope.
+	/* OK load `em up... */
+	lfd	f1, (FP_SAVE_BASE                 )(r1)
+	lfd	f2, (FP_SAVE_BASE +  FPR_SIZE     )(r1)
+	lfd	f3, (FP_SAVE_BASE +  FPR_SIZE * 2 )(r1)
+	lfd	f4, (FP_SAVE_BASE +  FPR_SIZE * 3 )(r1)
+	lfd	f5, (FP_SAVE_BASE +  FPR_SIZE * 4 )(r1)
+	lfd	f6, (FP_SAVE_BASE +  FPR_SIZE * 5 )(r1)
+	lfd	f7, (FP_SAVE_BASE +  FPR_SIZE * 6 )(r1)
+	lfd	f8, (FP_SAVE_BASE +  FPR_SIZE * 7 )(r1)
+	lfd	f9, (FP_SAVE_BASE +  FPR_SIZE * 8 )(r1)
+	lfd	f10,(FP_SAVE_BASE +  FPR_SIZE * 9 )(r1)
+	lfd	f11,(FP_SAVE_BASE +  FPR_SIZE * 10)(r1)
+	lfd	f12,(FP_SAVE_BASE +  FPR_SIZE * 11)(r1)
+	lfd	f13,(FP_SAVE_BASE +  FPR_SIZE * 12)(r1)
+
+	/* point back at our saved struct.  */
+Lstructints:
+	addi	r11,r1,(SAVE_SIZE-RESULT_BYTES)
+	lg	r3,0(r11)			; we end up picking the
+	lg	r4,8(r11)			; first two again.
+	lg	r5,16(r11)
+	lg	r6,24(r11)
+	lg	r7,32(r11)
+	lg	r8,40(r11)
+	lg	r9,48(r11)
+	lg	r10,56(r11)
+#endif
+
+/* case done  */
+Lfinish:
+	addi	r1,r1,SAVE_SIZE		/* Restore stack pointer.  */
+	lg	r0,SAVED_LR_OFFSET(r1)	/* Get return address.  */
+	mtlr	r0			/* Reset link register.  */
+	blr
+Lendcode:
+	.align 1
+	
+/* END(ffi_closure_ASM)  */
+
+/* EH frame stuff.  */
+#define EH_DATA_ALIGN_FACT MODE_CHOICE(0x7c,0x78)
+/* 176, 400 */
+#define EH_FRAME_OFFSETA MODE_CHOICE(176,0x90)
+#define EH_FRAME_OFFSETB MODE_CHOICE(1,3)
+
+	.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EH_frame1:
+	.set	L$set$0,LECIE1-LSCIE1
+	.long	L$set$0	; Length of Common Information Entry
+LSCIE1:
+	.long	0x0	; CIE Identifier Tag
+	.byte	0x1	; CIE Version
+	.ascii	"zR\0"	; CIE Augmentation
+	.byte	0x1	; uleb128 0x1; CIE Code Alignment Factor
+	.byte	EH_DATA_ALIGN_FACT ; sleb128 -4; CIE Data Alignment Factor
+	.byte	0x41	; CIE RA Column
+	.byte	0x1	; uleb128 0x1; Augmentation size
+	.byte	0x10	; FDE Encoding (pcrel)
+	.byte	0xc	; DW_CFA_def_cfa
+	.byte	0x1	; uleb128 0x1
+	.byte	0x0	; uleb128 0x0
+	.align	LOG2_GPR_BYTES
+LECIE1:
+	.globl _ffi_closure_ASM.eh
+_ffi_closure_ASM.eh:
+LSFDE1:
+	.set	L$set$1,LEFDE1-LASFDE1
+	.long	L$set$1	; FDE Length
+
+LASFDE1:
+	.long	LASFDE1-EH_frame1	; FDE CIE offset
+	.g_long	Lstartcode-.	; FDE initial location
+	.set	L$set$2,LFE1-Lstartcode
+	.g_long	L$set$2	; FDE address range
+	.byte   0x0     ; uleb128 0x0; Augmentation size
+	.byte	0x4	; DW_CFA_advance_loc4
+	.set	L$set$3,LCFI1-LCFI0
+	.long	L$set$3
+	.byte	0xe	; DW_CFA_def_cfa_offset
+	.byte	EH_FRAME_OFFSETA,EH_FRAME_OFFSETB	; uleb128 176,1/190,3
+	.byte	0x4	; DW_CFA_advance_loc4
+	.set	L$set$4,LCFI0-Lstartcode
+	.long	L$set$4
+	.byte   0x11    ; DW_CFA_offset_extended_sf
+	.byte	0x41	; uleb128 0x41
+	.byte   0x7e    ; sleb128 -2
+	.align	LOG2_GPR_BYTES
+LEFDE1:
+	.align 	1
+
+#ifdef WANT_STUB
+	.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+	.align 5
+L_ffi_closure_helper_DARWIN$stub:
+	.indirect_symbol _ffi_closure_helper_DARWIN
+	mflr r0
+	bcl 20,31,"L1$spb"
+"L1$spb":
+	mflr r11
+	addis r11,r11,ha16(L_ffi_closure_helper_DARWIN$lazy_ptr-"L1$spb")
+	mtlr r0
+	lwzu r12,lo16(L_ffi_closure_helper_DARWIN$lazy_ptr-"L1$spb")(r11)
+	mtctr r12
+	bctr
+	.lazy_symbol_pointer
+L_ffi_closure_helper_DARWIN$lazy_ptr:
+	.indirect_symbol _ffi_closure_helper_DARWIN
+	.g_long	dyld_stub_binding_helper
+
+#if defined(__ppc64__)
+	.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+	.align 5
+L_darwin64_struct_ret_by_value_p$stub:
+	.indirect_symbol _darwin64_struct_ret_by_value_p
+	mflr r0
+	bcl 20,31,"L2$spb"
+"L2$spb":
+	mflr r11
+	addis r11,r11,ha16(L_darwin64_struct_ret_by_value_p$lazy_ptr-"L2$spb")
+	mtlr r0
+	lwzu r12,lo16(L_darwin64_struct_ret_by_value_p$lazy_ptr-"L2$spb")(r11)
+	mtctr r12
+	bctr
+	.lazy_symbol_pointer
+L_darwin64_struct_ret_by_value_p$lazy_ptr:
+	.indirect_symbol _darwin64_struct_ret_by_value_p
+	.g_long	dyld_stub_binding_helper
+
+	.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32
+	.align 5
+L_darwin64_pass_struct_floats$stub:
+	.indirect_symbol _darwin64_pass_struct_floats
+	mflr r0
+	bcl 20,31,"L3$spb"
+"L3$spb":
+	mflr r11
+	addis r11,r11,ha16(L_darwin64_pass_struct_floats$lazy_ptr-"L3$spb")
+	mtlr r0
+	lwzu r12,lo16(L_darwin64_pass_struct_floats$lazy_ptr-"L3$spb")(r11)
+	mtctr r12
+	bctr
+	.lazy_symbol_pointer
+L_darwin64_pass_struct_floats$lazy_ptr:
+	.indirect_symbol _darwin64_pass_struct_floats
+	.g_long	dyld_stub_binding_helper
+#  endif
+#endif
diff --git a/contrib/restricted/libffi/src/powerpc/ffi.c b/contrib/restricted/libffi/src/powerpc/ffi.c
index 5d618cc0dd..a19bcbbfc5 100644
--- a/contrib/restricted/libffi/src/powerpc/ffi.c
+++ b/contrib/restricted/libffi/src/powerpc/ffi.c
@@ -1,175 +1,175 @@
-/* ----------------------------------------------------------------------- 
-   ffi.c - Copyright (C) 2013 IBM 
-           Copyright (C) 2011 Anthony Green 
-           Copyright (C) 2011 Kyle Moffett 
-           Copyright (C) 2008 Red Hat, Inc 
-           Copyright (C) 2007, 2008 Free Software Foundation, Inc 
-	   Copyright (c) 1998 Geoffrey Keating 
- 
-   PowerPC Foreign Function Interface 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS 
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
-   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR 
-   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
-   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
-   OTHER DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-#include "ffi.h" 
-#include "ffi_common.h" 
-#include "ffi_powerpc.h" 
- 
-#if HAVE_LONG_DOUBLE_VARIANT 
-/* Adjust ffi_type_longdouble.  */ 
-void FFI_HIDDEN 
-ffi_prep_types (ffi_abi abi) 
-{ 
-# if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-#  ifdef POWERPC64 
-  ffi_prep_types_linux64 (abi); 
-#  else 
-  ffi_prep_types_sysv (abi); 
-#  endif 
-# endif 
-} 
-#endif 
- 
-/* Perform machine dependent cif processing */ 
-ffi_status FFI_HIDDEN 
-ffi_prep_cif_machdep (ffi_cif *cif) 
-{ 
-#ifdef POWERPC64 
-  return ffi_prep_cif_linux64 (cif); 
-#else 
-  return ffi_prep_cif_sysv (cif); 
-#endif 
-} 
- 
-ffi_status FFI_HIDDEN 
-ffi_prep_cif_machdep_var (ffi_cif *cif, 
-			  unsigned int nfixedargs MAYBE_UNUSED, 
-			  unsigned int ntotalargs MAYBE_UNUSED) 
-{ 
-#ifdef POWERPC64 
-  return ffi_prep_cif_linux64_var (cif, nfixedargs, ntotalargs); 
-#else 
-  return ffi_prep_cif_sysv (cif); 
-#endif 
-} 
- 
-static void 
-ffi_call_int (ffi_cif *cif, 
-	      void (*fn) (void), 
-	      void *rvalue, 
-	      void **avalue, 
-	      void *closure) 
-{ 
-  /* The final SYSV ABI says that structures smaller or equal 8 bytes 
-     are returned in r3/r4.  A draft ABI used by linux instead returns 
-     them in memory. 
- 
-     We bounce-buffer SYSV small struct return values so that sysv.S 
-     can write r3 and r4 to memory without worrying about struct size. 
-    
-     For ELFv2 ABI, use a bounce buffer for homogeneous structs too, 
-     for similar reasons. This bounce buffer must be aligned to 16 
-     bytes for use with homogeneous structs of vectors (float128).  */ 
-  float128 smst_buffer[8]; 
-  extended_cif ecif; 
- 
-  ecif.cif = cif; 
-  ecif.avalue = avalue; 
- 
-  ecif.rvalue = rvalue; 
-  if ((cif->flags & FLAG_RETURNS_SMST) != 0) 
-    ecif.rvalue = smst_buffer; 
-  /* Ensure that we have a valid struct return value. 
-     FIXME: Isn't this just papering over a user problem?  */ 
-  else if (!rvalue && cif->rtype->type == FFI_TYPE_STRUCT) 
-    ecif.rvalue = alloca (cif->rtype->size); 
- 
-#ifdef POWERPC64 
-  ffi_call_LINUX64 (&ecif, fn, ecif.rvalue, cif->flags, closure, 
-		    -(long) cif->bytes); 
-#else 
-  ffi_call_SYSV (&ecif, fn, ecif.rvalue, cif->flags, closure, -cif->bytes); 
-#endif 
- 
-  /* Check for a bounce-buffered return value */ 
-  if (rvalue && ecif.rvalue == smst_buffer) 
-    { 
-      unsigned int rsize = cif->rtype->size; 
-#ifndef __LITTLE_ENDIAN__ 
-      /* The SYSV ABI returns a structure of up to 4 bytes in size 
-	 left-padded in r3.  */ 
-# ifndef POWERPC64 
-      if (rsize <= 4) 
-	memcpy (rvalue, (char *) smst_buffer + 4 - rsize, rsize); 
-      else 
-# endif 
-	/* The SYSV ABI returns a structure of up to 8 bytes in size 
-	   left-padded in r3/r4, and the ELFv2 ABI similarly returns a 
-	   structure of up to 8 bytes in size left-padded in r3. But 
-	   note that a structure of a single float is not paddded.  */ 
-	if (rsize <= 8 && (cif->flags & FLAG_RETURNS_FP) == 0) 
-	  memcpy (rvalue, (char *) smst_buffer + 8 - rsize, rsize); 
-	else 
-#endif 
-	  memcpy (rvalue, smst_buffer, rsize); 
-    } 
-} 
- 
-void 
-ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue) 
-{ 
-  ffi_call_int (cif, fn, rvalue, avalue, NULL); 
-} 
- 
-void 
-ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue, 
-	     void *closure) 
-{ 
-  ffi_call_int (cif, fn, rvalue, avalue, closure); 
-} 
- 
-ffi_status 
-ffi_prep_closure_loc (ffi_closure *closure, 
-		      ffi_cif *cif, 
-		      void (*fun) (ffi_cif *, void *, void **, void *), 
-		      void *user_data, 
-		      void *codeloc) 
-{ 
-#ifdef POWERPC64 
-  return ffi_prep_closure_loc_linux64 (closure, cif, fun, user_data, codeloc); 
-#else 
-  return ffi_prep_closure_loc_sysv (closure, cif, fun, user_data, codeloc); 
-#endif 
-} 
- 
-ffi_status 
-ffi_prep_go_closure (ffi_go_closure *closure, 
-		     ffi_cif *cif, 
-		     void (*fun) (ffi_cif *, void *, void **, void *)) 
-{ 
-#ifdef POWERPC64 
-  closure->tramp = ffi_go_closure_linux64; 
-#else 
-  closure->tramp = ffi_go_closure_sysv; 
-#endif 
-  closure->cif = cif; 
-  closure->fun = fun; 
-  return FFI_OK; 
-} 
+/* -----------------------------------------------------------------------
+   ffi.c - Copyright (C) 2013 IBM
+           Copyright (C) 2011 Anthony Green
+           Copyright (C) 2011 Kyle Moffett
+           Copyright (C) 2008 Red Hat, Inc
+           Copyright (C) 2007, 2008 Free Software Foundation, Inc
+	   Copyright (c) 1998 Geoffrey Keating
+
+   PowerPC Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include "ffi.h"
+#include "ffi_common.h"
+#include "ffi_powerpc.h"
+
+#if HAVE_LONG_DOUBLE_VARIANT
+/* Adjust ffi_type_longdouble.  */
+void FFI_HIDDEN
+ffi_prep_types (ffi_abi abi)
+{
+# if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+#  ifdef POWERPC64
+  ffi_prep_types_linux64 (abi);
+#  else
+  ffi_prep_types_sysv (abi);
+#  endif
+# endif
+}
+#endif
+
+/* Perform machine dependent cif processing */
+ffi_status FFI_HIDDEN
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+#ifdef POWERPC64
+  return ffi_prep_cif_linux64 (cif);
+#else
+  return ffi_prep_cif_sysv (cif);
+#endif
+}
+
+ffi_status FFI_HIDDEN
+ffi_prep_cif_machdep_var (ffi_cif *cif,
+			  unsigned int nfixedargs MAYBE_UNUSED,
+			  unsigned int ntotalargs MAYBE_UNUSED)
+{
+#ifdef POWERPC64
+  return ffi_prep_cif_linux64_var (cif, nfixedargs, ntotalargs);
+#else
+  return ffi_prep_cif_sysv (cif);
+#endif
+}
+
+static void
+ffi_call_int (ffi_cif *cif,
+	      void (*fn) (void),
+	      void *rvalue,
+	      void **avalue,
+	      void *closure)
+{
+  /* The final SYSV ABI says that structures smaller or equal 8 bytes
+     are returned in r3/r4.  A draft ABI used by linux instead returns
+     them in memory.
+
+     We bounce-buffer SYSV small struct return values so that sysv.S
+     can write r3 and r4 to memory without worrying about struct size.
+   
+     For ELFv2 ABI, use a bounce buffer for homogeneous structs too,
+     for similar reasons. This bounce buffer must be aligned to 16
+     bytes for use with homogeneous structs of vectors (float128).  */
+  float128 smst_buffer[8];
+  extended_cif ecif;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+
+  ecif.rvalue = rvalue;
+  if ((cif->flags & FLAG_RETURNS_SMST) != 0)
+    ecif.rvalue = smst_buffer;
+  /* Ensure that we have a valid struct return value.
+     FIXME: Isn't this just papering over a user problem?  */
+  else if (!rvalue && cif->rtype->type == FFI_TYPE_STRUCT)
+    ecif.rvalue = alloca (cif->rtype->size);
+
+#ifdef POWERPC64
+  ffi_call_LINUX64 (&ecif, fn, ecif.rvalue, cif->flags, closure,
+		    -(long) cif->bytes);
+#else
+  ffi_call_SYSV (&ecif, fn, ecif.rvalue, cif->flags, closure, -cif->bytes);
+#endif
+
+  /* Check for a bounce-buffered return value */
+  if (rvalue && ecif.rvalue == smst_buffer)
+    {
+      unsigned int rsize = cif->rtype->size;
+#ifndef __LITTLE_ENDIAN__
+      /* The SYSV ABI returns a structure of up to 4 bytes in size
+	 left-padded in r3.  */
+# ifndef POWERPC64
+      if (rsize <= 4)
+	memcpy (rvalue, (char *) smst_buffer + 4 - rsize, rsize);
+      else
+# endif
+	/* The SYSV ABI returns a structure of up to 8 bytes in size
+	   left-padded in r3/r4, and the ELFv2 ABI similarly returns a
+	   structure of up to 8 bytes in size left-padded in r3. But
+	   note that a structure of a single float is not paddded.  */
+	if (rsize <= 8 && (cif->flags & FLAG_RETURNS_FP) == 0)
+	  memcpy (rvalue, (char *) smst_buffer + 8 - rsize, rsize);
+	else
+#endif
+	  memcpy (rvalue, smst_buffer, rsize);
+    }
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue,
+	     void *closure)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure *closure,
+		      ffi_cif *cif,
+		      void (*fun) (ffi_cif *, void *, void **, void *),
+		      void *user_data,
+		      void *codeloc)
+{
+#ifdef POWERPC64
+  return ffi_prep_closure_loc_linux64 (closure, cif, fun, user_data, codeloc);
+#else
+  return ffi_prep_closure_loc_sysv (closure, cif, fun, user_data, codeloc);
+#endif
+}
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure *closure,
+		     ffi_cif *cif,
+		     void (*fun) (ffi_cif *, void *, void **, void *))
+{
+#ifdef POWERPC64
+  closure->tramp = ffi_go_closure_linux64;
+#else
+  closure->tramp = ffi_go_closure_sysv;
+#endif
+  closure->cif = cif;
+  closure->fun = fun;
+  return FFI_OK;
+}
diff --git a/contrib/restricted/libffi/src/powerpc/ffi_darwin.c b/contrib/restricted/libffi/src/powerpc/ffi_darwin.c
index f9f587a2eb..61a18c4911 100644
--- a/contrib/restricted/libffi/src/powerpc/ffi_darwin.c
+++ b/contrib/restricted/libffi/src/powerpc/ffi_darwin.c
@@ -1,1440 +1,1440 @@
-/* ----------------------------------------------------------------------- 
-   ffi_darwin.c 
- 
-   Copyright (C) 1998 Geoffrey Keating 
-   Copyright (C) 2001 John Hornkvist 
-   Copyright (C) 2002, 2006, 2007, 2009, 2010 Free Software Foundation, Inc. 
- 
-   FFI support for Darwin and AIX. 
-    
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS 
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
-   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR 
-   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
-   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
-   OTHER DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-#include <ffi.h> 
-#include <ffi_common.h> 
- 
-#include <stdlib.h> 
- 
-extern void ffi_closure_ASM (void); 
-extern void ffi_go_closure_ASM (void); 
- 
-enum { 
-  /* The assembly depends on these exact flags.   
-     For Darwin64 (when FLAG_RETURNS_STRUCT is set): 
-       FLAG_RETURNS_FP indicates that the structure embeds FP data. 
-       FLAG_RETURNS_128BITS signals a special struct size that is not 
-       expanded for float content.  */ 
-  FLAG_RETURNS_128BITS	= 1 << (31-31), /* These go in cr7  */ 
-  FLAG_RETURNS_NOTHING	= 1 << (31-30), 
-  FLAG_RETURNS_FP	= 1 << (31-29), 
-  FLAG_RETURNS_64BITS	= 1 << (31-28), 
- 
-  FLAG_RETURNS_STRUCT	= 1 << (31-27), /* This goes in cr6  */ 
- 
-  FLAG_ARG_NEEDS_COPY   = 1 << (31- 7), 
-  FLAG_FP_ARGUMENTS     = 1 << (31- 6), /* cr1.eq; specified by ABI  */ 
-  FLAG_4_GPR_ARGUMENTS  = 1 << (31- 5), 
-  FLAG_RETVAL_REFERENCE = 1 << (31- 4) 
-}; 
- 
-/* About the DARWIN ABI.  */ 
-enum { 
-  NUM_GPR_ARG_REGISTERS = 8, 
-  NUM_FPR_ARG_REGISTERS = 13, 
-  LINKAGE_AREA_GPRS = 6 
-}; 
- 
-enum { ASM_NEEDS_REGISTERS = 4 }; /* r28-r31 */ 
- 
-/* ffi_prep_args is called by the assembly routine once stack space 
-   has been allocated for the function's arguments. 
-    
-   m32/m64 
- 
-   The stack layout we want looks like this: 
- 
-   |   Return address from ffi_call_DARWIN      |	higher addresses 
-   |--------------------------------------------| 
-   |   Previous backchain pointer	4/8	|	stack pointer here 
-   |--------------------------------------------|<+ <<<	on entry to 
-   |   ASM_NEEDS_REGISTERS=r28-r31   4*(4/8)	| |	ffi_call_DARWIN 
-   |--------------------------------------------| | 
-   |   When we have any FP activity... the	| | 
-   |   FPRs occupy NUM_FPR_ARG_REGISTERS slots	| | 
-   |   here fp13 .. fp1 from high to low addr.	| | 
-   ~						~ ~ 
-   |   Parameters      (at least 8*4/8=32/64)	| | NUM_GPR_ARG_REGISTERS 
-   |--------------------------------------------| | 
-   |   TOC=R2 (AIX) Reserved (Darwin)   4/8	| | 
-   |--------------------------------------------| |	stack	| 
-   |   Reserved                       2*4/8	| |	grows	| 
-   |--------------------------------------------| |	down	V 
-   |   Space for callee's LR		4/8	| | 
-   |--------------------------------------------| |	lower addresses 
-   |   Saved CR [low word for m64]      4/8	| | 
-   |--------------------------------------------| |     stack pointer here 
-   |   Current backchain pointer	4/8	|-/	during 
-   |--------------------------------------------|   <<<	ffi_call_DARWIN 
- 
-   */ 
- 
-#if defined(POWERPC_DARWIN64) 
-static void 
-darwin64_pass_struct_by_value  
-  (ffi_type *, char *, unsigned, unsigned *, double **, unsigned long **); 
-#endif 
- 
-/* This depends on GPR_SIZE = sizeof (unsigned long) */ 
- 
-void 
-ffi_prep_args (extended_cif *ecif, unsigned long *const stack) 
-{ 
-  const unsigned bytes = ecif->cif->bytes; 
-  const unsigned flags = ecif->cif->flags; 
-  const unsigned nargs = ecif->cif->nargs; 
-#if !defined(POWERPC_DARWIN64)  
-  const ffi_abi abi = ecif->cif->abi; 
-#endif 
- 
-  /* 'stacktop' points at the previous backchain pointer.  */ 
-  unsigned long *const stacktop = stack + (bytes / sizeof(unsigned long)); 
- 
-  /* 'fpr_base' points at the space for fpr1, and grows upwards as 
-     we use FPR registers.  */ 
-  double *fpr_base = (double *) (stacktop - ASM_NEEDS_REGISTERS) - NUM_FPR_ARG_REGISTERS; 
-  int gp_count = 0, fparg_count = 0; 
- 
-  /* 'next_arg' grows up as we put parameters in it.  */ 
-  unsigned long *next_arg = stack + LINKAGE_AREA_GPRS; /* 6 reserved positions.  */ 
- 
-  int i; 
-  double double_tmp; 
-  void **p_argv = ecif->avalue; 
-  unsigned long gprvalue; 
-  ffi_type** ptr = ecif->cif->arg_types; 
-#if !defined(POWERPC_DARWIN64)  
-  char *dest_cpy; 
-#endif 
-  unsigned size_al = 0; 
- 
-  /* Check that everything starts aligned properly.  */ 
-  FFI_ASSERT(((unsigned) (char *) stack & 0xF) == 0); 
-  FFI_ASSERT(((unsigned) (char *) stacktop & 0xF) == 0); 
-  FFI_ASSERT((bytes & 0xF) == 0); 
- 
-  /* Deal with return values that are actually pass-by-reference. 
-     Rule: 
-     Return values are referenced by r3, so r4 is the first parameter.  */ 
- 
-  if (flags & FLAG_RETVAL_REFERENCE) 
-    *next_arg++ = (unsigned long) (char *) ecif->rvalue; 
- 
-  /* Now for the arguments.  */ 
-  for (i = nargs; i > 0; i--, ptr++, p_argv++) 
-    { 
-      switch ((*ptr)->type) 
-	{ 
-	/* If a floating-point parameter appears before all of the general- 
-	   purpose registers are filled, the corresponding GPRs that match 
-	   the size of the floating-point parameter are skipped.  */ 
-	case FFI_TYPE_FLOAT: 
-	  double_tmp = *(float *) *p_argv; 
-	  if (fparg_count < NUM_FPR_ARG_REGISTERS) 
-	    *fpr_base++ = double_tmp; 
-#if defined(POWERPC_DARWIN) 
-	  *(float *)next_arg = *(float *) *p_argv; 
-#else 
-	  *(double *)next_arg = double_tmp; 
-#endif 
-	  next_arg++; 
-	  gp_count++; 
-	  fparg_count++; 
-	  FFI_ASSERT(flags & FLAG_FP_ARGUMENTS); 
-	  break; 
- 
-	case FFI_TYPE_DOUBLE: 
-	  double_tmp = *(double *) *p_argv; 
-	  if (fparg_count < NUM_FPR_ARG_REGISTERS) 
-	    *fpr_base++ = double_tmp; 
-	  *(double *)next_arg = double_tmp; 
-#ifdef POWERPC64 
-	  next_arg++; 
-	  gp_count++; 
-#else 
-	  next_arg += 2; 
-	  gp_count += 2; 
-#endif 
-	  fparg_count++; 
-	  FFI_ASSERT(flags & FLAG_FP_ARGUMENTS); 
-	  break; 
- 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
- 
-	case FFI_TYPE_LONGDOUBLE: 
-#  if defined(POWERPC64) && !defined(POWERPC_DARWIN64) 
-	  /* ??? This will exceed the regs count when the value starts at fp13 
-	     and it will not put the extra bit on the stack.  */ 
-	  if (fparg_count < NUM_FPR_ARG_REGISTERS) 
-	    *(long double *) fpr_base++ = *(long double *) *p_argv; 
-	  else 
-	    *(long double *) next_arg = *(long double *) *p_argv; 
-	  next_arg += 2; 
-	  fparg_count += 2; 
-#  else 
-	  double_tmp = ((double *) *p_argv)[0]; 
-	  if (fparg_count < NUM_FPR_ARG_REGISTERS) 
-	    *fpr_base++ = double_tmp; 
-	  *(double *) next_arg = double_tmp; 
-#    if defined(POWERPC_DARWIN64) 
-	  next_arg++; 
-	  gp_count++; 
-#    else 
-	  next_arg += 2; 
-	  gp_count += 2; 
-#    endif 
-	  fparg_count++; 
-	  double_tmp = ((double *) *p_argv)[1]; 
-	  if (fparg_count < NUM_FPR_ARG_REGISTERS) 
-	    *fpr_base++ = double_tmp; 
-	  *(double *) next_arg = double_tmp; 
-#    if defined(POWERPC_DARWIN64) 
-	  next_arg++; 
-	  gp_count++; 
-#    else 
-	  next_arg += 2; 
-	  gp_count += 2; 
-#    endif 
-	  fparg_count++; 
-#  endif 
-	  FFI_ASSERT(flags & FLAG_FP_ARGUMENTS); 
-	  break; 
-#endif 
-	case FFI_TYPE_UINT64: 
-	case FFI_TYPE_SINT64: 
-#ifdef POWERPC64 
-	  gprvalue = *(long long *) *p_argv; 
-	  goto putgpr; 
-#else 
-	  *(long long *) next_arg = *(long long *) *p_argv; 
-	  next_arg += 2; 
-	  gp_count += 2; 
-#endif 
-	  break; 
-	case FFI_TYPE_POINTER: 
-	  gprvalue = *(unsigned long *) *p_argv; 
-	  goto putgpr; 
-	case FFI_TYPE_UINT8: 
-	  gprvalue = *(unsigned char *) *p_argv; 
-	  goto putgpr; 
-	case FFI_TYPE_SINT8: 
-	  gprvalue = *(signed char *) *p_argv; 
-	  goto putgpr; 
-	case FFI_TYPE_UINT16: 
-	  gprvalue = *(unsigned short *) *p_argv; 
-	  goto putgpr; 
-	case FFI_TYPE_SINT16: 
-	  gprvalue = *(signed short *) *p_argv; 
-	  goto putgpr; 
- 
-	case FFI_TYPE_STRUCT: 
-	  size_al = (*ptr)->size; 
-#if defined(POWERPC_DARWIN64) 
-	  next_arg = (unsigned long *)FFI_ALIGN((char *)next_arg, (*ptr)->alignment); 
-	  darwin64_pass_struct_by_value (*ptr, (char *) *p_argv,  
-					 (unsigned) size_al, 
-					 (unsigned int *) &fparg_count, 
-					 &fpr_base, &next_arg); 
-#else 
-	  dest_cpy = (char *) next_arg; 
- 
-	  /* If the first member of the struct is a double, then include enough 
-	     padding in the struct size to align it to double-word.  */ 
-	  if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE) 
-	    size_al = FFI_ALIGN((*ptr)->size, 8); 
- 
-#  if defined(POWERPC64)  
-	  FFI_ASSERT (abi != FFI_DARWIN); 
-	  memcpy ((char *) dest_cpy, (char *) *p_argv, size_al); 
-	  next_arg += (size_al + 7) / 8; 
-#  else 
-	  /* Structures that match the basic modes (QI 1 byte, HI 2 bytes, 
-	     SI 4 bytes) are aligned as if they were those modes. 
-	     Structures with 3 byte in size are padded upwards.  */ 
-	  if (size_al < 3 && abi == FFI_DARWIN) 
-	    dest_cpy += 4 - size_al; 
- 
-	  memcpy((char *) dest_cpy, (char *) *p_argv, size_al); 
-	  next_arg += (size_al + 3) / 4; 
-#  endif 
-#endif 
-	  break; 
- 
-	case FFI_TYPE_INT: 
-	case FFI_TYPE_SINT32: 
-	  gprvalue = *(signed int *) *p_argv; 
-	  goto putgpr; 
- 
-	case FFI_TYPE_UINT32: 
-	  gprvalue = *(unsigned int *) *p_argv; 
-	putgpr: 
-	  *next_arg++ = gprvalue; 
-	  gp_count++; 
-	  break; 
-	default: 
-	  break; 
-	} 
-    } 
- 
-  /* Check that we didn't overrun the stack...  */ 
-  /* FFI_ASSERT(gpr_base <= stacktop - ASM_NEEDS_REGISTERS); 
-     FFI_ASSERT((unsigned *)fpr_base 
-     	     <= stacktop - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS); 
-     FFI_ASSERT(flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4);  */ 
-} 
- 
-#if defined(POWERPC_DARWIN64) 
- 
-/* See if we can put some of the struct into fprs. 
-   This should not be called for structures of size 16 bytes, since these are not 
-   broken out this way.  */ 
-static void 
-darwin64_scan_struct_for_floats (ffi_type *s, unsigned *nfpr) 
-{ 
-  int i; 
- 
-  FFI_ASSERT (s->type == FFI_TYPE_STRUCT) 
- 
-  for (i = 0; s->elements[i] != NULL; i++) 
-    { 
-      ffi_type *p = s->elements[i]; 
-      switch (p->type) 
-	{ 
-	  case FFI_TYPE_STRUCT: 
-	    darwin64_scan_struct_for_floats (p, nfpr); 
-	    break; 
-	  case FFI_TYPE_LONGDOUBLE: 
-	    (*nfpr) += 2; 
-	    break; 
-	  case FFI_TYPE_DOUBLE: 
-	  case FFI_TYPE_FLOAT: 
-	    (*nfpr) += 1; 
-	    break; 
-	  default: 
-	    break;     
-	} 
-    } 
-} 
- 
-static int 
-darwin64_struct_size_exceeds_gprs_p (ffi_type *s, char *src, unsigned *nfpr) 
-{ 
-  unsigned struct_offset=0, i; 
- 
-  for (i = 0; s->elements[i] != NULL; i++) 
-    { 
-      char *item_base; 
-      ffi_type *p = s->elements[i]; 
-      /* Find the start of this item (0 for the first one).  */ 
-      if (i > 0) 
-        struct_offset = FFI_ALIGN(struct_offset, p->alignment); 
- 
-      item_base = src + struct_offset; 
- 
-      switch (p->type) 
-	{ 
-	  case FFI_TYPE_STRUCT: 
-	    if (darwin64_struct_size_exceeds_gprs_p (p, item_base, nfpr)) 
-	      return 1; 
-	    break; 
-	  case FFI_TYPE_LONGDOUBLE: 
-	    if (*nfpr >= NUM_FPR_ARG_REGISTERS) 
-	      return 1; 
-	    (*nfpr) += 1; 
-	    item_base += 8; 
-	  /* FALL THROUGH */ 
-	  case FFI_TYPE_DOUBLE: 
-	    if (*nfpr >= NUM_FPR_ARG_REGISTERS) 
-	      return 1; 
-	    (*nfpr) += 1; 
-	    break; 
-	  case FFI_TYPE_FLOAT: 
-	    if (*nfpr >= NUM_FPR_ARG_REGISTERS) 
-	      return 1; 
-	    (*nfpr) += 1; 
-	    break; 
-	  default: 
-	    /* If we try and place any item, that is non-float, once we've 
-	       exceeded the 8 GPR mark, then we can't fit the struct.  */ 
-	    if ((unsigned long)item_base >= 8*8)  
-	      return 1; 
-	    break;     
-	} 
-      /* now count the size of what we just used.  */ 
-      struct_offset += p->size; 
-    } 
-  return 0; 
-} 
- 
-/* Can this struct be returned by value?  */ 
-int  
-darwin64_struct_ret_by_value_p (ffi_type *s) 
-{ 
-  unsigned nfp = 0; 
- 
-  FFI_ASSERT (s && s->type == FFI_TYPE_STRUCT); 
-   
-  /* The largest structure we can return is 8long + 13 doubles.  */ 
-  if (s->size > 168) 
-    return 0; 
-   
-  /* We can't pass more than 13 floats.  */ 
-  darwin64_scan_struct_for_floats (s, &nfp); 
-  if (nfp > 13) 
-    return 0; 
-   
-  /* If there are not too many floats, and the struct is 
-     small enough to accommodate in the GPRs, then it must be OK.  */ 
-  if (s->size <= 64) 
-    return 1; 
-   
-  /* Well, we have to look harder.  */ 
-  nfp = 0; 
-  if (darwin64_struct_size_exceeds_gprs_p (s, NULL, &nfp)) 
-    return 0; 
-   
-  return 1; 
-} 
- 
-void 
-darwin64_pass_struct_floats (ffi_type *s, char *src,  
-			     unsigned *nfpr, double **fprs) 
-{ 
-  int i; 
-  double *fpr_base = *fprs; 
-  unsigned struct_offset = 0; 
- 
-  /* We don't assume anything about the alignment of the source.  */ 
-  for (i = 0; s->elements[i] != NULL; i++) 
-    { 
-      char *item_base; 
-      ffi_type *p = s->elements[i]; 
-      /* Find the start of this item (0 for the first one).  */ 
-      if (i > 0) 
-        struct_offset = FFI_ALIGN(struct_offset, p->alignment); 
-      item_base = src + struct_offset; 
- 
-      switch (p->type) 
-	{ 
-	  case FFI_TYPE_STRUCT: 
-	    darwin64_pass_struct_floats (p, item_base, nfpr, 
-					   &fpr_base); 
-	    break; 
-	  case FFI_TYPE_LONGDOUBLE: 
-	    if (*nfpr < NUM_FPR_ARG_REGISTERS) 
-	      *fpr_base++ = *(double *)item_base; 
-	    (*nfpr) += 1; 
-	    item_base += 8; 
-	  /* FALL THROUGH */ 
-	  case FFI_TYPE_DOUBLE: 
-	    if (*nfpr < NUM_FPR_ARG_REGISTERS) 
-	      *fpr_base++ = *(double *)item_base; 
-	    (*nfpr) += 1; 
-	    break; 
-	  case FFI_TYPE_FLOAT: 
-	    if (*nfpr < NUM_FPR_ARG_REGISTERS) 
-	      *fpr_base++ = (double) *(float *)item_base; 
-	    (*nfpr) += 1; 
-	    break; 
-	  default: 
-	    break;     
-	} 
-      /* now count the size of what we just used.  */ 
-      struct_offset += p->size; 
-    } 
-  /* Update the scores.  */ 
-  *fprs = fpr_base; 
-} 
- 
-/* Darwin64 special rules. 
-   Break out a struct into params and float registers.  */ 
-static void 
-darwin64_pass_struct_by_value (ffi_type *s, char *src, unsigned size, 
-			       unsigned *nfpr, double **fprs, unsigned long **arg) 
-{ 
-  unsigned long *next_arg = *arg; 
-  char *dest_cpy = (char *)next_arg; 
- 
-  FFI_ASSERT (s->type == FFI_TYPE_STRUCT) 
- 
-  if (!size) 
-    return; 
- 
-  /* First... special cases.  */ 
-  if (size < 3 
-      || (size == 4  
-	  && s->elements[0]  
-	  && s->elements[0]->type != FFI_TYPE_FLOAT)) 
-    { 
-      /* Must be at least one GPR, padding is unspecified in value,  
-	 let's make it zero.  */ 
-      *next_arg = 0UL;  
-      dest_cpy += 8 - size; 
-      memcpy ((char *) dest_cpy, src, size); 
-      next_arg++; 
-    } 
-  else if (size == 16) 
-    { 
-      memcpy ((char *) dest_cpy, src, size); 
-      next_arg += 2; 
-    } 
-  else 
-    { 
-      /* now the general case, we consider embedded floats.  */ 
-      memcpy ((char *) dest_cpy, src, size); 
-      darwin64_pass_struct_floats (s, src, nfpr, fprs); 
-      next_arg += (size+7)/8; 
-    } 
-     
-  *arg = next_arg; 
-} 
- 
-double * 
-darwin64_struct_floats_to_mem (ffi_type *s, char *dest, double *fprs, unsigned *nf) 
-{ 
-  int i; 
-  unsigned struct_offset = 0; 
- 
-  /* We don't assume anything about the alignment of the source.  */ 
-  for (i = 0; s->elements[i] != NULL; i++) 
-    { 
-      char *item_base; 
-      ffi_type *p = s->elements[i]; 
-      /* Find the start of this item (0 for the first one).  */ 
-      if (i > 0) 
-        struct_offset = FFI_ALIGN(struct_offset, p->alignment); 
-      item_base = dest + struct_offset; 
- 
-      switch (p->type) 
-	{ 
-	  case FFI_TYPE_STRUCT: 
-	    fprs = darwin64_struct_floats_to_mem (p, item_base, fprs, nf); 
-	    break; 
-	  case FFI_TYPE_LONGDOUBLE: 
-	    if (*nf < NUM_FPR_ARG_REGISTERS) 
-	      { 
-		*(double *)item_base = *fprs++ ; 
-		(*nf) += 1; 
-	      } 
-	    item_base += 8; 
-	  /* FALL THROUGH */ 
-	  case FFI_TYPE_DOUBLE: 
-	    if (*nf < NUM_FPR_ARG_REGISTERS) 
-	      { 
-		*(double *)item_base = *fprs++ ; 
-		(*nf) += 1; 
-	      } 
-	    break; 
-	  case FFI_TYPE_FLOAT: 
-	    if (*nf < NUM_FPR_ARG_REGISTERS) 
-	      { 
-		*(float *)item_base = (float) *fprs++ ; 
-		(*nf) += 1; 
-	      } 
-	    break; 
-	  default: 
-	    break;     
-	} 
-      /* now count the size of what we just used.  */ 
-      struct_offset += p->size; 
-    } 
-  return fprs; 
-} 
- 
-#endif 
- 
-/* Adjust the size of S to be correct for Darwin. 
-   On Darwin m32, the first field of a structure has natural alignment.   
-   On Darwin m64, all fields have natural alignment.  */ 
- 
-static void 
-darwin_adjust_aggregate_sizes (ffi_type *s) 
-{ 
-  int i; 
- 
-  if (s->type != FFI_TYPE_STRUCT) 
-    return; 
- 
-  s->size = 0; 
-  for (i = 0; s->elements[i] != NULL; i++) 
-    { 
-      ffi_type *p; 
-      int align; 
-       
-      p = s->elements[i]; 
-      if (p->type == FFI_TYPE_STRUCT) 
-	darwin_adjust_aggregate_sizes (p); 
-#if defined(POWERPC_DARWIN64) 
-      /* Natural alignment for all items.  */ 
-      align = p->alignment; 
-#else 
-      /* Natural alignment for the first item... */ 
-      if (i == 0) 
-	align = p->alignment; 
-      else if (p->alignment == 16 || p->alignment < 4) 
-	/* .. subsequent items with vector or align < 4 have natural align.  */ 
-	align = p->alignment; 
-      else 
-	/* .. or align is 4.  */ 
-	align = 4; 
-#endif 
-      /* Pad, if necessary, before adding the current item.  */ 
-      s->size = FFI_ALIGN(s->size, align) + p->size; 
-    } 
+/* -----------------------------------------------------------------------
+   ffi_darwin.c
+
+   Copyright (C) 1998 Geoffrey Keating
+   Copyright (C) 2001 John Hornkvist
+   Copyright (C) 2002, 2006, 2007, 2009, 2010 Free Software Foundation, Inc.
+
+   FFI support for Darwin and AIX.
    
-  s->size = FFI_ALIGN(s->size, s->alignment); 
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+
+#include <stdlib.h>
+
+extern void ffi_closure_ASM (void);
+extern void ffi_go_closure_ASM (void);
+
+enum {
+  /* The assembly depends on these exact flags.  
+     For Darwin64 (when FLAG_RETURNS_STRUCT is set):
+       FLAG_RETURNS_FP indicates that the structure embeds FP data.
+       FLAG_RETURNS_128BITS signals a special struct size that is not
+       expanded for float content.  */
+  FLAG_RETURNS_128BITS	= 1 << (31-31), /* These go in cr7  */
+  FLAG_RETURNS_NOTHING	= 1 << (31-30),
+  FLAG_RETURNS_FP	= 1 << (31-29),
+  FLAG_RETURNS_64BITS	= 1 << (31-28),
+
+  FLAG_RETURNS_STRUCT	= 1 << (31-27), /* This goes in cr6  */
+
+  FLAG_ARG_NEEDS_COPY   = 1 << (31- 7),
+  FLAG_FP_ARGUMENTS     = 1 << (31- 6), /* cr1.eq; specified by ABI  */
+  FLAG_4_GPR_ARGUMENTS  = 1 << (31- 5),
+  FLAG_RETVAL_REFERENCE = 1 << (31- 4)
+};
+
+/* About the DARWIN ABI.  */
+enum {
+  NUM_GPR_ARG_REGISTERS = 8,
+  NUM_FPR_ARG_REGISTERS = 13,
+  LINKAGE_AREA_GPRS = 6
+};
+
+enum { ASM_NEEDS_REGISTERS = 4 }; /* r28-r31 */
+
+/* ffi_prep_args is called by the assembly routine once stack space
+   has been allocated for the function's arguments.
    
-  /* This should not be necessary on m64, but harmless.  */ 
-  if (s->elements[0]->type == FFI_TYPE_UINT64 
-      || s->elements[0]->type == FFI_TYPE_SINT64 
-      || s->elements[0]->type == FFI_TYPE_DOUBLE 
-      || s->elements[0]->alignment == 8) 
-    s->alignment = s->alignment > 8 ? s->alignment : 8; 
-  /* Do not add additional tail padding.  */ 
-} 
- 
-/* Adjust the size of S to be correct for AIX. 
-   Word-align double unless it is the first member of a structure.  */ 
- 
-static void 
-aix_adjust_aggregate_sizes (ffi_type *s) 
-{ 
-  int i; 
- 
-  if (s->type != FFI_TYPE_STRUCT) 
-    return; 
- 
-  s->size = 0; 
-  for (i = 0; s->elements[i] != NULL; i++) 
-    { 
-      ffi_type *p; 
-      int align; 
-       
-      p = s->elements[i]; 
-      aix_adjust_aggregate_sizes (p); 
-      align = p->alignment; 
-      if (i != 0 && p->type == FFI_TYPE_DOUBLE) 
-	align = 4; 
-      s->size = FFI_ALIGN(s->size, align) + p->size; 
-    } 
-   
-  s->size = FFI_ALIGN(s->size, s->alignment); 
-   
-  if (s->elements[0]->type == FFI_TYPE_UINT64 
-      || s->elements[0]->type == FFI_TYPE_SINT64 
-      || s->elements[0]->type == FFI_TYPE_DOUBLE 
-      || s->elements[0]->alignment == 8) 
-    s->alignment = s->alignment > 8 ? s->alignment : 8; 
-  /* Do not add additional tail padding.  */ 
-} 
- 
-/* Perform machine dependent cif processing.  */ 
-ffi_status 
-ffi_prep_cif_machdep (ffi_cif *cif) 
-{ 
-  /* All this is for the DARWIN ABI.  */ 
-  unsigned i; 
-  ffi_type **ptr; 
-  unsigned bytes; 
-  unsigned fparg_count = 0, intarg_count = 0; 
-  unsigned flags = 0; 
-  unsigned size_al = 0; 
- 
-  /* All the machine-independent calculation of cif->bytes will be wrong. 
-     All the calculation of structure sizes will also be wrong. 
-     Redo the calculation for DARWIN.  */ 
- 
-  if (cif->abi == FFI_DARWIN) 
-    { 
-      darwin_adjust_aggregate_sizes (cif->rtype); 
-      for (i = 0; i < cif->nargs; i++) 
-	darwin_adjust_aggregate_sizes (cif->arg_types[i]); 
-    } 
- 
-  if (cif->abi == FFI_AIX) 
-    { 
-      aix_adjust_aggregate_sizes (cif->rtype); 
-      for (i = 0; i < cif->nargs; i++) 
-	aix_adjust_aggregate_sizes (cif->arg_types[i]); 
-    } 
- 
-  /* Space for the frame pointer, callee's LR, CR, etc, and for 
-     the asm's temp regs.  */ 
- 
-  bytes = (LINKAGE_AREA_GPRS + ASM_NEEDS_REGISTERS) * sizeof(unsigned long); 
- 
-  /* Return value handling.   
-    The rules m32 are as follows: 
-     - 32-bit (or less) integer values are returned in gpr3; 
-     - structures of size <= 4 bytes also returned in gpr3; 
-     - 64-bit integer values [??? and structures between 5 and 8 bytes] are 
-       returned in gpr3 and gpr4; 
-     - Single/double FP values are returned in fpr1; 
-     - Long double FP (if not equivalent to double) values are returned in 
-       fpr1 and fpr2; 
-     m64: 
-     - 64-bit or smaller integral values are returned in GPR3 
-     - Single/double FP values are returned in fpr1; 
-     - Long double FP values are returned in fpr1 and fpr2; 
-     m64 Structures: 
-     - If the structure could be accommodated in registers were it to be the 
-       first argument to a routine, then it is returned in those registers. 
-     m32/m64 structures otherwise: 
-     - Larger structures values are allocated space and a pointer is passed 
-       as the first argument.  */ 
-  switch (cif->rtype->type) 
-    { 
- 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-    case FFI_TYPE_LONGDOUBLE: 
-      flags |= FLAG_RETURNS_128BITS; 
-      flags |= FLAG_RETURNS_FP; 
-      break; 
-#endif 
- 
-    case FFI_TYPE_DOUBLE: 
-      flags |= FLAG_RETURNS_64BITS; 
-      /* Fall through.  */ 
-    case FFI_TYPE_FLOAT: 
-      flags |= FLAG_RETURNS_FP; 
-      break; 
- 
-    case FFI_TYPE_UINT64: 
-    case FFI_TYPE_SINT64: 
-#ifdef POWERPC64 
-    case FFI_TYPE_POINTER: 
-#endif 
-      flags |= FLAG_RETURNS_64BITS; 
-      break; 
- 
-    case FFI_TYPE_STRUCT: 
-#if defined(POWERPC_DARWIN64) 
-      { 
-	/* Can we fit the struct into regs?  */ 
-	if (darwin64_struct_ret_by_value_p (cif->rtype)) 
-	  { 
-	    unsigned nfpr = 0; 
-	    flags |= FLAG_RETURNS_STRUCT; 
-	    if (cif->rtype->size != 16) 
-	      darwin64_scan_struct_for_floats (cif->rtype, &nfpr) ; 
-	    else 
-	      flags |= FLAG_RETURNS_128BITS; 
-	    /* Will be 0 for 16byte struct.  */ 
-	    if (nfpr) 
-	      flags |= FLAG_RETURNS_FP; 
-	  } 
-	else /* By ref. */ 
-	  { 
-	    flags |= FLAG_RETVAL_REFERENCE; 
-	    flags |= FLAG_RETURNS_NOTHING; 
-	    intarg_count++; 
-	  } 
-      } 
-#elif defined(DARWIN_PPC) 
-      if (cif->rtype->size <= 4) 
-	flags |= FLAG_RETURNS_STRUCT; 
-      else /* else by reference.  */ 
-	{ 
-	  flags |= FLAG_RETVAL_REFERENCE; 
-	  flags |= FLAG_RETURNS_NOTHING; 
-	  intarg_count++; 
-	} 
-#else /* assume we pass by ref.  */ 
-      flags |= FLAG_RETVAL_REFERENCE; 
-      flags |= FLAG_RETURNS_NOTHING; 
-      intarg_count++; 
-#endif 
-      break; 
-    case FFI_TYPE_VOID: 
-      flags |= FLAG_RETURNS_NOTHING; 
-      break; 
- 
-    default: 
-      /* Returns 32-bit integer, or similar.  Nothing to do here.  */ 
-      break; 
-    } 
- 
-  /* The first NUM_GPR_ARG_REGISTERS words of integer arguments, and the 
-     first NUM_FPR_ARG_REGISTERS fp arguments, go in registers; the rest 
-     goes on the stack.   
-     ??? Structures are passed as a pointer to a copy of the structure.  
-     Stuff on the stack needs to keep proper alignment.   
-     For m64 the count is effectively of half-GPRs.  */ 
-  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++) 
-    { 
-      unsigned align_words; 
-      switch ((*ptr)->type) 
-	{ 
-	case FFI_TYPE_FLOAT: 
-	case FFI_TYPE_DOUBLE: 
-	  fparg_count++; 
+   m32/m64
+
+   The stack layout we want looks like this:
+
+   |   Return address from ffi_call_DARWIN      |	higher addresses
+   |--------------------------------------------|
+   |   Previous backchain pointer	4/8	|	stack pointer here
+   |--------------------------------------------|<+ <<<	on entry to
+   |   ASM_NEEDS_REGISTERS=r28-r31   4*(4/8)	| |	ffi_call_DARWIN
+   |--------------------------------------------| |
+   |   When we have any FP activity... the	| |
+   |   FPRs occupy NUM_FPR_ARG_REGISTERS slots	| |
+   |   here fp13 .. fp1 from high to low addr.	| |
+   ~						~ ~
+   |   Parameters      (at least 8*4/8=32/64)	| | NUM_GPR_ARG_REGISTERS
+   |--------------------------------------------| |
+   |   TOC=R2 (AIX) Reserved (Darwin)   4/8	| |
+   |--------------------------------------------| |	stack	|
+   |   Reserved                       2*4/8	| |	grows	|
+   |--------------------------------------------| |	down	V
+   |   Space for callee's LR		4/8	| |
+   |--------------------------------------------| |	lower addresses
+   |   Saved CR [low word for m64]      4/8	| |
+   |--------------------------------------------| |     stack pointer here
+   |   Current backchain pointer	4/8	|-/	during
+   |--------------------------------------------|   <<<	ffi_call_DARWIN
+
+   */
+
+#if defined(POWERPC_DARWIN64)
+static void
+darwin64_pass_struct_by_value 
+  (ffi_type *, char *, unsigned, unsigned *, double **, unsigned long **);
+#endif
+
+/* This depends on GPR_SIZE = sizeof (unsigned long) */
+
+void
+ffi_prep_args (extended_cif *ecif, unsigned long *const stack)
+{
+  const unsigned bytes = ecif->cif->bytes;
+  const unsigned flags = ecif->cif->flags;
+  const unsigned nargs = ecif->cif->nargs;
 #if !defined(POWERPC_DARWIN64) 
-	  /* If this FP arg is going on the stack, it must be 
-	     8-byte-aligned.  */ 
-	  if (fparg_count > NUM_FPR_ARG_REGISTERS 
-	      && (intarg_count & 0x01) != 0) 
-	    intarg_count++; 
-#endif 
-	  break; 
- 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-	case FFI_TYPE_LONGDOUBLE: 
-	  fparg_count += 2; 
-	  /* If this FP arg is going on the stack, it must be 
-	     16-byte-aligned.  */ 
-	  if (fparg_count >= NUM_FPR_ARG_REGISTERS) 
-#if defined (POWERPC64) 
-	    intarg_count = FFI_ALIGN(intarg_count, 2); 
-#else 
-	    intarg_count = FFI_ALIGN(intarg_count, 4); 
-#endif 
-	  break; 
-#endif 
- 
-	case FFI_TYPE_UINT64: 
-	case FFI_TYPE_SINT64: 
-#if defined(POWERPC64) 
-	  intarg_count++; 
-#else 
-	  /* 'long long' arguments are passed as two words, but 
-	     either both words must fit in registers or both go 
-	     on the stack.  If they go on the stack, they must 
-	     be 8-byte-aligned.  */ 
-	  if (intarg_count == NUM_GPR_ARG_REGISTERS-1 
-	      || (intarg_count >= NUM_GPR_ARG_REGISTERS  
-	          && (intarg_count & 0x01) != 0)) 
-	    intarg_count++; 
-	  intarg_count += 2; 
-#endif 
-	  break; 
- 
-	case FFI_TYPE_STRUCT: 
-	  size_al = (*ptr)->size; 
-#if defined(POWERPC_DARWIN64) 
-	  align_words = (*ptr)->alignment >> 3; 
-	  if (align_words) 
-	    intarg_count = FFI_ALIGN(intarg_count, align_words); 
-	  /* Base size of the struct.  */ 
-	  intarg_count += (size_al + 7) / 8; 
-	  /* If 16 bytes then don't worry about floats.  */ 
-	  if (size_al != 16) 
-	    /* Scan through for floats to be placed in regs.  */ 
-	    darwin64_scan_struct_for_floats (*ptr, &fparg_count) ; 
-#else 
-	  align_words = (*ptr)->alignment >> 2; 
-	  if (align_words) 
-	    intarg_count = FFI_ALIGN(intarg_count, align_words); 
-	  /* If the first member of the struct is a double, then align 
-	     the struct to double-word.  
-	  if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE) 
-	    size_al = FFI_ALIGN((*ptr)->size, 8); */ 
-#  ifdef POWERPC64 
-	  intarg_count += (size_al + 7) / 8; 
-#  else 
-	  intarg_count += (size_al + 3) / 4; 
-#  endif 
-#endif 
-	  break; 
- 
-	default: 
-	  /* Everything else is passed as a 4-byte word in a GPR, either 
-	     the object itself or a pointer to it.  */ 
-	  intarg_count++; 
-	  break; 
-	} 
-    } 
- 
-  if (fparg_count != 0) 
-    flags |= FLAG_FP_ARGUMENTS; 
- 
-#if defined(POWERPC_DARWIN64) 
-  /* Space to image the FPR registers, if needed - which includes when they might be 
-     used in a struct return.  */ 
-  if (fparg_count != 0  
-      || ((flags & FLAG_RETURNS_STRUCT) 
-	   && (flags & FLAG_RETURNS_FP))) 
-    bytes += NUM_FPR_ARG_REGISTERS * sizeof(double); 
-#else 
-  /* Space for the FPR registers, if needed.  */ 
-  if (fparg_count != 0) 
-    bytes += NUM_FPR_ARG_REGISTERS * sizeof(double); 
-#endif 
- 
-  /* Stack space.  */ 
-#ifdef POWERPC64 
-  if ((intarg_count + fparg_count) > NUM_GPR_ARG_REGISTERS) 
-    bytes += (intarg_count + fparg_count) * sizeof(long); 
-#else 
-  if ((intarg_count + 2 * fparg_count) > NUM_GPR_ARG_REGISTERS) 
-    bytes += (intarg_count + 2 * fparg_count) * sizeof(long); 
-#endif 
-  else 
-    bytes += NUM_GPR_ARG_REGISTERS * sizeof(long); 
- 
-  /* The stack space allocated needs to be a multiple of 16 bytes.  */ 
-  bytes = FFI_ALIGN(bytes, 16) ; 
- 
-  cif->flags = flags; 
-  cif->bytes = bytes; 
- 
-  return FFI_OK; 
-} 
- 
-extern void ffi_call_AIX(extended_cif *, long, unsigned, unsigned *, 
-			 void (*fn)(void), void (*fn2)(void)); 
- 
-extern void ffi_call_go_AIX(extended_cif *, long, unsigned, unsigned *, 
-			    void (*fn)(void), void (*fn2)(void), void *closure); 
- 
-extern void ffi_call_DARWIN(extended_cif *, long, unsigned, unsigned *, 
-			    void (*fn)(void), void (*fn2)(void), ffi_type*); 
- 
-void 
-ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) 
-{ 
-  extended_cif ecif; 
- 
-  ecif.cif = cif; 
-  ecif.avalue = avalue; 
- 
-  /* If the return value is a struct and we don't have a return 
-     value address then we need to make one.  */ 
- 
-  if ((rvalue == NULL) && 
-      (cif->rtype->type == FFI_TYPE_STRUCT)) 
-    { 
-      ecif.rvalue = alloca (cif->rtype->size); 
-    } 
-  else 
-    ecif.rvalue = rvalue; 
- 
-  switch (cif->abi) 
-    { 
-    case FFI_AIX: 
-      ffi_call_AIX(&ecif, -(long)cif->bytes, cif->flags, ecif.rvalue, fn, 
-		   FFI_FN(ffi_prep_args)); 
-      break; 
-    case FFI_DARWIN: 
-      ffi_call_DARWIN(&ecif, -(long)cif->bytes, cif->flags, ecif.rvalue, fn, 
-		      FFI_FN(ffi_prep_args), cif->rtype); 
-      break; 
-    default: 
-      FFI_ASSERT(0); 
-      break; 
-    } 
-} 
- 
-void 
-ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue, 
-	     void *closure) 
-{ 
-  extended_cif ecif; 
- 
-  ecif.cif = cif; 
-  ecif.avalue = avalue; 
- 
-  /* If the return value is a struct and we don't have a return 
-     value address then we need to make one.  */ 
- 
-  if ((rvalue == NULL) && 
-      (cif->rtype->type == FFI_TYPE_STRUCT)) 
-    { 
-      ecif.rvalue = alloca (cif->rtype->size); 
-    } 
-  else 
-    ecif.rvalue = rvalue; 
- 
-  switch (cif->abi) 
-    { 
-    case FFI_AIX: 
-      ffi_call_go_AIX(&ecif, -(long)cif->bytes, cif->flags, ecif.rvalue, fn, 
-		      FFI_FN(ffi_prep_args), closure); 
-      break; 
-    default: 
-      FFI_ASSERT(0); 
-      break; 
-    } 
-} 
- 
-static void flush_icache(char *); 
-static void flush_range(char *, int); 
- 
-/* The layout of a function descriptor.  A C function pointer really 
-   points to one of these.  */ 
- 
-typedef struct aix_fd_struct { 
-  void *code_pointer; 
-  void *toc; 
-} aix_fd; 
- 
-/* here I'd like to add the stack frame layout we use in darwin_closure.S 
-   and aix_closure.S 
- 
-   m32/m64 
- 
-   The stack layout looks like this: 
- 
-   |   Additional params...			| |     Higher address 
-   ~						~ ~ 
-   |   Parameters      (at least 8*4/8=32/64)	| | NUM_GPR_ARG_REGISTERS 
-   |--------------------------------------------| | 
-   |   TOC=R2 (AIX) Reserved (Darwin)   4/8	| | 
-   |--------------------------------------------| | 
-   |   Reserved                       2*4/8	| | 
-   |--------------------------------------------| | 
-   |   Space for callee's LR		4/8	| | 
-   |--------------------------------------------| | 
-   |   Saved CR [low word for m64]      4/8	| | 
-   |--------------------------------------------| | 
-   |   Current backchain pointer	4/8	|-/ Parent's frame. 
-   |--------------------------------------------| <+ <<< on entry to ffi_closure_ASM 
-   |   Result Bytes			16	| | 
-   |--------------------------------------------| | 
-   ~   padding to 16-byte alignment		~ ~ 
-   |--------------------------------------------| | 
-   |   NUM_FPR_ARG_REGISTERS slots		| | 
-   |   here fp13 .. fp1		       13*8	| | 
-   |--------------------------------------------| | 
-   |   R3..R10			  8*4/8=32/64	| | NUM_GPR_ARG_REGISTERS 
-   |--------------------------------------------| | 
-   |   TOC=R2 (AIX) Reserved (Darwin)   4/8	| | 
-   |--------------------------------------------| |	stack	| 
-   |   Reserved [compiler,binder]     2*4/8	| |	grows	| 
-   |--------------------------------------------| |	down	V 
-   |   Space for callee's LR		4/8	| | 
-   |--------------------------------------------| |	lower addresses 
-   |   Saved CR [low word for m64]      4/8	| | 
-   |--------------------------------------------| |     stack pointer here 
-   |   Current backchain pointer	4/8	|-/	during 
-   |--------------------------------------------|   <<<	ffi_closure_ASM. 
- 
-*/ 
- 
-ffi_status 
-ffi_prep_closure_loc (ffi_closure* closure, 
-		      ffi_cif* cif, 
-		      void (*fun)(ffi_cif*, void*, void**, void*), 
-		      void *user_data, 
-		      void *codeloc) 
-{ 
-  unsigned int *tramp; 
-  struct ffi_aix_trampoline_struct *tramp_aix; 
-  aix_fd *fd; 
- 
-  switch (cif->abi) 
-    { 
-      case FFI_DARWIN: 
- 
-	FFI_ASSERT (cif->abi == FFI_DARWIN); 
- 
-	tramp = (unsigned int *) &closure->tramp[0]; 
-#if defined(POWERPC_DARWIN64) 
-	tramp[0] = 0x7c0802a6;  /*   mflr    r0  */ 
-	tramp[1] = 0x429f0015;  /*   bcl-    20,4*cr7+so,  +0x18 (L1)  */ 
-	/* We put the addresses here.  */ 
-	tramp[6] = 0x7d6802a6;  /*L1:   mflr    r11  */ 
-	tramp[7] = 0xe98b0000;  /*   ld     r12,0(r11) function address  */ 
-	tramp[8] = 0x7c0803a6;  /*   mtlr    r0   */ 
-	tramp[9] = 0x7d8903a6;  /*   mtctr   r12  */ 
-	tramp[10] = 0xe96b0008;  /*   lwz     r11,8(r11) static chain  */ 
-	tramp[11] = 0x4e800420;  /*   bctr  */ 
- 
-	*((unsigned long *)&tramp[2]) = (unsigned long) ffi_closure_ASM; /* function  */ 
-	*((unsigned long *)&tramp[4]) = (unsigned long) codeloc; /* context  */ 
-#else 
-	tramp[0] = 0x7c0802a6;  /*   mflr    r0  */ 
-	tramp[1] = 0x429f000d;  /*   bcl-    20,4*cr7+so,0x10  */ 
-	tramp[4] = 0x7d6802a6;  /*   mflr    r11  */ 
-	tramp[5] = 0x818b0000;  /*   lwz     r12,0(r11) function address  */ 
-	tramp[6] = 0x7c0803a6;  /*   mtlr    r0   */ 
-	tramp[7] = 0x7d8903a6;  /*   mtctr   r12  */ 
-	tramp[8] = 0x816b0004;  /*   lwz     r11,4(r11) static chain  */ 
-	tramp[9] = 0x4e800420;  /*   bctr  */ 
-	tramp[2] = (unsigned long) ffi_closure_ASM; /* function  */ 
-	tramp[3] = (unsigned long) codeloc; /* context  */ 
-#endif 
-	closure->cif = cif; 
-	closure->fun = fun; 
-	closure->user_data = user_data; 
- 
-	/* Flush the icache. Only necessary on Darwin.  */ 
-	flush_range(codeloc, FFI_TRAMPOLINE_SIZE); 
- 
-	break; 
- 
-    case FFI_AIX: 
- 
-      tramp_aix = (struct ffi_aix_trampoline_struct *) (closure->tramp); 
-      fd = (aix_fd *)(void *)ffi_closure_ASM; 
- 
-      FFI_ASSERT (cif->abi == FFI_AIX); 
- 
-      tramp_aix->code_pointer = fd->code_pointer; 
-      tramp_aix->toc = fd->toc; 
-      tramp_aix->static_chain = codeloc; 
-      closure->cif = cif; 
-      closure->fun = fun; 
-      closure->user_data = user_data; 
-      break; 
- 
-    default: 
-      return FFI_BAD_ABI; 
-      break; 
-    } 
-  return FFI_OK; 
-} 
- 
-ffi_status 
-ffi_prep_go_closure (ffi_go_closure* closure, 
-		     ffi_cif* cif, 
-		     void (*fun)(ffi_cif*, void*, void**, void*)) 
-{ 
-  switch (cif->abi) 
-    { 
-      case FFI_AIX: 
- 
-        FFI_ASSERT (cif->abi == FFI_AIX); 
- 
-        closure->tramp = (void *)ffi_go_closure_ASM; 
-        closure->cif = cif; 
-        closure->fun = fun; 
-        return FFI_OK; 
-       
-      // For now, ffi_prep_go_closure is only implemented for AIX, not for Darwin 
-      default: 
-        return FFI_BAD_ABI; 
-        break; 
-    } 
-  return FFI_OK; 
-} 
- 
-static void 
-flush_icache(char *addr) 
-{ 
-#ifndef _AIX 
-  __asm__ volatile ( 
-		"dcbf 0,%0\n" 
-		"\tsync\n" 
-		"\ticbi 0,%0\n" 
-		"\tsync\n" 
-		"\tisync" 
-		: : "r"(addr) : "memory"); 
-#endif 
-} 
- 
-static void 
-flush_range(char * addr1, int size) 
-{ 
-#define MIN_LINE_SIZE 32 
-  int i; 
-  for (i = 0; i < size; i += MIN_LINE_SIZE) 
-    flush_icache(addr1+i); 
-  flush_icache(addr1+size-1); 
-} 
- 
-typedef union 
-{ 
-  float f; 
-  double d; 
-} ffi_dblfl; 
- 
-ffi_type * 
-ffi_closure_helper_DARWIN (ffi_closure *, void *, 
-			   unsigned long *, ffi_dblfl *); 
- 
-ffi_type * 
-ffi_go_closure_helper_DARWIN (ffi_go_closure*, void *, 
-			      unsigned long *, ffi_dblfl *); 
- 
-/* Basically the trampoline invokes ffi_closure_ASM, and on 
-   entry, r11 holds the address of the closure. 
-   After storing the registers that could possibly contain 
-   parameters to be passed into the stack frame and setting 
-   up space for a return value, ffi_closure_ASM invokes the 
-   following helper function to do most of the work.  */ 
- 
-static ffi_type * 
-ffi_closure_helper_common (ffi_cif* cif, 
-			   void (*fun)(ffi_cif*, void*, void**, void*), 
-			   void *user_data, void *rvalue, 
-			   unsigned long *pgr, ffi_dblfl *pfr) 
-{ 
-  /* rvalue is the pointer to space for return value in closure assembly 
-     pgr is the pointer to where r3-r10 are stored in ffi_closure_ASM 
-     pfr is the pointer to where f1-f13 are stored in ffi_closure_ASM.  */ 
- 
-  typedef double ldbits[2]; 
- 
-  union ldu 
-  { 
-    ldbits lb; 
-    long double ld; 
-  }; 
- 
-  void **          avalue; 
-  ffi_type **      arg_types; 
-  long             i, avn; 
-  ffi_dblfl *      end_pfr = pfr + NUM_FPR_ARG_REGISTERS; 
-  unsigned         size_al; 
-#if defined(POWERPC_DARWIN64) 
-  unsigned 	   fpsused = 0; 
-#endif 
- 
-  avalue = alloca (cif->nargs * sizeof(void *)); 
- 
-  if (cif->rtype->type == FFI_TYPE_STRUCT) 
-    { 
-#if defined(POWERPC_DARWIN64) 
-      if (!darwin64_struct_ret_by_value_p (cif->rtype)) 
-	{ 
-    	  /* Won't fit into the regs - return by ref.  */ 
-	  rvalue = (void *) *pgr; 
-	  pgr++; 
-	} 
-#elif defined(DARWIN_PPC) 
-      if (cif->rtype->size > 4) 
-	{ 
-	  rvalue = (void *) *pgr; 
-	  pgr++; 
-	} 
-#else /* assume we return by ref.  */ 
-      rvalue = (void *) *pgr; 
-      pgr++; 
-#endif 
-    } 
- 
-  i = 0; 
-  avn = cif->nargs; 
-  arg_types = cif->arg_types; 
- 
-  /* Grab the addresses of the arguments from the stack frame.  */ 
-  while (i < avn) 
-    { 
-      switch (arg_types[i]->type) 
-	{ 
-	case FFI_TYPE_SINT8: 
-	case FFI_TYPE_UINT8: 
-#if  defined(POWERPC64) 
-	  avalue[i] = (char *) pgr + 7; 
-#else 
-	  avalue[i] = (char *) pgr + 3; 
-#endif 
-	  pgr++; 
-	  break; 
- 
-	case FFI_TYPE_SINT16: 
-	case FFI_TYPE_UINT16: 
-#if  defined(POWERPC64) 
-	  avalue[i] = (char *) pgr + 6; 
-#else 
-	  avalue[i] = (char *) pgr + 2; 
-#endif 
-	  pgr++; 
-	  break; 
- 
-	case FFI_TYPE_SINT32: 
-	case FFI_TYPE_UINT32: 
-#if  defined(POWERPC64) 
-	  avalue[i] = (char *) pgr + 4; 
-#else 
-	case FFI_TYPE_POINTER: 
-	  avalue[i] = pgr; 
-#endif 
-	  pgr++; 
-	  break; 
- 
-	case FFI_TYPE_STRUCT: 
-	  size_al = arg_types[i]->size; 
-#if defined(POWERPC_DARWIN64) 
-	  pgr = (unsigned long *)FFI_ALIGN((char *)pgr, arg_types[i]->alignment); 
-	  if (size_al < 3 || size_al == 4) 
-	    { 
-	      avalue[i] = ((char *)pgr)+8-size_al; 
-	      if (arg_types[i]->elements[0]->type == FFI_TYPE_FLOAT 
-		  && fpsused < NUM_FPR_ARG_REGISTERS) 
-		{ 
-		  *(float *)pgr = (float) *(double *)pfr; 
-		  pfr++; 
-		  fpsused++; 
-		} 
-	    } 
-	  else  
-	    { 
-	      if (size_al != 16) 
-		pfr = (ffi_dblfl *)  
-		    darwin64_struct_floats_to_mem (arg_types[i], (char *)pgr, 
-						   (double *)pfr, &fpsused); 
-	      avalue[i] = pgr; 
-	    } 
-	  pgr += (size_al + 7) / 8; 
-#else 
-	  /* If the first member of the struct is a double, then align 
-	     the struct to double-word.  */ 
-	  if (arg_types[i]->elements[0]->type == FFI_TYPE_DOUBLE) 
-	    size_al = FFI_ALIGN(arg_types[i]->size, 8); 
+  const ffi_abi abi = ecif->cif->abi;
+#endif
+
+  /* 'stacktop' points at the previous backchain pointer.  */
+  unsigned long *const stacktop = stack + (bytes / sizeof(unsigned long));
+
+  /* 'fpr_base' points at the space for fpr1, and grows upwards as
+     we use FPR registers.  */
+  double *fpr_base = (double *) (stacktop - ASM_NEEDS_REGISTERS) - NUM_FPR_ARG_REGISTERS;
+  int gp_count = 0, fparg_count = 0;
+
+  /* 'next_arg' grows up as we put parameters in it.  */
+  unsigned long *next_arg = stack + LINKAGE_AREA_GPRS; /* 6 reserved positions.  */
+
+  int i;
+  double double_tmp;
+  void **p_argv = ecif->avalue;
+  unsigned long gprvalue;
+  ffi_type** ptr = ecif->cif->arg_types;
+#if !defined(POWERPC_DARWIN64) 
+  char *dest_cpy;
+#endif
+  unsigned size_al = 0;
+
+  /* Check that everything starts aligned properly.  */
+  FFI_ASSERT(((unsigned) (char *) stack & 0xF) == 0);
+  FFI_ASSERT(((unsigned) (char *) stacktop & 0xF) == 0);
+  FFI_ASSERT((bytes & 0xF) == 0);
+
+  /* Deal with return values that are actually pass-by-reference.
+     Rule:
+     Return values are referenced by r3, so r4 is the first parameter.  */
+
+  if (flags & FLAG_RETVAL_REFERENCE)
+    *next_arg++ = (unsigned long) (char *) ecif->rvalue;
+
+  /* Now for the arguments.  */
+  for (i = nargs; i > 0; i--, ptr++, p_argv++)
+    {
+      switch ((*ptr)->type)
+	{
+	/* If a floating-point parameter appears before all of the general-
+	   purpose registers are filled, the corresponding GPRs that match
+	   the size of the floating-point parameter are skipped.  */
+	case FFI_TYPE_FLOAT:
+	  double_tmp = *(float *) *p_argv;
+	  if (fparg_count < NUM_FPR_ARG_REGISTERS)
+	    *fpr_base++ = double_tmp;
+#if defined(POWERPC_DARWIN)
+	  *(float *)next_arg = *(float *) *p_argv;
+#else
+	  *(double *)next_arg = double_tmp;
+#endif
+	  next_arg++;
+	  gp_count++;
+	  fparg_count++;
+	  FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
+	  break;
+
+	case FFI_TYPE_DOUBLE:
+	  double_tmp = *(double *) *p_argv;
+	  if (fparg_count < NUM_FPR_ARG_REGISTERS)
+	    *fpr_base++ = double_tmp;
+	  *(double *)next_arg = double_tmp;
+#ifdef POWERPC64
+	  next_arg++;
+	  gp_count++;
+#else
+	  next_arg += 2;
+	  gp_count += 2;
+#endif
+	  fparg_count++;
+	  FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
+	  break;
+
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+
+	case FFI_TYPE_LONGDOUBLE:
+#  if defined(POWERPC64) && !defined(POWERPC_DARWIN64)
+	  /* ??? This will exceed the regs count when the value starts at fp13
+	     and it will not put the extra bit on the stack.  */
+	  if (fparg_count < NUM_FPR_ARG_REGISTERS)
+	    *(long double *) fpr_base++ = *(long double *) *p_argv;
+	  else
+	    *(long double *) next_arg = *(long double *) *p_argv;
+	  next_arg += 2;
+	  fparg_count += 2;
+#  else
+	  double_tmp = ((double *) *p_argv)[0];
+	  if (fparg_count < NUM_FPR_ARG_REGISTERS)
+	    *fpr_base++ = double_tmp;
+	  *(double *) next_arg = double_tmp;
+#    if defined(POWERPC_DARWIN64)
+	  next_arg++;
+	  gp_count++;
+#    else
+	  next_arg += 2;
+	  gp_count += 2;
+#    endif
+	  fparg_count++;
+	  double_tmp = ((double *) *p_argv)[1];
+	  if (fparg_count < NUM_FPR_ARG_REGISTERS)
+	    *fpr_base++ = double_tmp;
+	  *(double *) next_arg = double_tmp;
+#    if defined(POWERPC_DARWIN64)
+	  next_arg++;
+	  gp_count++;
+#    else
+	  next_arg += 2;
+	  gp_count += 2;
+#    endif
+	  fparg_count++;
+#  endif
+	  FFI_ASSERT(flags & FLAG_FP_ARGUMENTS);
+	  break;
+#endif
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+#ifdef POWERPC64
+	  gprvalue = *(long long *) *p_argv;
+	  goto putgpr;
+#else
+	  *(long long *) next_arg = *(long long *) *p_argv;
+	  next_arg += 2;
+	  gp_count += 2;
+#endif
+	  break;
+	case FFI_TYPE_POINTER:
+	  gprvalue = *(unsigned long *) *p_argv;
+	  goto putgpr;
+	case FFI_TYPE_UINT8:
+	  gprvalue = *(unsigned char *) *p_argv;
+	  goto putgpr;
+	case FFI_TYPE_SINT8:
+	  gprvalue = *(signed char *) *p_argv;
+	  goto putgpr;
+	case FFI_TYPE_UINT16:
+	  gprvalue = *(unsigned short *) *p_argv;
+	  goto putgpr;
+	case FFI_TYPE_SINT16:
+	  gprvalue = *(signed short *) *p_argv;
+	  goto putgpr;
+
+	case FFI_TYPE_STRUCT:
+	  size_al = (*ptr)->size;
+#if defined(POWERPC_DARWIN64)
+	  next_arg = (unsigned long *)FFI_ALIGN((char *)next_arg, (*ptr)->alignment);
+	  darwin64_pass_struct_by_value (*ptr, (char *) *p_argv, 
+					 (unsigned) size_al,
+					 (unsigned int *) &fparg_count,
+					 &fpr_base, &next_arg);
+#else
+	  dest_cpy = (char *) next_arg;
+
+	  /* If the first member of the struct is a double, then include enough
+	     padding in the struct size to align it to double-word.  */
+	  if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE)
+	    size_al = FFI_ALIGN((*ptr)->size, 8);
+
 #  if defined(POWERPC64) 
-	  FFI_ASSERT (cif->abi != FFI_DARWIN); 
-	  avalue[i] = pgr; 
-	  pgr += (size_al + 7) / 8; 
-#  else 
-	  /* Structures that match the basic modes (QI 1 byte, HI 2 bytes, 
-	     SI 4 bytes) are aligned as if they were those modes.  */ 
-	  if (size_al < 3 && cif->abi == FFI_DARWIN) 
-	    avalue[i] = (char*) pgr + 4 - size_al; 
-	  else 
-	    avalue[i] = pgr; 
-	  pgr += (size_al + 3) / 4; 
-#  endif 
-#endif 
-	  break; 
- 
-	case FFI_TYPE_SINT64: 
-	case FFI_TYPE_UINT64: 
-#if  defined(POWERPC64) 
-	case FFI_TYPE_POINTER: 
-	  avalue[i] = pgr; 
-	  pgr++; 
-	  break; 
-#else 
-	  /* Long long ints are passed in two gpr's.  */ 
-	  avalue[i] = pgr; 
-	  pgr += 2; 
-	  break; 
-#endif 
- 
-	case FFI_TYPE_FLOAT: 
-	  /* A float value consumes a GPR. 
-	     There are 13 64bit floating point registers.  */ 
-	  if (pfr < end_pfr) 
-	    { 
-	      double temp = pfr->d; 
-	      pfr->f = (float) temp; 
-	      avalue[i] = pfr; 
-	      pfr++; 
-	    } 
-	  else 
-	    { 
-	      avalue[i] = pgr; 
-	    } 
-	  pgr++; 
-	  break; 
- 
-	case FFI_TYPE_DOUBLE: 
-	  /* A double value consumes two GPRs. 
-	     There are 13 64bit floating point registers.  */ 
-	  if (pfr < end_pfr) 
-	    { 
-	      avalue[i] = pfr; 
-	      pfr++; 
-	    } 
-	  else 
-	    { 
-	      avalue[i] = pgr; 
-	    } 
-#ifdef POWERPC64 
-	  pgr++; 
-#else 
-	  pgr += 2; 
-#endif 
-	  break; 
- 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
- 
-	case FFI_TYPE_LONGDOUBLE: 
-#ifdef POWERPC64 
-	  if (pfr + 1 < end_pfr) 
-	    { 
-	      avalue[i] = pfr; 
-	      pfr += 2; 
-	    } 
-	  else 
-	    { 
-	      if (pfr < end_pfr) 
-		{ 
-		  *pgr = *(unsigned long *) pfr; 
-		  pfr++; 
-		} 
-	      avalue[i] = pgr; 
-	    } 
-	  pgr += 2; 
-#else  /* POWERPC64 */ 
-	  /* A long double value consumes four GPRs and two FPRs. 
-	     There are 13 64bit floating point registers.  */ 
-	  if (pfr + 1 < end_pfr) 
-	    { 
-	      avalue[i] = pfr; 
-	      pfr += 2; 
-	    } 
-	  /* Here we have the situation where one part of the long double 
-	     is stored in fpr13 and the other part is already on the stack. 
-	     We use a union to pass the long double to avalue[i].  */ 
-	  else if (pfr + 1 == end_pfr) 
-	    { 
-	      union ldu temp_ld; 
-	      memcpy (&temp_ld.lb[0], pfr, sizeof(ldbits)); 
-	      memcpy (&temp_ld.lb[1], pgr + 2, sizeof(ldbits)); 
-	      avalue[i] = &temp_ld.ld; 
-	      pfr++; 
-	    } 
+	  FFI_ASSERT (abi != FFI_DARWIN);
+	  memcpy ((char *) dest_cpy, (char *) *p_argv, size_al);
+	  next_arg += (size_al + 7) / 8;
+#  else
+	  /* Structures that match the basic modes (QI 1 byte, HI 2 bytes,
+	     SI 4 bytes) are aligned as if they were those modes.
+	     Structures with 3 byte in size are padded upwards.  */
+	  if (size_al < 3 && abi == FFI_DARWIN)
+	    dest_cpy += 4 - size_al;
+
+	  memcpy((char *) dest_cpy, (char *) *p_argv, size_al);
+	  next_arg += (size_al + 3) / 4;
+#  endif
+#endif
+	  break;
+
+	case FFI_TYPE_INT:
+	case FFI_TYPE_SINT32:
+	  gprvalue = *(signed int *) *p_argv;
+	  goto putgpr;
+
+	case FFI_TYPE_UINT32:
+	  gprvalue = *(unsigned int *) *p_argv;
+	putgpr:
+	  *next_arg++ = gprvalue;
+	  gp_count++;
+	  break;
+	default:
+	  break;
+	}
+    }
+
+  /* Check that we didn't overrun the stack...  */
+  /* FFI_ASSERT(gpr_base <= stacktop - ASM_NEEDS_REGISTERS);
+     FFI_ASSERT((unsigned *)fpr_base
+     	     <= stacktop - ASM_NEEDS_REGISTERS - NUM_GPR_ARG_REGISTERS);
+     FFI_ASSERT(flags & FLAG_4_GPR_ARGUMENTS || intarg_count <= 4);  */
+}
+
+#if defined(POWERPC_DARWIN64)
+
+/* See if we can put some of the struct into fprs.
+   This should not be called for structures of size 16 bytes, since these are not
+   broken out this way.  */
+static void
+darwin64_scan_struct_for_floats (ffi_type *s, unsigned *nfpr)
+{
+  int i;
+
+  FFI_ASSERT (s->type == FFI_TYPE_STRUCT)
+
+  for (i = 0; s->elements[i] != NULL; i++)
+    {
+      ffi_type *p = s->elements[i];
+      switch (p->type)
+	{
+	  case FFI_TYPE_STRUCT:
+	    darwin64_scan_struct_for_floats (p, nfpr);
+	    break;
+	  case FFI_TYPE_LONGDOUBLE:
+	    (*nfpr) += 2;
+	    break;
+	  case FFI_TYPE_DOUBLE:
+	  case FFI_TYPE_FLOAT:
+	    (*nfpr) += 1;
+	    break;
+	  default:
+	    break;    
+	}
+    }
+}
+
+static int
+darwin64_struct_size_exceeds_gprs_p (ffi_type *s, char *src, unsigned *nfpr)
+{
+  unsigned struct_offset=0, i;
+
+  for (i = 0; s->elements[i] != NULL; i++)
+    {
+      char *item_base;
+      ffi_type *p = s->elements[i];
+      /* Find the start of this item (0 for the first one).  */
+      if (i > 0)
+        struct_offset = FFI_ALIGN(struct_offset, p->alignment);
+
+      item_base = src + struct_offset;
+
+      switch (p->type)
+	{
+	  case FFI_TYPE_STRUCT:
+	    if (darwin64_struct_size_exceeds_gprs_p (p, item_base, nfpr))
+	      return 1;
+	    break;
+	  case FFI_TYPE_LONGDOUBLE:
+	    if (*nfpr >= NUM_FPR_ARG_REGISTERS)
+	      return 1;
+	    (*nfpr) += 1;
+	    item_base += 8;
+	  /* FALL THROUGH */
+	  case FFI_TYPE_DOUBLE:
+	    if (*nfpr >= NUM_FPR_ARG_REGISTERS)
+	      return 1;
+	    (*nfpr) += 1;
+	    break;
+	  case FFI_TYPE_FLOAT:
+	    if (*nfpr >= NUM_FPR_ARG_REGISTERS)
+	      return 1;
+	    (*nfpr) += 1;
+	    break;
+	  default:
+	    /* If we try and place any item, that is non-float, once we've
+	       exceeded the 8 GPR mark, then we can't fit the struct.  */
+	    if ((unsigned long)item_base >= 8*8) 
+	      return 1;
+	    break;    
+	}
+      /* now count the size of what we just used.  */
+      struct_offset += p->size;
+    }
+  return 0;
+}
+
+/* Can this struct be returned by value?  */
+int 
+darwin64_struct_ret_by_value_p (ffi_type *s)
+{
+  unsigned nfp = 0;
+
+  FFI_ASSERT (s && s->type == FFI_TYPE_STRUCT);
+  
+  /* The largest structure we can return is 8long + 13 doubles.  */
+  if (s->size > 168)
+    return 0;
+  
+  /* We can't pass more than 13 floats.  */
+  darwin64_scan_struct_for_floats (s, &nfp);
+  if (nfp > 13)
+    return 0;
+  
+  /* If there are not too many floats, and the struct is
+     small enough to accommodate in the GPRs, then it must be OK.  */
+  if (s->size <= 64)
+    return 1;
+  
+  /* Well, we have to look harder.  */
+  nfp = 0;
+  if (darwin64_struct_size_exceeds_gprs_p (s, NULL, &nfp))
+    return 0;
+  
+  return 1;
+}
+
+void
+darwin64_pass_struct_floats (ffi_type *s, char *src, 
+			     unsigned *nfpr, double **fprs)
+{
+  int i;
+  double *fpr_base = *fprs;
+  unsigned struct_offset = 0;
+
+  /* We don't assume anything about the alignment of the source.  */
+  for (i = 0; s->elements[i] != NULL; i++)
+    {
+      char *item_base;
+      ffi_type *p = s->elements[i];
+      /* Find the start of this item (0 for the first one).  */
+      if (i > 0)
+        struct_offset = FFI_ALIGN(struct_offset, p->alignment);
+      item_base = src + struct_offset;
+
+      switch (p->type)
+	{
+	  case FFI_TYPE_STRUCT:
+	    darwin64_pass_struct_floats (p, item_base, nfpr,
+					   &fpr_base);
+	    break;
+	  case FFI_TYPE_LONGDOUBLE:
+	    if (*nfpr < NUM_FPR_ARG_REGISTERS)
+	      *fpr_base++ = *(double *)item_base;
+	    (*nfpr) += 1;
+	    item_base += 8;
+	  /* FALL THROUGH */
+	  case FFI_TYPE_DOUBLE:
+	    if (*nfpr < NUM_FPR_ARG_REGISTERS)
+	      *fpr_base++ = *(double *)item_base;
+	    (*nfpr) += 1;
+	    break;
+	  case FFI_TYPE_FLOAT:
+	    if (*nfpr < NUM_FPR_ARG_REGISTERS)
+	      *fpr_base++ = (double) *(float *)item_base;
+	    (*nfpr) += 1;
+	    break;
+	  default:
+	    break;    
+	}
+      /* now count the size of what we just used.  */
+      struct_offset += p->size;
+    }
+  /* Update the scores.  */
+  *fprs = fpr_base;
+}
+
+/* Darwin64 special rules.
+   Break out a struct into params and float registers.  */
+static void
+darwin64_pass_struct_by_value (ffi_type *s, char *src, unsigned size,
+			       unsigned *nfpr, double **fprs, unsigned long **arg)
+{
+  unsigned long *next_arg = *arg;
+  char *dest_cpy = (char *)next_arg;
+
+  FFI_ASSERT (s->type == FFI_TYPE_STRUCT)
+
+  if (!size)
+    return;
+
+  /* First... special cases.  */
+  if (size < 3
+      || (size == 4 
+	  && s->elements[0] 
+	  && s->elements[0]->type != FFI_TYPE_FLOAT))
+    {
+      /* Must be at least one GPR, padding is unspecified in value, 
+	 let's make it zero.  */
+      *next_arg = 0UL; 
+      dest_cpy += 8 - size;
+      memcpy ((char *) dest_cpy, src, size);
+      next_arg++;
+    }
+  else if (size == 16)
+    {
+      memcpy ((char *) dest_cpy, src, size);
+      next_arg += 2;
+    }
+  else
+    {
+      /* now the general case, we consider embedded floats.  */
+      memcpy ((char *) dest_cpy, src, size);
+      darwin64_pass_struct_floats (s, src, nfpr, fprs);
+      next_arg += (size+7)/8;
+    }
+    
+  *arg = next_arg;
+}
+
+double *
+darwin64_struct_floats_to_mem (ffi_type *s, char *dest, double *fprs, unsigned *nf)
+{
+  int i;
+  unsigned struct_offset = 0;
+
+  /* We don't assume anything about the alignment of the source.  */
+  for (i = 0; s->elements[i] != NULL; i++)
+    {
+      char *item_base;
+      ffi_type *p = s->elements[i];
+      /* Find the start of this item (0 for the first one).  */
+      if (i > 0)
+        struct_offset = FFI_ALIGN(struct_offset, p->alignment);
+      item_base = dest + struct_offset;
+
+      switch (p->type)
+	{
+	  case FFI_TYPE_STRUCT:
+	    fprs = darwin64_struct_floats_to_mem (p, item_base, fprs, nf);
+	    break;
+	  case FFI_TYPE_LONGDOUBLE:
+	    if (*nf < NUM_FPR_ARG_REGISTERS)
+	      {
+		*(double *)item_base = *fprs++ ;
+		(*nf) += 1;
+	      }
+	    item_base += 8;
+	  /* FALL THROUGH */
+	  case FFI_TYPE_DOUBLE:
+	    if (*nf < NUM_FPR_ARG_REGISTERS)
+	      {
+		*(double *)item_base = *fprs++ ;
+		(*nf) += 1;
+	      }
+	    break;
+	  case FFI_TYPE_FLOAT:
+	    if (*nf < NUM_FPR_ARG_REGISTERS)
+	      {
+		*(float *)item_base = (float) *fprs++ ;
+		(*nf) += 1;
+	      }
+	    break;
+	  default:
+	    break;    
+	}
+      /* now count the size of what we just used.  */
+      struct_offset += p->size;
+    }
+  return fprs;
+}
+
+#endif
+
+/* Adjust the size of S to be correct for Darwin.
+   On Darwin m32, the first field of a structure has natural alignment.  
+   On Darwin m64, all fields have natural alignment.  */
+
+static void
+darwin_adjust_aggregate_sizes (ffi_type *s)
+{
+  int i;
+
+  if (s->type != FFI_TYPE_STRUCT)
+    return;
+
+  s->size = 0;
+  for (i = 0; s->elements[i] != NULL; i++)
+    {
+      ffi_type *p;
+      int align;
+      
+      p = s->elements[i];
+      if (p->type == FFI_TYPE_STRUCT)
+	darwin_adjust_aggregate_sizes (p);
+#if defined(POWERPC_DARWIN64)
+      /* Natural alignment for all items.  */
+      align = p->alignment;
+#else
+      /* Natural alignment for the first item... */
+      if (i == 0)
+	align = p->alignment;
+      else if (p->alignment == 16 || p->alignment < 4)
+	/* .. subsequent items with vector or align < 4 have natural align.  */
+	align = p->alignment;
+      else
+	/* .. or align is 4.  */
+	align = 4;
+#endif
+      /* Pad, if necessary, before adding the current item.  */
+      s->size = FFI_ALIGN(s->size, align) + p->size;
+    }
+  
+  s->size = FFI_ALIGN(s->size, s->alignment);
+  
+  /* This should not be necessary on m64, but harmless.  */
+  if (s->elements[0]->type == FFI_TYPE_UINT64
+      || s->elements[0]->type == FFI_TYPE_SINT64
+      || s->elements[0]->type == FFI_TYPE_DOUBLE
+      || s->elements[0]->alignment == 8)
+    s->alignment = s->alignment > 8 ? s->alignment : 8;
+  /* Do not add additional tail padding.  */
+}
+
+/* Adjust the size of S to be correct for AIX.
+   Word-align double unless it is the first member of a structure.  */
+
+static void
+aix_adjust_aggregate_sizes (ffi_type *s)
+{
+  int i;
+
+  if (s->type != FFI_TYPE_STRUCT)
+    return;
+
+  s->size = 0;
+  for (i = 0; s->elements[i] != NULL; i++)
+    {
+      ffi_type *p;
+      int align;
+      
+      p = s->elements[i];
+      aix_adjust_aggregate_sizes (p);
+      align = p->alignment;
+      if (i != 0 && p->type == FFI_TYPE_DOUBLE)
+	align = 4;
+      s->size = FFI_ALIGN(s->size, align) + p->size;
+    }
+  
+  s->size = FFI_ALIGN(s->size, s->alignment);
+  
+  if (s->elements[0]->type == FFI_TYPE_UINT64
+      || s->elements[0]->type == FFI_TYPE_SINT64
+      || s->elements[0]->type == FFI_TYPE_DOUBLE
+      || s->elements[0]->alignment == 8)
+    s->alignment = s->alignment > 8 ? s->alignment : 8;
+  /* Do not add additional tail padding.  */
+}
+
+/* Perform machine dependent cif processing.  */
+ffi_status
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+  /* All this is for the DARWIN ABI.  */
+  unsigned i;
+  ffi_type **ptr;
+  unsigned bytes;
+  unsigned fparg_count = 0, intarg_count = 0;
+  unsigned flags = 0;
+  unsigned size_al = 0;
+
+  /* All the machine-independent calculation of cif->bytes will be wrong.
+     All the calculation of structure sizes will also be wrong.
+     Redo the calculation for DARWIN.  */
+
+  if (cif->abi == FFI_DARWIN)
+    {
+      darwin_adjust_aggregate_sizes (cif->rtype);
+      for (i = 0; i < cif->nargs; i++)
+	darwin_adjust_aggregate_sizes (cif->arg_types[i]);
+    }
+
+  if (cif->abi == FFI_AIX)
+    {
+      aix_adjust_aggregate_sizes (cif->rtype);
+      for (i = 0; i < cif->nargs; i++)
+	aix_adjust_aggregate_sizes (cif->arg_types[i]);
+    }
+
+  /* Space for the frame pointer, callee's LR, CR, etc, and for
+     the asm's temp regs.  */
+
+  bytes = (LINKAGE_AREA_GPRS + ASM_NEEDS_REGISTERS) * sizeof(unsigned long);
+
+  /* Return value handling.  
+    The rules m32 are as follows:
+     - 32-bit (or less) integer values are returned in gpr3;
+     - structures of size <= 4 bytes also returned in gpr3;
+     - 64-bit integer values [??? and structures between 5 and 8 bytes] are
+       returned in gpr3 and gpr4;
+     - Single/double FP values are returned in fpr1;
+     - Long double FP (if not equivalent to double) values are returned in
+       fpr1 and fpr2;
+     m64:
+     - 64-bit or smaller integral values are returned in GPR3
+     - Single/double FP values are returned in fpr1;
+     - Long double FP values are returned in fpr1 and fpr2;
+     m64 Structures:
+     - If the structure could be accommodated in registers were it to be the
+       first argument to a routine, then it is returned in those registers.
+     m32/m64 structures otherwise:
+     - Larger structures values are allocated space and a pointer is passed
+       as the first argument.  */
+  switch (cif->rtype->type)
+    {
+
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+    case FFI_TYPE_LONGDOUBLE:
+      flags |= FLAG_RETURNS_128BITS;
+      flags |= FLAG_RETURNS_FP;
+      break;
+#endif
+
+    case FFI_TYPE_DOUBLE:
+      flags |= FLAG_RETURNS_64BITS;
+      /* Fall through.  */
+    case FFI_TYPE_FLOAT:
+      flags |= FLAG_RETURNS_FP;
+      break;
+
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+#ifdef POWERPC64
+    case FFI_TYPE_POINTER:
+#endif
+      flags |= FLAG_RETURNS_64BITS;
+      break;
+
+    case FFI_TYPE_STRUCT:
+#if defined(POWERPC_DARWIN64)
+      {
+	/* Can we fit the struct into regs?  */
+	if (darwin64_struct_ret_by_value_p (cif->rtype))
+	  {
+	    unsigned nfpr = 0;
+	    flags |= FLAG_RETURNS_STRUCT;
+	    if (cif->rtype->size != 16)
+	      darwin64_scan_struct_for_floats (cif->rtype, &nfpr) ;
+	    else
+	      flags |= FLAG_RETURNS_128BITS;
+	    /* Will be 0 for 16byte struct.  */
+	    if (nfpr)
+	      flags |= FLAG_RETURNS_FP;
+	  }
+	else /* By ref. */
+	  {
+	    flags |= FLAG_RETVAL_REFERENCE;
+	    flags |= FLAG_RETURNS_NOTHING;
+	    intarg_count++;
+	  }
+      }
+#elif defined(DARWIN_PPC)
+      if (cif->rtype->size <= 4)
+	flags |= FLAG_RETURNS_STRUCT;
+      else /* else by reference.  */
+	{
+	  flags |= FLAG_RETVAL_REFERENCE;
+	  flags |= FLAG_RETURNS_NOTHING;
+	  intarg_count++;
+	}
+#else /* assume we pass by ref.  */
+      flags |= FLAG_RETVAL_REFERENCE;
+      flags |= FLAG_RETURNS_NOTHING;
+      intarg_count++;
+#endif
+      break;
+    case FFI_TYPE_VOID:
+      flags |= FLAG_RETURNS_NOTHING;
+      break;
+
+    default:
+      /* Returns 32-bit integer, or similar.  Nothing to do here.  */
+      break;
+    }
+
+  /* The first NUM_GPR_ARG_REGISTERS words of integer arguments, and the
+     first NUM_FPR_ARG_REGISTERS fp arguments, go in registers; the rest
+     goes on the stack.  
+     ??? Structures are passed as a pointer to a copy of the structure. 
+     Stuff on the stack needs to keep proper alignment.  
+     For m64 the count is effectively of half-GPRs.  */
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+    {
+      unsigned align_words;
+      switch ((*ptr)->type)
+	{
+	case FFI_TYPE_FLOAT:
+	case FFI_TYPE_DOUBLE:
+	  fparg_count++;
+#if !defined(POWERPC_DARWIN64)
+	  /* If this FP arg is going on the stack, it must be
+	     8-byte-aligned.  */
+	  if (fparg_count > NUM_FPR_ARG_REGISTERS
+	      && (intarg_count & 0x01) != 0)
+	    intarg_count++;
+#endif
+	  break;
+
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+	case FFI_TYPE_LONGDOUBLE:
+	  fparg_count += 2;
+	  /* If this FP arg is going on the stack, it must be
+	     16-byte-aligned.  */
+	  if (fparg_count >= NUM_FPR_ARG_REGISTERS)
+#if defined (POWERPC64)
+	    intarg_count = FFI_ALIGN(intarg_count, 2);
+#else
+	    intarg_count = FFI_ALIGN(intarg_count, 4);
+#endif
+	  break;
+#endif
+
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+#if defined(POWERPC64)
+	  intarg_count++;
+#else
+	  /* 'long long' arguments are passed as two words, but
+	     either both words must fit in registers or both go
+	     on the stack.  If they go on the stack, they must
+	     be 8-byte-aligned.  */
+	  if (intarg_count == NUM_GPR_ARG_REGISTERS-1
+	      || (intarg_count >= NUM_GPR_ARG_REGISTERS 
+	          && (intarg_count & 0x01) != 0))
+	    intarg_count++;
+	  intarg_count += 2;
+#endif
+	  break;
+
+	case FFI_TYPE_STRUCT:
+	  size_al = (*ptr)->size;
+#if defined(POWERPC_DARWIN64)
+	  align_words = (*ptr)->alignment >> 3;
+	  if (align_words)
+	    intarg_count = FFI_ALIGN(intarg_count, align_words);
+	  /* Base size of the struct.  */
+	  intarg_count += (size_al + 7) / 8;
+	  /* If 16 bytes then don't worry about floats.  */
+	  if (size_al != 16)
+	    /* Scan through for floats to be placed in regs.  */
+	    darwin64_scan_struct_for_floats (*ptr, &fparg_count) ;
+#else
+	  align_words = (*ptr)->alignment >> 2;
+	  if (align_words)
+	    intarg_count = FFI_ALIGN(intarg_count, align_words);
+	  /* If the first member of the struct is a double, then align
+	     the struct to double-word. 
+	  if ((*ptr)->elements[0]->type == FFI_TYPE_DOUBLE)
+	    size_al = FFI_ALIGN((*ptr)->size, 8); */
+#  ifdef POWERPC64
+	  intarg_count += (size_al + 7) / 8;
+#  else
+	  intarg_count += (size_al + 3) / 4;
+#  endif
+#endif
+	  break;
+
+	default:
+	  /* Everything else is passed as a 4-byte word in a GPR, either
+	     the object itself or a pointer to it.  */
+	  intarg_count++;
+	  break;
+	}
+    }
+
+  if (fparg_count != 0)
+    flags |= FLAG_FP_ARGUMENTS;
+
+#if defined(POWERPC_DARWIN64)
+  /* Space to image the FPR registers, if needed - which includes when they might be
+     used in a struct return.  */
+  if (fparg_count != 0 
+      || ((flags & FLAG_RETURNS_STRUCT)
+	   && (flags & FLAG_RETURNS_FP)))
+    bytes += NUM_FPR_ARG_REGISTERS * sizeof(double);
+#else
+  /* Space for the FPR registers, if needed.  */
+  if (fparg_count != 0)
+    bytes += NUM_FPR_ARG_REGISTERS * sizeof(double);
+#endif
+
+  /* Stack space.  */
+#ifdef POWERPC64
+  if ((intarg_count + fparg_count) > NUM_GPR_ARG_REGISTERS)
+    bytes += (intarg_count + fparg_count) * sizeof(long);
+#else
+  if ((intarg_count + 2 * fparg_count) > NUM_GPR_ARG_REGISTERS)
+    bytes += (intarg_count + 2 * fparg_count) * sizeof(long);
+#endif
+  else
+    bytes += NUM_GPR_ARG_REGISTERS * sizeof(long);
+
+  /* The stack space allocated needs to be a multiple of 16 bytes.  */
+  bytes = FFI_ALIGN(bytes, 16) ;
+
+  cif->flags = flags;
+  cif->bytes = bytes;
+
+  return FFI_OK;
+}
+
+extern void ffi_call_AIX(extended_cif *, long, unsigned, unsigned *,
+			 void (*fn)(void), void (*fn2)(void));
+
+extern void ffi_call_go_AIX(extended_cif *, long, unsigned, unsigned *,
+			    void (*fn)(void), void (*fn2)(void), void *closure);
+
+extern void ffi_call_DARWIN(extended_cif *, long, unsigned, unsigned *,
+			    void (*fn)(void), void (*fn2)(void), ffi_type*);
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  extended_cif ecif;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+
+  /* If the return value is a struct and we don't have a return
+     value address then we need to make one.  */
+
+  if ((rvalue == NULL) &&
+      (cif->rtype->type == FFI_TYPE_STRUCT))
+    {
+      ecif.rvalue = alloca (cif->rtype->size);
+    }
+  else
+    ecif.rvalue = rvalue;
+
+  switch (cif->abi)
+    {
+    case FFI_AIX:
+      ffi_call_AIX(&ecif, -(long)cif->bytes, cif->flags, ecif.rvalue, fn,
+		   FFI_FN(ffi_prep_args));
+      break;
+    case FFI_DARWIN:
+      ffi_call_DARWIN(&ecif, -(long)cif->bytes, cif->flags, ecif.rvalue, fn,
+		      FFI_FN(ffi_prep_args), cif->rtype);
+      break;
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue,
+	     void *closure)
+{
+  extended_cif ecif;
+
+  ecif.cif = cif;
+  ecif.avalue = avalue;
+
+  /* If the return value is a struct and we don't have a return
+     value address then we need to make one.  */
+
+  if ((rvalue == NULL) &&
+      (cif->rtype->type == FFI_TYPE_STRUCT))
+    {
+      ecif.rvalue = alloca (cif->rtype->size);
+    }
+  else
+    ecif.rvalue = rvalue;
+
+  switch (cif->abi)
+    {
+    case FFI_AIX:
+      ffi_call_go_AIX(&ecif, -(long)cif->bytes, cif->flags, ecif.rvalue, fn,
+		      FFI_FN(ffi_prep_args), closure);
+      break;
+    default:
+      FFI_ASSERT(0);
+      break;
+    }
+}
+
+static void flush_icache(char *);
+static void flush_range(char *, int);
+
+/* The layout of a function descriptor.  A C function pointer really
+   points to one of these.  */
+
+typedef struct aix_fd_struct {
+  void *code_pointer;
+  void *toc;
+} aix_fd;
+
+/* here I'd like to add the stack frame layout we use in darwin_closure.S
+   and aix_closure.S
+
+   m32/m64
+
+   The stack layout looks like this:
+
+   |   Additional params...			| |     Higher address
+   ~						~ ~
+   |   Parameters      (at least 8*4/8=32/64)	| | NUM_GPR_ARG_REGISTERS
+   |--------------------------------------------| |
+   |   TOC=R2 (AIX) Reserved (Darwin)   4/8	| |
+   |--------------------------------------------| |
+   |   Reserved                       2*4/8	| |
+   |--------------------------------------------| |
+   |   Space for callee's LR		4/8	| |
+   |--------------------------------------------| |
+   |   Saved CR [low word for m64]      4/8	| |
+   |--------------------------------------------| |
+   |   Current backchain pointer	4/8	|-/ Parent's frame.
+   |--------------------------------------------| <+ <<< on entry to ffi_closure_ASM
+   |   Result Bytes			16	| |
+   |--------------------------------------------| |
+   ~   padding to 16-byte alignment		~ ~
+   |--------------------------------------------| |
+   |   NUM_FPR_ARG_REGISTERS slots		| |
+   |   here fp13 .. fp1		       13*8	| |
+   |--------------------------------------------| |
+   |   R3..R10			  8*4/8=32/64	| | NUM_GPR_ARG_REGISTERS
+   |--------------------------------------------| |
+   |   TOC=R2 (AIX) Reserved (Darwin)   4/8	| |
+   |--------------------------------------------| |	stack	|
+   |   Reserved [compiler,binder]     2*4/8	| |	grows	|
+   |--------------------------------------------| |	down	V
+   |   Space for callee's LR		4/8	| |
+   |--------------------------------------------| |	lower addresses
+   |   Saved CR [low word for m64]      4/8	| |
+   |--------------------------------------------| |     stack pointer here
+   |   Current backchain pointer	4/8	|-/	during
+   |--------------------------------------------|   <<<	ffi_closure_ASM.
+
+*/
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure* closure,
+		      ffi_cif* cif,
+		      void (*fun)(ffi_cif*, void*, void**, void*),
+		      void *user_data,
+		      void *codeloc)
+{
+  unsigned int *tramp;
+  struct ffi_aix_trampoline_struct *tramp_aix;
+  aix_fd *fd;
+
+  switch (cif->abi)
+    {
+      case FFI_DARWIN:
+
+	FFI_ASSERT (cif->abi == FFI_DARWIN);
+
+	tramp = (unsigned int *) &closure->tramp[0];
+#if defined(POWERPC_DARWIN64)
+	tramp[0] = 0x7c0802a6;  /*   mflr    r0  */
+	tramp[1] = 0x429f0015;  /*   bcl-    20,4*cr7+so,  +0x18 (L1)  */
+	/* We put the addresses here.  */
+	tramp[6] = 0x7d6802a6;  /*L1:   mflr    r11  */
+	tramp[7] = 0xe98b0000;  /*   ld     r12,0(r11) function address  */
+	tramp[8] = 0x7c0803a6;  /*   mtlr    r0   */
+	tramp[9] = 0x7d8903a6;  /*   mtctr   r12  */
+	tramp[10] = 0xe96b0008;  /*   lwz     r11,8(r11) static chain  */
+	tramp[11] = 0x4e800420;  /*   bctr  */
+
+	*((unsigned long *)&tramp[2]) = (unsigned long) ffi_closure_ASM; /* function  */
+	*((unsigned long *)&tramp[4]) = (unsigned long) codeloc; /* context  */
+#else
+	tramp[0] = 0x7c0802a6;  /*   mflr    r0  */
+	tramp[1] = 0x429f000d;  /*   bcl-    20,4*cr7+so,0x10  */
+	tramp[4] = 0x7d6802a6;  /*   mflr    r11  */
+	tramp[5] = 0x818b0000;  /*   lwz     r12,0(r11) function address  */
+	tramp[6] = 0x7c0803a6;  /*   mtlr    r0   */
+	tramp[7] = 0x7d8903a6;  /*   mtctr   r12  */
+	tramp[8] = 0x816b0004;  /*   lwz     r11,4(r11) static chain  */
+	tramp[9] = 0x4e800420;  /*   bctr  */
+	tramp[2] = (unsigned long) ffi_closure_ASM; /* function  */
+	tramp[3] = (unsigned long) codeloc; /* context  */
+#endif
+	closure->cif = cif;
+	closure->fun = fun;
+	closure->user_data = user_data;
+
+	/* Flush the icache. Only necessary on Darwin.  */
+	flush_range(codeloc, FFI_TRAMPOLINE_SIZE);
+
+	break;
+
+    case FFI_AIX:
+
+      tramp_aix = (struct ffi_aix_trampoline_struct *) (closure->tramp);
+      fd = (aix_fd *)(void *)ffi_closure_ASM;
+
+      FFI_ASSERT (cif->abi == FFI_AIX);
+
+      tramp_aix->code_pointer = fd->code_pointer;
+      tramp_aix->toc = fd->toc;
+      tramp_aix->static_chain = codeloc;
+      closure->cif = cif;
+      closure->fun = fun;
+      closure->user_data = user_data;
+      break;
+
+    default:
+      return FFI_BAD_ABI;
+      break;
+    }
+  return FFI_OK;
+}
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure* closure,
+		     ffi_cif* cif,
+		     void (*fun)(ffi_cif*, void*, void**, void*))
+{
+  switch (cif->abi)
+    {
+      case FFI_AIX:
+
+        FFI_ASSERT (cif->abi == FFI_AIX);
+
+        closure->tramp = (void *)ffi_go_closure_ASM;
+        closure->cif = cif;
+        closure->fun = fun;
+        return FFI_OK;
+      
+      // For now, ffi_prep_go_closure is only implemented for AIX, not for Darwin
+      default:
+        return FFI_BAD_ABI;
+        break;
+    }
+  return FFI_OK;
+}
+
+static void
+flush_icache(char *addr)
+{
+#ifndef _AIX
+  __asm__ volatile (
+		"dcbf 0,%0\n"
+		"\tsync\n"
+		"\ticbi 0,%0\n"
+		"\tsync\n"
+		"\tisync"
+		: : "r"(addr) : "memory");
+#endif
+}
+
+static void
+flush_range(char * addr1, int size)
+{
+#define MIN_LINE_SIZE 32
+  int i;
+  for (i = 0; i < size; i += MIN_LINE_SIZE)
+    flush_icache(addr1+i);
+  flush_icache(addr1+size-1);
+}
+
+typedef union
+{
+  float f;
+  double d;
+} ffi_dblfl;
+
+ffi_type *
+ffi_closure_helper_DARWIN (ffi_closure *, void *,
+			   unsigned long *, ffi_dblfl *);
+
+ffi_type *
+ffi_go_closure_helper_DARWIN (ffi_go_closure*, void *,
+			      unsigned long *, ffi_dblfl *);
+
+/* Basically the trampoline invokes ffi_closure_ASM, and on
+   entry, r11 holds the address of the closure.
+   After storing the registers that could possibly contain
+   parameters to be passed into the stack frame and setting
+   up space for a return value, ffi_closure_ASM invokes the
+   following helper function to do most of the work.  */
+
+static ffi_type *
+ffi_closure_helper_common (ffi_cif* cif,
+			   void (*fun)(ffi_cif*, void*, void**, void*),
+			   void *user_data, void *rvalue,
+			   unsigned long *pgr, ffi_dblfl *pfr)
+{
+  /* rvalue is the pointer to space for return value in closure assembly
+     pgr is the pointer to where r3-r10 are stored in ffi_closure_ASM
+     pfr is the pointer to where f1-f13 are stored in ffi_closure_ASM.  */
+
+  typedef double ldbits[2];
+
+  union ldu
+  {
+    ldbits lb;
+    long double ld;
+  };
+
+  void **          avalue;
+  ffi_type **      arg_types;
+  long             i, avn;
+  ffi_dblfl *      end_pfr = pfr + NUM_FPR_ARG_REGISTERS;
+  unsigned         size_al;
+#if defined(POWERPC_DARWIN64)
+  unsigned 	   fpsused = 0;
+#endif
+
+  avalue = alloca (cif->nargs * sizeof(void *));
+
+  if (cif->rtype->type == FFI_TYPE_STRUCT)
+    {
+#if defined(POWERPC_DARWIN64)
+      if (!darwin64_struct_ret_by_value_p (cif->rtype))
+	{
+    	  /* Won't fit into the regs - return by ref.  */
+	  rvalue = (void *) *pgr;
+	  pgr++;
+	}
+#elif defined(DARWIN_PPC)
+      if (cif->rtype->size > 4)
+	{
+	  rvalue = (void *) *pgr;
+	  pgr++;
+	}
+#else /* assume we return by ref.  */
+      rvalue = (void *) *pgr;
+      pgr++;
+#endif
+    }
+
+  i = 0;
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+
+  /* Grab the addresses of the arguments from the stack frame.  */
+  while (i < avn)
+    {
+      switch (arg_types[i]->type)
+	{
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT8:
+#if  defined(POWERPC64)
+	  avalue[i] = (char *) pgr + 7;
+#else
+	  avalue[i] = (char *) pgr + 3;
+#endif
+	  pgr++;
+	  break;
+
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_UINT16:
+#if  defined(POWERPC64)
+	  avalue[i] = (char *) pgr + 6;
+#else
+	  avalue[i] = (char *) pgr + 2;
+#endif
+	  pgr++;
+	  break;
+
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT32:
+#if  defined(POWERPC64)
+	  avalue[i] = (char *) pgr + 4;
+#else
+	case FFI_TYPE_POINTER:
+	  avalue[i] = pgr;
+#endif
+	  pgr++;
+	  break;
+
+	case FFI_TYPE_STRUCT:
+	  size_al = arg_types[i]->size;
+#if defined(POWERPC_DARWIN64)
+	  pgr = (unsigned long *)FFI_ALIGN((char *)pgr, arg_types[i]->alignment);
+	  if (size_al < 3 || size_al == 4)
+	    {
+	      avalue[i] = ((char *)pgr)+8-size_al;
+	      if (arg_types[i]->elements[0]->type == FFI_TYPE_FLOAT
+		  && fpsused < NUM_FPR_ARG_REGISTERS)
+		{
+		  *(float *)pgr = (float) *(double *)pfr;
+		  pfr++;
+		  fpsused++;
+		}
+	    }
 	  else 
-	    { 
-	      avalue[i] = pgr; 
-	    } 
-	  pgr += 4; 
-#endif  /* POWERPC64 */ 
-	  break; 
-#endif 
-	default: 
-	  FFI_ASSERT(0); 
-	} 
-      i++; 
-    } 
- 
-  (fun) (cif, rvalue, avalue, user_data); 
- 
-  /* Tell ffi_closure_ASM to perform return type promotions.  */ 
-  return cif->rtype; 
-} 
- 
-ffi_type * 
-ffi_closure_helper_DARWIN (ffi_closure *closure, void *rvalue, 
-			   unsigned long *pgr, ffi_dblfl *pfr) 
-{ 
-  return ffi_closure_helper_common (closure->cif, closure->fun, 
-				    closure->user_data, rvalue, pgr, pfr); 
-} 
- 
-ffi_type * 
-ffi_go_closure_helper_DARWIN (ffi_go_closure *closure, void *rvalue, 
-			      unsigned long *pgr, ffi_dblfl *pfr) 
-{ 
-  return ffi_closure_helper_common (closure->cif, closure->fun, 
-				    closure, rvalue, pgr, pfr); 
-} 
- 
+	    {
+	      if (size_al != 16)
+		pfr = (ffi_dblfl *) 
+		    darwin64_struct_floats_to_mem (arg_types[i], (char *)pgr,
+						   (double *)pfr, &fpsused);
+	      avalue[i] = pgr;
+	    }
+	  pgr += (size_al + 7) / 8;
+#else
+	  /* If the first member of the struct is a double, then align
+	     the struct to double-word.  */
+	  if (arg_types[i]->elements[0]->type == FFI_TYPE_DOUBLE)
+	    size_al = FFI_ALIGN(arg_types[i]->size, 8);
+#  if defined(POWERPC64)
+	  FFI_ASSERT (cif->abi != FFI_DARWIN);
+	  avalue[i] = pgr;
+	  pgr += (size_al + 7) / 8;
+#  else
+	  /* Structures that match the basic modes (QI 1 byte, HI 2 bytes,
+	     SI 4 bytes) are aligned as if they were those modes.  */
+	  if (size_al < 3 && cif->abi == FFI_DARWIN)
+	    avalue[i] = (char*) pgr + 4 - size_al;
+	  else
+	    avalue[i] = pgr;
+	  pgr += (size_al + 3) / 4;
+#  endif
+#endif
+	  break;
+
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_UINT64:
+#if  defined(POWERPC64)
+	case FFI_TYPE_POINTER:
+	  avalue[i] = pgr;
+	  pgr++;
+	  break;
+#else
+	  /* Long long ints are passed in two gpr's.  */
+	  avalue[i] = pgr;
+	  pgr += 2;
+	  break;
+#endif
+
+	case FFI_TYPE_FLOAT:
+	  /* A float value consumes a GPR.
+	     There are 13 64bit floating point registers.  */
+	  if (pfr < end_pfr)
+	    {
+	      double temp = pfr->d;
+	      pfr->f = (float) temp;
+	      avalue[i] = pfr;
+	      pfr++;
+	    }
+	  else
+	    {
+	      avalue[i] = pgr;
+	    }
+	  pgr++;
+	  break;
+
+	case FFI_TYPE_DOUBLE:
+	  /* A double value consumes two GPRs.
+	     There are 13 64bit floating point registers.  */
+	  if (pfr < end_pfr)
+	    {
+	      avalue[i] = pfr;
+	      pfr++;
+	    }
+	  else
+	    {
+	      avalue[i] = pgr;
+	    }
+#ifdef POWERPC64
+	  pgr++;
+#else
+	  pgr += 2;
+#endif
+	  break;
+
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+
+	case FFI_TYPE_LONGDOUBLE:
+#ifdef POWERPC64
+	  if (pfr + 1 < end_pfr)
+	    {
+	      avalue[i] = pfr;
+	      pfr += 2;
+	    }
+	  else
+	    {
+	      if (pfr < end_pfr)
+		{
+		  *pgr = *(unsigned long *) pfr;
+		  pfr++;
+		}
+	      avalue[i] = pgr;
+	    }
+	  pgr += 2;
+#else  /* POWERPC64 */
+	  /* A long double value consumes four GPRs and two FPRs.
+	     There are 13 64bit floating point registers.  */
+	  if (pfr + 1 < end_pfr)
+	    {
+	      avalue[i] = pfr;
+	      pfr += 2;
+	    }
+	  /* Here we have the situation where one part of the long double
+	     is stored in fpr13 and the other part is already on the stack.
+	     We use a union to pass the long double to avalue[i].  */
+	  else if (pfr + 1 == end_pfr)
+	    {
+	      union ldu temp_ld;
+	      memcpy (&temp_ld.lb[0], pfr, sizeof(ldbits));
+	      memcpy (&temp_ld.lb[1], pgr + 2, sizeof(ldbits));
+	      avalue[i] = &temp_ld.ld;
+	      pfr++;
+	    }
+	  else
+	    {
+	      avalue[i] = pgr;
+	    }
+	  pgr += 4;
+#endif  /* POWERPC64 */
+	  break;
+#endif
+	default:
+	  FFI_ASSERT(0);
+	}
+      i++;
+    }
+
+  (fun) (cif, rvalue, avalue, user_data);
+
+  /* Tell ffi_closure_ASM to perform return type promotions.  */
+  return cif->rtype;
+}
+
+ffi_type *
+ffi_closure_helper_DARWIN (ffi_closure *closure, void *rvalue,
+			   unsigned long *pgr, ffi_dblfl *pfr)
+{
+  return ffi_closure_helper_common (closure->cif, closure->fun,
+				    closure->user_data, rvalue, pgr, pfr);
+}
+
+ffi_type *
+ffi_go_closure_helper_DARWIN (ffi_go_closure *closure, void *rvalue,
+			      unsigned long *pgr, ffi_dblfl *pfr)
+{
+  return ffi_closure_helper_common (closure->cif, closure->fun,
+				    closure, rvalue, pgr, pfr);
+}
+
diff --git a/contrib/restricted/libffi/src/powerpc/ffi_linux64.c b/contrib/restricted/libffi/src/powerpc/ffi_linux64.c
index f4c5014a86..de0d033769 100644
--- a/contrib/restricted/libffi/src/powerpc/ffi_linux64.c
+++ b/contrib/restricted/libffi/src/powerpc/ffi_linux64.c
@@ -1,1153 +1,1153 @@
-/* ----------------------------------------------------------------------- 
-   ffi_linux64.c - Copyright (C) 2013 IBM 
-                   Copyright (C) 2011 Anthony Green 
-                   Copyright (C) 2011 Kyle Moffett 
-                   Copyright (C) 2008 Red Hat, Inc 
-                   Copyright (C) 2007, 2008 Free Software Foundation, Inc 
-                   Copyright (c) 1998 Geoffrey Keating 
- 
-   PowerPC Foreign Function Interface 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS 
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
-   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR 
-   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
-   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
-   OTHER DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-#include "ffi.h" 
- 
-#ifdef POWERPC64 
-#include "ffi_common.h" 
-#include "ffi_powerpc.h" 
- 
- 
-/* About the LINUX64 ABI.  */ 
-enum { 
-  NUM_GPR_ARG_REGISTERS64 = 8, 
-  NUM_FPR_ARG_REGISTERS64 = 13, 
-  NUM_VEC_ARG_REGISTERS64 = 12, 
-}; 
-enum { ASM_NEEDS_REGISTERS64 = 4 }; 
- 
- 
-#if HAVE_LONG_DOUBLE_VARIANT && FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-/* Adjust size of ffi_type_longdouble.  */ 
-void FFI_HIDDEN 
-ffi_prep_types_linux64 (ffi_abi abi) 
-{ 
-  if ((abi & (FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128)) == FFI_LINUX) 
-    { 
-      ffi_type_longdouble.size = 8; 
-      ffi_type_longdouble.alignment = 8; 
-    } 
-  else 
-    { 
-      ffi_type_longdouble.size = 16; 
-      ffi_type_longdouble.alignment = 16; 
-    } 
-} 
-#endif 
- 
- 
-static unsigned int 
-discover_homogeneous_aggregate (ffi_abi abi, 
-                                const ffi_type *t, 
-                                unsigned int *elnum) 
-{ 
-  switch (t->type) 
-    { 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-    case FFI_TYPE_LONGDOUBLE: 
-      /* 64-bit long doubles are equivalent to doubles. */ 
-      if ((abi & FFI_LINUX_LONG_DOUBLE_128) == 0) 
-        { 
-          *elnum = 1; 
-          return FFI_TYPE_DOUBLE; 
-        } 
-      /* IBM extended precision values use unaligned pairs 
-         of FPRs, but according to the ABI must be considered 
-         distinct from doubles. They are also limited to a 
-         maximum of four members in a homogeneous aggregate. */ 
-      else if ((abi & FFI_LINUX_LONG_DOUBLE_IEEE128) == 0) 
-        { 
-          *elnum = 2; 
-          return FFI_TYPE_LONGDOUBLE; 
-        } 
-      /* Fall through. */ 
-#endif 
-    case FFI_TYPE_FLOAT: 
-    case FFI_TYPE_DOUBLE: 
-      *elnum = 1; 
-      return (int) t->type; 
- 
-    case FFI_TYPE_STRUCT:; 
-      { 
-	unsigned int base_elt = 0, total_elnum = 0; 
-	ffi_type **el = t->elements; 
-	while (*el) 
-	  { 
-	    unsigned int el_elt, el_elnum = 0; 
-	    el_elt = discover_homogeneous_aggregate (abi, *el, &el_elnum); 
-	    if (el_elt == 0 
-		|| (base_elt && base_elt != el_elt)) 
-	      return 0; 
-	    base_elt = el_elt; 
-	    total_elnum += el_elnum; 
-#if _CALL_ELF == 2 
-	    if (total_elnum > 8) 
-	      return 0; 
-#else 
-	    if (total_elnum > 1) 
-	      return 0; 
-#endif 
-	    el++; 
-	  } 
-	*elnum = total_elnum; 
-	return base_elt; 
-      } 
- 
-    default: 
-      return 0; 
-    } 
-} 
- 
- 
-/* Perform machine dependent cif processing */ 
-static ffi_status 
-ffi_prep_cif_linux64_core (ffi_cif *cif) 
-{ 
-  ffi_type **ptr; 
-  unsigned bytes; 
-  unsigned i, fparg_count = 0, intarg_count = 0, vecarg_count = 0; 
-  unsigned flags = cif->flags; 
-  unsigned elt, elnum, rtype; 
- 
-#if FFI_TYPE_LONGDOUBLE == FFI_TYPE_DOUBLE 
-  /* If compiled without long double support... */ 
-  if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0 || 
-      (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0) 
-    return FFI_BAD_ABI; 
-#elif !defined(__VEC__) 
-  /* If compiled without vector register support (used by assembly)... */ 
-  if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0) 
-    return FFI_BAD_ABI; 
-#else 
-  /* If the IEEE128 flag is set, but long double is only 64 bits wide... */ 
-  if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) == 0 && 
-      (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0) 
-    return FFI_BAD_ABI; 
-#endif 
- 
-  /* The machine-independent calculation of cif->bytes doesn't work 
-     for us.  Redo the calculation.  */ 
-#if _CALL_ELF == 2 
-  /* Space for backchain, CR, LR, TOC and the asm's temp regs.  */ 
-  bytes = (4 + ASM_NEEDS_REGISTERS64) * sizeof (long); 
- 
-  /* Space for the general registers.  */ 
-  bytes += NUM_GPR_ARG_REGISTERS64 * sizeof (long); 
-#else 
-  /* Space for backchain, CR, LR, cc/ld doubleword, TOC and the asm's temp 
-     regs.  */ 
-  bytes = (6 + ASM_NEEDS_REGISTERS64) * sizeof (long); 
- 
-  /* Space for the mandatory parm save area and general registers.  */ 
-  bytes += 2 * NUM_GPR_ARG_REGISTERS64 * sizeof (long); 
-#endif 
- 
-  /* Return value handling.  */ 
-  rtype = cif->rtype->type; 
-#if _CALL_ELF == 2 
-homogeneous: 
-#endif 
-  switch (rtype) 
-    { 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-    case FFI_TYPE_LONGDOUBLE: 
-      if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0) 
-        { 
-          flags |= FLAG_RETURNS_VEC; 
-          break; 
-        } 
-      if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0) 
-	flags |= FLAG_RETURNS_128BITS; 
-      /* Fall through.  */ 
-#endif 
-    case FFI_TYPE_DOUBLE: 
-      flags |= FLAG_RETURNS_64BITS; 
-      /* Fall through.  */ 
-    case FFI_TYPE_FLOAT: 
-      flags |= FLAG_RETURNS_FP; 
-      break; 
- 
-    case FFI_TYPE_UINT128: 
-      flags |= FLAG_RETURNS_128BITS; 
-      /* Fall through.  */ 
-    case FFI_TYPE_UINT64: 
-    case FFI_TYPE_SINT64: 
-    case FFI_TYPE_POINTER: 
-      flags |= FLAG_RETURNS_64BITS; 
-      break; 
- 
-    case FFI_TYPE_STRUCT: 
-#if _CALL_ELF == 2 
-      elt = discover_homogeneous_aggregate (cif->abi, cif->rtype, &elnum); 
-      if (elt) 
-        { 
-          flags |= FLAG_RETURNS_SMST; 
-          rtype = elt; 
-          goto homogeneous; 
-        } 
-      if (cif->rtype->size <= 16) 
-        { 
-          flags |= FLAG_RETURNS_SMST; 
-          break; 
-        } 
-#endif 
-      intarg_count++; 
-      flags |= FLAG_RETVAL_REFERENCE; 
-      /* Fall through.  */ 
-    case FFI_TYPE_VOID: 
-      flags |= FLAG_RETURNS_NOTHING; 
-      break; 
- 
-    default: 
-      /* Returns 32-bit integer, or similar.  Nothing to do here.  */ 
-      break; 
-    } 
- 
-  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++) 
-    { 
-      unsigned int align; 
- 
-      switch ((*ptr)->type) 
-	{ 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-	case FFI_TYPE_LONGDOUBLE: 
-          if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0) 
-            { 
-              vecarg_count++; 
-              /* Align to 16 bytes, plus the 16-byte argument. */ 
-              intarg_count = (intarg_count + 3) & ~0x1; 
-              if (vecarg_count > NUM_VEC_ARG_REGISTERS64) 
-                flags |= FLAG_ARG_NEEDS_PSAVE; 
-              break; 
-            } 
-	  if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0) 
-	    { 
-	      fparg_count++; 
-	      intarg_count++; 
-	    } 
-	  /* Fall through.  */ 
-#endif 
-	case FFI_TYPE_DOUBLE: 
-	case FFI_TYPE_FLOAT: 
-	  fparg_count++; 
-	  intarg_count++; 
-	  if (fparg_count > NUM_FPR_ARG_REGISTERS64) 
-	    flags |= FLAG_ARG_NEEDS_PSAVE; 
-	  break; 
- 
-	case FFI_TYPE_STRUCT: 
-	  if ((cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0) 
-	    { 
-	      align = (*ptr)->alignment; 
-	      if (align > 16) 
-		align = 16; 
-	      align = align / 8; 
-	      if (align > 1) 
-		intarg_count = FFI_ALIGN (intarg_count, align); 
-	    } 
-	  intarg_count += ((*ptr)->size + 7) / 8; 
-	  elt = discover_homogeneous_aggregate (cif->abi, *ptr, &elnum); 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-          if (elt == FFI_TYPE_LONGDOUBLE && 
-              (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0) 
-            { 
-              vecarg_count += elnum; 
-              if (vecarg_count > NUM_VEC_ARG_REGISTERS64) 
-                flags |= FLAG_ARG_NEEDS_PSAVE; 
-              break; 
-            } 
-	  else 
-#endif 
-	  if (elt) 
-	    { 
-	      fparg_count += elnum; 
-	      if (fparg_count > NUM_FPR_ARG_REGISTERS64) 
-		flags |= FLAG_ARG_NEEDS_PSAVE; 
-	    } 
-	  else 
-	    { 
-	      if (intarg_count > NUM_GPR_ARG_REGISTERS64) 
-		flags |= FLAG_ARG_NEEDS_PSAVE; 
-	    } 
-	  break; 
- 
-	case FFI_TYPE_POINTER: 
-	case FFI_TYPE_UINT64: 
-	case FFI_TYPE_SINT64: 
-	case FFI_TYPE_INT: 
-	case FFI_TYPE_UINT32: 
-	case FFI_TYPE_SINT32: 
-	case FFI_TYPE_UINT16: 
-	case FFI_TYPE_SINT16: 
-	case FFI_TYPE_UINT8: 
-	case FFI_TYPE_SINT8: 
-	  /* Everything else is passed as a 8-byte word in a GPR, either 
-	     the object itself or a pointer to it.  */ 
-	  intarg_count++; 
-	  if (intarg_count > NUM_GPR_ARG_REGISTERS64) 
-	    flags |= FLAG_ARG_NEEDS_PSAVE; 
-	  break; 
-	default: 
-	  FFI_ASSERT (0); 
-	} 
-    } 
- 
-  if (fparg_count != 0) 
-    flags |= FLAG_FP_ARGUMENTS; 
-  if (intarg_count > 4) 
-    flags |= FLAG_4_GPR_ARGUMENTS; 
-  if (vecarg_count != 0) 
-    flags |= FLAG_VEC_ARGUMENTS; 
- 
-  /* Space for the FPR registers, if needed.  */ 
-  if (fparg_count != 0) 
-    bytes += NUM_FPR_ARG_REGISTERS64 * sizeof (double); 
-  /* Space for the vector registers, if needed, aligned to 16 bytes. */ 
-  if (vecarg_count != 0) { 
-    bytes = (bytes + 15) & ~0xF; 
-    bytes += NUM_VEC_ARG_REGISTERS64 * sizeof (float128); 
-  } 
- 
-  /* Stack space.  */ 
-#if _CALL_ELF == 2 
-  if ((flags & FLAG_ARG_NEEDS_PSAVE) != 0) 
-    bytes += intarg_count * sizeof (long); 
-#else 
-  if (intarg_count > NUM_GPR_ARG_REGISTERS64) 
-    bytes += (intarg_count - NUM_GPR_ARG_REGISTERS64) * sizeof (long); 
-#endif 
- 
-  /* The stack space allocated needs to be a multiple of 16 bytes.  */ 
-  bytes = (bytes + 15) & ~0xF; 
- 
-  cif->flags = flags; 
-  cif->bytes = bytes; 
- 
-  return FFI_OK; 
-} 
- 
-ffi_status FFI_HIDDEN 
-ffi_prep_cif_linux64 (ffi_cif *cif) 
-{ 
-  if ((cif->abi & FFI_LINUX) != 0) 
-    cif->nfixedargs = cif->nargs; 
-#if _CALL_ELF != 2 
-  else if (cif->abi == FFI_COMPAT_LINUX64) 
-    { 
-      /* This call is from old code.  Don't touch cif->nfixedargs 
-	 since old code will be using a smaller cif.  */ 
-      cif->flags |= FLAG_COMPAT; 
-      /* Translate to new abi value.  */ 
-      cif->abi = FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128; 
-    } 
-#endif 
-  else 
-    return FFI_BAD_ABI; 
-  return ffi_prep_cif_linux64_core (cif); 
-} 
- 
-ffi_status FFI_HIDDEN 
-ffi_prep_cif_linux64_var (ffi_cif *cif, 
-			  unsigned int nfixedargs, 
-			  unsigned int ntotalargs MAYBE_UNUSED) 
-{ 
-  if ((cif->abi & FFI_LINUX) != 0) 
-    cif->nfixedargs = nfixedargs; 
-#if _CALL_ELF != 2 
-  else if (cif->abi == FFI_COMPAT_LINUX64) 
-    { 
-      /* This call is from old code.  Don't touch cif->nfixedargs 
-	 since old code will be using a smaller cif.  */ 
-      cif->flags |= FLAG_COMPAT; 
-      /* Translate to new abi value.  */ 
-      cif->abi = FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128; 
-    } 
-#endif 
-  else 
-    return FFI_BAD_ABI; 
-#if _CALL_ELF == 2 
-  cif->flags |= FLAG_ARG_NEEDS_PSAVE; 
-#endif 
-  return ffi_prep_cif_linux64_core (cif); 
-} 
- 
- 
-/* ffi_prep_args64 is called by the assembly routine once stack space 
-   has been allocated for the function's arguments. 
- 
-   The stack layout we want looks like this: 
- 
-   |   Ret addr from ffi_call_LINUX64	8bytes	|	higher addresses 
-   |--------------------------------------------| 
-   |   CR save area			8bytes	| 
-   |--------------------------------------------| 
-   |   Previous backchain pointer	8	|	stack pointer here 
-   |--------------------------------------------|<+ <<<	on entry to 
-   |   Saved r28-r31			4*8	| |	ffi_call_LINUX64 
-   |--------------------------------------------| | 
-   |   GPR registers r3-r10		8*8	| | 
-   |--------------------------------------------| | 
-   |   FPR registers f1-f13 (optional)	13*8	| | 
-   |--------------------------------------------| | 
-   |   VEC registers v2-v13 (optional)  12*16   | | 
-   |--------------------------------------------| | 
-   |   Parameter save area		        | | 
-   |--------------------------------------------| | 
-   |   TOC save area			8	| | 
-   |--------------------------------------------| |	stack	| 
-   |   Linker doubleword		8	| |	grows	| 
-   |--------------------------------------------| |	down	V 
-   |   Compiler doubleword		8	| | 
-   |--------------------------------------------| |	lower addresses 
-   |   Space for callee's LR		8	| | 
-   |--------------------------------------------| | 
-   |   CR save area			8	| | 
-   |--------------------------------------------| |	stack pointer here 
-   |   Current backchain pointer	8	|-/	during 
-   |--------------------------------------------|   <<<	ffi_call_LINUX64 
- 
-*/ 
- 
-void FFI_HIDDEN 
-ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack) 
-{ 
-  const unsigned long bytes = ecif->cif->bytes; 
-  const unsigned long flags = ecif->cif->flags; 
- 
-  typedef union 
-  { 
-    char *c; 
-    unsigned long *ul; 
-    float *f; 
-    double *d; 
-    float128 *f128; 
-    size_t p; 
-  } valp; 
- 
-  /* 'stacktop' points at the previous backchain pointer.  */ 
-  valp stacktop; 
- 
-  /* 'next_arg' points at the space for gpr3, and grows upwards as 
-     we use GPR registers, then continues at rest.  */ 
-  valp gpr_base; 
-  valp gpr_end; 
-  valp rest; 
-  valp next_arg; 
- 
-  /* 'fpr_base' points at the space for f1, and grows upwards as 
-     we use FPR registers.  */ 
-  valp fpr_base; 
-  unsigned int fparg_count; 
- 
-  /* 'vec_base' points at the space for v2, and grows upwards as 
-     we use vector registers.  */ 
-  valp vec_base; 
-  unsigned int vecarg_count; 
- 
-  unsigned int i, words, nargs, nfixedargs; 
-  ffi_type **ptr; 
-  double double_tmp; 
-  union 
-  { 
-    void **v; 
-    char **c; 
-    signed char **sc; 
-    unsigned char **uc; 
-    signed short **ss; 
-    unsigned short **us; 
-    signed int **si; 
-    unsigned int **ui; 
-    unsigned long **ul; 
-    float **f; 
-    double **d; 
-    float128 **f128; 
-  } p_argv; 
-  unsigned long gprvalue; 
-  unsigned long align; 
- 
-  stacktop.c = (char *) stack + bytes; 
-  gpr_base.ul = stacktop.ul - ASM_NEEDS_REGISTERS64 - NUM_GPR_ARG_REGISTERS64; 
-  gpr_end.ul = gpr_base.ul + NUM_GPR_ARG_REGISTERS64; 
-#if _CALL_ELF == 2 
-  rest.ul = stack + 4 + NUM_GPR_ARG_REGISTERS64; 
-#else 
-  rest.ul = stack + 6 + NUM_GPR_ARG_REGISTERS64; 
-#endif 
-  fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS64; 
-  fparg_count = 0; 
-  /* Place the vector args below the FPRs, if used, else the GPRs. */ 
-  if (ecif->cif->flags & FLAG_FP_ARGUMENTS) 
-    vec_base.p = fpr_base.p & ~0xF; 
-  else 
-    vec_base.p = gpr_base.p; 
-  vec_base.f128 -= NUM_VEC_ARG_REGISTERS64; 
-  vecarg_count = 0; 
-  next_arg.ul = gpr_base.ul; 
- 
-  /* Check that everything starts aligned properly.  */ 
-  FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0); 
-  FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0); 
-  FFI_ASSERT (((unsigned long) gpr_base.c & 0xF) == 0); 
-  FFI_ASSERT (((unsigned long) gpr_end.c  & 0xF) == 0); 
-  FFI_ASSERT (((unsigned long) vec_base.c & 0xF) == 0); 
-  FFI_ASSERT ((bytes & 0xF) == 0); 
- 
-  /* Deal with return values that are actually pass-by-reference.  */ 
-  if (flags & FLAG_RETVAL_REFERENCE) 
-    *next_arg.ul++ = (unsigned long) (char *) ecif->rvalue; 
- 
-  /* Now for the arguments.  */ 
-  p_argv.v = ecif->avalue; 
-  nargs = ecif->cif->nargs; 
-#if _CALL_ELF != 2 
-  nfixedargs = (unsigned) -1; 
-  if ((flags & FLAG_COMPAT) == 0) 
-#endif 
-    nfixedargs = ecif->cif->nfixedargs; 
-  for (ptr = ecif->cif->arg_types, i = 0; 
-       i < nargs; 
-       i++, ptr++, p_argv.v++) 
-    { 
-      unsigned int elt, elnum; 
- 
-      switch ((*ptr)->type) 
-	{ 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-	case FFI_TYPE_LONGDOUBLE: 
-          if ((ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0) 
-            { 
-              next_arg.p = FFI_ALIGN (next_arg.p, 16); 
-              if (next_arg.ul == gpr_end.ul) 
-                next_arg.ul = rest.ul; 
-              if (vecarg_count < NUM_VEC_ARG_REGISTERS64 && i < nfixedargs) 
-                *vec_base.f128++ = **p_argv.f128; 
-              else 
-                *next_arg.f128 = **p_argv.f128; 
-              if (++next_arg.f128 == gpr_end.f128) 
-                next_arg.f128 = rest.f128; 
-              vecarg_count++; 
-              FFI_ASSERT (__LDBL_MANT_DIG__ == 113); 
-              FFI_ASSERT (flags & FLAG_VEC_ARGUMENTS); 
-              break; 
-            } 
-	  if ((ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0) 
-	    { 
-	      double_tmp = (*p_argv.d)[0]; 
-	      if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs) 
-		{ 
-		  *fpr_base.d++ = double_tmp; 
-# if _CALL_ELF != 2 
-		  if ((flags & FLAG_COMPAT) != 0) 
-		    *next_arg.d = double_tmp; 
-# endif 
-		} 
-	      else 
-		*next_arg.d = double_tmp; 
-	      if (++next_arg.ul == gpr_end.ul) 
-		next_arg.ul = rest.ul; 
-	      fparg_count++; 
-	      double_tmp = (*p_argv.d)[1]; 
-	      if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs) 
-		{ 
-		  *fpr_base.d++ = double_tmp; 
-# if _CALL_ELF != 2 
-		  if ((flags & FLAG_COMPAT) != 0) 
-		    *next_arg.d = double_tmp; 
-# endif 
-		} 
-	      else 
-		*next_arg.d = double_tmp; 
-	      if (++next_arg.ul == gpr_end.ul) 
-		next_arg.ul = rest.ul; 
-	      fparg_count++; 
-	      FFI_ASSERT (__LDBL_MANT_DIG__ == 106); 
-	      FFI_ASSERT (flags & FLAG_FP_ARGUMENTS); 
-	      break; 
-	    } 
-	  /* Fall through.  */ 
-#endif 
-	case FFI_TYPE_DOUBLE: 
-#if _CALL_ELF != 2 
-	do_double: 
-#endif 
-	  double_tmp = **p_argv.d; 
-	  if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs) 
-	    { 
-	      *fpr_base.d++ = double_tmp; 
-#if _CALL_ELF != 2 
-	      if ((flags & FLAG_COMPAT) != 0) 
-		*next_arg.d = double_tmp; 
-#endif 
-	    } 
-	  else 
-	    *next_arg.d = double_tmp; 
-	  if (++next_arg.ul == gpr_end.ul) 
-	    next_arg.ul = rest.ul; 
-	  fparg_count++; 
-	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS); 
-	  break; 
- 
-	case FFI_TYPE_FLOAT: 
-#if _CALL_ELF != 2 
-	do_float: 
-#endif 
-	  double_tmp = **p_argv.f; 
-	  if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs) 
-	    { 
-	      *fpr_base.d++ = double_tmp; 
-#if _CALL_ELF != 2 
-	      if ((flags & FLAG_COMPAT) != 0) 
-		{ 
-# ifndef __LITTLE_ENDIAN__ 
-		  next_arg.f[1] = (float) double_tmp; 
-# else 
-		  next_arg.f[0] = (float) double_tmp; 
-# endif 
-		} 
-#endif 
-	    } 
-	  else 
-	    { 
-# ifndef __LITTLE_ENDIAN__ 
-	      next_arg.f[1] = (float) double_tmp; 
-# else 
-	      next_arg.f[0] = (float) double_tmp; 
-# endif 
-	    } 
-	  if (++next_arg.ul == gpr_end.ul) 
-	    next_arg.ul = rest.ul; 
-	  fparg_count++; 
-	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS); 
-	  break; 
- 
-	case FFI_TYPE_STRUCT: 
-	  if ((ecif->cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0) 
-	    { 
-	      align = (*ptr)->alignment; 
-	      if (align > 16) 
-		align = 16; 
-	      if (align > 1) 
-                { 
-                  next_arg.p = FFI_ALIGN (next_arg.p, align); 
-                  if (next_arg.ul == gpr_end.ul) 
-                    next_arg.ul = rest.ul; 
-                } 
-	    } 
-	  elt = discover_homogeneous_aggregate (ecif->cif->abi, *ptr, &elnum); 
-	  if (elt) 
-	    { 
-#if _CALL_ELF == 2 
-	      union { 
-		void *v; 
-		float *f; 
-		double *d; 
-		float128 *f128; 
-	      } arg; 
- 
-	      arg.v = *p_argv.v; 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-              if (elt == FFI_TYPE_LONGDOUBLE && 
-                  (ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0) 
-                { 
-                  do 
-                    { 
-                      if (vecarg_count < NUM_VEC_ARG_REGISTERS64 
-                          && i < nfixedargs) 
-                        *vec_base.f128++ = *arg.f128++; 
-                      else 
-                        *next_arg.f128 = *arg.f128++; 
-                      if (++next_arg.f128 == gpr_end.f128) 
-                        next_arg.f128 = rest.f128; 
-                      vecarg_count++; 
-                    } 
-                  while (--elnum != 0); 
-                } 
-              else 
-#endif 
-	      if (elt == FFI_TYPE_FLOAT) 
-		{ 
-		  do 
-		    { 
-		      double_tmp = *arg.f++; 
-		      if (fparg_count < NUM_FPR_ARG_REGISTERS64 
-			  && i < nfixedargs) 
-			*fpr_base.d++ = double_tmp; 
-		      else 
-			*next_arg.f = (float) double_tmp; 
-		      if (++next_arg.f == gpr_end.f) 
-			next_arg.f = rest.f; 
-		      fparg_count++; 
-		    } 
-		  while (--elnum != 0); 
-		  if ((next_arg.p & 7) != 0) 
-                    if (++next_arg.f == gpr_end.f) 
-                      next_arg.f = rest.f; 
-		} 
-	      else 
-		do 
-		  { 
-		    double_tmp = *arg.d++; 
-		    if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs) 
-		      *fpr_base.d++ = double_tmp; 
-		    else 
-		      *next_arg.d = double_tmp; 
-		    if (++next_arg.d == gpr_end.d) 
-		      next_arg.d = rest.d; 
-		    fparg_count++; 
-		  } 
-		while (--elnum != 0); 
-#else 
-	      if (elt == FFI_TYPE_FLOAT) 
-		goto do_float; 
-	      else 
-		goto do_double; 
-#endif 
-	    } 
-	  else 
-	    { 
-	      words = ((*ptr)->size + 7) / 8; 
-	      if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul) 
-		{ 
-		  size_t first = gpr_end.c - next_arg.c; 
-		  memcpy (next_arg.c, *p_argv.c, first); 
-		  memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first); 
-		  next_arg.c = rest.c + words * 8 - first; 
-		} 
-	      else 
-		{ 
-		  char *where = next_arg.c; 
- 
-#ifndef __LITTLE_ENDIAN__ 
-		  /* Structures with size less than eight bytes are passed 
-		     left-padded.  */ 
-		  if ((*ptr)->size < 8) 
-		    where += 8 - (*ptr)->size; 
-#endif 
-		  memcpy (where, *p_argv.c, (*ptr)->size); 
-		  next_arg.ul += words; 
-		  if (next_arg.ul == gpr_end.ul) 
-		    next_arg.ul = rest.ul; 
-		} 
-	    } 
-	  break; 
- 
-	case FFI_TYPE_UINT8: 
-	  gprvalue = **p_argv.uc; 
-	  goto putgpr; 
-	case FFI_TYPE_SINT8: 
-	  gprvalue = **p_argv.sc; 
-	  goto putgpr; 
-	case FFI_TYPE_UINT16: 
-	  gprvalue = **p_argv.us; 
-	  goto putgpr; 
-	case FFI_TYPE_SINT16: 
-	  gprvalue = **p_argv.ss; 
-	  goto putgpr; 
-	case FFI_TYPE_UINT32: 
-	  gprvalue = **p_argv.ui; 
-	  goto putgpr; 
-	case FFI_TYPE_INT: 
-	case FFI_TYPE_SINT32: 
-	  gprvalue = **p_argv.si; 
-	  goto putgpr; 
- 
-	case FFI_TYPE_UINT64: 
-	case FFI_TYPE_SINT64: 
-	case FFI_TYPE_POINTER: 
-	  gprvalue = **p_argv.ul; 
-	putgpr: 
-	  *next_arg.ul++ = gprvalue; 
-	  if (next_arg.ul == gpr_end.ul) 
-	    next_arg.ul = rest.ul; 
-	  break; 
-	} 
-    } 
- 
-  FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS 
-	      || (next_arg.ul >= gpr_base.ul 
-		  && next_arg.ul <= gpr_base.ul + 4)); 
-} 
- 
- 
-#if _CALL_ELF == 2 
-#define MIN_CACHE_LINE_SIZE 8 
- 
-static void 
-flush_icache (char *wraddr, char *xaddr, int size) 
-{ 
-  int i; 
-  for (i = 0; i < size; i += MIN_CACHE_LINE_SIZE) 
-    __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;" 
-		      : : "r" (xaddr + i), "r" (wraddr + i) : "memory"); 
-  __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;" "sync;" "isync;" 
-		    : : "r"(xaddr + size - 1), "r"(wraddr + size - 1) 
-		    : "memory"); 
-} 
-#endif 
- 
- 
-ffi_status FFI_HIDDEN 
-ffi_prep_closure_loc_linux64 (ffi_closure *closure, 
-			      ffi_cif *cif, 
-			      void (*fun) (ffi_cif *, void *, void **, void *), 
-			      void *user_data, 
-			      void *codeloc) 
-{ 
-#if _CALL_ELF == 2 
-  unsigned int *tramp = (unsigned int *) &closure->tramp[0]; 
- 
-  if (cif->abi < FFI_LINUX || cif->abi >= FFI_LAST_ABI) 
-    return FFI_BAD_ABI; 
- 
-  tramp[0] = 0xe96c0018;	/* 0:	ld	11,2f-0b(12)	*/ 
-  tramp[1] = 0xe98c0010;	/*	ld	12,1f-0b(12)	*/ 
-  tramp[2] = 0x7d8903a6;	/*	mtctr	12		*/ 
-  tramp[3] = 0x4e800420;	/*	bctr			*/ 
-				/* 1:	.quad	function_addr	*/ 
-				/* 2:	.quad	context		*/ 
-  *(void **) &tramp[4] = (void *) ffi_closure_LINUX64; 
-  *(void **) &tramp[6] = codeloc; 
-  flush_icache ((char *) tramp, (char *) codeloc, 4 * 4); 
-#else 
-  void **tramp = (void **) &closure->tramp[0]; 
- 
-  if (cif->abi < FFI_LINUX || cif->abi >= FFI_LAST_ABI) 
-    return FFI_BAD_ABI; 
- 
-  /* Copy function address and TOC from ffi_closure_LINUX64 OPD.  */ 
-  memcpy (&tramp[0], (void **) ffi_closure_LINUX64, sizeof (void *)); 
-  tramp[1] = codeloc; 
-  memcpy (&tramp[2], (void **) ffi_closure_LINUX64 + 1, sizeof (void *)); 
-#endif 
- 
-  closure->cif = cif; 
-  closure->fun = fun; 
-  closure->user_data = user_data; 
- 
-  return FFI_OK; 
-} 
- 
- 
-int FFI_HIDDEN 
-ffi_closure_helper_LINUX64 (ffi_cif *cif, 
-			    void (*fun) (ffi_cif *, void *, void **, void *), 
-			    void *user_data, 
-			    void *rvalue, 
-			    unsigned long *pst, 
-                            ffi_dblfl *pfr, 
-                            float128 *pvec) 
-{ 
-  /* rvalue is the pointer to space for return value in closure assembly */ 
-  /* pst is the pointer to parameter save area 
-     (r3-r10 are stored into its first 8 slots by ffi_closure_LINUX64) */ 
-  /* pfr is the pointer to where f1-f13 are stored in ffi_closure_LINUX64 */ 
-  /* pvec is the pointer to where v2-v13 are stored in ffi_closure_LINUX64 */ 
- 
-  void **avalue; 
-  ffi_type **arg_types; 
-  unsigned long i, avn, nfixedargs; 
-  ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS64; 
-  float128 *end_pvec = pvec + NUM_VEC_ARG_REGISTERS64; 
-  unsigned long align; 
- 
-  avalue = alloca (cif->nargs * sizeof (void *)); 
- 
-  /* Copy the caller's structure return value address so that the 
-     closure returns the data directly to the caller.  */ 
-  if (cif->rtype->type == FFI_TYPE_STRUCT 
-      && (cif->flags & FLAG_RETURNS_SMST) == 0) 
-    { 
-      rvalue = (void *) *pst; 
-      pst++; 
-    } 
- 
-  i = 0; 
-  avn = cif->nargs; 
-#if _CALL_ELF != 2 
-  nfixedargs = (unsigned) -1; 
-  if ((cif->flags & FLAG_COMPAT) == 0) 
-#endif 
-    nfixedargs = cif->nfixedargs; 
-  arg_types = cif->arg_types; 
- 
-  /* Grab the addresses of the arguments from the stack frame.  */ 
-  while (i < avn) 
-    { 
-      unsigned int elt, elnum; 
- 
-      switch (arg_types[i]->type) 
-	{ 
-	case FFI_TYPE_SINT8: 
-	case FFI_TYPE_UINT8: 
-#ifndef __LITTLE_ENDIAN__ 
-	  avalue[i] = (char *) pst + 7; 
-	  pst++; 
-	  break; 
-#endif 
- 
-	case FFI_TYPE_SINT16: 
-	case FFI_TYPE_UINT16: 
-#ifndef __LITTLE_ENDIAN__ 
-	  avalue[i] = (char *) pst + 6; 
-	  pst++; 
-	  break; 
-#endif 
- 
-	case FFI_TYPE_SINT32: 
-	case FFI_TYPE_UINT32: 
-#ifndef __LITTLE_ENDIAN__ 
-	  avalue[i] = (char *) pst + 4; 
-	  pst++; 
-	  break; 
-#endif 
- 
-	case FFI_TYPE_SINT64: 
-	case FFI_TYPE_UINT64: 
-	case FFI_TYPE_POINTER: 
-	  avalue[i] = pst; 
-	  pst++; 
-	  break; 
- 
-	case FFI_TYPE_STRUCT: 
-	  if ((cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0) 
-	    { 
-	      align = arg_types[i]->alignment; 
-	      if (align > 16) 
-		align = 16; 
-	      if (align > 1) 
-		pst = (unsigned long *) FFI_ALIGN ((size_t) pst, align); 
-	    } 
-	  elt = discover_homogeneous_aggregate (cif->abi, arg_types[i], &elnum); 
-	  if (elt) 
-	    { 
-#if _CALL_ELF == 2 
-	      union { 
-		void *v; 
-		unsigned long *ul; 
-		float *f; 
-		double *d; 
-		float128 *f128; 
-		size_t p; 
-	      } to, from; 
- 
-	      /* Repackage the aggregate from its parts.  The 
-		 aggregate size is not greater than the space taken by 
-		 the registers so store back to the register/parameter 
-		 save arrays.  */ 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-              if (elt == FFI_TYPE_LONGDOUBLE && 
-                  (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0) 
-                { 
-                  if (pvec + elnum <= end_pvec) 
-                    to.v = pvec; 
-                  else 
-                    to.v = pst; 
-                } 
-              else 
-#endif 
-	      if (pfr + elnum <= end_pfr) 
-		to.v = pfr; 
-	      else 
-		to.v = pst; 
- 
-	      avalue[i] = to.v; 
-	      from.ul = pst; 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-              if (elt == FFI_TYPE_LONGDOUBLE && 
-                  (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0) 
-                { 
-                  do 
-                    { 
-                      if (pvec < end_pvec && i < nfixedargs) 
-                        *to.f128 = *pvec++; 
-                      else 
-                        *to.f128 = *from.f128; 
-                      to.f128++; 
-                      from.f128++; 
-                    } 
-                  while (--elnum != 0); 
-                } 
-              else 
-#endif 
-	      if (elt == FFI_TYPE_FLOAT) 
-		{ 
-		  do 
-		    { 
-		      if (pfr < end_pfr && i < nfixedargs) 
-			{ 
-			  *to.f = (float) pfr->d; 
-			  pfr++; 
-			} 
-		      else 
-			*to.f = *from.f; 
-		      to.f++; 
-		      from.f++; 
-		    } 
-		  while (--elnum != 0); 
-		} 
-	      else 
-		{ 
-		  do 
-		    { 
-		      if (pfr < end_pfr && i < nfixedargs) 
-			{ 
-			  *to.d = pfr->d; 
-			  pfr++; 
-			} 
-		      else 
-			*to.d = *from.d; 
-		      to.d++; 
-		      from.d++; 
-		    } 
-		  while (--elnum != 0); 
-		} 
-#else 
-	      if (elt == FFI_TYPE_FLOAT) 
-		goto do_float; 
-	      else 
-		goto do_double; 
-#endif 
-	    } 
-	  else 
-	    { 
-#ifndef __LITTLE_ENDIAN__ 
-	      /* Structures with size less than eight bytes are passed 
-		 left-padded.  */ 
-	      if (arg_types[i]->size < 8) 
-		avalue[i] = (char *) pst + 8 - arg_types[i]->size; 
-	      else 
-#endif 
-		avalue[i] = pst; 
-	    } 
-	  pst += (arg_types[i]->size + 7) / 8; 
-	  break; 
- 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-	case FFI_TYPE_LONGDOUBLE: 
-          if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0) 
-            { 
-              if (((unsigned long) pst & 0xF) != 0) 
-                ++pst; 
-              if (pvec < end_pvec && i < nfixedargs) 
-                avalue[i] = pvec++; 
-              else 
-                avalue[i] = pst; 
-              pst += 2; 
-              break; 
-            } 
-          else if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0) 
-	    { 
-	      if (pfr + 1 < end_pfr && i + 1 < nfixedargs) 
-		{ 
-		  avalue[i] = pfr; 
-		  pfr += 2; 
-		} 
-	      else 
-		{ 
-		  if (pfr < end_pfr && i < nfixedargs) 
-		    { 
-		      /* Passed partly in f13 and partly on the stack. 
-			 Move it all to the stack.  */ 
-		      *pst = *(unsigned long *) pfr; 
-		      pfr++; 
-		    } 
-		  avalue[i] = pst; 
-		} 
-	      pst += 2; 
-	      break; 
-	    } 
-	  /* Fall through.  */ 
-#endif 
-	case FFI_TYPE_DOUBLE: 
-#if _CALL_ELF != 2 
-	do_double: 
-#endif 
-	  /* On the outgoing stack all values are aligned to 8 */ 
-	  /* there are 13 64bit floating point registers */ 
- 
-	  if (pfr < end_pfr && i < nfixedargs) 
-	    { 
-	      avalue[i] = pfr; 
-	      pfr++; 
-	    } 
-	  else 
-	    avalue[i] = pst; 
-	  pst++; 
-	  break; 
- 
-	case FFI_TYPE_FLOAT: 
-#if _CALL_ELF != 2 
-	do_float: 
-#endif 
-	  if (pfr < end_pfr && i < nfixedargs) 
-	    { 
-	      /* Float values are stored as doubles in the 
-		 ffi_closure_LINUX64 code.  Fix them here.  */ 
-	      pfr->f = (float) pfr->d; 
-	      avalue[i] = pfr; 
-	      pfr++; 
-	    } 
-	  else 
-	    { 
-#ifndef __LITTLE_ENDIAN__ 
-	      avalue[i] = (char *) pst + 4; 
-#else 
-	      avalue[i] = pst; 
-#endif 
-	    } 
-	  pst++; 
-	  break; 
- 
-	default: 
-	  FFI_ASSERT (0); 
-	} 
- 
-      i++; 
-    } 
- 
-  (*fun) (cif, rvalue, avalue, user_data); 
- 
-  /* Tell ffi_closure_LINUX64 how to perform return type promotions.  */ 
-  if ((cif->flags & FLAG_RETURNS_SMST) != 0) 
-    { 
-      if ((cif->flags & (FLAG_RETURNS_FP | FLAG_RETURNS_VEC)) == 0) 
-	return FFI_V2_TYPE_SMALL_STRUCT + cif->rtype->size - 1; 
-      else if ((cif->flags & FLAG_RETURNS_VEC) != 0) 
-        return FFI_V2_TYPE_VECTOR_HOMOG; 
-      else if ((cif->flags & FLAG_RETURNS_64BITS) != 0) 
-	return FFI_V2_TYPE_DOUBLE_HOMOG; 
-      else 
-	return FFI_V2_TYPE_FLOAT_HOMOG; 
-    } 
-  if ((cif->flags & FLAG_RETURNS_VEC) != 0) 
-    return FFI_V2_TYPE_VECTOR; 
-  return cif->rtype->type; 
-} 
-#endif 
+/* -----------------------------------------------------------------------
+   ffi_linux64.c - Copyright (C) 2013 IBM
+                   Copyright (C) 2011 Anthony Green
+                   Copyright (C) 2011 Kyle Moffett
+                   Copyright (C) 2008 Red Hat, Inc
+                   Copyright (C) 2007, 2008 Free Software Foundation, Inc
+                   Copyright (c) 1998 Geoffrey Keating
+
+   PowerPC Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include "ffi.h"
+
+#ifdef POWERPC64
+#include "ffi_common.h"
+#include "ffi_powerpc.h"
+
+
+/* About the LINUX64 ABI.  */
+enum {
+  NUM_GPR_ARG_REGISTERS64 = 8,
+  NUM_FPR_ARG_REGISTERS64 = 13,
+  NUM_VEC_ARG_REGISTERS64 = 12,
+};
+enum { ASM_NEEDS_REGISTERS64 = 4 };
+
+
+#if HAVE_LONG_DOUBLE_VARIANT && FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+/* Adjust size of ffi_type_longdouble.  */
+void FFI_HIDDEN
+ffi_prep_types_linux64 (ffi_abi abi)
+{
+  if ((abi & (FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128)) == FFI_LINUX)
+    {
+      ffi_type_longdouble.size = 8;
+      ffi_type_longdouble.alignment = 8;
+    }
+  else
+    {
+      ffi_type_longdouble.size = 16;
+      ffi_type_longdouble.alignment = 16;
+    }
+}
+#endif
+
+
+static unsigned int
+discover_homogeneous_aggregate (ffi_abi abi,
+                                const ffi_type *t,
+                                unsigned int *elnum)
+{
+  switch (t->type)
+    {
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+    case FFI_TYPE_LONGDOUBLE:
+      /* 64-bit long doubles are equivalent to doubles. */
+      if ((abi & FFI_LINUX_LONG_DOUBLE_128) == 0)
+        {
+          *elnum = 1;
+          return FFI_TYPE_DOUBLE;
+        }
+      /* IBM extended precision values use unaligned pairs
+         of FPRs, but according to the ABI must be considered
+         distinct from doubles. They are also limited to a
+         maximum of four members in a homogeneous aggregate. */
+      else if ((abi & FFI_LINUX_LONG_DOUBLE_IEEE128) == 0)
+        {
+          *elnum = 2;
+          return FFI_TYPE_LONGDOUBLE;
+        }
+      /* Fall through. */
+#endif
+    case FFI_TYPE_FLOAT:
+    case FFI_TYPE_DOUBLE:
+      *elnum = 1;
+      return (int) t->type;
+
+    case FFI_TYPE_STRUCT:;
+      {
+	unsigned int base_elt = 0, total_elnum = 0;
+	ffi_type **el = t->elements;
+	while (*el)
+	  {
+	    unsigned int el_elt, el_elnum = 0;
+	    el_elt = discover_homogeneous_aggregate (abi, *el, &el_elnum);
+	    if (el_elt == 0
+		|| (base_elt && base_elt != el_elt))
+	      return 0;
+	    base_elt = el_elt;
+	    total_elnum += el_elnum;
+#if _CALL_ELF == 2
+	    if (total_elnum > 8)
+	      return 0;
+#else
+	    if (total_elnum > 1)
+	      return 0;
+#endif
+	    el++;
+	  }
+	*elnum = total_elnum;
+	return base_elt;
+      }
+
+    default:
+      return 0;
+    }
+}
+
+
+/* Perform machine dependent cif processing */
+static ffi_status
+ffi_prep_cif_linux64_core (ffi_cif *cif)
+{
+  ffi_type **ptr;
+  unsigned bytes;
+  unsigned i, fparg_count = 0, intarg_count = 0, vecarg_count = 0;
+  unsigned flags = cif->flags;
+  unsigned elt, elnum, rtype;
+
+#if FFI_TYPE_LONGDOUBLE == FFI_TYPE_DOUBLE
+  /* If compiled without long double support... */
+  if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0 ||
+      (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+    return FFI_BAD_ABI;
+#elif !defined(__VEC__)
+  /* If compiled without vector register support (used by assembly)... */
+  if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+    return FFI_BAD_ABI;
+#else
+  /* If the IEEE128 flag is set, but long double is only 64 bits wide... */
+  if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) == 0 &&
+      (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+    return FFI_BAD_ABI;
+#endif
+
+  /* The machine-independent calculation of cif->bytes doesn't work
+     for us.  Redo the calculation.  */
+#if _CALL_ELF == 2
+  /* Space for backchain, CR, LR, TOC and the asm's temp regs.  */
+  bytes = (4 + ASM_NEEDS_REGISTERS64) * sizeof (long);
+
+  /* Space for the general registers.  */
+  bytes += NUM_GPR_ARG_REGISTERS64 * sizeof (long);
+#else
+  /* Space for backchain, CR, LR, cc/ld doubleword, TOC and the asm's temp
+     regs.  */
+  bytes = (6 + ASM_NEEDS_REGISTERS64) * sizeof (long);
+
+  /* Space for the mandatory parm save area and general registers.  */
+  bytes += 2 * NUM_GPR_ARG_REGISTERS64 * sizeof (long);
+#endif
+
+  /* Return value handling.  */
+  rtype = cif->rtype->type;
+#if _CALL_ELF == 2
+homogeneous:
+#endif
+  switch (rtype)
+    {
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+    case FFI_TYPE_LONGDOUBLE:
+      if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+        {
+          flags |= FLAG_RETURNS_VEC;
+          break;
+        }
+      if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
+	flags |= FLAG_RETURNS_128BITS;
+      /* Fall through.  */
+#endif
+    case FFI_TYPE_DOUBLE:
+      flags |= FLAG_RETURNS_64BITS;
+      /* Fall through.  */
+    case FFI_TYPE_FLOAT:
+      flags |= FLAG_RETURNS_FP;
+      break;
+
+    case FFI_TYPE_UINT128:
+      flags |= FLAG_RETURNS_128BITS;
+      /* Fall through.  */
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_POINTER:
+      flags |= FLAG_RETURNS_64BITS;
+      break;
+
+    case FFI_TYPE_STRUCT:
+#if _CALL_ELF == 2
+      elt = discover_homogeneous_aggregate (cif->abi, cif->rtype, &elnum);
+      if (elt)
+        {
+          flags |= FLAG_RETURNS_SMST;
+          rtype = elt;
+          goto homogeneous;
+        }
+      if (cif->rtype->size <= 16)
+        {
+          flags |= FLAG_RETURNS_SMST;
+          break;
+        }
+#endif
+      intarg_count++;
+      flags |= FLAG_RETVAL_REFERENCE;
+      /* Fall through.  */
+    case FFI_TYPE_VOID:
+      flags |= FLAG_RETURNS_NOTHING;
+      break;
+
+    default:
+      /* Returns 32-bit integer, or similar.  Nothing to do here.  */
+      break;
+    }
+
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+    {
+      unsigned int align;
+
+      switch ((*ptr)->type)
+	{
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+	case FFI_TYPE_LONGDOUBLE:
+          if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+            {
+              vecarg_count++;
+              /* Align to 16 bytes, plus the 16-byte argument. */
+              intarg_count = (intarg_count + 3) & ~0x1;
+              if (vecarg_count > NUM_VEC_ARG_REGISTERS64)
+                flags |= FLAG_ARG_NEEDS_PSAVE;
+              break;
+            }
+	  if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
+	    {
+	      fparg_count++;
+	      intarg_count++;
+	    }
+	  /* Fall through.  */
+#endif
+	case FFI_TYPE_DOUBLE:
+	case FFI_TYPE_FLOAT:
+	  fparg_count++;
+	  intarg_count++;
+	  if (fparg_count > NUM_FPR_ARG_REGISTERS64)
+	    flags |= FLAG_ARG_NEEDS_PSAVE;
+	  break;
+
+	case FFI_TYPE_STRUCT:
+	  if ((cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0)
+	    {
+	      align = (*ptr)->alignment;
+	      if (align > 16)
+		align = 16;
+	      align = align / 8;
+	      if (align > 1)
+		intarg_count = FFI_ALIGN (intarg_count, align);
+	    }
+	  intarg_count += ((*ptr)->size + 7) / 8;
+	  elt = discover_homogeneous_aggregate (cif->abi, *ptr, &elnum);
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+          if (elt == FFI_TYPE_LONGDOUBLE &&
+              (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+            {
+              vecarg_count += elnum;
+              if (vecarg_count > NUM_VEC_ARG_REGISTERS64)
+                flags |= FLAG_ARG_NEEDS_PSAVE;
+              break;
+            }
+	  else
+#endif
+	  if (elt)
+	    {
+	      fparg_count += elnum;
+	      if (fparg_count > NUM_FPR_ARG_REGISTERS64)
+		flags |= FLAG_ARG_NEEDS_PSAVE;
+	    }
+	  else
+	    {
+	      if (intarg_count > NUM_GPR_ARG_REGISTERS64)
+		flags |= FLAG_ARG_NEEDS_PSAVE;
+	    }
+	  break;
+
+	case FFI_TYPE_POINTER:
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_INT:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT8:
+	  /* Everything else is passed as a 8-byte word in a GPR, either
+	     the object itself or a pointer to it.  */
+	  intarg_count++;
+	  if (intarg_count > NUM_GPR_ARG_REGISTERS64)
+	    flags |= FLAG_ARG_NEEDS_PSAVE;
+	  break;
+	default:
+	  FFI_ASSERT (0);
+	}
+    }
+
+  if (fparg_count != 0)
+    flags |= FLAG_FP_ARGUMENTS;
+  if (intarg_count > 4)
+    flags |= FLAG_4_GPR_ARGUMENTS;
+  if (vecarg_count != 0)
+    flags |= FLAG_VEC_ARGUMENTS;
+
+  /* Space for the FPR registers, if needed.  */
+  if (fparg_count != 0)
+    bytes += NUM_FPR_ARG_REGISTERS64 * sizeof (double);
+  /* Space for the vector registers, if needed, aligned to 16 bytes. */
+  if (vecarg_count != 0) {
+    bytes = (bytes + 15) & ~0xF;
+    bytes += NUM_VEC_ARG_REGISTERS64 * sizeof (float128);
+  }
+
+  /* Stack space.  */
+#if _CALL_ELF == 2
+  if ((flags & FLAG_ARG_NEEDS_PSAVE) != 0)
+    bytes += intarg_count * sizeof (long);
+#else
+  if (intarg_count > NUM_GPR_ARG_REGISTERS64)
+    bytes += (intarg_count - NUM_GPR_ARG_REGISTERS64) * sizeof (long);
+#endif
+
+  /* The stack space allocated needs to be a multiple of 16 bytes.  */
+  bytes = (bytes + 15) & ~0xF;
+
+  cif->flags = flags;
+  cif->bytes = bytes;
+
+  return FFI_OK;
+}
+
+ffi_status FFI_HIDDEN
+ffi_prep_cif_linux64 (ffi_cif *cif)
+{
+  if ((cif->abi & FFI_LINUX) != 0)
+    cif->nfixedargs = cif->nargs;
+#if _CALL_ELF != 2
+  else if (cif->abi == FFI_COMPAT_LINUX64)
+    {
+      /* This call is from old code.  Don't touch cif->nfixedargs
+	 since old code will be using a smaller cif.  */
+      cif->flags |= FLAG_COMPAT;
+      /* Translate to new abi value.  */
+      cif->abi = FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128;
+    }
+#endif
+  else
+    return FFI_BAD_ABI;
+  return ffi_prep_cif_linux64_core (cif);
+}
+
+ffi_status FFI_HIDDEN
+ffi_prep_cif_linux64_var (ffi_cif *cif,
+			  unsigned int nfixedargs,
+			  unsigned int ntotalargs MAYBE_UNUSED)
+{
+  if ((cif->abi & FFI_LINUX) != 0)
+    cif->nfixedargs = nfixedargs;
+#if _CALL_ELF != 2
+  else if (cif->abi == FFI_COMPAT_LINUX64)
+    {
+      /* This call is from old code.  Don't touch cif->nfixedargs
+	 since old code will be using a smaller cif.  */
+      cif->flags |= FLAG_COMPAT;
+      /* Translate to new abi value.  */
+      cif->abi = FFI_LINUX | FFI_LINUX_LONG_DOUBLE_128;
+    }
+#endif
+  else
+    return FFI_BAD_ABI;
+#if _CALL_ELF == 2
+  cif->flags |= FLAG_ARG_NEEDS_PSAVE;
+#endif
+  return ffi_prep_cif_linux64_core (cif);
+}
+
+
+/* ffi_prep_args64 is called by the assembly routine once stack space
+   has been allocated for the function's arguments.
+
+   The stack layout we want looks like this:
+
+   |   Ret addr from ffi_call_LINUX64	8bytes	|	higher addresses
+   |--------------------------------------------|
+   |   CR save area			8bytes	|
+   |--------------------------------------------|
+   |   Previous backchain pointer	8	|	stack pointer here
+   |--------------------------------------------|<+ <<<	on entry to
+   |   Saved r28-r31			4*8	| |	ffi_call_LINUX64
+   |--------------------------------------------| |
+   |   GPR registers r3-r10		8*8	| |
+   |--------------------------------------------| |
+   |   FPR registers f1-f13 (optional)	13*8	| |
+   |--------------------------------------------| |
+   |   VEC registers v2-v13 (optional)  12*16   | |
+   |--------------------------------------------| |
+   |   Parameter save area		        | |
+   |--------------------------------------------| |
+   |   TOC save area			8	| |
+   |--------------------------------------------| |	stack	|
+   |   Linker doubleword		8	| |	grows	|
+   |--------------------------------------------| |	down	V
+   |   Compiler doubleword		8	| |
+   |--------------------------------------------| |	lower addresses
+   |   Space for callee's LR		8	| |
+   |--------------------------------------------| |
+   |   CR save area			8	| |
+   |--------------------------------------------| |	stack pointer here
+   |   Current backchain pointer	8	|-/	during
+   |--------------------------------------------|   <<<	ffi_call_LINUX64
+
+*/
+
+void FFI_HIDDEN
+ffi_prep_args64 (extended_cif *ecif, unsigned long *const stack)
+{
+  const unsigned long bytes = ecif->cif->bytes;
+  const unsigned long flags = ecif->cif->flags;
+
+  typedef union
+  {
+    char *c;
+    unsigned long *ul;
+    float *f;
+    double *d;
+    float128 *f128;
+    size_t p;
+  } valp;
+
+  /* 'stacktop' points at the previous backchain pointer.  */
+  valp stacktop;
+
+  /* 'next_arg' points at the space for gpr3, and grows upwards as
+     we use GPR registers, then continues at rest.  */
+  valp gpr_base;
+  valp gpr_end;
+  valp rest;
+  valp next_arg;
+
+  /* 'fpr_base' points at the space for f1, and grows upwards as
+     we use FPR registers.  */
+  valp fpr_base;
+  unsigned int fparg_count;
+
+  /* 'vec_base' points at the space for v2, and grows upwards as
+     we use vector registers.  */
+  valp vec_base;
+  unsigned int vecarg_count;
+
+  unsigned int i, words, nargs, nfixedargs;
+  ffi_type **ptr;
+  double double_tmp;
+  union
+  {
+    void **v;
+    char **c;
+    signed char **sc;
+    unsigned char **uc;
+    signed short **ss;
+    unsigned short **us;
+    signed int **si;
+    unsigned int **ui;
+    unsigned long **ul;
+    float **f;
+    double **d;
+    float128 **f128;
+  } p_argv;
+  unsigned long gprvalue;
+  unsigned long align;
+
+  stacktop.c = (char *) stack + bytes;
+  gpr_base.ul = stacktop.ul - ASM_NEEDS_REGISTERS64 - NUM_GPR_ARG_REGISTERS64;
+  gpr_end.ul = gpr_base.ul + NUM_GPR_ARG_REGISTERS64;
+#if _CALL_ELF == 2
+  rest.ul = stack + 4 + NUM_GPR_ARG_REGISTERS64;
+#else
+  rest.ul = stack + 6 + NUM_GPR_ARG_REGISTERS64;
+#endif
+  fpr_base.d = gpr_base.d - NUM_FPR_ARG_REGISTERS64;
+  fparg_count = 0;
+  /* Place the vector args below the FPRs, if used, else the GPRs. */
+  if (ecif->cif->flags & FLAG_FP_ARGUMENTS)
+    vec_base.p = fpr_base.p & ~0xF;
+  else
+    vec_base.p = gpr_base.p;
+  vec_base.f128 -= NUM_VEC_ARG_REGISTERS64;
+  vecarg_count = 0;
+  next_arg.ul = gpr_base.ul;
+
+  /* Check that everything starts aligned properly.  */
+  FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
+  FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
+  FFI_ASSERT (((unsigned long) gpr_base.c & 0xF) == 0);
+  FFI_ASSERT (((unsigned long) gpr_end.c  & 0xF) == 0);
+  FFI_ASSERT (((unsigned long) vec_base.c & 0xF) == 0);
+  FFI_ASSERT ((bytes & 0xF) == 0);
+
+  /* Deal with return values that are actually pass-by-reference.  */
+  if (flags & FLAG_RETVAL_REFERENCE)
+    *next_arg.ul++ = (unsigned long) (char *) ecif->rvalue;
+
+  /* Now for the arguments.  */
+  p_argv.v = ecif->avalue;
+  nargs = ecif->cif->nargs;
+#if _CALL_ELF != 2
+  nfixedargs = (unsigned) -1;
+  if ((flags & FLAG_COMPAT) == 0)
+#endif
+    nfixedargs = ecif->cif->nfixedargs;
+  for (ptr = ecif->cif->arg_types, i = 0;
+       i < nargs;
+       i++, ptr++, p_argv.v++)
+    {
+      unsigned int elt, elnum;
+
+      switch ((*ptr)->type)
+	{
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+	case FFI_TYPE_LONGDOUBLE:
+          if ((ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+            {
+              next_arg.p = FFI_ALIGN (next_arg.p, 16);
+              if (next_arg.ul == gpr_end.ul)
+                next_arg.ul = rest.ul;
+              if (vecarg_count < NUM_VEC_ARG_REGISTERS64 && i < nfixedargs)
+                *vec_base.f128++ = **p_argv.f128;
+              else
+                *next_arg.f128 = **p_argv.f128;
+              if (++next_arg.f128 == gpr_end.f128)
+                next_arg.f128 = rest.f128;
+              vecarg_count++;
+              FFI_ASSERT (__LDBL_MANT_DIG__ == 113);
+              FFI_ASSERT (flags & FLAG_VEC_ARGUMENTS);
+              break;
+            }
+	  if ((ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
+	    {
+	      double_tmp = (*p_argv.d)[0];
+	      if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
+		{
+		  *fpr_base.d++ = double_tmp;
+# if _CALL_ELF != 2
+		  if ((flags & FLAG_COMPAT) != 0)
+		    *next_arg.d = double_tmp;
+# endif
+		}
+	      else
+		*next_arg.d = double_tmp;
+	      if (++next_arg.ul == gpr_end.ul)
+		next_arg.ul = rest.ul;
+	      fparg_count++;
+	      double_tmp = (*p_argv.d)[1];
+	      if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
+		{
+		  *fpr_base.d++ = double_tmp;
+# if _CALL_ELF != 2
+		  if ((flags & FLAG_COMPAT) != 0)
+		    *next_arg.d = double_tmp;
+# endif
+		}
+	      else
+		*next_arg.d = double_tmp;
+	      if (++next_arg.ul == gpr_end.ul)
+		next_arg.ul = rest.ul;
+	      fparg_count++;
+	      FFI_ASSERT (__LDBL_MANT_DIG__ == 106);
+	      FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
+	      break;
+	    }
+	  /* Fall through.  */
+#endif
+	case FFI_TYPE_DOUBLE:
+#if _CALL_ELF != 2
+	do_double:
+#endif
+	  double_tmp = **p_argv.d;
+	  if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
+	    {
+	      *fpr_base.d++ = double_tmp;
+#if _CALL_ELF != 2
+	      if ((flags & FLAG_COMPAT) != 0)
+		*next_arg.d = double_tmp;
+#endif
+	    }
+	  else
+	    *next_arg.d = double_tmp;
+	  if (++next_arg.ul == gpr_end.ul)
+	    next_arg.ul = rest.ul;
+	  fparg_count++;
+	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
+	  break;
+
+	case FFI_TYPE_FLOAT:
+#if _CALL_ELF != 2
+	do_float:
+#endif
+	  double_tmp = **p_argv.f;
+	  if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
+	    {
+	      *fpr_base.d++ = double_tmp;
+#if _CALL_ELF != 2
+	      if ((flags & FLAG_COMPAT) != 0)
+		{
+# ifndef __LITTLE_ENDIAN__
+		  next_arg.f[1] = (float) double_tmp;
+# else
+		  next_arg.f[0] = (float) double_tmp;
+# endif
+		}
+#endif
+	    }
+	  else
+	    {
+# ifndef __LITTLE_ENDIAN__
+	      next_arg.f[1] = (float) double_tmp;
+# else
+	      next_arg.f[0] = (float) double_tmp;
+# endif
+	    }
+	  if (++next_arg.ul == gpr_end.ul)
+	    next_arg.ul = rest.ul;
+	  fparg_count++;
+	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
+	  break;
+
+	case FFI_TYPE_STRUCT:
+	  if ((ecif->cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0)
+	    {
+	      align = (*ptr)->alignment;
+	      if (align > 16)
+		align = 16;
+	      if (align > 1)
+                {
+                  next_arg.p = FFI_ALIGN (next_arg.p, align);
+                  if (next_arg.ul == gpr_end.ul)
+                    next_arg.ul = rest.ul;
+                }
+	    }
+	  elt = discover_homogeneous_aggregate (ecif->cif->abi, *ptr, &elnum);
+	  if (elt)
+	    {
+#if _CALL_ELF == 2
+	      union {
+		void *v;
+		float *f;
+		double *d;
+		float128 *f128;
+	      } arg;
+
+	      arg.v = *p_argv.v;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+              if (elt == FFI_TYPE_LONGDOUBLE &&
+                  (ecif->cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+                {
+                  do
+                    {
+                      if (vecarg_count < NUM_VEC_ARG_REGISTERS64
+                          && i < nfixedargs)
+                        *vec_base.f128++ = *arg.f128++;
+                      else
+                        *next_arg.f128 = *arg.f128++;
+                      if (++next_arg.f128 == gpr_end.f128)
+                        next_arg.f128 = rest.f128;
+                      vecarg_count++;
+                    }
+                  while (--elnum != 0);
+                }
+              else
+#endif
+	      if (elt == FFI_TYPE_FLOAT)
+		{
+		  do
+		    {
+		      double_tmp = *arg.f++;
+		      if (fparg_count < NUM_FPR_ARG_REGISTERS64
+			  && i < nfixedargs)
+			*fpr_base.d++ = double_tmp;
+		      else
+			*next_arg.f = (float) double_tmp;
+		      if (++next_arg.f == gpr_end.f)
+			next_arg.f = rest.f;
+		      fparg_count++;
+		    }
+		  while (--elnum != 0);
+		  if ((next_arg.p & 7) != 0)
+                    if (++next_arg.f == gpr_end.f)
+                      next_arg.f = rest.f;
+		}
+	      else
+		do
+		  {
+		    double_tmp = *arg.d++;
+		    if (fparg_count < NUM_FPR_ARG_REGISTERS64 && i < nfixedargs)
+		      *fpr_base.d++ = double_tmp;
+		    else
+		      *next_arg.d = double_tmp;
+		    if (++next_arg.d == gpr_end.d)
+		      next_arg.d = rest.d;
+		    fparg_count++;
+		  }
+		while (--elnum != 0);
+#else
+	      if (elt == FFI_TYPE_FLOAT)
+		goto do_float;
+	      else
+		goto do_double;
+#endif
+	    }
+	  else
+	    {
+	      words = ((*ptr)->size + 7) / 8;
+	      if (next_arg.ul >= gpr_base.ul && next_arg.ul + words > gpr_end.ul)
+		{
+		  size_t first = gpr_end.c - next_arg.c;
+		  memcpy (next_arg.c, *p_argv.c, first);
+		  memcpy (rest.c, *p_argv.c + first, (*ptr)->size - first);
+		  next_arg.c = rest.c + words * 8 - first;
+		}
+	      else
+		{
+		  char *where = next_arg.c;
+
+#ifndef __LITTLE_ENDIAN__
+		  /* Structures with size less than eight bytes are passed
+		     left-padded.  */
+		  if ((*ptr)->size < 8)
+		    where += 8 - (*ptr)->size;
+#endif
+		  memcpy (where, *p_argv.c, (*ptr)->size);
+		  next_arg.ul += words;
+		  if (next_arg.ul == gpr_end.ul)
+		    next_arg.ul = rest.ul;
+		}
+	    }
+	  break;
+
+	case FFI_TYPE_UINT8:
+	  gprvalue = **p_argv.uc;
+	  goto putgpr;
+	case FFI_TYPE_SINT8:
+	  gprvalue = **p_argv.sc;
+	  goto putgpr;
+	case FFI_TYPE_UINT16:
+	  gprvalue = **p_argv.us;
+	  goto putgpr;
+	case FFI_TYPE_SINT16:
+	  gprvalue = **p_argv.ss;
+	  goto putgpr;
+	case FFI_TYPE_UINT32:
+	  gprvalue = **p_argv.ui;
+	  goto putgpr;
+	case FFI_TYPE_INT:
+	case FFI_TYPE_SINT32:
+	  gprvalue = **p_argv.si;
+	  goto putgpr;
+
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_POINTER:
+	  gprvalue = **p_argv.ul;
+	putgpr:
+	  *next_arg.ul++ = gprvalue;
+	  if (next_arg.ul == gpr_end.ul)
+	    next_arg.ul = rest.ul;
+	  break;
+	}
+    }
+
+  FFI_ASSERT (flags & FLAG_4_GPR_ARGUMENTS
+	      || (next_arg.ul >= gpr_base.ul
+		  && next_arg.ul <= gpr_base.ul + 4));
+}
+
+
+#if _CALL_ELF == 2
+#define MIN_CACHE_LINE_SIZE 8
+
+static void
+flush_icache (char *wraddr, char *xaddr, int size)
+{
+  int i;
+  for (i = 0; i < size; i += MIN_CACHE_LINE_SIZE)
+    __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;"
+		      : : "r" (xaddr + i), "r" (wraddr + i) : "memory");
+  __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;" "sync;" "isync;"
+		    : : "r"(xaddr + size - 1), "r"(wraddr + size - 1)
+		    : "memory");
+}
+#endif
+
+
+ffi_status FFI_HIDDEN
+ffi_prep_closure_loc_linux64 (ffi_closure *closure,
+			      ffi_cif *cif,
+			      void (*fun) (ffi_cif *, void *, void **, void *),
+			      void *user_data,
+			      void *codeloc)
+{
+#if _CALL_ELF == 2
+  unsigned int *tramp = (unsigned int *) &closure->tramp[0];
+
+  if (cif->abi < FFI_LINUX || cif->abi >= FFI_LAST_ABI)
+    return FFI_BAD_ABI;
+
+  tramp[0] = 0xe96c0018;	/* 0:	ld	11,2f-0b(12)	*/
+  tramp[1] = 0xe98c0010;	/*	ld	12,1f-0b(12)	*/
+  tramp[2] = 0x7d8903a6;	/*	mtctr	12		*/
+  tramp[3] = 0x4e800420;	/*	bctr			*/
+				/* 1:	.quad	function_addr	*/
+				/* 2:	.quad	context		*/
+  *(void **) &tramp[4] = (void *) ffi_closure_LINUX64;
+  *(void **) &tramp[6] = codeloc;
+  flush_icache ((char *) tramp, (char *) codeloc, 4 * 4);
+#else
+  void **tramp = (void **) &closure->tramp[0];
+
+  if (cif->abi < FFI_LINUX || cif->abi >= FFI_LAST_ABI)
+    return FFI_BAD_ABI;
+
+  /* Copy function address and TOC from ffi_closure_LINUX64 OPD.  */
+  memcpy (&tramp[0], (void **) ffi_closure_LINUX64, sizeof (void *));
+  tramp[1] = codeloc;
+  memcpy (&tramp[2], (void **) ffi_closure_LINUX64 + 1, sizeof (void *));
+#endif
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+
+int FFI_HIDDEN
+ffi_closure_helper_LINUX64 (ffi_cif *cif,
+			    void (*fun) (ffi_cif *, void *, void **, void *),
+			    void *user_data,
+			    void *rvalue,
+			    unsigned long *pst,
+                            ffi_dblfl *pfr,
+                            float128 *pvec)
+{
+  /* rvalue is the pointer to space for return value in closure assembly */
+  /* pst is the pointer to parameter save area
+     (r3-r10 are stored into its first 8 slots by ffi_closure_LINUX64) */
+  /* pfr is the pointer to where f1-f13 are stored in ffi_closure_LINUX64 */
+  /* pvec is the pointer to where v2-v13 are stored in ffi_closure_LINUX64 */
+
+  void **avalue;
+  ffi_type **arg_types;
+  unsigned long i, avn, nfixedargs;
+  ffi_dblfl *end_pfr = pfr + NUM_FPR_ARG_REGISTERS64;
+  float128 *end_pvec = pvec + NUM_VEC_ARG_REGISTERS64;
+  unsigned long align;
+
+  avalue = alloca (cif->nargs * sizeof (void *));
+
+  /* Copy the caller's structure return value address so that the
+     closure returns the data directly to the caller.  */
+  if (cif->rtype->type == FFI_TYPE_STRUCT
+      && (cif->flags & FLAG_RETURNS_SMST) == 0)
+    {
+      rvalue = (void *) *pst;
+      pst++;
+    }
+
+  i = 0;
+  avn = cif->nargs;
+#if _CALL_ELF != 2
+  nfixedargs = (unsigned) -1;
+  if ((cif->flags & FLAG_COMPAT) == 0)
+#endif
+    nfixedargs = cif->nfixedargs;
+  arg_types = cif->arg_types;
+
+  /* Grab the addresses of the arguments from the stack frame.  */
+  while (i < avn)
+    {
+      unsigned int elt, elnum;
+
+      switch (arg_types[i]->type)
+	{
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT8:
+#ifndef __LITTLE_ENDIAN__
+	  avalue[i] = (char *) pst + 7;
+	  pst++;
+	  break;
+#endif
+
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_UINT16:
+#ifndef __LITTLE_ENDIAN__
+	  avalue[i] = (char *) pst + 6;
+	  pst++;
+	  break;
+#endif
+
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT32:
+#ifndef __LITTLE_ENDIAN__
+	  avalue[i] = (char *) pst + 4;
+	  pst++;
+	  break;
+#endif
+
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_POINTER:
+	  avalue[i] = pst;
+	  pst++;
+	  break;
+
+	case FFI_TYPE_STRUCT:
+	  if ((cif->abi & FFI_LINUX_STRUCT_ALIGN) != 0)
+	    {
+	      align = arg_types[i]->alignment;
+	      if (align > 16)
+		align = 16;
+	      if (align > 1)
+		pst = (unsigned long *) FFI_ALIGN ((size_t) pst, align);
+	    }
+	  elt = discover_homogeneous_aggregate (cif->abi, arg_types[i], &elnum);
+	  if (elt)
+	    {
+#if _CALL_ELF == 2
+	      union {
+		void *v;
+		unsigned long *ul;
+		float *f;
+		double *d;
+		float128 *f128;
+		size_t p;
+	      } to, from;
+
+	      /* Repackage the aggregate from its parts.  The
+		 aggregate size is not greater than the space taken by
+		 the registers so store back to the register/parameter
+		 save arrays.  */
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+              if (elt == FFI_TYPE_LONGDOUBLE &&
+                  (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+                {
+                  if (pvec + elnum <= end_pvec)
+                    to.v = pvec;
+                  else
+                    to.v = pst;
+                }
+              else
+#endif
+	      if (pfr + elnum <= end_pfr)
+		to.v = pfr;
+	      else
+		to.v = pst;
+
+	      avalue[i] = to.v;
+	      from.ul = pst;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+              if (elt == FFI_TYPE_LONGDOUBLE &&
+                  (cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+                {
+                  do
+                    {
+                      if (pvec < end_pvec && i < nfixedargs)
+                        *to.f128 = *pvec++;
+                      else
+                        *to.f128 = *from.f128;
+                      to.f128++;
+                      from.f128++;
+                    }
+                  while (--elnum != 0);
+                }
+              else
+#endif
+	      if (elt == FFI_TYPE_FLOAT)
+		{
+		  do
+		    {
+		      if (pfr < end_pfr && i < nfixedargs)
+			{
+			  *to.f = (float) pfr->d;
+			  pfr++;
+			}
+		      else
+			*to.f = *from.f;
+		      to.f++;
+		      from.f++;
+		    }
+		  while (--elnum != 0);
+		}
+	      else
+		{
+		  do
+		    {
+		      if (pfr < end_pfr && i < nfixedargs)
+			{
+			  *to.d = pfr->d;
+			  pfr++;
+			}
+		      else
+			*to.d = *from.d;
+		      to.d++;
+		      from.d++;
+		    }
+		  while (--elnum != 0);
+		}
+#else
+	      if (elt == FFI_TYPE_FLOAT)
+		goto do_float;
+	      else
+		goto do_double;
+#endif
+	    }
+	  else
+	    {
+#ifndef __LITTLE_ENDIAN__
+	      /* Structures with size less than eight bytes are passed
+		 left-padded.  */
+	      if (arg_types[i]->size < 8)
+		avalue[i] = (char *) pst + 8 - arg_types[i]->size;
+	      else
+#endif
+		avalue[i] = pst;
+	    }
+	  pst += (arg_types[i]->size + 7) / 8;
+	  break;
+
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+	case FFI_TYPE_LONGDOUBLE:
+          if ((cif->abi & FFI_LINUX_LONG_DOUBLE_IEEE128) != 0)
+            {
+              if (((unsigned long) pst & 0xF) != 0)
+                ++pst;
+              if (pvec < end_pvec && i < nfixedargs)
+                avalue[i] = pvec++;
+              else
+                avalue[i] = pst;
+              pst += 2;
+              break;
+            }
+          else if ((cif->abi & FFI_LINUX_LONG_DOUBLE_128) != 0)
+	    {
+	      if (pfr + 1 < end_pfr && i + 1 < nfixedargs)
+		{
+		  avalue[i] = pfr;
+		  pfr += 2;
+		}
+	      else
+		{
+		  if (pfr < end_pfr && i < nfixedargs)
+		    {
+		      /* Passed partly in f13 and partly on the stack.
+			 Move it all to the stack.  */
+		      *pst = *(unsigned long *) pfr;
+		      pfr++;
+		    }
+		  avalue[i] = pst;
+		}
+	      pst += 2;
+	      break;
+	    }
+	  /* Fall through.  */
+#endif
+	case FFI_TYPE_DOUBLE:
+#if _CALL_ELF != 2
+	do_double:
+#endif
+	  /* On the outgoing stack all values are aligned to 8 */
+	  /* there are 13 64bit floating point registers */
+
+	  if (pfr < end_pfr && i < nfixedargs)
+	    {
+	      avalue[i] = pfr;
+	      pfr++;
+	    }
+	  else
+	    avalue[i] = pst;
+	  pst++;
+	  break;
+
+	case FFI_TYPE_FLOAT:
+#if _CALL_ELF != 2
+	do_float:
+#endif
+	  if (pfr < end_pfr && i < nfixedargs)
+	    {
+	      /* Float values are stored as doubles in the
+		 ffi_closure_LINUX64 code.  Fix them here.  */
+	      pfr->f = (float) pfr->d;
+	      avalue[i] = pfr;
+	      pfr++;
+	    }
+	  else
+	    {
+#ifndef __LITTLE_ENDIAN__
+	      avalue[i] = (char *) pst + 4;
+#else
+	      avalue[i] = pst;
+#endif
+	    }
+	  pst++;
+	  break;
+
+	default:
+	  FFI_ASSERT (0);
+	}
+
+      i++;
+    }
+
+  (*fun) (cif, rvalue, avalue, user_data);
+
+  /* Tell ffi_closure_LINUX64 how to perform return type promotions.  */
+  if ((cif->flags & FLAG_RETURNS_SMST) != 0)
+    {
+      if ((cif->flags & (FLAG_RETURNS_FP | FLAG_RETURNS_VEC)) == 0)
+	return FFI_V2_TYPE_SMALL_STRUCT + cif->rtype->size - 1;
+      else if ((cif->flags & FLAG_RETURNS_VEC) != 0)
+        return FFI_V2_TYPE_VECTOR_HOMOG;
+      else if ((cif->flags & FLAG_RETURNS_64BITS) != 0)
+	return FFI_V2_TYPE_DOUBLE_HOMOG;
+      else
+	return FFI_V2_TYPE_FLOAT_HOMOG;
+    }
+  if ((cif->flags & FLAG_RETURNS_VEC) != 0)
+    return FFI_V2_TYPE_VECTOR;
+  return cif->rtype->type;
+}
+#endif
diff --git a/contrib/restricted/libffi/src/powerpc/ffi_powerpc.h b/contrib/restricted/libffi/src/powerpc/ffi_powerpc.h
index 7956cf81d0..5ee2a7095a 100644
--- a/contrib/restricted/libffi/src/powerpc/ffi_powerpc.h
+++ b/contrib/restricted/libffi/src/powerpc/ffi_powerpc.h
@@ -1,105 +1,105 @@
-/* ----------------------------------------------------------------------- 
-   ffi_powerpc.h - Copyright (C) 2013 IBM 
-                   Copyright (C) 2011 Anthony Green 
-                   Copyright (C) 2011 Kyle Moffett 
-                   Copyright (C) 2008 Red Hat, Inc 
-                   Copyright (C) 2007, 2008 Free Software Foundation, Inc 
-                   Copyright (c) 1998 Geoffrey Keating 
- 
-   PowerPC Foreign Function Interface 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS 
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
-   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR 
-   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
-   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
-   OTHER DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-enum { 
-  /* The assembly depends on these exact flags.  */ 
-  /* These go in cr7 */ 
-  FLAG_RETURNS_SMST     = 1 << (31-31), /* Used for FFI_SYSV small structs.  */ 
-  FLAG_RETURNS_NOTHING  = 1 << (31-30), 
-  FLAG_RETURNS_FP       = 1 << (31-29), 
-  FLAG_RETURNS_VEC      = 1 << (31-28), 
- 
-  /* These go in cr6 */ 
-  FLAG_RETURNS_64BITS   = 1 << (31-27), 
-  FLAG_RETURNS_128BITS  = 1 << (31-26), 
- 
-  FLAG_COMPAT           = 1 << (31- 8), /* Not used by assembly */ 
- 
-  /* These go in cr1 */ 
-  FLAG_ARG_NEEDS_COPY   = 1 << (31- 7), /* Used by sysv code */ 
-  FLAG_ARG_NEEDS_PSAVE  = FLAG_ARG_NEEDS_COPY, /* Used by linux64 code */ 
-  FLAG_FP_ARGUMENTS     = 1 << (31- 6), /* cr1.eq; specified by ABI */ 
-  FLAG_4_GPR_ARGUMENTS  = 1 << (31- 5), 
-  FLAG_RETVAL_REFERENCE = 1 << (31- 4), 
-  FLAG_VEC_ARGUMENTS    = 1 << (31- 3), 
-}; 
- 
-typedef union 
-{ 
-  float f; 
-  double d; 
-} ffi_dblfl; 
- 
-#if defined(__FLOAT128_TYPE__) 
-typedef _Float128 float128; 
-#elif defined(__FLOAT128__) 
-typedef __float128 float128; 
-#else 
-typedef __int128 float128; 
-#endif 
- 
-void FFI_HIDDEN ffi_closure_SYSV (void); 
-void FFI_HIDDEN ffi_go_closure_sysv (void); 
-void FFI_HIDDEN ffi_call_SYSV(extended_cif *, void (*)(void), void *, 
-			      unsigned, void *, int); 
- 
-void FFI_HIDDEN ffi_prep_types_sysv (ffi_abi); 
-ffi_status FFI_HIDDEN ffi_prep_cif_sysv (ffi_cif *); 
-ffi_status FFI_HIDDEN ffi_prep_closure_loc_sysv (ffi_closure *, 
-						 ffi_cif *, 
-						 void (*) (ffi_cif *, void *, 
-							   void **, void *), 
-						 void *, void *); 
-int FFI_HIDDEN ffi_closure_helper_SYSV (ffi_cif *, 
-					void (*) (ffi_cif *, void *, 
-						  void **, void *), 
-					void *, void *, unsigned long *, 
-					ffi_dblfl *, unsigned long *); 
- 
-void FFI_HIDDEN ffi_call_LINUX64(extended_cif *, void (*) (void), void *, 
-				 unsigned long, void *, long); 
-void FFI_HIDDEN ffi_closure_LINUX64 (void); 
-void FFI_HIDDEN ffi_go_closure_linux64 (void); 
- 
-void FFI_HIDDEN ffi_prep_types_linux64 (ffi_abi); 
-ffi_status FFI_HIDDEN ffi_prep_cif_linux64 (ffi_cif *); 
-ffi_status FFI_HIDDEN ffi_prep_cif_linux64_var (ffi_cif *, unsigned int, 
-						unsigned int); 
-void FFI_HIDDEN ffi_prep_args64 (extended_cif *, unsigned long *const); 
-ffi_status FFI_HIDDEN ffi_prep_closure_loc_linux64 (ffi_closure *, ffi_cif *, 
-						    void (*) (ffi_cif *, void *, 
-							      void **, void *), 
-						    void *, void *); 
-int FFI_HIDDEN ffi_closure_helper_LINUX64 (ffi_cif *, 
-					   void (*) (ffi_cif *, void *, 
-						     void **, void *), 
-					   void *, void *, 
-					   unsigned long *, ffi_dblfl *, 
-					   float128 *); 
+/* -----------------------------------------------------------------------
+   ffi_powerpc.h - Copyright (C) 2013 IBM
+                   Copyright (C) 2011 Anthony Green
+                   Copyright (C) 2011 Kyle Moffett
+                   Copyright (C) 2008 Red Hat, Inc
+                   Copyright (C) 2007, 2008 Free Software Foundation, Inc
+                   Copyright (c) 1998 Geoffrey Keating
+
+   PowerPC Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+enum {
+  /* The assembly depends on these exact flags.  */
+  /* These go in cr7 */
+  FLAG_RETURNS_SMST     = 1 << (31-31), /* Used for FFI_SYSV small structs.  */
+  FLAG_RETURNS_NOTHING  = 1 << (31-30),
+  FLAG_RETURNS_FP       = 1 << (31-29),
+  FLAG_RETURNS_VEC      = 1 << (31-28),
+
+  /* These go in cr6 */
+  FLAG_RETURNS_64BITS   = 1 << (31-27),
+  FLAG_RETURNS_128BITS  = 1 << (31-26),
+
+  FLAG_COMPAT           = 1 << (31- 8), /* Not used by assembly */
+
+  /* These go in cr1 */
+  FLAG_ARG_NEEDS_COPY   = 1 << (31- 7), /* Used by sysv code */
+  FLAG_ARG_NEEDS_PSAVE  = FLAG_ARG_NEEDS_COPY, /* Used by linux64 code */
+  FLAG_FP_ARGUMENTS     = 1 << (31- 6), /* cr1.eq; specified by ABI */
+  FLAG_4_GPR_ARGUMENTS  = 1 << (31- 5),
+  FLAG_RETVAL_REFERENCE = 1 << (31- 4),
+  FLAG_VEC_ARGUMENTS    = 1 << (31- 3),
+};
+
+typedef union
+{
+  float f;
+  double d;
+} ffi_dblfl;
+
+#if defined(__FLOAT128_TYPE__)
+typedef _Float128 float128;
+#elif defined(__FLOAT128__)
+typedef __float128 float128;
+#else
+typedef __int128 float128;
+#endif
+
+void FFI_HIDDEN ffi_closure_SYSV (void);
+void FFI_HIDDEN ffi_go_closure_sysv (void);
+void FFI_HIDDEN ffi_call_SYSV(extended_cif *, void (*)(void), void *,
+			      unsigned, void *, int);
+
+void FFI_HIDDEN ffi_prep_types_sysv (ffi_abi);
+ffi_status FFI_HIDDEN ffi_prep_cif_sysv (ffi_cif *);
+ffi_status FFI_HIDDEN ffi_prep_closure_loc_sysv (ffi_closure *,
+						 ffi_cif *,
+						 void (*) (ffi_cif *, void *,
+							   void **, void *),
+						 void *, void *);
+int FFI_HIDDEN ffi_closure_helper_SYSV (ffi_cif *,
+					void (*) (ffi_cif *, void *,
+						  void **, void *),
+					void *, void *, unsigned long *,
+					ffi_dblfl *, unsigned long *);
+
+void FFI_HIDDEN ffi_call_LINUX64(extended_cif *, void (*) (void), void *,
+				 unsigned long, void *, long);
+void FFI_HIDDEN ffi_closure_LINUX64 (void);
+void FFI_HIDDEN ffi_go_closure_linux64 (void);
+
+void FFI_HIDDEN ffi_prep_types_linux64 (ffi_abi);
+ffi_status FFI_HIDDEN ffi_prep_cif_linux64 (ffi_cif *);
+ffi_status FFI_HIDDEN ffi_prep_cif_linux64_var (ffi_cif *, unsigned int,
+						unsigned int);
+void FFI_HIDDEN ffi_prep_args64 (extended_cif *, unsigned long *const);
+ffi_status FFI_HIDDEN ffi_prep_closure_loc_linux64 (ffi_closure *, ffi_cif *,
+						    void (*) (ffi_cif *, void *,
+							      void **, void *),
+						    void *, void *);
+int FFI_HIDDEN ffi_closure_helper_LINUX64 (ffi_cif *,
+					   void (*) (ffi_cif *, void *,
+						     void **, void *),
+					   void *, void *,
+					   unsigned long *, ffi_dblfl *,
+					   float128 *);
diff --git a/contrib/restricted/libffi/src/powerpc/ffi_sysv.c b/contrib/restricted/libffi/src/powerpc/ffi_sysv.c
index d641d929e3..4078e75116 100644
--- a/contrib/restricted/libffi/src/powerpc/ffi_sysv.c
+++ b/contrib/restricted/libffi/src/powerpc/ffi_sysv.c
@@ -1,923 +1,923 @@
-/* ----------------------------------------------------------------------- 
-   ffi_sysv.c - Copyright (C) 2013 IBM 
-                Copyright (C) 2011 Anthony Green 
-                Copyright (C) 2011 Kyle Moffett 
-                Copyright (C) 2008 Red Hat, Inc 
-                Copyright (C) 2007, 2008 Free Software Foundation, Inc 
-                Copyright (c) 1998 Geoffrey Keating 
- 
-   PowerPC Foreign Function Interface 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS 
-   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 
-   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR 
-   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 
-   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 
-   OTHER DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-#include "ffi.h" 
- 
-#ifndef POWERPC64 
-#include "ffi_common.h" 
-#include "ffi_powerpc.h" 
- 
- 
-/* About the SYSV ABI.  */ 
-#define ASM_NEEDS_REGISTERS 6 
-#define NUM_GPR_ARG_REGISTERS 8 
-#define NUM_FPR_ARG_REGISTERS 8 
- 
- 
-#if HAVE_LONG_DOUBLE_VARIANT && FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-/* Adjust size of ffi_type_longdouble.  */ 
-void FFI_HIDDEN 
-ffi_prep_types_sysv (ffi_abi abi) 
-{ 
-  if ((abi & (FFI_SYSV | FFI_SYSV_LONG_DOUBLE_128)) == FFI_SYSV) 
-    { 
-      ffi_type_longdouble.size = 8; 
-      ffi_type_longdouble.alignment = 8; 
-    } 
-  else 
-    { 
-      ffi_type_longdouble.size = 16; 
-      ffi_type_longdouble.alignment = 16; 
-    } 
-} 
-#endif 
- 
-/* Transform long double, double and float to other types as per abi.  */ 
-static int 
-translate_float (int abi, int type) 
-{ 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-  if (type == FFI_TYPE_LONGDOUBLE 
-      && (abi & FFI_SYSV_LONG_DOUBLE_128) == 0) 
-    type = FFI_TYPE_DOUBLE; 
-#endif 
-  if ((abi & FFI_SYSV_SOFT_FLOAT) != 0) 
-    { 
-      if (type == FFI_TYPE_FLOAT) 
-	type = FFI_TYPE_UINT32; 
-      else if (type == FFI_TYPE_DOUBLE) 
-	type = FFI_TYPE_UINT64; 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-      else if (type == FFI_TYPE_LONGDOUBLE) 
-	type = FFI_TYPE_UINT128; 
-    } 
-  else if ((abi & FFI_SYSV_IBM_LONG_DOUBLE) == 0) 
-    { 
-      if (type == FFI_TYPE_LONGDOUBLE) 
-	type = FFI_TYPE_STRUCT; 
-#endif 
-    } 
-  return type; 
-} 
- 
-/* Perform machine dependent cif processing */ 
-static ffi_status 
-ffi_prep_cif_sysv_core (ffi_cif *cif) 
-{ 
-  ffi_type **ptr; 
-  unsigned bytes; 
-  unsigned i, fpr_count = 0, gpr_count = 0, stack_count = 0; 
-  unsigned flags = cif->flags; 
-  unsigned struct_copy_size = 0; 
-  unsigned type = cif->rtype->type; 
-  unsigned size = cif->rtype->size; 
- 
-  /* The machine-independent calculation of cif->bytes doesn't work 
-     for us.  Redo the calculation.  */ 
- 
-  /* Space for the frame pointer, callee's LR, and the asm's temp regs.  */ 
-  bytes = (2 + ASM_NEEDS_REGISTERS) * sizeof (int); 
- 
-  /* Space for the GPR registers.  */ 
-  bytes += NUM_GPR_ARG_REGISTERS * sizeof (int); 
- 
-  /* Return value handling.  The rules for SYSV are as follows: 
-     - 32-bit (or less) integer values are returned in gpr3; 
-     - Structures of size <= 4 bytes also returned in gpr3; 
-     - 64-bit integer values and structures between 5 and 8 bytes are returned 
-     in gpr3 and gpr4; 
-     - Larger structures are allocated space and a pointer is passed as 
-     the first argument. 
-     - Single/double FP values are returned in fpr1; 
-     - long doubles (if not equivalent to double) are returned in 
-     fpr1,fpr2 for Linux and as for large structs for SysV.  */ 
- 
-  type = translate_float (cif->abi, type); 
- 
-  switch (type) 
-    { 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-    case FFI_TYPE_LONGDOUBLE: 
-      flags |= FLAG_RETURNS_128BITS; 
-      /* Fall through.  */ 
-#endif 
-    case FFI_TYPE_DOUBLE: 
-      flags |= FLAG_RETURNS_64BITS; 
-      /* Fall through.  */ 
-    case FFI_TYPE_FLOAT: 
-      flags |= FLAG_RETURNS_FP; 
-#ifdef __NO_FPRS__ 
-      return FFI_BAD_ABI; 
-#endif 
-      break; 
- 
-    case FFI_TYPE_UINT128: 
-      flags |= FLAG_RETURNS_128BITS; 
-      /* Fall through.  */ 
-    case FFI_TYPE_UINT64: 
-    case FFI_TYPE_SINT64: 
-      flags |= FLAG_RETURNS_64BITS; 
-      break; 
- 
-    case FFI_TYPE_STRUCT: 
-      /* The final SYSV ABI says that structures smaller or equal 8 bytes 
-	 are returned in r3/r4.  A draft ABI used by linux instead 
-	 returns them in memory.  */ 
-      if ((cif->abi & FFI_SYSV_STRUCT_RET) != 0 && size <= 8) 
-	{ 
-	  flags |= FLAG_RETURNS_SMST; 
-	  break; 
-	} 
-      gpr_count++; 
-      flags |= FLAG_RETVAL_REFERENCE; 
-      /* Fall through.  */ 
-    case FFI_TYPE_VOID: 
-      flags |= FLAG_RETURNS_NOTHING; 
-      break; 
- 
-    default: 
-      /* Returns 32-bit integer, or similar.  Nothing to do here.  */ 
-      break; 
-    } 
- 
-  /* The first NUM_GPR_ARG_REGISTERS words of integer arguments, and the 
-     first NUM_FPR_ARG_REGISTERS fp arguments, go in registers; the rest 
-     goes on the stack.  Structures and long doubles (if not equivalent 
-     to double) are passed as a pointer to a copy of the structure. 
-     Stuff on the stack needs to keep proper alignment.  */ 
-  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++) 
-    { 
-      unsigned short typenum = (*ptr)->type; 
- 
-      typenum = translate_float (cif->abi, typenum); 
- 
-      switch (typenum) 
-	{ 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-	case FFI_TYPE_LONGDOUBLE: 
-	  if (fpr_count >= NUM_FPR_ARG_REGISTERS - 1) 
-	    { 
-	      fpr_count = NUM_FPR_ARG_REGISTERS; 
-	      /* 8-byte align long doubles.  */ 
-	      stack_count += stack_count & 1; 
-	      stack_count += 4; 
-	    } 
-	  else 
-	    fpr_count += 2; 
-#ifdef __NO_FPRS__ 
-	  return FFI_BAD_ABI; 
-#endif 
-	  break; 
-#endif 
- 
-	case FFI_TYPE_DOUBLE: 
-	  if (fpr_count >= NUM_FPR_ARG_REGISTERS) 
-	    { 
-	      /* 8-byte align doubles.  */ 
-	      stack_count += stack_count & 1; 
-	      stack_count += 2; 
-	    } 
-	  else 
-	    fpr_count += 1; 
-#ifdef __NO_FPRS__ 
-	  return FFI_BAD_ABI; 
-#endif 
-	  break; 
- 
-	case FFI_TYPE_FLOAT: 
-	  if (fpr_count >= NUM_FPR_ARG_REGISTERS) 
-	    /* Yes, we don't follow the ABI, but neither does gcc.  */ 
-	    stack_count += 1; 
-	  else 
-	    fpr_count += 1; 
-#ifdef __NO_FPRS__ 
-	  return FFI_BAD_ABI; 
-#endif 
-	  break; 
- 
-	case FFI_TYPE_UINT128: 
-	  /* A long double in FFI_LINUX_SOFT_FLOAT can use only a set 
-	     of four consecutive gprs. If we do not have enough, we 
-	     have to adjust the gpr_count value.  */ 
-	  if (gpr_count >= NUM_GPR_ARG_REGISTERS - 3) 
-	    gpr_count = NUM_GPR_ARG_REGISTERS; 
-	  if (gpr_count >= NUM_GPR_ARG_REGISTERS) 
-	    stack_count += 4; 
-	  else 
-	    gpr_count += 4; 
-	  break; 
- 
-	case FFI_TYPE_UINT64: 
-	case FFI_TYPE_SINT64: 
-	  /* 'long long' arguments are passed as two words, but 
-	     either both words must fit in registers or both go 
-	     on the stack.  If they go on the stack, they must 
-	     be 8-byte-aligned. 
- 
-	     Also, only certain register pairs can be used for 
-	     passing long long int -- specifically (r3,r4), (r5,r6), 
-	     (r7,r8), (r9,r10).  */ 
-	  gpr_count += gpr_count & 1; 
-	  if (gpr_count >= NUM_GPR_ARG_REGISTERS) 
-	    { 
-	      stack_count += stack_count & 1; 
-	      stack_count += 2; 
-	    } 
-	  else 
-	    gpr_count += 2; 
-	  break; 
- 
-	case FFI_TYPE_STRUCT: 
-	  /* We must allocate space for a copy of these to enforce 
-	     pass-by-value.  Pad the space up to a multiple of 16 
-	     bytes (the maximum alignment required for anything under 
-	     the SYSV ABI).  */ 
-	  struct_copy_size += ((*ptr)->size + 15) & ~0xF; 
-	  /* Fall through (allocate space for the pointer).  */ 
- 
-	case FFI_TYPE_POINTER: 
-	case FFI_TYPE_INT: 
-	case FFI_TYPE_UINT32: 
-	case FFI_TYPE_SINT32: 
-	case FFI_TYPE_UINT16: 
-	case FFI_TYPE_SINT16: 
-	case FFI_TYPE_UINT8: 
-	case FFI_TYPE_SINT8: 
-	  /* Everything else is passed as a 4-byte word in a GPR, either 
-	     the object itself or a pointer to it.  */ 
-	  if (gpr_count >= NUM_GPR_ARG_REGISTERS) 
-	    stack_count += 1; 
-	  else 
-	    gpr_count += 1; 
-	  break; 
- 
-	default: 
-	  FFI_ASSERT (0); 
-	} 
-    } 
- 
-  if (fpr_count != 0) 
-    flags |= FLAG_FP_ARGUMENTS; 
-  if (gpr_count > 4) 
-    flags |= FLAG_4_GPR_ARGUMENTS; 
-  if (struct_copy_size != 0) 
-    flags |= FLAG_ARG_NEEDS_COPY; 
- 
-  /* Space for the FPR registers, if needed.  */ 
-  if (fpr_count != 0) 
-    bytes += NUM_FPR_ARG_REGISTERS * sizeof (double); 
- 
-  /* Stack space.  */ 
-  bytes += stack_count * sizeof (int); 
- 
-  /* The stack space allocated needs to be a multiple of 16 bytes.  */ 
-  bytes = (bytes + 15) & ~0xF; 
- 
-  /* Add in the space for the copied structures.  */ 
-  bytes += struct_copy_size; 
- 
-  cif->flags = flags; 
-  cif->bytes = bytes; 
- 
-  return FFI_OK; 
-} 
- 
-ffi_status FFI_HIDDEN 
-ffi_prep_cif_sysv (ffi_cif *cif) 
-{ 
-  if ((cif->abi & FFI_SYSV) == 0) 
-    { 
-      /* This call is from old code.  Translate to new ABI values.  */ 
-      cif->flags |= FLAG_COMPAT; 
-      switch (cif->abi) 
-	{ 
-	default: 
-	  return FFI_BAD_ABI; 
- 
-	case FFI_COMPAT_SYSV: 
-	  cif->abi = FFI_SYSV | FFI_SYSV_STRUCT_RET | FFI_SYSV_LONG_DOUBLE_128; 
-	  break; 
- 
-	case FFI_COMPAT_GCC_SYSV: 
-	  cif->abi = FFI_SYSV | FFI_SYSV_LONG_DOUBLE_128; 
-	  break; 
- 
-	case FFI_COMPAT_LINUX: 
-	  cif->abi = (FFI_SYSV | FFI_SYSV_IBM_LONG_DOUBLE 
-		      | FFI_SYSV_LONG_DOUBLE_128); 
-	  break; 
- 
-	case FFI_COMPAT_LINUX_SOFT_FLOAT: 
-	  cif->abi = (FFI_SYSV | FFI_SYSV_SOFT_FLOAT | FFI_SYSV_IBM_LONG_DOUBLE 
-		      | FFI_SYSV_LONG_DOUBLE_128); 
-	  break; 
-	} 
-    } 
-  return ffi_prep_cif_sysv_core (cif); 
-} 
- 
-/* ffi_prep_args_SYSV is called by the assembly routine once stack space 
-   has been allocated for the function's arguments. 
- 
-   The stack layout we want looks like this: 
- 
-   |   Return address from ffi_call_SYSV 4bytes	|	higher addresses 
-   |--------------------------------------------| 
-   |   Previous backchain pointer	4	|       stack pointer here 
-   |--------------------------------------------|<+ <<<	on entry to 
-   |   Saved r28-r31			4*4	| |	ffi_call_SYSV 
-   |--------------------------------------------| | 
-   |   GPR registers r3-r10		8*4	| |	ffi_call_SYSV 
-   |--------------------------------------------| | 
-   |   FPR registers f1-f8 (optional)	8*8	| | 
-   |--------------------------------------------| |	stack	| 
-   |   Space for copied structures		| |	grows	| 
-   |--------------------------------------------| |	down    V 
-   |   Parameters that didn't fit in registers  | | 
-   |--------------------------------------------| |	lower addresses 
-   |   Space for callee's LR		4	| | 
-   |--------------------------------------------| |	stack pointer here 
-   |   Current backchain pointer	4	|-/	during 
-   |--------------------------------------------|   <<<	ffi_call_SYSV 
- 
-*/ 
- 
-void FFI_HIDDEN 
-ffi_prep_args_SYSV (extended_cif *ecif, unsigned *const stack) 
-{ 
-  const unsigned bytes = ecif->cif->bytes; 
-  const unsigned flags = ecif->cif->flags; 
- 
-  typedef union 
-  { 
-    char *c; 
-    unsigned *u; 
-    long long *ll; 
-    float *f; 
-    double *d; 
-  } valp; 
- 
-  /* 'stacktop' points at the previous backchain pointer.  */ 
-  valp stacktop; 
- 
-  /* 'gpr_base' points at the space for gpr3, and grows upwards as 
-     we use GPR registers.  */ 
-  valp gpr_base; 
-  valp gpr_end; 
- 
-#ifndef __NO_FPRS__ 
-  /* 'fpr_base' points at the space for fpr1, and grows upwards as 
-     we use FPR registers.  */ 
-  valp fpr_base; 
-  valp fpr_end; 
-#endif 
- 
-  /* 'copy_space' grows down as we put structures in it.  It should 
-     stay 16-byte aligned.  */ 
-  valp copy_space; 
- 
-  /* 'next_arg' grows up as we put parameters in it.  */ 
-  valp next_arg; 
- 
-  int i; 
-  ffi_type **ptr; 
-#ifndef __NO_FPRS__ 
-  double double_tmp; 
-#endif 
-  union 
-  { 
-    void **v; 
-    char **c; 
-    signed char **sc; 
-    unsigned char **uc; 
-    signed short **ss; 
-    unsigned short **us; 
-    unsigned int **ui; 
-    long long **ll; 
-    float **f; 
-    double **d; 
-  } p_argv; 
-  size_t struct_copy_size; 
-  unsigned gprvalue; 
- 
-  stacktop.c = (char *) stack + bytes; 
-  gpr_end.u = stacktop.u - ASM_NEEDS_REGISTERS; 
-  gpr_base.u = gpr_end.u - NUM_GPR_ARG_REGISTERS; 
-#ifndef __NO_FPRS__ 
-  fpr_end.d = gpr_base.d; 
-  fpr_base.d = fpr_end.d - NUM_FPR_ARG_REGISTERS; 
-  copy_space.c = ((flags & FLAG_FP_ARGUMENTS) ? fpr_base.c : gpr_base.c); 
-#else 
-  copy_space.c = gpr_base.c; 
-#endif 
-  next_arg.u = stack + 2; 
- 
-  /* Check that everything starts aligned properly.  */ 
-  FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0); 
-  FFI_ASSERT (((unsigned long) copy_space.c & 0xF) == 0); 
-  FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0); 
-  FFI_ASSERT ((bytes & 0xF) == 0); 
-  FFI_ASSERT (copy_space.c >= next_arg.c); 
- 
-  /* Deal with return values that are actually pass-by-reference.  */ 
-  if (flags & FLAG_RETVAL_REFERENCE) 
-    *gpr_base.u++ = (unsigned) (char *) ecif->rvalue; 
- 
-  /* Now for the arguments.  */ 
-  p_argv.v = ecif->avalue; 
-  for (ptr = ecif->cif->arg_types, i = ecif->cif->nargs; 
-       i > 0; 
-       i--, ptr++, p_argv.v++) 
-    { 
-      unsigned int typenum = (*ptr)->type; 
- 
-      typenum = translate_float (ecif->cif->abi, typenum); 
- 
-      /* Now test the translated value */ 
-      switch (typenum) 
-	{ 
-#ifndef __NO_FPRS__ 
-# if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-	case FFI_TYPE_LONGDOUBLE: 
-	  double_tmp = (*p_argv.d)[0]; 
- 
-	  if (fpr_base.d >= fpr_end.d - 1) 
-	    { 
-	      fpr_base.d = fpr_end.d; 
-	      if (((next_arg.u - stack) & 1) != 0) 
-		next_arg.u += 1; 
-	      *next_arg.d = double_tmp; 
-	      next_arg.u += 2; 
-	      double_tmp = (*p_argv.d)[1]; 
-	      *next_arg.d = double_tmp; 
-	      next_arg.u += 2; 
-	    } 
-	  else 
-	    { 
-	      *fpr_base.d++ = double_tmp; 
-	      double_tmp = (*p_argv.d)[1]; 
-	      *fpr_base.d++ = double_tmp; 
-	    } 
-	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS); 
-	  break; 
-# endif 
-	case FFI_TYPE_DOUBLE: 
-	  double_tmp = **p_argv.d; 
- 
-	  if (fpr_base.d >= fpr_end.d) 
-	    { 
-	      if (((next_arg.u - stack) & 1) != 0) 
-		next_arg.u += 1; 
-	      *next_arg.d = double_tmp; 
-	      next_arg.u += 2; 
-	    } 
-	  else 
-	    *fpr_base.d++ = double_tmp; 
-	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS); 
-	  break; 
- 
-	case FFI_TYPE_FLOAT: 
-	  double_tmp = **p_argv.f; 
-	  if (fpr_base.d >= fpr_end.d) 
-	    { 
-	      *next_arg.f = (float) double_tmp; 
-	      next_arg.u += 1; 
-	    } 
-	  else 
-	    *fpr_base.d++ = double_tmp; 
-	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS); 
-	  break; 
-#endif /* have FPRs */ 
- 
-	case FFI_TYPE_UINT128: 
-	  /* The soft float ABI for long doubles works like this, a long double 
-	     is passed in four consecutive GPRs if available.  A maximum of 2 
-	     long doubles can be passed in gprs.  If we do not have 4 GPRs 
-	     left, the long double is passed on the stack, 4-byte aligned.  */ 
-	  if (gpr_base.u >= gpr_end.u - 3) 
-	    { 
-	      unsigned int ii; 
-	      gpr_base.u = gpr_end.u; 
-	      for (ii = 0; ii < 4; ii++) 
-		{ 
-		  unsigned int int_tmp = (*p_argv.ui)[ii]; 
-		  *next_arg.u++ = int_tmp; 
-		} 
-	    } 
-	  else 
-	    { 
-	      unsigned int ii; 
-	      for (ii = 0; ii < 4; ii++) 
-		{ 
-		  unsigned int int_tmp = (*p_argv.ui)[ii]; 
-		  *gpr_base.u++ = int_tmp; 
-		} 
-	    } 
-	  break; 
- 
-	case FFI_TYPE_UINT64: 
-	case FFI_TYPE_SINT64: 
-	  if (gpr_base.u >= gpr_end.u - 1) 
-	    { 
-	      gpr_base.u = gpr_end.u; 
-	      if (((next_arg.u - stack) & 1) != 0) 
-		next_arg.u++; 
-	      *next_arg.ll = **p_argv.ll; 
-	      next_arg.u += 2; 
-	    } 
-	  else 
-	    { 
-	      /* The abi states only certain register pairs can be 
-		 used for passing long long int specifically (r3,r4), 
-		 (r5,r6), (r7,r8), (r9,r10).  If next arg is long long 
-		 but not correct starting register of pair then skip 
-		 until the proper starting register.  */ 
-	      if (((gpr_end.u - gpr_base.u) & 1) != 0) 
-		gpr_base.u++; 
-	      *gpr_base.ll++ = **p_argv.ll; 
-	    } 
-	  break; 
- 
-	case FFI_TYPE_STRUCT: 
-	  struct_copy_size = ((*ptr)->size + 15) & ~0xF; 
-	  copy_space.c -= struct_copy_size; 
-	  memcpy (copy_space.c, *p_argv.c, (*ptr)->size); 
- 
-	  gprvalue = (unsigned long) copy_space.c; 
- 
-	  FFI_ASSERT (copy_space.c > next_arg.c); 
-	  FFI_ASSERT (flags & FLAG_ARG_NEEDS_COPY); 
-	  goto putgpr; 
- 
-	case FFI_TYPE_UINT8: 
-	  gprvalue = **p_argv.uc; 
-	  goto putgpr; 
-	case FFI_TYPE_SINT8: 
-	  gprvalue = **p_argv.sc; 
-	  goto putgpr; 
-	case FFI_TYPE_UINT16: 
-	  gprvalue = **p_argv.us; 
-	  goto putgpr; 
-	case FFI_TYPE_SINT16: 
-	  gprvalue = **p_argv.ss; 
-	  goto putgpr; 
- 
-	case FFI_TYPE_INT: 
-	case FFI_TYPE_UINT32: 
-	case FFI_TYPE_SINT32: 
-	case FFI_TYPE_POINTER: 
- 
-	  gprvalue = **p_argv.ui; 
- 
-	putgpr: 
-	  if (gpr_base.u >= gpr_end.u) 
-	    *next_arg.u++ = gprvalue; 
-	  else 
-	    *gpr_base.u++ = gprvalue; 
-	  break; 
-	} 
-    } 
- 
-  /* Check that we didn't overrun the stack...  */ 
-  FFI_ASSERT (copy_space.c >= next_arg.c); 
-  FFI_ASSERT (gpr_base.u <= gpr_end.u); 
-#ifndef __NO_FPRS__ 
-  FFI_ASSERT (fpr_base.u <= fpr_end.u); 
-#endif 
-  FFI_ASSERT (((flags & FLAG_4_GPR_ARGUMENTS) != 0) 
-	      == (gpr_end.u - gpr_base.u < 4)); 
-} 
- 
-#define MIN_CACHE_LINE_SIZE 8 
- 
-static void 
-flush_icache (char *wraddr, char *xaddr, int size) 
-{ 
-  int i; 
-  for (i = 0; i < size; i += MIN_CACHE_LINE_SIZE) 
-    __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;" 
-		      : : "r" (xaddr + i), "r" (wraddr + i) : "memory"); 
-  __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;" "sync;" "isync;" 
-		    : : "r"(xaddr + size - 1), "r"(wraddr + size - 1) 
-		    : "memory"); 
-} 
- 
-ffi_status FFI_HIDDEN 
-ffi_prep_closure_loc_sysv (ffi_closure *closure, 
-			   ffi_cif *cif, 
-			   void (*fun) (ffi_cif *, void *, void **, void *), 
-			   void *user_data, 
-			   void *codeloc) 
-{ 
-  unsigned int *tramp; 
- 
-  if (cif->abi < FFI_SYSV || cif->abi >= FFI_LAST_ABI) 
-    return FFI_BAD_ABI; 
- 
-  tramp = (unsigned int *) &closure->tramp[0]; 
-  tramp[0] = 0x7c0802a6;  /*   mflr    r0 */ 
-  tramp[1] = 0x429f0005;  /*   bcl     20,31,.+4 */ 
-  tramp[2] = 0x7d6802a6;  /*   mflr    r11 */ 
-  tramp[3] = 0x7c0803a6;  /*   mtlr    r0 */ 
-  tramp[4] = 0x800b0018;  /*   lwz     r0,24(r11) */ 
-  tramp[5] = 0x816b001c;  /*   lwz     r11,28(r11) */ 
-  tramp[6] = 0x7c0903a6;  /*   mtctr   r0 */ 
-  tramp[7] = 0x4e800420;  /*   bctr */ 
-  *(void **) &tramp[8] = (void *) ffi_closure_SYSV; /* function */ 
-  *(void **) &tramp[9] = codeloc;                   /* context */ 
- 
-  /* Flush the icache.  */ 
-  flush_icache ((char *)tramp, (char *)codeloc, 8 * 4); 
- 
-  closure->cif = cif; 
-  closure->fun = fun; 
-  closure->user_data = user_data; 
- 
-  return FFI_OK; 
-} 
- 
-/* Basically the trampoline invokes ffi_closure_SYSV, and on 
-   entry, r11 holds the address of the closure. 
-   After storing the registers that could possibly contain 
-   parameters to be passed into the stack frame and setting 
-   up space for a return value, ffi_closure_SYSV invokes the 
-   following helper function to do most of the work.  */ 
- 
-int 
-ffi_closure_helper_SYSV (ffi_cif *cif, 
-			 void (*fun) (ffi_cif *, void *, void **, void *), 
-			 void *user_data, 
-			 void *rvalue, 
-			 unsigned long *pgr, 
-			 ffi_dblfl *pfr, 
-			 unsigned long *pst) 
-{ 
-  /* rvalue is the pointer to space for return value in closure assembly */ 
-  /* pgr is the pointer to where r3-r10 are stored in ffi_closure_SYSV */ 
-  /* pfr is the pointer to where f1-f8 are stored in ffi_closure_SYSV  */ 
-  /* pst is the pointer to outgoing parameter stack in original caller */ 
- 
-  void **          avalue; 
-  ffi_type **      arg_types; 
-  long             i, avn; 
-#ifndef __NO_FPRS__ 
-  long             nf = 0;   /* number of floating registers already used */ 
-#endif 
-  long             ng = 0;   /* number of general registers already used */ 
- 
-  unsigned       size     = cif->rtype->size; 
-  unsigned short rtypenum = cif->rtype->type; 
- 
-  avalue = alloca (cif->nargs * sizeof (void *)); 
- 
-  /* First translate for softfloat/nonlinux */ 
-  rtypenum = translate_float (cif->abi, rtypenum); 
- 
-  /* Copy the caller's structure return value address so that the closure 
-     returns the data directly to the caller. 
-     For FFI_SYSV the result is passed in r3/r4 if the struct size is less 
-     or equal 8 bytes.  */ 
-  if (rtypenum == FFI_TYPE_STRUCT 
-      && !((cif->abi & FFI_SYSV_STRUCT_RET) != 0 && size <= 8)) 
-    { 
-      rvalue = (void *) *pgr; 
-      ng++; 
-      pgr++; 
-    } 
- 
-  i = 0; 
-  avn = cif->nargs; 
-  arg_types = cif->arg_types; 
- 
-  /* Grab the addresses of the arguments from the stack frame.  */ 
-  while (i < avn) { 
-    unsigned short typenum = arg_types[i]->type; 
- 
-    /* We may need to handle some values depending on ABI.  */ 
-    typenum = translate_float (cif->abi, typenum); 
- 
-    switch (typenum) 
-      { 
-#ifndef __NO_FPRS__ 
-      case FFI_TYPE_FLOAT: 
-	/* Unfortunately float values are stored as doubles 
-	   in the ffi_closure_SYSV code (since we don't check 
-	   the type in that routine).  */ 
-	if (nf < NUM_FPR_ARG_REGISTERS) 
-	  { 
-	    /* FIXME? here we are really changing the values 
-	       stored in the original calling routines outgoing 
-	       parameter stack.  This is probably a really 
-	       naughty thing to do but...  */ 
-	    double temp = pfr->d; 
-	    pfr->f = (float) temp; 
-	    avalue[i] = pfr; 
-	    nf++; 
-	    pfr++; 
-	  } 
-	else 
-	  { 
-	    avalue[i] = pst; 
-	    pst += 1; 
-	  } 
-	break; 
- 
-      case FFI_TYPE_DOUBLE: 
-	if (nf < NUM_FPR_ARG_REGISTERS) 
-	  { 
-	    avalue[i] = pfr; 
-	    nf++; 
-	    pfr++; 
-	  } 
-	else 
-	  { 
-	    if (((long) pst) & 4) 
-	      pst++; 
-	    avalue[i] = pst; 
-	    pst += 2; 
-	  } 
-	break; 
- 
-# if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-      case FFI_TYPE_LONGDOUBLE: 
-	if (nf < NUM_FPR_ARG_REGISTERS - 1) 
-	  { 
-	    avalue[i] = pfr; 
-	    pfr += 2; 
-	    nf += 2; 
-	  } 
-	else 
-	  { 
-	    if (((long) pst) & 4) 
-	      pst++; 
-	    avalue[i] = pst; 
-	    pst += 4; 
-	    nf = 8; 
-	  } 
-	break; 
-# endif 
-#endif 
- 
-      case FFI_TYPE_UINT128: 
-	/* Test if for the whole long double, 4 gprs are available. 
-	   otherwise the stuff ends up on the stack.  */ 
-	if (ng < NUM_GPR_ARG_REGISTERS - 3) 
-	  { 
-	    avalue[i] = pgr; 
-	    pgr += 4; 
-	    ng += 4; 
-	  } 
-	else 
-	  { 
-	    avalue[i] = pst; 
-	    pst += 4; 
-	    ng = 8+4; 
-	  } 
-	break; 
- 
-      case FFI_TYPE_SINT8: 
-      case FFI_TYPE_UINT8: 
-#ifndef __LITTLE_ENDIAN__ 
-	if (ng < NUM_GPR_ARG_REGISTERS) 
-	  { 
-	    avalue[i] = (char *) pgr + 3; 
-	    ng++; 
-	    pgr++; 
-	  } 
-	else 
-	  { 
-	    avalue[i] = (char *) pst + 3; 
-	    pst++; 
-	  } 
-	break; 
-#endif 
- 
-      case FFI_TYPE_SINT16: 
-      case FFI_TYPE_UINT16: 
-#ifndef __LITTLE_ENDIAN__ 
-	if (ng < NUM_GPR_ARG_REGISTERS) 
-	  { 
-	    avalue[i] = (char *) pgr + 2; 
-	    ng++; 
-	    pgr++; 
-	  } 
-	else 
-	  { 
-	    avalue[i] = (char *) pst + 2; 
-	    pst++; 
-	  } 
-	break; 
-#endif 
- 
-      case FFI_TYPE_SINT32: 
-      case FFI_TYPE_UINT32: 
-      case FFI_TYPE_POINTER: 
-	if (ng < NUM_GPR_ARG_REGISTERS) 
-	  { 
-	    avalue[i] = pgr; 
-	    ng++; 
-	    pgr++; 
-	  } 
-	else 
-	  { 
-	    avalue[i] = pst; 
-	    pst++; 
-	  } 
-	break; 
- 
-      case FFI_TYPE_STRUCT: 
-	/* Structs are passed by reference. The address will appear in a 
-	   gpr if it is one of the first 8 arguments.  */ 
-	if (ng < NUM_GPR_ARG_REGISTERS) 
-	  { 
-	    avalue[i] = (void *) *pgr; 
-	    ng++; 
-	    pgr++; 
-	  } 
-	else 
-	  { 
-	    avalue[i] = (void *) *pst; 
-	    pst++; 
-	  } 
-	break; 
- 
-      case FFI_TYPE_SINT64: 
-      case FFI_TYPE_UINT64: 
-	/* Passing long long ints are complex, they must 
-	   be passed in suitable register pairs such as 
-	   (r3,r4) or (r5,r6) or (r6,r7), or (r7,r8) or (r9,r10) 
-	   and if the entire pair aren't available then the outgoing 
-	   parameter stack is used for both but an alignment of 8 
-	   must will be kept.  So we must either look in pgr 
-	   or pst to find the correct address for this type 
-	   of parameter.  */ 
-	if (ng < NUM_GPR_ARG_REGISTERS - 1) 
-	  { 
-	    if (ng & 1) 
-	      { 
-		/* skip r4, r6, r8 as starting points */ 
-		ng++; 
-		pgr++; 
-	      } 
-	    avalue[i] = pgr; 
-	    ng += 2; 
-	    pgr += 2; 
-	  } 
-	else 
-	  { 
-	    if (((long) pst) & 4) 
-	      pst++; 
-	    avalue[i] = pst; 
-	    pst += 2; 
-	    ng = NUM_GPR_ARG_REGISTERS; 
-	  } 
-	break; 
- 
-      default: 
-	FFI_ASSERT (0); 
-      } 
- 
-    i++; 
-  } 
- 
-  (*fun) (cif, rvalue, avalue, user_data); 
- 
-  /* Tell ffi_closure_SYSV how to perform return type promotions. 
-     Because the FFI_SYSV ABI returns the structures <= 8 bytes in 
-     r3/r4 we have to tell ffi_closure_SYSV how to treat them.  We 
-     combine the base type FFI_SYSV_TYPE_SMALL_STRUCT with the size of 
-     the struct less one.  We never have a struct with size zero. 
-     See the comment in ffitarget.h about ordering.  */ 
-  if (rtypenum == FFI_TYPE_STRUCT 
-      && (cif->abi & FFI_SYSV_STRUCT_RET) != 0 && size <= 8) 
-    return FFI_SYSV_TYPE_SMALL_STRUCT - 1 + size; 
-  return rtypenum; 
-} 
-#endif 
+/* -----------------------------------------------------------------------
+   ffi_sysv.c - Copyright (C) 2013 IBM
+                Copyright (C) 2011 Anthony Green
+                Copyright (C) 2011 Kyle Moffett
+                Copyright (C) 2008 Red Hat, Inc
+                Copyright (C) 2007, 2008 Free Software Foundation, Inc
+                Copyright (c) 1998 Geoffrey Keating
+
+   PowerPC Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+   IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR
+   OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+   ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+   OTHER DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include "ffi.h"
+
+#ifndef POWERPC64
+#include "ffi_common.h"
+#include "ffi_powerpc.h"
+
+
+/* About the SYSV ABI.  */
+#define ASM_NEEDS_REGISTERS 6
+#define NUM_GPR_ARG_REGISTERS 8
+#define NUM_FPR_ARG_REGISTERS 8
+
+
+#if HAVE_LONG_DOUBLE_VARIANT && FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+/* Adjust size of ffi_type_longdouble.  */
+void FFI_HIDDEN
+ffi_prep_types_sysv (ffi_abi abi)
+{
+  if ((abi & (FFI_SYSV | FFI_SYSV_LONG_DOUBLE_128)) == FFI_SYSV)
+    {
+      ffi_type_longdouble.size = 8;
+      ffi_type_longdouble.alignment = 8;
+    }
+  else
+    {
+      ffi_type_longdouble.size = 16;
+      ffi_type_longdouble.alignment = 16;
+    }
+}
+#endif
+
+/* Transform long double, double and float to other types as per abi.  */
+static int
+translate_float (int abi, int type)
+{
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+  if (type == FFI_TYPE_LONGDOUBLE
+      && (abi & FFI_SYSV_LONG_DOUBLE_128) == 0)
+    type = FFI_TYPE_DOUBLE;
+#endif
+  if ((abi & FFI_SYSV_SOFT_FLOAT) != 0)
+    {
+      if (type == FFI_TYPE_FLOAT)
+	type = FFI_TYPE_UINT32;
+      else if (type == FFI_TYPE_DOUBLE)
+	type = FFI_TYPE_UINT64;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+      else if (type == FFI_TYPE_LONGDOUBLE)
+	type = FFI_TYPE_UINT128;
+    }
+  else if ((abi & FFI_SYSV_IBM_LONG_DOUBLE) == 0)
+    {
+      if (type == FFI_TYPE_LONGDOUBLE)
+	type = FFI_TYPE_STRUCT;
+#endif
+    }
+  return type;
+}
+
+/* Perform machine dependent cif processing */
+static ffi_status
+ffi_prep_cif_sysv_core (ffi_cif *cif)
+{
+  ffi_type **ptr;
+  unsigned bytes;
+  unsigned i, fpr_count = 0, gpr_count = 0, stack_count = 0;
+  unsigned flags = cif->flags;
+  unsigned struct_copy_size = 0;
+  unsigned type = cif->rtype->type;
+  unsigned size = cif->rtype->size;
+
+  /* The machine-independent calculation of cif->bytes doesn't work
+     for us.  Redo the calculation.  */
+
+  /* Space for the frame pointer, callee's LR, and the asm's temp regs.  */
+  bytes = (2 + ASM_NEEDS_REGISTERS) * sizeof (int);
+
+  /* Space for the GPR registers.  */
+  bytes += NUM_GPR_ARG_REGISTERS * sizeof (int);
+
+  /* Return value handling.  The rules for SYSV are as follows:
+     - 32-bit (or less) integer values are returned in gpr3;
+     - Structures of size <= 4 bytes also returned in gpr3;
+     - 64-bit integer values and structures between 5 and 8 bytes are returned
+     in gpr3 and gpr4;
+     - Larger structures are allocated space and a pointer is passed as
+     the first argument.
+     - Single/double FP values are returned in fpr1;
+     - long doubles (if not equivalent to double) are returned in
+     fpr1,fpr2 for Linux and as for large structs for SysV.  */
+
+  type = translate_float (cif->abi, type);
+
+  switch (type)
+    {
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+    case FFI_TYPE_LONGDOUBLE:
+      flags |= FLAG_RETURNS_128BITS;
+      /* Fall through.  */
+#endif
+    case FFI_TYPE_DOUBLE:
+      flags |= FLAG_RETURNS_64BITS;
+      /* Fall through.  */
+    case FFI_TYPE_FLOAT:
+      flags |= FLAG_RETURNS_FP;
+#ifdef __NO_FPRS__
+      return FFI_BAD_ABI;
+#endif
+      break;
+
+    case FFI_TYPE_UINT128:
+      flags |= FLAG_RETURNS_128BITS;
+      /* Fall through.  */
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+      flags |= FLAG_RETURNS_64BITS;
+      break;
+
+    case FFI_TYPE_STRUCT:
+      /* The final SYSV ABI says that structures smaller or equal 8 bytes
+	 are returned in r3/r4.  A draft ABI used by linux instead
+	 returns them in memory.  */
+      if ((cif->abi & FFI_SYSV_STRUCT_RET) != 0 && size <= 8)
+	{
+	  flags |= FLAG_RETURNS_SMST;
+	  break;
+	}
+      gpr_count++;
+      flags |= FLAG_RETVAL_REFERENCE;
+      /* Fall through.  */
+    case FFI_TYPE_VOID:
+      flags |= FLAG_RETURNS_NOTHING;
+      break;
+
+    default:
+      /* Returns 32-bit integer, or similar.  Nothing to do here.  */
+      break;
+    }
+
+  /* The first NUM_GPR_ARG_REGISTERS words of integer arguments, and the
+     first NUM_FPR_ARG_REGISTERS fp arguments, go in registers; the rest
+     goes on the stack.  Structures and long doubles (if not equivalent
+     to double) are passed as a pointer to a copy of the structure.
+     Stuff on the stack needs to keep proper alignment.  */
+  for (ptr = cif->arg_types, i = cif->nargs; i > 0; i--, ptr++)
+    {
+      unsigned short typenum = (*ptr)->type;
+
+      typenum = translate_float (cif->abi, typenum);
+
+      switch (typenum)
+	{
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+	case FFI_TYPE_LONGDOUBLE:
+	  if (fpr_count >= NUM_FPR_ARG_REGISTERS - 1)
+	    {
+	      fpr_count = NUM_FPR_ARG_REGISTERS;
+	      /* 8-byte align long doubles.  */
+	      stack_count += stack_count & 1;
+	      stack_count += 4;
+	    }
+	  else
+	    fpr_count += 2;
+#ifdef __NO_FPRS__
+	  return FFI_BAD_ABI;
+#endif
+	  break;
+#endif
+
+	case FFI_TYPE_DOUBLE:
+	  if (fpr_count >= NUM_FPR_ARG_REGISTERS)
+	    {
+	      /* 8-byte align doubles.  */
+	      stack_count += stack_count & 1;
+	      stack_count += 2;
+	    }
+	  else
+	    fpr_count += 1;
+#ifdef __NO_FPRS__
+	  return FFI_BAD_ABI;
+#endif
+	  break;
+
+	case FFI_TYPE_FLOAT:
+	  if (fpr_count >= NUM_FPR_ARG_REGISTERS)
+	    /* Yes, we don't follow the ABI, but neither does gcc.  */
+	    stack_count += 1;
+	  else
+	    fpr_count += 1;
+#ifdef __NO_FPRS__
+	  return FFI_BAD_ABI;
+#endif
+	  break;
+
+	case FFI_TYPE_UINT128:
+	  /* A long double in FFI_LINUX_SOFT_FLOAT can use only a set
+	     of four consecutive gprs. If we do not have enough, we
+	     have to adjust the gpr_count value.  */
+	  if (gpr_count >= NUM_GPR_ARG_REGISTERS - 3)
+	    gpr_count = NUM_GPR_ARG_REGISTERS;
+	  if (gpr_count >= NUM_GPR_ARG_REGISTERS)
+	    stack_count += 4;
+	  else
+	    gpr_count += 4;
+	  break;
+
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	  /* 'long long' arguments are passed as two words, but
+	     either both words must fit in registers or both go
+	     on the stack.  If they go on the stack, they must
+	     be 8-byte-aligned.
+
+	     Also, only certain register pairs can be used for
+	     passing long long int -- specifically (r3,r4), (r5,r6),
+	     (r7,r8), (r9,r10).  */
+	  gpr_count += gpr_count & 1;
+	  if (gpr_count >= NUM_GPR_ARG_REGISTERS)
+	    {
+	      stack_count += stack_count & 1;
+	      stack_count += 2;
+	    }
+	  else
+	    gpr_count += 2;
+	  break;
+
+	case FFI_TYPE_STRUCT:
+	  /* We must allocate space for a copy of these to enforce
+	     pass-by-value.  Pad the space up to a multiple of 16
+	     bytes (the maximum alignment required for anything under
+	     the SYSV ABI).  */
+	  struct_copy_size += ((*ptr)->size + 15) & ~0xF;
+	  /* Fall through (allocate space for the pointer).  */
+
+	case FFI_TYPE_POINTER:
+	case FFI_TYPE_INT:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT8:
+	  /* Everything else is passed as a 4-byte word in a GPR, either
+	     the object itself or a pointer to it.  */
+	  if (gpr_count >= NUM_GPR_ARG_REGISTERS)
+	    stack_count += 1;
+	  else
+	    gpr_count += 1;
+	  break;
+
+	default:
+	  FFI_ASSERT (0);
+	}
+    }
+
+  if (fpr_count != 0)
+    flags |= FLAG_FP_ARGUMENTS;
+  if (gpr_count > 4)
+    flags |= FLAG_4_GPR_ARGUMENTS;
+  if (struct_copy_size != 0)
+    flags |= FLAG_ARG_NEEDS_COPY;
+
+  /* Space for the FPR registers, if needed.  */
+  if (fpr_count != 0)
+    bytes += NUM_FPR_ARG_REGISTERS * sizeof (double);
+
+  /* Stack space.  */
+  bytes += stack_count * sizeof (int);
+
+  /* The stack space allocated needs to be a multiple of 16 bytes.  */
+  bytes = (bytes + 15) & ~0xF;
+
+  /* Add in the space for the copied structures.  */
+  bytes += struct_copy_size;
+
+  cif->flags = flags;
+  cif->bytes = bytes;
+
+  return FFI_OK;
+}
+
+ffi_status FFI_HIDDEN
+ffi_prep_cif_sysv (ffi_cif *cif)
+{
+  if ((cif->abi & FFI_SYSV) == 0)
+    {
+      /* This call is from old code.  Translate to new ABI values.  */
+      cif->flags |= FLAG_COMPAT;
+      switch (cif->abi)
+	{
+	default:
+	  return FFI_BAD_ABI;
+
+	case FFI_COMPAT_SYSV:
+	  cif->abi = FFI_SYSV | FFI_SYSV_STRUCT_RET | FFI_SYSV_LONG_DOUBLE_128;
+	  break;
+
+	case FFI_COMPAT_GCC_SYSV:
+	  cif->abi = FFI_SYSV | FFI_SYSV_LONG_DOUBLE_128;
+	  break;
+
+	case FFI_COMPAT_LINUX:
+	  cif->abi = (FFI_SYSV | FFI_SYSV_IBM_LONG_DOUBLE
+		      | FFI_SYSV_LONG_DOUBLE_128);
+	  break;
+
+	case FFI_COMPAT_LINUX_SOFT_FLOAT:
+	  cif->abi = (FFI_SYSV | FFI_SYSV_SOFT_FLOAT | FFI_SYSV_IBM_LONG_DOUBLE
+		      | FFI_SYSV_LONG_DOUBLE_128);
+	  break;
+	}
+    }
+  return ffi_prep_cif_sysv_core (cif);
+}
+
+/* ffi_prep_args_SYSV is called by the assembly routine once stack space
+   has been allocated for the function's arguments.
+
+   The stack layout we want looks like this:
+
+   |   Return address from ffi_call_SYSV 4bytes	|	higher addresses
+   |--------------------------------------------|
+   |   Previous backchain pointer	4	|       stack pointer here
+   |--------------------------------------------|<+ <<<	on entry to
+   |   Saved r28-r31			4*4	| |	ffi_call_SYSV
+   |--------------------------------------------| |
+   |   GPR registers r3-r10		8*4	| |	ffi_call_SYSV
+   |--------------------------------------------| |
+   |   FPR registers f1-f8 (optional)	8*8	| |
+   |--------------------------------------------| |	stack	|
+   |   Space for copied structures		| |	grows	|
+   |--------------------------------------------| |	down    V
+   |   Parameters that didn't fit in registers  | |
+   |--------------------------------------------| |	lower addresses
+   |   Space for callee's LR		4	| |
+   |--------------------------------------------| |	stack pointer here
+   |   Current backchain pointer	4	|-/	during
+   |--------------------------------------------|   <<<	ffi_call_SYSV
+
+*/
+
+void FFI_HIDDEN
+ffi_prep_args_SYSV (extended_cif *ecif, unsigned *const stack)
+{
+  const unsigned bytes = ecif->cif->bytes;
+  const unsigned flags = ecif->cif->flags;
+
+  typedef union
+  {
+    char *c;
+    unsigned *u;
+    long long *ll;
+    float *f;
+    double *d;
+  } valp;
+
+  /* 'stacktop' points at the previous backchain pointer.  */
+  valp stacktop;
+
+  /* 'gpr_base' points at the space for gpr3, and grows upwards as
+     we use GPR registers.  */
+  valp gpr_base;
+  valp gpr_end;
+
+#ifndef __NO_FPRS__
+  /* 'fpr_base' points at the space for fpr1, and grows upwards as
+     we use FPR registers.  */
+  valp fpr_base;
+  valp fpr_end;
+#endif
+
+  /* 'copy_space' grows down as we put structures in it.  It should
+     stay 16-byte aligned.  */
+  valp copy_space;
+
+  /* 'next_arg' grows up as we put parameters in it.  */
+  valp next_arg;
+
+  int i;
+  ffi_type **ptr;
+#ifndef __NO_FPRS__
+  double double_tmp;
+#endif
+  union
+  {
+    void **v;
+    char **c;
+    signed char **sc;
+    unsigned char **uc;
+    signed short **ss;
+    unsigned short **us;
+    unsigned int **ui;
+    long long **ll;
+    float **f;
+    double **d;
+  } p_argv;
+  size_t struct_copy_size;
+  unsigned gprvalue;
+
+  stacktop.c = (char *) stack + bytes;
+  gpr_end.u = stacktop.u - ASM_NEEDS_REGISTERS;
+  gpr_base.u = gpr_end.u - NUM_GPR_ARG_REGISTERS;
+#ifndef __NO_FPRS__
+  fpr_end.d = gpr_base.d;
+  fpr_base.d = fpr_end.d - NUM_FPR_ARG_REGISTERS;
+  copy_space.c = ((flags & FLAG_FP_ARGUMENTS) ? fpr_base.c : gpr_base.c);
+#else
+  copy_space.c = gpr_base.c;
+#endif
+  next_arg.u = stack + 2;
+
+  /* Check that everything starts aligned properly.  */
+  FFI_ASSERT (((unsigned long) (char *) stack & 0xF) == 0);
+  FFI_ASSERT (((unsigned long) copy_space.c & 0xF) == 0);
+  FFI_ASSERT (((unsigned long) stacktop.c & 0xF) == 0);
+  FFI_ASSERT ((bytes & 0xF) == 0);
+  FFI_ASSERT (copy_space.c >= next_arg.c);
+
+  /* Deal with return values that are actually pass-by-reference.  */
+  if (flags & FLAG_RETVAL_REFERENCE)
+    *gpr_base.u++ = (unsigned) (char *) ecif->rvalue;
+
+  /* Now for the arguments.  */
+  p_argv.v = ecif->avalue;
+  for (ptr = ecif->cif->arg_types, i = ecif->cif->nargs;
+       i > 0;
+       i--, ptr++, p_argv.v++)
+    {
+      unsigned int typenum = (*ptr)->type;
+
+      typenum = translate_float (ecif->cif->abi, typenum);
+
+      /* Now test the translated value */
+      switch (typenum)
+	{
+#ifndef __NO_FPRS__
+# if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+	case FFI_TYPE_LONGDOUBLE:
+	  double_tmp = (*p_argv.d)[0];
+
+	  if (fpr_base.d >= fpr_end.d - 1)
+	    {
+	      fpr_base.d = fpr_end.d;
+	      if (((next_arg.u - stack) & 1) != 0)
+		next_arg.u += 1;
+	      *next_arg.d = double_tmp;
+	      next_arg.u += 2;
+	      double_tmp = (*p_argv.d)[1];
+	      *next_arg.d = double_tmp;
+	      next_arg.u += 2;
+	    }
+	  else
+	    {
+	      *fpr_base.d++ = double_tmp;
+	      double_tmp = (*p_argv.d)[1];
+	      *fpr_base.d++ = double_tmp;
+	    }
+	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
+	  break;
+# endif
+	case FFI_TYPE_DOUBLE:
+	  double_tmp = **p_argv.d;
+
+	  if (fpr_base.d >= fpr_end.d)
+	    {
+	      if (((next_arg.u - stack) & 1) != 0)
+		next_arg.u += 1;
+	      *next_arg.d = double_tmp;
+	      next_arg.u += 2;
+	    }
+	  else
+	    *fpr_base.d++ = double_tmp;
+	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
+	  break;
+
+	case FFI_TYPE_FLOAT:
+	  double_tmp = **p_argv.f;
+	  if (fpr_base.d >= fpr_end.d)
+	    {
+	      *next_arg.f = (float) double_tmp;
+	      next_arg.u += 1;
+	    }
+	  else
+	    *fpr_base.d++ = double_tmp;
+	  FFI_ASSERT (flags & FLAG_FP_ARGUMENTS);
+	  break;
+#endif /* have FPRs */
+
+	case FFI_TYPE_UINT128:
+	  /* The soft float ABI for long doubles works like this, a long double
+	     is passed in four consecutive GPRs if available.  A maximum of 2
+	     long doubles can be passed in gprs.  If we do not have 4 GPRs
+	     left, the long double is passed on the stack, 4-byte aligned.  */
+	  if (gpr_base.u >= gpr_end.u - 3)
+	    {
+	      unsigned int ii;
+	      gpr_base.u = gpr_end.u;
+	      for (ii = 0; ii < 4; ii++)
+		{
+		  unsigned int int_tmp = (*p_argv.ui)[ii];
+		  *next_arg.u++ = int_tmp;
+		}
+	    }
+	  else
+	    {
+	      unsigned int ii;
+	      for (ii = 0; ii < 4; ii++)
+		{
+		  unsigned int int_tmp = (*p_argv.ui)[ii];
+		  *gpr_base.u++ = int_tmp;
+		}
+	    }
+	  break;
+
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	  if (gpr_base.u >= gpr_end.u - 1)
+	    {
+	      gpr_base.u = gpr_end.u;
+	      if (((next_arg.u - stack) & 1) != 0)
+		next_arg.u++;
+	      *next_arg.ll = **p_argv.ll;
+	      next_arg.u += 2;
+	    }
+	  else
+	    {
+	      /* The abi states only certain register pairs can be
+		 used for passing long long int specifically (r3,r4),
+		 (r5,r6), (r7,r8), (r9,r10).  If next arg is long long
+		 but not correct starting register of pair then skip
+		 until the proper starting register.  */
+	      if (((gpr_end.u - gpr_base.u) & 1) != 0)
+		gpr_base.u++;
+	      *gpr_base.ll++ = **p_argv.ll;
+	    }
+	  break;
+
+	case FFI_TYPE_STRUCT:
+	  struct_copy_size = ((*ptr)->size + 15) & ~0xF;
+	  copy_space.c -= struct_copy_size;
+	  memcpy (copy_space.c, *p_argv.c, (*ptr)->size);
+
+	  gprvalue = (unsigned long) copy_space.c;
+
+	  FFI_ASSERT (copy_space.c > next_arg.c);
+	  FFI_ASSERT (flags & FLAG_ARG_NEEDS_COPY);
+	  goto putgpr;
+
+	case FFI_TYPE_UINT8:
+	  gprvalue = **p_argv.uc;
+	  goto putgpr;
+	case FFI_TYPE_SINT8:
+	  gprvalue = **p_argv.sc;
+	  goto putgpr;
+	case FFI_TYPE_UINT16:
+	  gprvalue = **p_argv.us;
+	  goto putgpr;
+	case FFI_TYPE_SINT16:
+	  gprvalue = **p_argv.ss;
+	  goto putgpr;
+
+	case FFI_TYPE_INT:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_POINTER:
+
+	  gprvalue = **p_argv.ui;
+
+	putgpr:
+	  if (gpr_base.u >= gpr_end.u)
+	    *next_arg.u++ = gprvalue;
+	  else
+	    *gpr_base.u++ = gprvalue;
+	  break;
+	}
+    }
+
+  /* Check that we didn't overrun the stack...  */
+  FFI_ASSERT (copy_space.c >= next_arg.c);
+  FFI_ASSERT (gpr_base.u <= gpr_end.u);
+#ifndef __NO_FPRS__
+  FFI_ASSERT (fpr_base.u <= fpr_end.u);
+#endif
+  FFI_ASSERT (((flags & FLAG_4_GPR_ARGUMENTS) != 0)
+	      == (gpr_end.u - gpr_base.u < 4));
+}
+
+#define MIN_CACHE_LINE_SIZE 8
+
+static void
+flush_icache (char *wraddr, char *xaddr, int size)
+{
+  int i;
+  for (i = 0; i < size; i += MIN_CACHE_LINE_SIZE)
+    __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;"
+		      : : "r" (xaddr + i), "r" (wraddr + i) : "memory");
+  __asm__ volatile ("icbi 0,%0;" "dcbf 0,%1;" "sync;" "isync;"
+		    : : "r"(xaddr + size - 1), "r"(wraddr + size - 1)
+		    : "memory");
+}
+
+ffi_status FFI_HIDDEN
+ffi_prep_closure_loc_sysv (ffi_closure *closure,
+			   ffi_cif *cif,
+			   void (*fun) (ffi_cif *, void *, void **, void *),
+			   void *user_data,
+			   void *codeloc)
+{
+  unsigned int *tramp;
+
+  if (cif->abi < FFI_SYSV || cif->abi >= FFI_LAST_ABI)
+    return FFI_BAD_ABI;
+
+  tramp = (unsigned int *) &closure->tramp[0];
+  tramp[0] = 0x7c0802a6;  /*   mflr    r0 */
+  tramp[1] = 0x429f0005;  /*   bcl     20,31,.+4 */
+  tramp[2] = 0x7d6802a6;  /*   mflr    r11 */
+  tramp[3] = 0x7c0803a6;  /*   mtlr    r0 */
+  tramp[4] = 0x800b0018;  /*   lwz     r0,24(r11) */
+  tramp[5] = 0x816b001c;  /*   lwz     r11,28(r11) */
+  tramp[6] = 0x7c0903a6;  /*   mtctr   r0 */
+  tramp[7] = 0x4e800420;  /*   bctr */
+  *(void **) &tramp[8] = (void *) ffi_closure_SYSV; /* function */
+  *(void **) &tramp[9] = codeloc;                   /* context */
+
+  /* Flush the icache.  */
+  flush_icache ((char *)tramp, (char *)codeloc, 8 * 4);
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+/* Basically the trampoline invokes ffi_closure_SYSV, and on
+   entry, r11 holds the address of the closure.
+   After storing the registers that could possibly contain
+   parameters to be passed into the stack frame and setting
+   up space for a return value, ffi_closure_SYSV invokes the
+   following helper function to do most of the work.  */
+
+int
+ffi_closure_helper_SYSV (ffi_cif *cif,
+			 void (*fun) (ffi_cif *, void *, void **, void *),
+			 void *user_data,
+			 void *rvalue,
+			 unsigned long *pgr,
+			 ffi_dblfl *pfr,
+			 unsigned long *pst)
+{
+  /* rvalue is the pointer to space for return value in closure assembly */
+  /* pgr is the pointer to where r3-r10 are stored in ffi_closure_SYSV */
+  /* pfr is the pointer to where f1-f8 are stored in ffi_closure_SYSV  */
+  /* pst is the pointer to outgoing parameter stack in original caller */
+
+  void **          avalue;
+  ffi_type **      arg_types;
+  long             i, avn;
+#ifndef __NO_FPRS__
+  long             nf = 0;   /* number of floating registers already used */
+#endif
+  long             ng = 0;   /* number of general registers already used */
+
+  unsigned       size     = cif->rtype->size;
+  unsigned short rtypenum = cif->rtype->type;
+
+  avalue = alloca (cif->nargs * sizeof (void *));
+
+  /* First translate for softfloat/nonlinux */
+  rtypenum = translate_float (cif->abi, rtypenum);
+
+  /* Copy the caller's structure return value address so that the closure
+     returns the data directly to the caller.
+     For FFI_SYSV the result is passed in r3/r4 if the struct size is less
+     or equal 8 bytes.  */
+  if (rtypenum == FFI_TYPE_STRUCT
+      && !((cif->abi & FFI_SYSV_STRUCT_RET) != 0 && size <= 8))
+    {
+      rvalue = (void *) *pgr;
+      ng++;
+      pgr++;
+    }
+
+  i = 0;
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+
+  /* Grab the addresses of the arguments from the stack frame.  */
+  while (i < avn) {
+    unsigned short typenum = arg_types[i]->type;
+
+    /* We may need to handle some values depending on ABI.  */
+    typenum = translate_float (cif->abi, typenum);
+
+    switch (typenum)
+      {
+#ifndef __NO_FPRS__
+      case FFI_TYPE_FLOAT:
+	/* Unfortunately float values are stored as doubles
+	   in the ffi_closure_SYSV code (since we don't check
+	   the type in that routine).  */
+	if (nf < NUM_FPR_ARG_REGISTERS)
+	  {
+	    /* FIXME? here we are really changing the values
+	       stored in the original calling routines outgoing
+	       parameter stack.  This is probably a really
+	       naughty thing to do but...  */
+	    double temp = pfr->d;
+	    pfr->f = (float) temp;
+	    avalue[i] = pfr;
+	    nf++;
+	    pfr++;
+	  }
+	else
+	  {
+	    avalue[i] = pst;
+	    pst += 1;
+	  }
+	break;
+
+      case FFI_TYPE_DOUBLE:
+	if (nf < NUM_FPR_ARG_REGISTERS)
+	  {
+	    avalue[i] = pfr;
+	    nf++;
+	    pfr++;
+	  }
+	else
+	  {
+	    if (((long) pst) & 4)
+	      pst++;
+	    avalue[i] = pst;
+	    pst += 2;
+	  }
+	break;
+
+# if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+      case FFI_TYPE_LONGDOUBLE:
+	if (nf < NUM_FPR_ARG_REGISTERS - 1)
+	  {
+	    avalue[i] = pfr;
+	    pfr += 2;
+	    nf += 2;
+	  }
+	else
+	  {
+	    if (((long) pst) & 4)
+	      pst++;
+	    avalue[i] = pst;
+	    pst += 4;
+	    nf = 8;
+	  }
+	break;
+# endif
+#endif
+
+      case FFI_TYPE_UINT128:
+	/* Test if for the whole long double, 4 gprs are available.
+	   otherwise the stuff ends up on the stack.  */
+	if (ng < NUM_GPR_ARG_REGISTERS - 3)
+	  {
+	    avalue[i] = pgr;
+	    pgr += 4;
+	    ng += 4;
+	  }
+	else
+	  {
+	    avalue[i] = pst;
+	    pst += 4;
+	    ng = 8+4;
+	  }
+	break;
+
+      case FFI_TYPE_SINT8:
+      case FFI_TYPE_UINT8:
+#ifndef __LITTLE_ENDIAN__
+	if (ng < NUM_GPR_ARG_REGISTERS)
+	  {
+	    avalue[i] = (char *) pgr + 3;
+	    ng++;
+	    pgr++;
+	  }
+	else
+	  {
+	    avalue[i] = (char *) pst + 3;
+	    pst++;
+	  }
+	break;
+#endif
+
+      case FFI_TYPE_SINT16:
+      case FFI_TYPE_UINT16:
+#ifndef __LITTLE_ENDIAN__
+	if (ng < NUM_GPR_ARG_REGISTERS)
+	  {
+	    avalue[i] = (char *) pgr + 2;
+	    ng++;
+	    pgr++;
+	  }
+	else
+	  {
+	    avalue[i] = (char *) pst + 2;
+	    pst++;
+	  }
+	break;
+#endif
+
+      case FFI_TYPE_SINT32:
+      case FFI_TYPE_UINT32:
+      case FFI_TYPE_POINTER:
+	if (ng < NUM_GPR_ARG_REGISTERS)
+	  {
+	    avalue[i] = pgr;
+	    ng++;
+	    pgr++;
+	  }
+	else
+	  {
+	    avalue[i] = pst;
+	    pst++;
+	  }
+	break;
+
+      case FFI_TYPE_STRUCT:
+	/* Structs are passed by reference. The address will appear in a
+	   gpr if it is one of the first 8 arguments.  */
+	if (ng < NUM_GPR_ARG_REGISTERS)
+	  {
+	    avalue[i] = (void *) *pgr;
+	    ng++;
+	    pgr++;
+	  }
+	else
+	  {
+	    avalue[i] = (void *) *pst;
+	    pst++;
+	  }
+	break;
+
+      case FFI_TYPE_SINT64:
+      case FFI_TYPE_UINT64:
+	/* Passing long long ints are complex, they must
+	   be passed in suitable register pairs such as
+	   (r3,r4) or (r5,r6) or (r6,r7), or (r7,r8) or (r9,r10)
+	   and if the entire pair aren't available then the outgoing
+	   parameter stack is used for both but an alignment of 8
+	   must will be kept.  So we must either look in pgr
+	   or pst to find the correct address for this type
+	   of parameter.  */
+	if (ng < NUM_GPR_ARG_REGISTERS - 1)
+	  {
+	    if (ng & 1)
+	      {
+		/* skip r4, r6, r8 as starting points */
+		ng++;
+		pgr++;
+	      }
+	    avalue[i] = pgr;
+	    ng += 2;
+	    pgr += 2;
+	  }
+	else
+	  {
+	    if (((long) pst) & 4)
+	      pst++;
+	    avalue[i] = pst;
+	    pst += 2;
+	    ng = NUM_GPR_ARG_REGISTERS;
+	  }
+	break;
+
+      default:
+	FFI_ASSERT (0);
+      }
+
+    i++;
+  }
+
+  (*fun) (cif, rvalue, avalue, user_data);
+
+  /* Tell ffi_closure_SYSV how to perform return type promotions.
+     Because the FFI_SYSV ABI returns the structures <= 8 bytes in
+     r3/r4 we have to tell ffi_closure_SYSV how to treat them.  We
+     combine the base type FFI_SYSV_TYPE_SMALL_STRUCT with the size of
+     the struct less one.  We never have a struct with size zero.
+     See the comment in ffitarget.h about ordering.  */
+  if (rtypenum == FFI_TYPE_STRUCT
+      && (cif->abi & FFI_SYSV_STRUCT_RET) != 0 && size <= 8)
+    return FFI_SYSV_TYPE_SMALL_STRUCT - 1 + size;
+  return rtypenum;
+}
+#endif
diff --git a/contrib/restricted/libffi/src/powerpc/ffitarget.h b/contrib/restricted/libffi/src/powerpc/ffitarget.h
index 1a25a43141..7fb9a93908 100644
--- a/contrib/restricted/libffi/src/powerpc/ffitarget.h
+++ b/contrib/restricted/libffi/src/powerpc/ffitarget.h
@@ -1,204 +1,204 @@
-/* -----------------------------------------------------------------*-C-*- 
-   ffitarget.h - Copyright (c) 2012  Anthony Green 
-                 Copyright (C) 2007, 2008, 2010 Free Software Foundation, Inc 
-                 Copyright (c) 1996-2003  Red Hat, Inc. 
- 
-   Target configuration macros for PowerPC. 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
- 
-   ----------------------------------------------------------------------- */ 
- 
-#ifndef LIBFFI_TARGET_H 
-#define LIBFFI_TARGET_H 
- 
-#ifndef LIBFFI_H 
-#error "Please do not include ffitarget.h directly into your source.  Use ffi.h instead." 
-#endif 
- 
-/* ---- System specific configurations ----------------------------------- */ 
- 
-#if defined (POWERPC) && defined (__powerpc64__)	/* linux64 */ 
-#ifndef POWERPC64 
-#define POWERPC64 
-#endif 
-#elif defined (POWERPC_DARWIN) && defined (__ppc64__)	/* Darwin64 */ 
-#ifndef POWERPC64 
-#define POWERPC64 
-#endif 
-#ifndef POWERPC_DARWIN64 
-#define POWERPC_DARWIN64 
-#endif 
-#elif defined (POWERPC_AIX) && defined (__64BIT__)	/* AIX64 */ 
-#ifndef POWERPC64 
-#define POWERPC64 
-#endif 
-#endif 
- 
-#ifndef LIBFFI_ASM 
-typedef unsigned long          ffi_arg; 
-typedef signed long            ffi_sarg; 
- 
-typedef enum ffi_abi { 
-  FFI_FIRST_ABI = 0, 
- 
-#if defined (POWERPC_AIX) 
-  FFI_AIX, 
-  FFI_DARWIN, 
-  FFI_DEFAULT_ABI = FFI_AIX, 
-  FFI_LAST_ABI 
- 
-#elif defined (POWERPC_DARWIN) 
-  FFI_AIX, 
-  FFI_DARWIN, 
-  FFI_DEFAULT_ABI = FFI_DARWIN, 
-  FFI_LAST_ABI 
- 
-#else 
-  /* The FFI_COMPAT values are used by old code.  Since libffi may be 
-     a shared library we have to support old values for backwards 
-     compatibility.  */ 
-  FFI_COMPAT_SYSV, 
-  FFI_COMPAT_GCC_SYSV, 
-  FFI_COMPAT_LINUX64, 
-  FFI_COMPAT_LINUX, 
-  FFI_COMPAT_LINUX_SOFT_FLOAT, 
- 
-# if defined (POWERPC64) 
-  /* This bit, always set in new code, must not be set in any of the 
-     old FFI_COMPAT values that might be used for 64-bit linux.  We 
-     only need worry about FFI_COMPAT_LINUX64, but to be safe avoid 
-     all old values.  */ 
-  FFI_LINUX = 8, 
-  /* This and following bits can reuse FFI_COMPAT values.  */ 
-  FFI_LINUX_STRUCT_ALIGN = 1, 
-  FFI_LINUX_LONG_DOUBLE_128 = 2, 
-  FFI_LINUX_LONG_DOUBLE_IEEE128 = 4, 
-  FFI_DEFAULT_ABI = (FFI_LINUX 
-#  ifdef __STRUCT_PARM_ALIGN__ 
-		     | FFI_LINUX_STRUCT_ALIGN 
-#  endif 
-#  ifdef __LONG_DOUBLE_128__ 
-		     | FFI_LINUX_LONG_DOUBLE_128 
-#   ifdef __LONG_DOUBLE_IEEE128__ 
-		     | FFI_LINUX_LONG_DOUBLE_IEEE128 
-#   endif 
-#  endif 
-		     ), 
-  FFI_LAST_ABI = 16 
- 
-# else 
-  /* This bit, always set in new code, must not be set in any of the 
-     old FFI_COMPAT values that might be used for 32-bit linux/sysv/bsd.  */ 
-  FFI_SYSV = 8, 
-  /* This and following bits can reuse FFI_COMPAT values.  */ 
-  FFI_SYSV_SOFT_FLOAT = 1, 
-  FFI_SYSV_STRUCT_RET = 2, 
-  FFI_SYSV_IBM_LONG_DOUBLE = 4, 
-  FFI_SYSV_LONG_DOUBLE_128 = 16, 
- 
-  FFI_DEFAULT_ABI = (FFI_SYSV 
-#  ifdef __NO_FPRS__ 
-		     | FFI_SYSV_SOFT_FLOAT 
-#  endif 
-#  if (defined (__SVR4_STRUCT_RETURN)					\ 
-       || defined (POWERPC_FREEBSD) && !defined (__AIX_STRUCT_RETURN)) 
-		     | FFI_SYSV_STRUCT_RET 
-#  endif 
-#  if __LDBL_MANT_DIG__ == 106 
-		     | FFI_SYSV_IBM_LONG_DOUBLE 
-#  endif 
-#  ifdef __LONG_DOUBLE_128__ 
-		     | FFI_SYSV_LONG_DOUBLE_128 
-#  endif 
-		     ), 
-  FFI_LAST_ABI = 32 
-# endif 
-#endif 
- 
-} ffi_abi; 
-#endif 
- 
-/* ---- Definitions for closures ----------------------------------------- */ 
- 
-#define FFI_CLOSURES 1 
-#define FFI_NATIVE_RAW_API 0 
-#if defined (POWERPC) || defined (POWERPC_FREEBSD) 
-# define FFI_GO_CLOSURES 1 
-# define FFI_TARGET_SPECIFIC_VARIADIC 1 
-# define FFI_EXTRA_CIF_FIELDS unsigned nfixedargs 
-#endif 
-#if defined (POWERPC_AIX) 
-# define FFI_GO_CLOSURES 1 
-#endif 
- 
-/* ppc_closure.S and linux64_closure.S expect this.  */ 
-#define FFI_PPC_TYPE_LAST FFI_TYPE_POINTER 
- 
-/* We define additional types below.  If generic types are added that 
-   must be supported by powerpc libffi then it is likely that 
-   FFI_PPC_TYPE_LAST needs increasing *and* the jump tables in 
-   ppc_closure.S and linux64_closure.S be extended.  */ 
- 
-#if !(FFI_TYPE_LAST == FFI_PPC_TYPE_LAST		\ 
-      || (FFI_TYPE_LAST == FFI_TYPE_COMPLEX		\ 
-	  && !defined FFI_TARGET_HAS_COMPLEX_TYPE)) 
-# error "You likely have a broken powerpc libffi" 
-#endif 
- 
-/* Needed for soft-float long-double-128 support.  */ 
-#define FFI_TYPE_UINT128 (FFI_PPC_TYPE_LAST + 1) 
- 
-/* Needed for FFI_SYSV small structure returns.  */ 
-#define FFI_SYSV_TYPE_SMALL_STRUCT (FFI_PPC_TYPE_LAST + 2) 
- 
-/* Used by ELFv2 for homogenous structure returns.  */ 
-#define FFI_V2_TYPE_VECTOR		(FFI_PPC_TYPE_LAST + 1) 
-#define FFI_V2_TYPE_VECTOR_HOMOG	(FFI_PPC_TYPE_LAST + 2) 
-#define FFI_V2_TYPE_FLOAT_HOMOG		(FFI_PPC_TYPE_LAST + 3) 
-#define FFI_V2_TYPE_DOUBLE_HOMOG	(FFI_PPC_TYPE_LAST + 4) 
-#define FFI_V2_TYPE_SMALL_STRUCT	(FFI_PPC_TYPE_LAST + 5) 
- 
-#if _CALL_ELF == 2 
-# define FFI_TRAMPOLINE_SIZE 32 
-#else 
-# if defined(POWERPC64) || defined(POWERPC_AIX) 
-#  if defined(POWERPC_DARWIN64) 
-#    define FFI_TRAMPOLINE_SIZE 48 
-#  else 
-#    define FFI_TRAMPOLINE_SIZE 24 
-#  endif 
-# else /* POWERPC || POWERPC_AIX */ 
-#  define FFI_TRAMPOLINE_SIZE 40 
-# endif 
-#endif 
- 
-#ifndef LIBFFI_ASM 
-#if defined(POWERPC_DARWIN) || defined(POWERPC_AIX) 
-struct ffi_aix_trampoline_struct { 
-    void * code_pointer;	/* Pointer to ffi_closure_ASM */ 
-    void * toc;			/* TOC */ 
-    void * static_chain;	/* Pointer to closure */ 
-}; 
-#endif 
-#endif 
- 
-#endif 
+/* -----------------------------------------------------------------*-C-*-
+   ffitarget.h - Copyright (c) 2012  Anthony Green
+                 Copyright (C) 2007, 2008, 2010 Free Software Foundation, Inc
+                 Copyright (c) 1996-2003  Red Hat, Inc.
+
+   Target configuration macros for PowerPC.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+
+   ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error "Please do not include ffitarget.h directly into your source.  Use ffi.h instead."
+#endif
+
+/* ---- System specific configurations ----------------------------------- */
+
+#if defined (POWERPC) && defined (__powerpc64__)	/* linux64 */
+#ifndef POWERPC64
+#define POWERPC64
+#endif
+#elif defined (POWERPC_DARWIN) && defined (__ppc64__)	/* Darwin64 */
+#ifndef POWERPC64
+#define POWERPC64
+#endif
+#ifndef POWERPC_DARWIN64
+#define POWERPC_DARWIN64
+#endif
+#elif defined (POWERPC_AIX) && defined (__64BIT__)	/* AIX64 */
+#ifndef POWERPC64
+#define POWERPC64
+#endif
+#endif
+
+#ifndef LIBFFI_ASM
+typedef unsigned long          ffi_arg;
+typedef signed long            ffi_sarg;
+
+typedef enum ffi_abi {
+  FFI_FIRST_ABI = 0,
+
+#if defined (POWERPC_AIX)
+  FFI_AIX,
+  FFI_DARWIN,
+  FFI_DEFAULT_ABI = FFI_AIX,
+  FFI_LAST_ABI
+
+#elif defined (POWERPC_DARWIN)
+  FFI_AIX,
+  FFI_DARWIN,
+  FFI_DEFAULT_ABI = FFI_DARWIN,
+  FFI_LAST_ABI
+
+#else
+  /* The FFI_COMPAT values are used by old code.  Since libffi may be
+     a shared library we have to support old values for backwards
+     compatibility.  */
+  FFI_COMPAT_SYSV,
+  FFI_COMPAT_GCC_SYSV,
+  FFI_COMPAT_LINUX64,
+  FFI_COMPAT_LINUX,
+  FFI_COMPAT_LINUX_SOFT_FLOAT,
+
+# if defined (POWERPC64)
+  /* This bit, always set in new code, must not be set in any of the
+     old FFI_COMPAT values that might be used for 64-bit linux.  We
+     only need worry about FFI_COMPAT_LINUX64, but to be safe avoid
+     all old values.  */
+  FFI_LINUX = 8,
+  /* This and following bits can reuse FFI_COMPAT values.  */
+  FFI_LINUX_STRUCT_ALIGN = 1,
+  FFI_LINUX_LONG_DOUBLE_128 = 2,
+  FFI_LINUX_LONG_DOUBLE_IEEE128 = 4,
+  FFI_DEFAULT_ABI = (FFI_LINUX
+#  ifdef __STRUCT_PARM_ALIGN__
+		     | FFI_LINUX_STRUCT_ALIGN
+#  endif
+#  ifdef __LONG_DOUBLE_128__
+		     | FFI_LINUX_LONG_DOUBLE_128
+#   ifdef __LONG_DOUBLE_IEEE128__
+		     | FFI_LINUX_LONG_DOUBLE_IEEE128
+#   endif
+#  endif
+		     ),
+  FFI_LAST_ABI = 16
+
+# else
+  /* This bit, always set in new code, must not be set in any of the
+     old FFI_COMPAT values that might be used for 32-bit linux/sysv/bsd.  */
+  FFI_SYSV = 8,
+  /* This and following bits can reuse FFI_COMPAT values.  */
+  FFI_SYSV_SOFT_FLOAT = 1,
+  FFI_SYSV_STRUCT_RET = 2,
+  FFI_SYSV_IBM_LONG_DOUBLE = 4,
+  FFI_SYSV_LONG_DOUBLE_128 = 16,
+
+  FFI_DEFAULT_ABI = (FFI_SYSV
+#  ifdef __NO_FPRS__
+		     | FFI_SYSV_SOFT_FLOAT
+#  endif
+#  if (defined (__SVR4_STRUCT_RETURN)					\
+       || defined (POWERPC_FREEBSD) && !defined (__AIX_STRUCT_RETURN))
+		     | FFI_SYSV_STRUCT_RET
+#  endif
+#  if __LDBL_MANT_DIG__ == 106
+		     | FFI_SYSV_IBM_LONG_DOUBLE
+#  endif
+#  ifdef __LONG_DOUBLE_128__
+		     | FFI_SYSV_LONG_DOUBLE_128
+#  endif
+		     ),
+  FFI_LAST_ABI = 32
+# endif
+#endif
+
+} ffi_abi;
+#endif
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_NATIVE_RAW_API 0
+#if defined (POWERPC) || defined (POWERPC_FREEBSD)
+# define FFI_GO_CLOSURES 1
+# define FFI_TARGET_SPECIFIC_VARIADIC 1
+# define FFI_EXTRA_CIF_FIELDS unsigned nfixedargs
+#endif
+#if defined (POWERPC_AIX)
+# define FFI_GO_CLOSURES 1
+#endif
+
+/* ppc_closure.S and linux64_closure.S expect this.  */
+#define FFI_PPC_TYPE_LAST FFI_TYPE_POINTER
+
+/* We define additional types below.  If generic types are added that
+   must be supported by powerpc libffi then it is likely that
+   FFI_PPC_TYPE_LAST needs increasing *and* the jump tables in
+   ppc_closure.S and linux64_closure.S be extended.  */
+
+#if !(FFI_TYPE_LAST == FFI_PPC_TYPE_LAST		\
+      || (FFI_TYPE_LAST == FFI_TYPE_COMPLEX		\
+	  && !defined FFI_TARGET_HAS_COMPLEX_TYPE))
+# error "You likely have a broken powerpc libffi"
+#endif
+
+/* Needed for soft-float long-double-128 support.  */
+#define FFI_TYPE_UINT128 (FFI_PPC_TYPE_LAST + 1)
+
+/* Needed for FFI_SYSV small structure returns.  */
+#define FFI_SYSV_TYPE_SMALL_STRUCT (FFI_PPC_TYPE_LAST + 2)
+
+/* Used by ELFv2 for homogenous structure returns.  */
+#define FFI_V2_TYPE_VECTOR		(FFI_PPC_TYPE_LAST + 1)
+#define FFI_V2_TYPE_VECTOR_HOMOG	(FFI_PPC_TYPE_LAST + 2)
+#define FFI_V2_TYPE_FLOAT_HOMOG		(FFI_PPC_TYPE_LAST + 3)
+#define FFI_V2_TYPE_DOUBLE_HOMOG	(FFI_PPC_TYPE_LAST + 4)
+#define FFI_V2_TYPE_SMALL_STRUCT	(FFI_PPC_TYPE_LAST + 5)
+
+#if _CALL_ELF == 2
+# define FFI_TRAMPOLINE_SIZE 32
+#else
+# if defined(POWERPC64) || defined(POWERPC_AIX)
+#  if defined(POWERPC_DARWIN64)
+#    define FFI_TRAMPOLINE_SIZE 48
+#  else
+#    define FFI_TRAMPOLINE_SIZE 24
+#  endif
+# else /* POWERPC || POWERPC_AIX */
+#  define FFI_TRAMPOLINE_SIZE 40
+# endif
+#endif
+
+#ifndef LIBFFI_ASM
+#if defined(POWERPC_DARWIN) || defined(POWERPC_AIX)
+struct ffi_aix_trampoline_struct {
+    void * code_pointer;	/* Pointer to ffi_closure_ASM */
+    void * toc;			/* TOC */
+    void * static_chain;	/* Pointer to closure */
+};
+#endif
+#endif
+
+#endif
diff --git a/contrib/restricted/libffi/src/powerpc/linux64.S b/contrib/restricted/libffi/src/powerpc/linux64.S
index ec1a9c160c..c99889c1c6 100644
--- a/contrib/restricted/libffi/src/powerpc/linux64.S
+++ b/contrib/restricted/libffi/src/powerpc/linux64.S
@@ -1,283 +1,283 @@
-/* ----------------------------------------------------------------------- 
-   sysv.h - Copyright (c) 2003 Jakub Jelinek <jakub@redhat.com> 
-	    Copyright (c) 2008 Red Hat, Inc. 
- 
-   PowerPC64 Assembly glue. 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-#define LIBFFI_ASM 
-#include <fficonfig.h> 
-#include <ffi.h> 
- 
-#ifdef POWERPC64 
-	.hidden	ffi_call_LINUX64 
-	.globl	ffi_call_LINUX64 
-	.text 
-	.cfi_startproc 
-# if _CALL_ELF == 2 
-ffi_call_LINUX64: 
-	addis	%r2, %r12, .TOC.-ffi_call_LINUX64@ha 
-	addi	%r2, %r2, .TOC.-ffi_call_LINUX64@l 
-	.localentry ffi_call_LINUX64, . - ffi_call_LINUX64 
-# else 
-	.section	".opd","aw" 
-	.align	3 
-ffi_call_LINUX64: 
-#  ifdef _CALL_LINUX 
-	.quad	.L.ffi_call_LINUX64,.TOC.@tocbase,0 
-	.type	ffi_call_LINUX64,@function 
-	.text 
-.L.ffi_call_LINUX64: 
-#  else 
-	.hidden	.ffi_call_LINUX64 
-	.globl	.ffi_call_LINUX64 
-	.quad	.ffi_call_LINUX64,.TOC.@tocbase,0 
-	.size	ffi_call_LINUX64,24 
-	.type	.ffi_call_LINUX64,@function 
-	.text 
-.ffi_call_LINUX64: 
-#  endif 
-# endif 
-	mflr	%r0 
-	std	%r28, -32(%r1) 
-	std	%r29, -24(%r1) 
-	std	%r30, -16(%r1) 
-	std	%r31, -8(%r1) 
-	std	%r7, 8(%r1)	/* closure, saved in cr field.  */ 
-	std	%r0, 16(%r1) 
- 
-	mr	%r28, %r1	/* our AP.  */ 
-	.cfi_def_cfa_register 28 
-	.cfi_offset 65, 16 
-	.cfi_offset 31, -8 
-	.cfi_offset 30, -16 
-	.cfi_offset 29, -24 
-	.cfi_offset 28, -32 
- 
-	stdux	%r1, %r1, %r8 
-	mr	%r31, %r6	/* flags, */ 
-	mr	%r30, %r5	/* rvalue, */ 
-	mr	%r29, %r4	/* function address.  */ 
-/* Save toc pointer, not for the ffi_prep_args64 call, but for the later 
-   bctrl function call.  */ 
-# if _CALL_ELF == 2 
-	std	%r2, 24(%r1) 
-# else 
-	std	%r2, 40(%r1) 
-# endif 
- 
-	/* Call ffi_prep_args64.  */ 
-	mr	%r4, %r1 
-# if defined _CALL_LINUX || _CALL_ELF == 2 
-	bl	ffi_prep_args64 
-# else 
-	bl	.ffi_prep_args64 
-# endif 
- 
-# if _CALL_ELF == 2 
-	mr	%r12, %r29 
-# else 
-	ld	%r12, 0(%r29) 
-	ld	%r2, 8(%r29) 
-# endif 
-	/* Now do the call.  */ 
-	/* Set up cr1 with bits 3-7 of the flags.  */ 
-	mtcrf	0xc0, %r31 
- 
-	/* Get the address to call into CTR.  */ 
-	mtctr	%r12 
-	/* Load all those argument registers.  */ 
-	addi	%r29, %r28, -32-(8*8) 
-	ld	%r3,  (0*8)(%r29) 
-	ld	%r4,  (1*8)(%r29) 
-	ld	%r5,  (2*8)(%r29) 
-	ld	%r6,  (3*8)(%r29) 
-	bf-	5, 1f 
-	ld	%r7,  (4*8)(%r29) 
-	ld	%r8,  (5*8)(%r29) 
-	ld	%r9,  (6*8)(%r29) 
-	ld	%r10, (7*8)(%r29) 
-1: 
- 
-	/* Load all the FP registers.  */ 
-	bf-	6, 2f 
-	addi	%r29, %r29, -(14*8) 
-	lfd	%f1,  ( 1*8)(%r29) 
-	lfd	%f2,  ( 2*8)(%r29) 
-	lfd	%f3,  ( 3*8)(%r29) 
-	lfd	%f4,  ( 4*8)(%r29) 
-	lfd	%f5,  ( 5*8)(%r29) 
-	lfd	%f6,  ( 6*8)(%r29) 
-	lfd	%f7,  ( 7*8)(%r29) 
-	lfd	%f8,  ( 8*8)(%r29) 
-	lfd	%f9,  ( 9*8)(%r29) 
-	lfd	%f10, (10*8)(%r29) 
-	lfd	%f11, (11*8)(%r29) 
-	lfd	%f12, (12*8)(%r29) 
-	lfd	%f13, (13*8)(%r29) 
-2: 
- 
-	/* Load all the vector registers.  */ 
-	bf-	3, 3f 
-	addi	%r29, %r29, -16 
-	lvx	%v13, 0, %r29 
-	addi	%r29, %r29, -16 
-	lvx	%v12, 0, %r29 
-	addi	%r29, %r29, -16 
-	lvx	%v11, 0, %r29 
-	addi	%r29, %r29, -16 
-	lvx	%v10, 0, %r29 
-	addi	%r29, %r29, -16 
-	lvx	%v9,  0, %r29 
-	addi	%r29, %r29, -16 
-	lvx	%v8,  0, %r29 
-	addi	%r29, %r29, -16 
-	lvx	%v7,  0, %r29 
-	addi	%r29, %r29, -16 
-	lvx	%v6,  0, %r29 
-	addi	%r29, %r29, -16 
-	lvx	%v5,  0, %r29 
-	addi	%r29, %r29, -16 
-	lvx	%v4,  0, %r29 
-	addi	%r29, %r29, -16 
-	lvx	%v3,  0, %r29 
-	addi	%r29, %r29, -16 
-	lvx	%v2,  0, %r29 
-3: 
- 
-	/* Make the call.  */ 
-	ld	%r11, 8(%r28) 
-	bctrl 
- 
-	/* This must follow the call immediately, the unwinder 
-	   uses this to find out if r2 has been saved or not.  */ 
-# if _CALL_ELF == 2 
-	ld	%r2, 24(%r1) 
-# else 
-	ld	%r2, 40(%r1) 
-# endif 
- 
-	/* Now, deal with the return value.  */ 
-	mtcrf	0x01, %r31 
-	bt	31, .Lstruct_return_value 
-	bt	30, .Ldone_return_value 
-	bt	29, .Lfp_return_value 
-	bt	28, .Lvec_return_value 
-	std	%r3, 0(%r30) 
-	/* Fall through...  */ 
- 
-.Ldone_return_value: 
-	/* Restore the registers we used and return.  */ 
-	mr	%r1, %r28 
-	.cfi_def_cfa_register 1 
-	ld	%r0, 16(%r28) 
-	ld	%r28, -32(%r28) 
-	mtlr	%r0 
-	ld	%r29, -24(%r1) 
-	ld	%r30, -16(%r1) 
-	ld	%r31, -8(%r1) 
-	blr 
- 
-.Lvec_return_value: 
-	stvx	%v2, 0, %r30 
-	b	.Ldone_return_value 
- 
-.Lfp_return_value: 
-	.cfi_def_cfa_register 28 
-	mtcrf	0x02, %r31 /* cr6  */ 
-	bf	27, .Lfloat_return_value 
-	stfd	%f1, 0(%r30) 
-	bf	26, .Ldone_return_value 
-	stfd	%f2, 8(%r30) 
-	b	.Ldone_return_value 
-.Lfloat_return_value: 
-	stfs	%f1, 0(%r30) 
-	b	.Ldone_return_value 
- 
-.Lstruct_return_value: 
-	bf	29, .Lvec_homog_or_small_struct 
-	mtcrf	0x02, %r31 /* cr6  */ 
-	bf	27, .Lfloat_homog_return_value 
-	stfd	%f1, 0(%r30) 
-	stfd	%f2, 8(%r30) 
-	stfd	%f3, 16(%r30) 
-	stfd	%f4, 24(%r30) 
-	stfd	%f5, 32(%r30) 
-	stfd	%f6, 40(%r30) 
-	stfd	%f7, 48(%r30) 
-	stfd	%f8, 56(%r30) 
-	b	.Ldone_return_value 
- 
-.Lfloat_homog_return_value: 
-	stfs	%f1, 0(%r30) 
-	stfs	%f2, 4(%r30) 
-	stfs	%f3, 8(%r30) 
-	stfs	%f4, 12(%r30) 
-	stfs	%f5, 16(%r30) 
-	stfs	%f6, 20(%r30) 
-	stfs	%f7, 24(%r30) 
-	stfs	%f8, 28(%r30) 
-	b	.Ldone_return_value 
- 
-.Lvec_homog_or_small_struct: 
-	bf	28, .Lsmall_struct 
-	stvx	%v2, 0, %r30 
-	addi	%r30, %r30, 16 
-	stvx	%v3, 0, %r30 
-	addi	%r30, %r30, 16 
-	stvx	%v4, 0, %r30 
-	addi	%r30, %r30, 16 
-	stvx	%v5, 0, %r30 
-	addi	%r30, %r30, 16 
-	stvx	%v6, 0, %r30 
-	addi	%r30, %r30, 16 
-	stvx	%v7, 0, %r30 
-	addi	%r30, %r30, 16 
-	stvx	%v8, 0, %r30 
-	addi	%r30, %r30, 16 
-	stvx	%v9, 0, %r30 
-	b	.Ldone_return_value 
- 
-.Lsmall_struct: 
-	std	%r3, 0(%r30) 
-	std	%r4, 8(%r30) 
-	b	.Ldone_return_value 
- 
-	.cfi_endproc 
-# if _CALL_ELF == 2 
-	.size	ffi_call_LINUX64,.-ffi_call_LINUX64 
-# else 
-#  ifdef _CALL_LINUX 
-	.size	ffi_call_LINUX64,.-.L.ffi_call_LINUX64 
-#  else 
-	.long	0 
-	.byte	0,12,0,1,128,4,0,0 
-	.size	.ffi_call_LINUX64,.-.ffi_call_LINUX64 
-#  endif 
-# endif 
- 
-#endif 
- 
-#if (defined __ELF__ && defined __linux__) || _CALL_ELF == 2 
-	.section	.note.GNU-stack,"",@progbits 
-#endif 
+/* -----------------------------------------------------------------------
+   sysv.h - Copyright (c) 2003 Jakub Jelinek <jakub@redhat.com>
+	    Copyright (c) 2008 Red Hat, Inc.
+
+   PowerPC64 Assembly glue.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+#ifdef POWERPC64
+	.hidden	ffi_call_LINUX64
+	.globl	ffi_call_LINUX64
+	.text
+	.cfi_startproc
+# if _CALL_ELF == 2
+ffi_call_LINUX64:
+	addis	%r2, %r12, .TOC.-ffi_call_LINUX64@ha
+	addi	%r2, %r2, .TOC.-ffi_call_LINUX64@l
+	.localentry ffi_call_LINUX64, . - ffi_call_LINUX64
+# else
+	.section	".opd","aw"
+	.align	3
+ffi_call_LINUX64:
+#  ifdef _CALL_LINUX
+	.quad	.L.ffi_call_LINUX64,.TOC.@tocbase,0
+	.type	ffi_call_LINUX64,@function
+	.text
+.L.ffi_call_LINUX64:
+#  else
+	.hidden	.ffi_call_LINUX64
+	.globl	.ffi_call_LINUX64
+	.quad	.ffi_call_LINUX64,.TOC.@tocbase,0
+	.size	ffi_call_LINUX64,24
+	.type	.ffi_call_LINUX64,@function
+	.text
+.ffi_call_LINUX64:
+#  endif
+# endif
+	mflr	%r0
+	std	%r28, -32(%r1)
+	std	%r29, -24(%r1)
+	std	%r30, -16(%r1)
+	std	%r31, -8(%r1)
+	std	%r7, 8(%r1)	/* closure, saved in cr field.  */
+	std	%r0, 16(%r1)
+
+	mr	%r28, %r1	/* our AP.  */
+	.cfi_def_cfa_register 28
+	.cfi_offset 65, 16
+	.cfi_offset 31, -8
+	.cfi_offset 30, -16
+	.cfi_offset 29, -24
+	.cfi_offset 28, -32
+
+	stdux	%r1, %r1, %r8
+	mr	%r31, %r6	/* flags, */
+	mr	%r30, %r5	/* rvalue, */
+	mr	%r29, %r4	/* function address.  */
+/* Save toc pointer, not for the ffi_prep_args64 call, but for the later
+   bctrl function call.  */
+# if _CALL_ELF == 2
+	std	%r2, 24(%r1)
+# else
+	std	%r2, 40(%r1)
+# endif
+
+	/* Call ffi_prep_args64.  */
+	mr	%r4, %r1
+# if defined _CALL_LINUX || _CALL_ELF == 2
+	bl	ffi_prep_args64
+# else
+	bl	.ffi_prep_args64
+# endif
+
+# if _CALL_ELF == 2
+	mr	%r12, %r29
+# else
+	ld	%r12, 0(%r29)
+	ld	%r2, 8(%r29)
+# endif
+	/* Now do the call.  */
+	/* Set up cr1 with bits 3-7 of the flags.  */
+	mtcrf	0xc0, %r31
+
+	/* Get the address to call into CTR.  */
+	mtctr	%r12
+	/* Load all those argument registers.  */
+	addi	%r29, %r28, -32-(8*8)
+	ld	%r3,  (0*8)(%r29)
+	ld	%r4,  (1*8)(%r29)
+	ld	%r5,  (2*8)(%r29)
+	ld	%r6,  (3*8)(%r29)
+	bf-	5, 1f
+	ld	%r7,  (4*8)(%r29)
+	ld	%r8,  (5*8)(%r29)
+	ld	%r9,  (6*8)(%r29)
+	ld	%r10, (7*8)(%r29)
+1:
+
+	/* Load all the FP registers.  */
+	bf-	6, 2f
+	addi	%r29, %r29, -(14*8)
+	lfd	%f1,  ( 1*8)(%r29)
+	lfd	%f2,  ( 2*8)(%r29)
+	lfd	%f3,  ( 3*8)(%r29)
+	lfd	%f4,  ( 4*8)(%r29)
+	lfd	%f5,  ( 5*8)(%r29)
+	lfd	%f6,  ( 6*8)(%r29)
+	lfd	%f7,  ( 7*8)(%r29)
+	lfd	%f8,  ( 8*8)(%r29)
+	lfd	%f9,  ( 9*8)(%r29)
+	lfd	%f10, (10*8)(%r29)
+	lfd	%f11, (11*8)(%r29)
+	lfd	%f12, (12*8)(%r29)
+	lfd	%f13, (13*8)(%r29)
+2:
+
+	/* Load all the vector registers.  */
+	bf-	3, 3f
+	addi	%r29, %r29, -16
+	lvx	%v13, 0, %r29
+	addi	%r29, %r29, -16
+	lvx	%v12, 0, %r29
+	addi	%r29, %r29, -16
+	lvx	%v11, 0, %r29
+	addi	%r29, %r29, -16
+	lvx	%v10, 0, %r29
+	addi	%r29, %r29, -16
+	lvx	%v9,  0, %r29
+	addi	%r29, %r29, -16
+	lvx	%v8,  0, %r29
+	addi	%r29, %r29, -16
+	lvx	%v7,  0, %r29
+	addi	%r29, %r29, -16
+	lvx	%v6,  0, %r29
+	addi	%r29, %r29, -16
+	lvx	%v5,  0, %r29
+	addi	%r29, %r29, -16
+	lvx	%v4,  0, %r29
+	addi	%r29, %r29, -16
+	lvx	%v3,  0, %r29
+	addi	%r29, %r29, -16
+	lvx	%v2,  0, %r29
+3:
+
+	/* Make the call.  */
+	ld	%r11, 8(%r28)
+	bctrl
+
+	/* This must follow the call immediately, the unwinder
+	   uses this to find out if r2 has been saved or not.  */
+# if _CALL_ELF == 2
+	ld	%r2, 24(%r1)
+# else
+	ld	%r2, 40(%r1)
+# endif
+
+	/* Now, deal with the return value.  */
+	mtcrf	0x01, %r31
+	bt	31, .Lstruct_return_value
+	bt	30, .Ldone_return_value
+	bt	29, .Lfp_return_value
+	bt	28, .Lvec_return_value
+	std	%r3, 0(%r30)
+	/* Fall through...  */
+
+.Ldone_return_value:
+	/* Restore the registers we used and return.  */
+	mr	%r1, %r28
+	.cfi_def_cfa_register 1
+	ld	%r0, 16(%r28)
+	ld	%r28, -32(%r28)
+	mtlr	%r0
+	ld	%r29, -24(%r1)
+	ld	%r30, -16(%r1)
+	ld	%r31, -8(%r1)
+	blr
+
+.Lvec_return_value:
+	stvx	%v2, 0, %r30
+	b	.Ldone_return_value
+
+.Lfp_return_value:
+	.cfi_def_cfa_register 28
+	mtcrf	0x02, %r31 /* cr6  */
+	bf	27, .Lfloat_return_value
+	stfd	%f1, 0(%r30)
+	bf	26, .Ldone_return_value
+	stfd	%f2, 8(%r30)
+	b	.Ldone_return_value
+.Lfloat_return_value:
+	stfs	%f1, 0(%r30)
+	b	.Ldone_return_value
+
+.Lstruct_return_value:
+	bf	29, .Lvec_homog_or_small_struct
+	mtcrf	0x02, %r31 /* cr6  */
+	bf	27, .Lfloat_homog_return_value
+	stfd	%f1, 0(%r30)
+	stfd	%f2, 8(%r30)
+	stfd	%f3, 16(%r30)
+	stfd	%f4, 24(%r30)
+	stfd	%f5, 32(%r30)
+	stfd	%f6, 40(%r30)
+	stfd	%f7, 48(%r30)
+	stfd	%f8, 56(%r30)
+	b	.Ldone_return_value
+
+.Lfloat_homog_return_value:
+	stfs	%f1, 0(%r30)
+	stfs	%f2, 4(%r30)
+	stfs	%f3, 8(%r30)
+	stfs	%f4, 12(%r30)
+	stfs	%f5, 16(%r30)
+	stfs	%f6, 20(%r30)
+	stfs	%f7, 24(%r30)
+	stfs	%f8, 28(%r30)
+	b	.Ldone_return_value
+
+.Lvec_homog_or_small_struct:
+	bf	28, .Lsmall_struct
+	stvx	%v2, 0, %r30
+	addi	%r30, %r30, 16
+	stvx	%v3, 0, %r30
+	addi	%r30, %r30, 16
+	stvx	%v4, 0, %r30
+	addi	%r30, %r30, 16
+	stvx	%v5, 0, %r30
+	addi	%r30, %r30, 16
+	stvx	%v6, 0, %r30
+	addi	%r30, %r30, 16
+	stvx	%v7, 0, %r30
+	addi	%r30, %r30, 16
+	stvx	%v8, 0, %r30
+	addi	%r30, %r30, 16
+	stvx	%v9, 0, %r30
+	b	.Ldone_return_value
+
+.Lsmall_struct:
+	std	%r3, 0(%r30)
+	std	%r4, 8(%r30)
+	b	.Ldone_return_value
+
+	.cfi_endproc
+# if _CALL_ELF == 2
+	.size	ffi_call_LINUX64,.-ffi_call_LINUX64
+# else
+#  ifdef _CALL_LINUX
+	.size	ffi_call_LINUX64,.-.L.ffi_call_LINUX64
+#  else
+	.long	0
+	.byte	0,12,0,1,128,4,0,0
+	.size	.ffi_call_LINUX64,.-.ffi_call_LINUX64
+#  endif
+# endif
+
+#endif
+
+#if (defined __ELF__ && defined __linux__) || _CALL_ELF == 2
+	.section	.note.GNU-stack,"",@progbits
+#endif
diff --git a/contrib/restricted/libffi/src/powerpc/linux64_closure.S b/contrib/restricted/libffi/src/powerpc/linux64_closure.S
index 051d5410b8..d67e4bbbd1 100644
--- a/contrib/restricted/libffi/src/powerpc/linux64_closure.S
+++ b/contrib/restricted/libffi/src/powerpc/linux64_closure.S
@@ -1,552 +1,552 @@
-/* ----------------------------------------------------------------------- 
-   sysv.h - Copyright (c) 2003 Jakub Jelinek <jakub@redhat.com> 
-	    Copyright (c) 2008 Red Hat, Inc. 
- 
-   PowerPC64 Assembly glue. 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
-#define LIBFFI_ASM 
-#include <fficonfig.h> 
-#include <ffi.h> 
- 
-	.file	"linux64_closure.S" 
- 
-#ifdef POWERPC64 
-	FFI_HIDDEN (ffi_closure_LINUX64) 
-	.globl  ffi_closure_LINUX64 
-	.text 
-	.cfi_startproc 
-# if _CALL_ELF == 2 
-ffi_closure_LINUX64: 
-	addis	%r2, %r12, .TOC.-ffi_closure_LINUX64@ha 
-	addi	%r2, %r2, .TOC.-ffi_closure_LINUX64@l 
-	.localentry ffi_closure_LINUX64, . - ffi_closure_LINUX64 
-# else 
-	.section        ".opd","aw" 
-	.align  3 
-ffi_closure_LINUX64: 
-#  ifdef _CALL_LINUX 
-	.quad   .L.ffi_closure_LINUX64,.TOC.@tocbase,0 
-	.type   ffi_closure_LINUX64,@function 
-	.text 
-.L.ffi_closure_LINUX64: 
-#  else 
-	FFI_HIDDEN (.ffi_closure_LINUX64) 
-	.globl  .ffi_closure_LINUX64 
-	.quad   .ffi_closure_LINUX64,.TOC.@tocbase,0 
-	.size   ffi_closure_LINUX64,24 
-	.type   .ffi_closure_LINUX64,@function 
-	.text 
-.ffi_closure_LINUX64: 
-#  endif 
-# endif 
- 
-# if _CALL_ELF == 2 
-#  ifdef __VEC__ 
-#   32 byte special reg save area + 64 byte parm save area 
-#   + 128 byte retval area + 13*8 fpr save area + 12*16 vec save area + round to 16 
-#   define STACKFRAME 528 
-#  else 
-#   32 byte special reg save area + 64 byte parm save area 
-#   + 64 byte retval area + 13*8 fpr save area + round to 16 
-#   define STACKFRAME 272 
-#  endif 
-#  define PARMSAVE 32 
-#  define RETVAL PARMSAVE+64 
-# else 
-#  48 bytes special reg save area + 64 bytes parm save area 
-#  + 16 bytes retval area + 13*8 bytes fpr save area + round to 16 
-#  define STACKFRAME 240 
-#  define PARMSAVE 48 
-#  define RETVAL PARMSAVE+64 
-# endif 
- 
-# if _CALL_ELF == 2 
-	ld	%r12, FFI_TRAMPOLINE_SIZE(%r11)		# closure->cif 
-	mflr	%r0 
-	lwz	%r12, 28(%r12)				# cif->flags 
-	mtcrf	0x40, %r12 
-	addi	%r12, %r1, PARMSAVE 
-	bt	7, 0f 
-	# Our caller has not allocated a parameter save area. 
-	# We need to allocate one here and use it to pass gprs to 
-	# ffi_closure_helper_LINUX64. 
-	addi	%r12, %r1, -STACKFRAME+PARMSAVE 
-0: 
-	# Save general regs into parm save area 
-	std	%r3, 0(%r12) 
-	std	%r4, 8(%r12) 
-	std	%r5, 16(%r12) 
-	std	%r6, 24(%r12) 
-	std	%r7, 32(%r12) 
-	std	%r8, 40(%r12) 
-	std	%r9, 48(%r12) 
-	std	%r10, 56(%r12) 
- 
-	# load up the pointer to the parm save area 
-	mr	%r7, %r12 
-# else 
-	# copy r2 to r11 and load TOC into r2 
-	mr	%r11, %r2 
-	ld	%r2, 16(%r2) 
- 
-	mflr	%r0 
-	# Save general regs into parm save area 
-	# This is the parameter save area set up by our caller. 
-	std	%r3, PARMSAVE+0(%r1) 
-	std	%r4, PARMSAVE+8(%r1) 
-	std	%r5, PARMSAVE+16(%r1) 
-	std	%r6, PARMSAVE+24(%r1) 
-	std	%r7, PARMSAVE+32(%r1) 
-	std	%r8, PARMSAVE+40(%r1) 
-	std	%r9, PARMSAVE+48(%r1) 
-	std	%r10, PARMSAVE+56(%r1) 
- 
-	# load up the pointer to the parm save area 
-	addi	%r7, %r1, PARMSAVE 
-# endif 
-	std	%r0, 16(%r1) 
- 
-	# closure->cif 
-	ld	%r3, FFI_TRAMPOLINE_SIZE(%r11) 
-	# closure->fun 
-	ld	%r4, FFI_TRAMPOLINE_SIZE+8(%r11) 
-	# closure->user_data 
-	ld	%r5, FFI_TRAMPOLINE_SIZE+16(%r11) 
- 
-.Ldoclosure: 
-	# next save fpr 1 to fpr 13 
-	stfd	%f1, -104+(0*8)(%r1) 
-	stfd	%f2, -104+(1*8)(%r1) 
-	stfd	%f3, -104+(2*8)(%r1) 
-	stfd	%f4, -104+(3*8)(%r1) 
-	stfd	%f5, -104+(4*8)(%r1) 
-	stfd	%f6, -104+(5*8)(%r1) 
-	stfd	%f7, -104+(6*8)(%r1) 
-	stfd	%f8, -104+(7*8)(%r1) 
-	stfd	%f9, -104+(8*8)(%r1) 
-	stfd	%f10, -104+(9*8)(%r1) 
-	stfd	%f11, -104+(10*8)(%r1) 
-	stfd	%f12, -104+(11*8)(%r1) 
-	stfd	%f13, -104+(12*8)(%r1) 
- 
-	# load up the pointer to the saved fpr registers 
-	addi	%r8, %r1, -104 
- 
-# ifdef __VEC__ 
-	# load up the pointer to the saved vector registers 
-	# 8 bytes padding for 16-byte alignment at -112(%r1) 
-	addi	%r9, %r8, -24 
-	stvx	%v13, 0, %r9 
-	addi	%r9, %r9, -16 
-	stvx	%v12, 0, %r9 
-	addi	%r9, %r9, -16 
-	stvx	%v11, 0, %r9 
-	addi	%r9, %r9, -16 
-	stvx	%v10, 0, %r9 
-	addi	%r9, %r9, -16 
-	stvx	%v9, 0, %r9 
-	addi	%r9, %r9, -16 
-	stvx	%v8, 0, %r9 
-	addi	%r9, %r9, -16 
-	stvx	%v7, 0, %r9 
-	addi	%r9, %r9, -16 
-	stvx	%v6, 0, %r9 
-	addi	%r9, %r9, -16 
-	stvx	%v5, 0, %r9 
-	addi	%r9, %r9, -16 
-	stvx	%v4, 0, %r9 
-	addi	%r9, %r9, -16 
-	stvx	%v3, 0, %r9 
-	addi	%r9, %r9, -16 
-	stvx	%v2, 0, %r9 
-# endif 
- 
-	# load up the pointer to the result storage 
-	addi	%r6, %r1, -STACKFRAME+RETVAL 
- 
-	stdu	%r1, -STACKFRAME(%r1) 
-	.cfi_def_cfa_offset STACKFRAME 
-	.cfi_offset 65, 16 
- 
-	# make the call 
-# if defined _CALL_LINUX || _CALL_ELF == 2 
-	bl ffi_closure_helper_LINUX64 
-# else 
-	bl .ffi_closure_helper_LINUX64 
-# endif 
-.Lret: 
- 
-	# now r3 contains the return type 
-	# so use it to look up in a table 
-	# so we know how to deal with each type 
- 
-	# look up the proper starting point in table 
-	# by using return type as offset 
-	ld %r0, STACKFRAME+16(%r1) 
-	cmpldi %r3, FFI_V2_TYPE_SMALL_STRUCT 
-	bge .Lsmall 
-	mflr %r4		# move address of .Lret to r4 
-	sldi %r3, %r3, 4	# now multiply return type by 16 
-	addi %r4, %r4, .Lret_type0 - .Lret 
-	add %r3, %r3, %r4	# add contents of table to table address 
-	mtctr %r3 
-	bctr			# jump to it 
- 
-# Each of the ret_typeX code fragments has to be exactly 16 bytes long 
-# (4 instructions). For cache effectiveness we align to a 16 byte boundary 
-# first. 
-	.align 4 
- 
-.Lret_type0: 
-# case FFI_TYPE_VOID 
-	mtlr %r0 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset STACKFRAME 
-	nop 
-# case FFI_TYPE_INT 
-# ifdef __LITTLE_ENDIAN__ 
-	lwa %r3, RETVAL+0(%r1) 
-# else 
-	lwa %r3, RETVAL+4(%r1) 
-# endif 
-	mtlr %r0 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset STACKFRAME 
-# case FFI_TYPE_FLOAT 
-	lfs %f1, RETVAL+0(%r1) 
-	mtlr %r0 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset STACKFRAME 
-# case FFI_TYPE_DOUBLE 
-	lfd %f1, RETVAL+0(%r1) 
-	mtlr %r0 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset STACKFRAME 
-# case FFI_TYPE_LONGDOUBLE 
-	lfd %f1, RETVAL+0(%r1) 
-	mtlr %r0 
-	lfd %f2, RETVAL+8(%r1) 
-	b .Lfinish 
-# case FFI_TYPE_UINT8 
-# ifdef __LITTLE_ENDIAN__ 
-	lbz %r3, RETVAL+0(%r1) 
-# else 
-	lbz %r3, RETVAL+7(%r1) 
-# endif 
-	mtlr %r0 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset STACKFRAME 
-# case FFI_TYPE_SINT8 
-# ifdef __LITTLE_ENDIAN__ 
-	lbz %r3, RETVAL+0(%r1) 
-# else 
-	lbz %r3, RETVAL+7(%r1) 
-# endif 
-	extsb %r3,%r3 
-	mtlr %r0 
-	b .Lfinish 
-# case FFI_TYPE_UINT16 
-# ifdef __LITTLE_ENDIAN__ 
-	lhz %r3, RETVAL+0(%r1) 
-# else 
-	lhz %r3, RETVAL+6(%r1) 
-# endif 
-	mtlr %r0 
-.Lfinish: 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset STACKFRAME 
-# case FFI_TYPE_SINT16 
-# ifdef __LITTLE_ENDIAN__ 
-	lha %r3, RETVAL+0(%r1) 
-# else 
-	lha %r3, RETVAL+6(%r1) 
-# endif 
-	mtlr %r0 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset STACKFRAME 
-# case FFI_TYPE_UINT32 
-# ifdef __LITTLE_ENDIAN__ 
-	lwz %r3, RETVAL+0(%r1) 
-# else 
-	lwz %r3, RETVAL+4(%r1) 
-# endif 
-	mtlr %r0 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset STACKFRAME 
-# case FFI_TYPE_SINT32 
-# ifdef __LITTLE_ENDIAN__ 
-	lwa %r3, RETVAL+0(%r1) 
-# else 
-	lwa %r3, RETVAL+4(%r1) 
-# endif 
-	mtlr %r0 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset STACKFRAME 
-# case FFI_TYPE_UINT64 
-	ld %r3, RETVAL+0(%r1) 
-	mtlr %r0 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset STACKFRAME 
-# case FFI_TYPE_SINT64 
-	ld %r3, RETVAL+0(%r1) 
-	mtlr %r0 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset STACKFRAME 
-# case FFI_TYPE_STRUCT 
-	mtlr %r0 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset STACKFRAME 
-	nop 
-# case FFI_TYPE_POINTER 
-	ld %r3, RETVAL+0(%r1) 
-	mtlr %r0 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset STACKFRAME 
-# case FFI_V2_TYPE_VECTOR 
-	addi %r3, %r1, RETVAL 
-	lvx %v2, 0, %r3 
-	mtlr %r0 
-	b .Lfinish 
-# case FFI_V2_TYPE_VECTOR_HOMOG 
-	addi %r3, %r1, RETVAL 
-	lvx %v2, 0, %r3 
-	addi %r3, %r3, 16 
-	b .Lmorevector 
-# case FFI_V2_TYPE_FLOAT_HOMOG 
-	lfs %f1, RETVAL+0(%r1) 
-	lfs %f2, RETVAL+4(%r1) 
-	lfs %f3, RETVAL+8(%r1) 
-	b .Lmorefloat 
-# case FFI_V2_TYPE_DOUBLE_HOMOG 
-	lfd %f1, RETVAL+0(%r1) 
-	lfd %f2, RETVAL+8(%r1) 
-	lfd %f3, RETVAL+16(%r1) 
-	lfd %f4, RETVAL+24(%r1) 
-	mtlr %r0 
-	lfd %f5, RETVAL+32(%r1) 
-	lfd %f6, RETVAL+40(%r1) 
-	lfd %f7, RETVAL+48(%r1) 
-	lfd %f8, RETVAL+56(%r1) 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset STACKFRAME 
-.Lmorevector: 
-	lvx %v3, 0, %r3 
-	addi %r3, %r3, 16 
-	lvx %v4, 0, %r3 
-	addi %r3, %r3, 16 
-	lvx %v5, 0, %r3 
-	mtlr %r0 
-	addi %r3, %r3, 16 
-	lvx %v6, 0, %r3 
-	addi %r3, %r3, 16 
-	lvx %v7, 0, %r3 
-	addi %r3, %r3, 16 
-	lvx %v8, 0, %r3 
-	addi %r3, %r3, 16 
-	lvx %v9, 0, %r3 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset STACKFRAME 
-.Lmorefloat: 
-	lfs %f4, RETVAL+12(%r1) 
-	mtlr %r0 
-	lfs %f5, RETVAL+16(%r1) 
-	lfs %f6, RETVAL+20(%r1) 
-	lfs %f7, RETVAL+24(%r1) 
-	lfs %f8, RETVAL+28(%r1) 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset STACKFRAME 
-.Lsmall: 
-# ifdef __LITTLE_ENDIAN__ 
-	ld %r3,RETVAL+0(%r1) 
-	mtlr %r0 
-	ld %r4,RETVAL+8(%r1) 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	blr 
-# else 
-	# A struct smaller than a dword is returned in the low bits of r3 
-	# ie. right justified.  Larger structs are passed left justified 
-	# in r3 and r4.  The return value area on the stack will have 
-	# the structs as they are usually stored in memory. 
-	cmpldi %r3, FFI_V2_TYPE_SMALL_STRUCT + 7 # size 8 bytes? 
-	neg %r5, %r3 
-	ld %r3,RETVAL+0(%r1) 
-	blt .Lsmalldown 
-	mtlr %r0 
-	ld %r4,RETVAL+8(%r1) 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset STACKFRAME 
-.Lsmalldown: 
-	addi %r5, %r5, FFI_V2_TYPE_SMALL_STRUCT + 7 
-	mtlr %r0 
-	sldi %r5, %r5, 3 
-	addi %r1, %r1, STACKFRAME 
-	.cfi_def_cfa_offset 0 
-	srd %r3, %r3, %r5 
-	blr 
-# endif 
- 
-	.cfi_endproc 
-# if _CALL_ELF == 2 
-	.size	ffi_closure_LINUX64,.-ffi_closure_LINUX64 
-# else 
-#  ifdef _CALL_LINUX 
-	.size	ffi_closure_LINUX64,.-.L.ffi_closure_LINUX64 
-#  else 
-	.long	0 
-	.byte	0,12,0,1,128,0,0,0 
-	.size	.ffi_closure_LINUX64,.-.ffi_closure_LINUX64 
-#  endif 
-# endif 
- 
- 
-	FFI_HIDDEN (ffi_go_closure_linux64) 
-	.globl  ffi_go_closure_linux64 
-	.text 
-	.cfi_startproc 
-# if _CALL_ELF == 2 
-ffi_go_closure_linux64: 
-	addis	%r2, %r12, .TOC.-ffi_go_closure_linux64@ha 
-	addi	%r2, %r2, .TOC.-ffi_go_closure_linux64@l 
-	.localentry ffi_go_closure_linux64, . - ffi_go_closure_linux64 
-# else 
-	.section        ".opd","aw" 
-	.align  3 
-ffi_go_closure_linux64: 
-#  ifdef _CALL_LINUX 
-	.quad   .L.ffi_go_closure_linux64,.TOC.@tocbase,0 
-	.type   ffi_go_closure_linux64,@function 
-	.text 
-.L.ffi_go_closure_linux64: 
-#  else 
-	FFI_HIDDEN (.ffi_go_closure_linux64) 
-	.globl  .ffi_go_closure_linux64 
-	.quad   .ffi_go_closure_linux64,.TOC.@tocbase,0 
-	.size   ffi_go_closure_linux64,24 
-	.type   .ffi_go_closure_linux64,@function 
-	.text 
-.ffi_go_closure_linux64: 
-#  endif 
-# endif 
- 
-# if _CALL_ELF == 2 
-	ld	%r12, 8(%r11)				# closure->cif 
-	mflr	%r0 
-	lwz	%r12, 28(%r12)				# cif->flags 
-	mtcrf	0x40, %r12 
-	addi	%r12, %r1, PARMSAVE 
-	bt	7, 0f 
-	# Our caller has not allocated a parameter save area. 
-	# We need to allocate one here and use it to pass gprs to 
-	# ffi_closure_helper_LINUX64. 
-	addi	%r12, %r1, -STACKFRAME+PARMSAVE 
-0: 
-	# Save general regs into parm save area 
-	std	%r3, 0(%r12) 
-	std	%r4, 8(%r12) 
-	std	%r5, 16(%r12) 
-	std	%r6, 24(%r12) 
-	std	%r7, 32(%r12) 
-	std	%r8, 40(%r12) 
-	std	%r9, 48(%r12) 
-	std	%r10, 56(%r12) 
- 
-	# load up the pointer to the parm save area 
-	mr	%r7, %r12 
-# else 
-	mflr	%r0 
-	# Save general regs into parm save area 
-	# This is the parameter save area set up by our caller. 
-	std	%r3, PARMSAVE+0(%r1) 
-	std	%r4, PARMSAVE+8(%r1) 
-	std	%r5, PARMSAVE+16(%r1) 
-	std	%r6, PARMSAVE+24(%r1) 
-	std	%r7, PARMSAVE+32(%r1) 
-	std	%r8, PARMSAVE+40(%r1) 
-	std	%r9, PARMSAVE+48(%r1) 
-	std	%r10, PARMSAVE+56(%r1) 
- 
-	# load up the pointer to the parm save area 
-	addi	%r7, %r1, PARMSAVE 
-# endif 
-	std	%r0, 16(%r1) 
- 
-	# closure->cif 
-	ld	%r3, 8(%r11) 
-	# closure->fun 
-	ld	%r4, 16(%r11) 
-	# user_data 
-	mr	%r5, %r11 
-	b	.Ldoclosure 
- 
-	.cfi_endproc 
-# if _CALL_ELF == 2 
-	.size	ffi_go_closure_linux64,.-ffi_go_closure_linux64 
-# else 
-#  ifdef _CALL_LINUX 
-	.size	ffi_go_closure_linux64,.-.L.ffi_go_closure_linux64 
-#  else 
-	.long	0 
-	.byte	0,12,0,1,128,0,0,0 
-	.size	.ffi_go_closure_linux64,.-.ffi_go_closure_linux64 
-#  endif 
-# endif 
-#endif 
- 
-#if (defined __ELF__ && defined __linux__) || _CALL_ELF == 2 
-	.section	.note.GNU-stack,"",@progbits 
-#endif 
+/* -----------------------------------------------------------------------
+   sysv.h - Copyright (c) 2003 Jakub Jelinek <jakub@redhat.com>
+	    Copyright (c) 2008 Red Hat, Inc.
+
+   PowerPC64 Assembly glue.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+
+	.file	"linux64_closure.S"
+
+#ifdef POWERPC64
+	FFI_HIDDEN (ffi_closure_LINUX64)
+	.globl  ffi_closure_LINUX64
+	.text
+	.cfi_startproc
+# if _CALL_ELF == 2
+ffi_closure_LINUX64:
+	addis	%r2, %r12, .TOC.-ffi_closure_LINUX64@ha
+	addi	%r2, %r2, .TOC.-ffi_closure_LINUX64@l
+	.localentry ffi_closure_LINUX64, . - ffi_closure_LINUX64
+# else
+	.section        ".opd","aw"
+	.align  3
+ffi_closure_LINUX64:
+#  ifdef _CALL_LINUX
+	.quad   .L.ffi_closure_LINUX64,.TOC.@tocbase,0
+	.type   ffi_closure_LINUX64,@function
+	.text
+.L.ffi_closure_LINUX64:
+#  else
+	FFI_HIDDEN (.ffi_closure_LINUX64)
+	.globl  .ffi_closure_LINUX64
+	.quad   .ffi_closure_LINUX64,.TOC.@tocbase,0
+	.size   ffi_closure_LINUX64,24
+	.type   .ffi_closure_LINUX64,@function
+	.text
+.ffi_closure_LINUX64:
+#  endif
+# endif
+
+# if _CALL_ELF == 2
+#  ifdef __VEC__
+#   32 byte special reg save area + 64 byte parm save area
+#   + 128 byte retval area + 13*8 fpr save area + 12*16 vec save area + round to 16
+#   define STACKFRAME 528
+#  else
+#   32 byte special reg save area + 64 byte parm save area
+#   + 64 byte retval area + 13*8 fpr save area + round to 16
+#   define STACKFRAME 272
+#  endif
+#  define PARMSAVE 32
+#  define RETVAL PARMSAVE+64
+# else
+#  48 bytes special reg save area + 64 bytes parm save area
+#  + 16 bytes retval area + 13*8 bytes fpr save area + round to 16
+#  define STACKFRAME 240
+#  define PARMSAVE 48
+#  define RETVAL PARMSAVE+64
+# endif
+
+# if _CALL_ELF == 2
+	ld	%r12, FFI_TRAMPOLINE_SIZE(%r11)		# closure->cif
+	mflr	%r0
+	lwz	%r12, 28(%r12)				# cif->flags
+	mtcrf	0x40, %r12
+	addi	%r12, %r1, PARMSAVE
+	bt	7, 0f
+	# Our caller has not allocated a parameter save area.
+	# We need to allocate one here and use it to pass gprs to
+	# ffi_closure_helper_LINUX64.
+	addi	%r12, %r1, -STACKFRAME+PARMSAVE
+0:
+	# Save general regs into parm save area
+	std	%r3, 0(%r12)
+	std	%r4, 8(%r12)
+	std	%r5, 16(%r12)
+	std	%r6, 24(%r12)
+	std	%r7, 32(%r12)
+	std	%r8, 40(%r12)
+	std	%r9, 48(%r12)
+	std	%r10, 56(%r12)
+
+	# load up the pointer to the parm save area
+	mr	%r7, %r12
+# else
+	# copy r2 to r11 and load TOC into r2
+	mr	%r11, %r2
+	ld	%r2, 16(%r2)
+
+	mflr	%r0
+	# Save general regs into parm save area
+	# This is the parameter save area set up by our caller.
+	std	%r3, PARMSAVE+0(%r1)
+	std	%r4, PARMSAVE+8(%r1)
+	std	%r5, PARMSAVE+16(%r1)
+	std	%r6, PARMSAVE+24(%r1)
+	std	%r7, PARMSAVE+32(%r1)
+	std	%r8, PARMSAVE+40(%r1)
+	std	%r9, PARMSAVE+48(%r1)
+	std	%r10, PARMSAVE+56(%r1)
+
+	# load up the pointer to the parm save area
+	addi	%r7, %r1, PARMSAVE
+# endif
+	std	%r0, 16(%r1)
+
+	# closure->cif
+	ld	%r3, FFI_TRAMPOLINE_SIZE(%r11)
+	# closure->fun
+	ld	%r4, FFI_TRAMPOLINE_SIZE+8(%r11)
+	# closure->user_data
+	ld	%r5, FFI_TRAMPOLINE_SIZE+16(%r11)
+
+.Ldoclosure:
+	# next save fpr 1 to fpr 13
+	stfd	%f1, -104+(0*8)(%r1)
+	stfd	%f2, -104+(1*8)(%r1)
+	stfd	%f3, -104+(2*8)(%r1)
+	stfd	%f4, -104+(3*8)(%r1)
+	stfd	%f5, -104+(4*8)(%r1)
+	stfd	%f6, -104+(5*8)(%r1)
+	stfd	%f7, -104+(6*8)(%r1)
+	stfd	%f8, -104+(7*8)(%r1)
+	stfd	%f9, -104+(8*8)(%r1)
+	stfd	%f10, -104+(9*8)(%r1)
+	stfd	%f11, -104+(10*8)(%r1)
+	stfd	%f12, -104+(11*8)(%r1)
+	stfd	%f13, -104+(12*8)(%r1)
+
+	# load up the pointer to the saved fpr registers
+	addi	%r8, %r1, -104
+
+# ifdef __VEC__
+	# load up the pointer to the saved vector registers
+	# 8 bytes padding for 16-byte alignment at -112(%r1)
+	addi	%r9, %r8, -24
+	stvx	%v13, 0, %r9
+	addi	%r9, %r9, -16
+	stvx	%v12, 0, %r9
+	addi	%r9, %r9, -16
+	stvx	%v11, 0, %r9
+	addi	%r9, %r9, -16
+	stvx	%v10, 0, %r9
+	addi	%r9, %r9, -16
+	stvx	%v9, 0, %r9
+	addi	%r9, %r9, -16
+	stvx	%v8, 0, %r9
+	addi	%r9, %r9, -16
+	stvx	%v7, 0, %r9
+	addi	%r9, %r9, -16
+	stvx	%v6, 0, %r9
+	addi	%r9, %r9, -16
+	stvx	%v5, 0, %r9
+	addi	%r9, %r9, -16
+	stvx	%v4, 0, %r9
+	addi	%r9, %r9, -16
+	stvx	%v3, 0, %r9
+	addi	%r9, %r9, -16
+	stvx	%v2, 0, %r9
+# endif
+
+	# load up the pointer to the result storage
+	addi	%r6, %r1, -STACKFRAME+RETVAL
+
+	stdu	%r1, -STACKFRAME(%r1)
+	.cfi_def_cfa_offset STACKFRAME
+	.cfi_offset 65, 16
+
+	# make the call
+# if defined _CALL_LINUX || _CALL_ELF == 2
+	bl ffi_closure_helper_LINUX64
+# else
+	bl .ffi_closure_helper_LINUX64
+# endif
+.Lret:
+
+	# now r3 contains the return type
+	# so use it to look up in a table
+	# so we know how to deal with each type
+
+	# look up the proper starting point in table
+	# by using return type as offset
+	ld %r0, STACKFRAME+16(%r1)
+	cmpldi %r3, FFI_V2_TYPE_SMALL_STRUCT
+	bge .Lsmall
+	mflr %r4		# move address of .Lret to r4
+	sldi %r3, %r3, 4	# now multiply return type by 16
+	addi %r4, %r4, .Lret_type0 - .Lret
+	add %r3, %r3, %r4	# add contents of table to table address
+	mtctr %r3
+	bctr			# jump to it
+
+# Each of the ret_typeX code fragments has to be exactly 16 bytes long
+# (4 instructions). For cache effectiveness we align to a 16 byte boundary
+# first.
+	.align 4
+
+.Lret_type0:
+# case FFI_TYPE_VOID
+	mtlr %r0
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset STACKFRAME
+	nop
+# case FFI_TYPE_INT
+# ifdef __LITTLE_ENDIAN__
+	lwa %r3, RETVAL+0(%r1)
+# else
+	lwa %r3, RETVAL+4(%r1)
+# endif
+	mtlr %r0
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset STACKFRAME
+# case FFI_TYPE_FLOAT
+	lfs %f1, RETVAL+0(%r1)
+	mtlr %r0
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset STACKFRAME
+# case FFI_TYPE_DOUBLE
+	lfd %f1, RETVAL+0(%r1)
+	mtlr %r0
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset STACKFRAME
+# case FFI_TYPE_LONGDOUBLE
+	lfd %f1, RETVAL+0(%r1)
+	mtlr %r0
+	lfd %f2, RETVAL+8(%r1)
+	b .Lfinish
+# case FFI_TYPE_UINT8
+# ifdef __LITTLE_ENDIAN__
+	lbz %r3, RETVAL+0(%r1)
+# else
+	lbz %r3, RETVAL+7(%r1)
+# endif
+	mtlr %r0
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset STACKFRAME
+# case FFI_TYPE_SINT8
+# ifdef __LITTLE_ENDIAN__
+	lbz %r3, RETVAL+0(%r1)
+# else
+	lbz %r3, RETVAL+7(%r1)
+# endif
+	extsb %r3,%r3
+	mtlr %r0
+	b .Lfinish
+# case FFI_TYPE_UINT16
+# ifdef __LITTLE_ENDIAN__
+	lhz %r3, RETVAL+0(%r1)
+# else
+	lhz %r3, RETVAL+6(%r1)
+# endif
+	mtlr %r0
+.Lfinish:
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset STACKFRAME
+# case FFI_TYPE_SINT16
+# ifdef __LITTLE_ENDIAN__
+	lha %r3, RETVAL+0(%r1)
+# else
+	lha %r3, RETVAL+6(%r1)
+# endif
+	mtlr %r0
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset STACKFRAME
+# case FFI_TYPE_UINT32
+# ifdef __LITTLE_ENDIAN__
+	lwz %r3, RETVAL+0(%r1)
+# else
+	lwz %r3, RETVAL+4(%r1)
+# endif
+	mtlr %r0
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset STACKFRAME
+# case FFI_TYPE_SINT32
+# ifdef __LITTLE_ENDIAN__
+	lwa %r3, RETVAL+0(%r1)
+# else
+	lwa %r3, RETVAL+4(%r1)
+# endif
+	mtlr %r0
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset STACKFRAME
+# case FFI_TYPE_UINT64
+	ld %r3, RETVAL+0(%r1)
+	mtlr %r0
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset STACKFRAME
+# case FFI_TYPE_SINT64
+	ld %r3, RETVAL+0(%r1)
+	mtlr %r0
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset STACKFRAME
+# case FFI_TYPE_STRUCT
+	mtlr %r0
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset STACKFRAME
+	nop
+# case FFI_TYPE_POINTER
+	ld %r3, RETVAL+0(%r1)
+	mtlr %r0
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset STACKFRAME
+# case FFI_V2_TYPE_VECTOR
+	addi %r3, %r1, RETVAL
+	lvx %v2, 0, %r3
+	mtlr %r0
+	b .Lfinish
+# case FFI_V2_TYPE_VECTOR_HOMOG
+	addi %r3, %r1, RETVAL
+	lvx %v2, 0, %r3
+	addi %r3, %r3, 16
+	b .Lmorevector
+# case FFI_V2_TYPE_FLOAT_HOMOG
+	lfs %f1, RETVAL+0(%r1)
+	lfs %f2, RETVAL+4(%r1)
+	lfs %f3, RETVAL+8(%r1)
+	b .Lmorefloat
+# case FFI_V2_TYPE_DOUBLE_HOMOG
+	lfd %f1, RETVAL+0(%r1)
+	lfd %f2, RETVAL+8(%r1)
+	lfd %f3, RETVAL+16(%r1)
+	lfd %f4, RETVAL+24(%r1)
+	mtlr %r0
+	lfd %f5, RETVAL+32(%r1)
+	lfd %f6, RETVAL+40(%r1)
+	lfd %f7, RETVAL+48(%r1)
+	lfd %f8, RETVAL+56(%r1)
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset STACKFRAME
+.Lmorevector:
+	lvx %v3, 0, %r3
+	addi %r3, %r3, 16
+	lvx %v4, 0, %r3
+	addi %r3, %r3, 16
+	lvx %v5, 0, %r3
+	mtlr %r0
+	addi %r3, %r3, 16
+	lvx %v6, 0, %r3
+	addi %r3, %r3, 16
+	lvx %v7, 0, %r3
+	addi %r3, %r3, 16
+	lvx %v8, 0, %r3
+	addi %r3, %r3, 16
+	lvx %v9, 0, %r3
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset STACKFRAME
+.Lmorefloat:
+	lfs %f4, RETVAL+12(%r1)
+	mtlr %r0
+	lfs %f5, RETVAL+16(%r1)
+	lfs %f6, RETVAL+20(%r1)
+	lfs %f7, RETVAL+24(%r1)
+	lfs %f8, RETVAL+28(%r1)
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset STACKFRAME
+.Lsmall:
+# ifdef __LITTLE_ENDIAN__
+	ld %r3,RETVAL+0(%r1)
+	mtlr %r0
+	ld %r4,RETVAL+8(%r1)
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	blr
+# else
+	# A struct smaller than a dword is returned in the low bits of r3
+	# ie. right justified.  Larger structs are passed left justified
+	# in r3 and r4.  The return value area on the stack will have
+	# the structs as they are usually stored in memory.
+	cmpldi %r3, FFI_V2_TYPE_SMALL_STRUCT + 7 # size 8 bytes?
+	neg %r5, %r3
+	ld %r3,RETVAL+0(%r1)
+	blt .Lsmalldown
+	mtlr %r0
+	ld %r4,RETVAL+8(%r1)
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset STACKFRAME
+.Lsmalldown:
+	addi %r5, %r5, FFI_V2_TYPE_SMALL_STRUCT + 7
+	mtlr %r0
+	sldi %r5, %r5, 3
+	addi %r1, %r1, STACKFRAME
+	.cfi_def_cfa_offset 0
+	srd %r3, %r3, %r5
+	blr
+# endif
+
+	.cfi_endproc
+# if _CALL_ELF == 2
+	.size	ffi_closure_LINUX64,.-ffi_closure_LINUX64
+# else
+#  ifdef _CALL_LINUX
+	.size	ffi_closure_LINUX64,.-.L.ffi_closure_LINUX64
+#  else
+	.long	0
+	.byte	0,12,0,1,128,0,0,0
+	.size	.ffi_closure_LINUX64,.-.ffi_closure_LINUX64
+#  endif
+# endif
+
+
+	FFI_HIDDEN (ffi_go_closure_linux64)
+	.globl  ffi_go_closure_linux64
+	.text
+	.cfi_startproc
+# if _CALL_ELF == 2
+ffi_go_closure_linux64:
+	addis	%r2, %r12, .TOC.-ffi_go_closure_linux64@ha
+	addi	%r2, %r2, .TOC.-ffi_go_closure_linux64@l
+	.localentry ffi_go_closure_linux64, . - ffi_go_closure_linux64
+# else
+	.section        ".opd","aw"
+	.align  3
+ffi_go_closure_linux64:
+#  ifdef _CALL_LINUX
+	.quad   .L.ffi_go_closure_linux64,.TOC.@tocbase,0
+	.type   ffi_go_closure_linux64,@function
+	.text
+.L.ffi_go_closure_linux64:
+#  else
+	FFI_HIDDEN (.ffi_go_closure_linux64)
+	.globl  .ffi_go_closure_linux64
+	.quad   .ffi_go_closure_linux64,.TOC.@tocbase,0
+	.size   ffi_go_closure_linux64,24
+	.type   .ffi_go_closure_linux64,@function
+	.text
+.ffi_go_closure_linux64:
+#  endif
+# endif
+
+# if _CALL_ELF == 2
+	ld	%r12, 8(%r11)				# closure->cif
+	mflr	%r0
+	lwz	%r12, 28(%r12)				# cif->flags
+	mtcrf	0x40, %r12
+	addi	%r12, %r1, PARMSAVE
+	bt	7, 0f
+	# Our caller has not allocated a parameter save area.
+	# We need to allocate one here and use it to pass gprs to
+	# ffi_closure_helper_LINUX64.
+	addi	%r12, %r1, -STACKFRAME+PARMSAVE
+0:
+	# Save general regs into parm save area
+	std	%r3, 0(%r12)
+	std	%r4, 8(%r12)
+	std	%r5, 16(%r12)
+	std	%r6, 24(%r12)
+	std	%r7, 32(%r12)
+	std	%r8, 40(%r12)
+	std	%r9, 48(%r12)
+	std	%r10, 56(%r12)
+
+	# load up the pointer to the parm save area
+	mr	%r7, %r12
+# else
+	mflr	%r0
+	# Save general regs into parm save area
+	# This is the parameter save area set up by our caller.
+	std	%r3, PARMSAVE+0(%r1)
+	std	%r4, PARMSAVE+8(%r1)
+	std	%r5, PARMSAVE+16(%r1)
+	std	%r6, PARMSAVE+24(%r1)
+	std	%r7, PARMSAVE+32(%r1)
+	std	%r8, PARMSAVE+40(%r1)
+	std	%r9, PARMSAVE+48(%r1)
+	std	%r10, PARMSAVE+56(%r1)
+
+	# load up the pointer to the parm save area
+	addi	%r7, %r1, PARMSAVE
+# endif
+	std	%r0, 16(%r1)
+
+	# closure->cif
+	ld	%r3, 8(%r11)
+	# closure->fun
+	ld	%r4, 16(%r11)
+	# user_data
+	mr	%r5, %r11
+	b	.Ldoclosure
+
+	.cfi_endproc
+# if _CALL_ELF == 2
+	.size	ffi_go_closure_linux64,.-ffi_go_closure_linux64
+# else
+#  ifdef _CALL_LINUX
+	.size	ffi_go_closure_linux64,.-.L.ffi_go_closure_linux64
+#  else
+	.long	0
+	.byte	0,12,0,1,128,0,0,0
+	.size	.ffi_go_closure_linux64,.-.ffi_go_closure_linux64
+#  endif
+# endif
+#endif
+
+#if (defined __ELF__ && defined __linux__) || _CALL_ELF == 2
+	.section	.note.GNU-stack,"",@progbits
+#endif
diff --git a/contrib/restricted/libffi/src/powerpc/ppc_closure.S b/contrib/restricted/libffi/src/powerpc/ppc_closure.S
index a88b732281..b6d209de86 100644
--- a/contrib/restricted/libffi/src/powerpc/ppc_closure.S
+++ b/contrib/restricted/libffi/src/powerpc/ppc_closure.S
@@ -1,397 +1,397 @@
-/* ----------------------------------------------------------------------- 
-   sysv.h - Copyright (c) 2003 Jakub Jelinek <jakub@redhat.com> 
-	    Copyright (c) 2008 Red Hat, Inc. 
- 
-   PowerPC Assembly glue. 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
-#define LIBFFI_ASM 
-#include <fficonfig.h> 
-#include <ffi.h> 
-#include <powerpc/asm.h> 
- 
-	.file   "ppc_closure.S" 
- 
-#ifndef POWERPC64 
- 
-FFI_HIDDEN(ffi_closure_SYSV) 
-ENTRY(ffi_closure_SYSV) 
-	.cfi_startproc 
-	stwu %r1,-144(%r1) 
-	.cfi_def_cfa_offset 144 
-	mflr %r0 
-	stw %r0,148(%r1) 
-	.cfi_offset 65, 4 
- 
-# we want to build up an areas for the parameters passed 
-# in registers (both floating point and integer) 
- 
-	# so first save gpr 3 to gpr 10 (aligned to 4) 
-	stw   %r3, 16(%r1) 
-	stw   %r4, 20(%r1) 
-	stw   %r5, 24(%r1) 
- 
-	# set up registers for the routine that does the work 
- 
-	# closure->cif 
-	lwz %r3,FFI_TRAMPOLINE_SIZE(%r11) 
-	# closure->fun 
-	lwz %r4,FFI_TRAMPOLINE_SIZE+4(%r11) 
-	# closure->user_data 
-	lwz %r5,FFI_TRAMPOLINE_SIZE+8(%r11) 
- 
-.Ldoclosure: 
-	stw   %r6, 28(%r1) 
-	stw   %r7, 32(%r1) 
-	stw   %r8, 36(%r1) 
-	stw   %r9, 40(%r1) 
-	stw   %r10,44(%r1) 
- 
-#ifndef __NO_FPRS__ 
-	# next save fpr 1 to fpr 8 (aligned to 8) 
-	stfd  %f1, 48(%r1) 
-	stfd  %f2, 56(%r1) 
-	stfd  %f3, 64(%r1) 
-	stfd  %f4, 72(%r1) 
-	stfd  %f5, 80(%r1) 
-	stfd  %f6, 88(%r1) 
-	stfd  %f7, 96(%r1) 
-	stfd  %f8, 104(%r1) 
-#endif 
- 
-	# pointer to the result storage 
-	addi %r6,%r1,112 
- 
-	# pointer to the saved gpr registers 
-	addi %r7,%r1,16 
- 
-	# pointer to the saved fpr registers 
-	addi %r8,%r1,48 
- 
-	# pointer to the outgoing parameter save area in the previous frame 
-	# i.e. the previous frame pointer + 8 
-	addi %r9,%r1,152 
- 
-	# make the call 
-	bl ffi_closure_helper_SYSV@local 
-.Lret: 
-	# now r3 contains the return type 
-	# so use it to look up in a table 
-	# so we know how to deal with each type 
- 
-	# look up the proper starting point in table 
-	# by using return type as offset 
- 
-	mflr %r4		# move address of .Lret to r4 
-	slwi %r3,%r3,4		# now multiply return type by 16 
-	addi %r4, %r4, .Lret_type0 - .Lret 
-	lwz %r0,148(%r1) 
-	add %r3,%r3,%r4		# add contents of table to table address 
-	mtctr %r3 
-	bctr			# jump to it 
- 
-# Each of the ret_typeX code fragments has to be exactly 16 bytes long 
-# (4 instructions). For cache effectiveness we align to a 16 byte boundary 
-# first. 
-	.align 4 
-# case FFI_TYPE_VOID 
-.Lret_type0: 
-	mtlr %r0 
-	addi %r1,%r1,144 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset 144 
-	nop 
- 
-# case FFI_TYPE_INT 
-	lwz %r3,112+0(%r1) 
-	mtlr %r0 
-.Lfinish: 
-	addi %r1,%r1,144 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset 144 
- 
-# case FFI_TYPE_FLOAT 
-#ifndef __NO_FPRS__ 
-	lfs %f1,112+0(%r1) 
-#else 
-	nop 
-#endif 
-	mtlr %r0 
-	addi %r1,%r1,144 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset 144 
- 
-# case FFI_TYPE_DOUBLE 
-#ifndef __NO_FPRS__ 
-	lfd %f1,112+0(%r1) 
-#else 
-	nop 
-#endif 
-	mtlr %r0 
-	addi %r1,%r1,144 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset 144 
- 
-# case FFI_TYPE_LONGDOUBLE 
-#ifndef __NO_FPRS__ 
-	lfd %f1,112+0(%r1) 
-	lfd %f2,112+8(%r1) 
-	mtlr %r0 
-	b .Lfinish 
-#else 
-	mtlr %r0 
-	addi %r1,%r1,144 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset 144 
-	nop 
-#endif 
- 
-# case FFI_TYPE_UINT8 
-#ifdef __LITTLE_ENDIAN__ 
-	lbz %r3,112+0(%r1) 
-#else 
-	lbz %r3,112+3(%r1) 
-#endif 
-	mtlr %r0 
-	addi %r1,%r1,144 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset 144 
- 
-# case FFI_TYPE_SINT8 
-#ifdef __LITTLE_ENDIAN__ 
-	lbz %r3,112+0(%r1) 
-#else 
-	lbz %r3,112+3(%r1) 
-#endif 
-	extsb %r3,%r3 
-	mtlr %r0 
-	b .Lfinish 
- 
-# case FFI_TYPE_UINT16 
-#ifdef __LITTLE_ENDIAN__ 
-	lhz %r3,112+0(%r1) 
-#else 
-	lhz %r3,112+2(%r1) 
-#endif 
-	mtlr %r0 
-	addi %r1,%r1,144 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset 144 
- 
-# case FFI_TYPE_SINT16 
-#ifdef __LITTLE_ENDIAN__ 
-	lha %r3,112+0(%r1) 
-#else 
-	lha %r3,112+2(%r1) 
-#endif 
-	mtlr %r0 
-	addi %r1,%r1,144 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset 144 
- 
-# case FFI_TYPE_UINT32 
-	lwz %r3,112+0(%r1) 
-	mtlr %r0 
-	addi %r1,%r1,144 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset 144 
- 
-# case FFI_TYPE_SINT32 
-	lwz %r3,112+0(%r1) 
-	mtlr %r0 
-	addi %r1,%r1,144 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset 144 
- 
-# case FFI_TYPE_UINT64 
-	lwz %r3,112+0(%r1) 
-	lwz %r4,112+4(%r1) 
-	mtlr %r0 
-	b .Lfinish 
- 
-# case FFI_TYPE_SINT64 
-	lwz %r3,112+0(%r1) 
-	lwz %r4,112+4(%r1) 
-	mtlr %r0 
-	b .Lfinish 
- 
-# case FFI_TYPE_STRUCT 
-	mtlr %r0 
-	addi %r1,%r1,144 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset 144 
-	nop 
- 
-# case FFI_TYPE_POINTER 
-	lwz %r3,112+0(%r1) 
-	mtlr %r0 
-	addi %r1,%r1,144 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset 144 
- 
-# case FFI_TYPE_UINT128 
-	lwz %r3,112+0(%r1) 
-	lwz %r4,112+4(%r1) 
-	lwz %r5,112+8(%r1) 
-	b .Luint128 
- 
-# The return types below are only used when the ABI type is FFI_SYSV. 
-# case FFI_SYSV_TYPE_SMALL_STRUCT + 1. One byte struct. 
-	lbz %r3,112+0(%r1) 
-	mtlr %r0 
-	addi %r1,%r1,144 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset 144 
- 
-# case FFI_SYSV_TYPE_SMALL_STRUCT + 2. Two byte struct. 
-	lhz %r3,112+0(%r1) 
-	mtlr %r0 
-	addi %r1,%r1,144 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset 144 
- 
-# case FFI_SYSV_TYPE_SMALL_STRUCT + 3. Three byte struct. 
-	lwz %r3,112+0(%r1) 
-#ifdef __LITTLE_ENDIAN__ 
-	mtlr %r0 
-	addi %r1,%r1,144 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset 144 
-#else 
-	srwi %r3,%r3,8 
-	mtlr %r0 
-	b .Lfinish 
-#endif 
- 
-# case FFI_SYSV_TYPE_SMALL_STRUCT + 4. Four byte struct. 
-	lwz %r3,112+0(%r1) 
-	mtlr %r0 
-	addi %r1,%r1,144 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset 144 
- 
-# case FFI_SYSV_TYPE_SMALL_STRUCT + 5. Five byte struct. 
-	lwz %r3,112+0(%r1) 
-	lwz %r4,112+4(%r1) 
-#ifdef __LITTLE_ENDIAN__ 
-	mtlr %r0 
-	b .Lfinish 
-#else 
-	li %r5,24 
-	b .Lstruct567 
-#endif 
- 
-# case FFI_SYSV_TYPE_SMALL_STRUCT + 6. Six byte struct. 
-	lwz %r3,112+0(%r1) 
-	lwz %r4,112+4(%r1) 
-#ifdef __LITTLE_ENDIAN__ 
-	mtlr %r0 
-	b .Lfinish 
-#else 
-	li %r5,16 
-	b .Lstruct567 
-#endif 
- 
-# case FFI_SYSV_TYPE_SMALL_STRUCT + 7. Seven byte struct. 
-	lwz %r3,112+0(%r1) 
-	lwz %r4,112+4(%r1) 
-#ifdef __LITTLE_ENDIAN__ 
-	mtlr %r0 
-	b .Lfinish 
-#else 
-	li %r5,8 
-	b .Lstruct567 
-#endif 
- 
-# case FFI_SYSV_TYPE_SMALL_STRUCT + 8. Eight byte struct. 
-	lwz %r3,112+0(%r1) 
-	lwz %r4,112+4(%r1) 
-	mtlr %r0 
-	b .Lfinish 
- 
-#ifndef __LITTLE_ENDIAN__ 
-.Lstruct567: 
-	subfic %r6,%r5,32 
-	srw %r4,%r4,%r5 
-	slw %r6,%r3,%r6 
-	srw %r3,%r3,%r5 
-	or %r4,%r6,%r4 
-	mtlr %r0 
-	addi %r1,%r1,144 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_def_cfa_offset 144 
-#endif 
- 
-.Luint128: 
-	lwz %r6,112+12(%r1) 
-	mtlr %r0 
-	addi %r1,%r1,144 
-	.cfi_def_cfa_offset 0 
-	blr 
-	.cfi_endproc 
-END(ffi_closure_SYSV) 
- 
- 
-FFI_HIDDEN(ffi_go_closure_sysv) 
-ENTRY(ffi_go_closure_sysv) 
-	.cfi_startproc 
-	stwu %r1,-144(%r1) 
-	.cfi_def_cfa_offset 144 
-	mflr %r0 
-	stw %r0,148(%r1) 
-	.cfi_offset 65, 4 
- 
-	stw   %r3, 16(%r1) 
-	stw   %r4, 20(%r1) 
-	stw   %r5, 24(%r1) 
- 
-	# closure->cif 
-	lwz %r3,4(%r11) 
-	# closure->fun 
-	lwz %r4,8(%r11) 
-	# user_data 
-	mr %r5,%r11 
-	b .Ldoclosure 
-	.cfi_endproc 
-END(ffi_go_closure_sysv) 
- 
-#if defined __ELF__ && defined __linux__ 
-	.section	.note.GNU-stack,"",@progbits 
-#endif 
-#endif 
+/* -----------------------------------------------------------------------
+   sysv.h - Copyright (c) 2003 Jakub Jelinek <jakub@redhat.com>
+	    Copyright (c) 2008 Red Hat, Inc.
+
+   PowerPC Assembly glue.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <powerpc/asm.h>
+
+	.file   "ppc_closure.S"
+
+#ifndef POWERPC64
+
+FFI_HIDDEN(ffi_closure_SYSV)
+ENTRY(ffi_closure_SYSV)
+	.cfi_startproc
+	stwu %r1,-144(%r1)
+	.cfi_def_cfa_offset 144
+	mflr %r0
+	stw %r0,148(%r1)
+	.cfi_offset 65, 4
+
+# we want to build up an areas for the parameters passed
+# in registers (both floating point and integer)
+
+	# so first save gpr 3 to gpr 10 (aligned to 4)
+	stw   %r3, 16(%r1)
+	stw   %r4, 20(%r1)
+	stw   %r5, 24(%r1)
+
+	# set up registers for the routine that does the work
+
+	# closure->cif
+	lwz %r3,FFI_TRAMPOLINE_SIZE(%r11)
+	# closure->fun
+	lwz %r4,FFI_TRAMPOLINE_SIZE+4(%r11)
+	# closure->user_data
+	lwz %r5,FFI_TRAMPOLINE_SIZE+8(%r11)
+
+.Ldoclosure:
+	stw   %r6, 28(%r1)
+	stw   %r7, 32(%r1)
+	stw   %r8, 36(%r1)
+	stw   %r9, 40(%r1)
+	stw   %r10,44(%r1)
+
+#ifndef __NO_FPRS__
+	# next save fpr 1 to fpr 8 (aligned to 8)
+	stfd  %f1, 48(%r1)
+	stfd  %f2, 56(%r1)
+	stfd  %f3, 64(%r1)
+	stfd  %f4, 72(%r1)
+	stfd  %f5, 80(%r1)
+	stfd  %f6, 88(%r1)
+	stfd  %f7, 96(%r1)
+	stfd  %f8, 104(%r1)
+#endif
+
+	# pointer to the result storage
+	addi %r6,%r1,112
+
+	# pointer to the saved gpr registers
+	addi %r7,%r1,16
+
+	# pointer to the saved fpr registers
+	addi %r8,%r1,48
+
+	# pointer to the outgoing parameter save area in the previous frame
+	# i.e. the previous frame pointer + 8
+	addi %r9,%r1,152
+
+	# make the call
+	bl ffi_closure_helper_SYSV@local
+.Lret:
+	# now r3 contains the return type
+	# so use it to look up in a table
+	# so we know how to deal with each type
+
+	# look up the proper starting point in table
+	# by using return type as offset
+
+	mflr %r4		# move address of .Lret to r4
+	slwi %r3,%r3,4		# now multiply return type by 16
+	addi %r4, %r4, .Lret_type0 - .Lret
+	lwz %r0,148(%r1)
+	add %r3,%r3,%r4		# add contents of table to table address
+	mtctr %r3
+	bctr			# jump to it
+
+# Each of the ret_typeX code fragments has to be exactly 16 bytes long
+# (4 instructions). For cache effectiveness we align to a 16 byte boundary
+# first.
+	.align 4
+# case FFI_TYPE_VOID
+.Lret_type0:
+	mtlr %r0
+	addi %r1,%r1,144
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset 144
+	nop
+
+# case FFI_TYPE_INT
+	lwz %r3,112+0(%r1)
+	mtlr %r0
+.Lfinish:
+	addi %r1,%r1,144
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset 144
+
+# case FFI_TYPE_FLOAT
+#ifndef __NO_FPRS__
+	lfs %f1,112+0(%r1)
+#else
+	nop
+#endif
+	mtlr %r0
+	addi %r1,%r1,144
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset 144
+
+# case FFI_TYPE_DOUBLE
+#ifndef __NO_FPRS__
+	lfd %f1,112+0(%r1)
+#else
+	nop
+#endif
+	mtlr %r0
+	addi %r1,%r1,144
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset 144
+
+# case FFI_TYPE_LONGDOUBLE
+#ifndef __NO_FPRS__
+	lfd %f1,112+0(%r1)
+	lfd %f2,112+8(%r1)
+	mtlr %r0
+	b .Lfinish
+#else
+	mtlr %r0
+	addi %r1,%r1,144
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset 144
+	nop
+#endif
+
+# case FFI_TYPE_UINT8
+#ifdef __LITTLE_ENDIAN__
+	lbz %r3,112+0(%r1)
+#else
+	lbz %r3,112+3(%r1)
+#endif
+	mtlr %r0
+	addi %r1,%r1,144
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset 144
+
+# case FFI_TYPE_SINT8
+#ifdef __LITTLE_ENDIAN__
+	lbz %r3,112+0(%r1)
+#else
+	lbz %r3,112+3(%r1)
+#endif
+	extsb %r3,%r3
+	mtlr %r0
+	b .Lfinish
+
+# case FFI_TYPE_UINT16
+#ifdef __LITTLE_ENDIAN__
+	lhz %r3,112+0(%r1)
+#else
+	lhz %r3,112+2(%r1)
+#endif
+	mtlr %r0
+	addi %r1,%r1,144
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset 144
+
+# case FFI_TYPE_SINT16
+#ifdef __LITTLE_ENDIAN__
+	lha %r3,112+0(%r1)
+#else
+	lha %r3,112+2(%r1)
+#endif
+	mtlr %r0
+	addi %r1,%r1,144
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset 144
+
+# case FFI_TYPE_UINT32
+	lwz %r3,112+0(%r1)
+	mtlr %r0
+	addi %r1,%r1,144
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset 144
+
+# case FFI_TYPE_SINT32
+	lwz %r3,112+0(%r1)
+	mtlr %r0
+	addi %r1,%r1,144
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset 144
+
+# case FFI_TYPE_UINT64
+	lwz %r3,112+0(%r1)
+	lwz %r4,112+4(%r1)
+	mtlr %r0
+	b .Lfinish
+
+# case FFI_TYPE_SINT64
+	lwz %r3,112+0(%r1)
+	lwz %r4,112+4(%r1)
+	mtlr %r0
+	b .Lfinish
+
+# case FFI_TYPE_STRUCT
+	mtlr %r0
+	addi %r1,%r1,144
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset 144
+	nop
+
+# case FFI_TYPE_POINTER
+	lwz %r3,112+0(%r1)
+	mtlr %r0
+	addi %r1,%r1,144
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset 144
+
+# case FFI_TYPE_UINT128
+	lwz %r3,112+0(%r1)
+	lwz %r4,112+4(%r1)
+	lwz %r5,112+8(%r1)
+	b .Luint128
+
+# The return types below are only used when the ABI type is FFI_SYSV.
+# case FFI_SYSV_TYPE_SMALL_STRUCT + 1. One byte struct.
+	lbz %r3,112+0(%r1)
+	mtlr %r0
+	addi %r1,%r1,144
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset 144
+
+# case FFI_SYSV_TYPE_SMALL_STRUCT + 2. Two byte struct.
+	lhz %r3,112+0(%r1)
+	mtlr %r0
+	addi %r1,%r1,144
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset 144
+
+# case FFI_SYSV_TYPE_SMALL_STRUCT + 3. Three byte struct.
+	lwz %r3,112+0(%r1)
+#ifdef __LITTLE_ENDIAN__
+	mtlr %r0
+	addi %r1,%r1,144
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset 144
+#else
+	srwi %r3,%r3,8
+	mtlr %r0
+	b .Lfinish
+#endif
+
+# case FFI_SYSV_TYPE_SMALL_STRUCT + 4. Four byte struct.
+	lwz %r3,112+0(%r1)
+	mtlr %r0
+	addi %r1,%r1,144
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset 144
+
+# case FFI_SYSV_TYPE_SMALL_STRUCT + 5. Five byte struct.
+	lwz %r3,112+0(%r1)
+	lwz %r4,112+4(%r1)
+#ifdef __LITTLE_ENDIAN__
+	mtlr %r0
+	b .Lfinish
+#else
+	li %r5,24
+	b .Lstruct567
+#endif
+
+# case FFI_SYSV_TYPE_SMALL_STRUCT + 6. Six byte struct.
+	lwz %r3,112+0(%r1)
+	lwz %r4,112+4(%r1)
+#ifdef __LITTLE_ENDIAN__
+	mtlr %r0
+	b .Lfinish
+#else
+	li %r5,16
+	b .Lstruct567
+#endif
+
+# case FFI_SYSV_TYPE_SMALL_STRUCT + 7. Seven byte struct.
+	lwz %r3,112+0(%r1)
+	lwz %r4,112+4(%r1)
+#ifdef __LITTLE_ENDIAN__
+	mtlr %r0
+	b .Lfinish
+#else
+	li %r5,8
+	b .Lstruct567
+#endif
+
+# case FFI_SYSV_TYPE_SMALL_STRUCT + 8. Eight byte struct.
+	lwz %r3,112+0(%r1)
+	lwz %r4,112+4(%r1)
+	mtlr %r0
+	b .Lfinish
+
+#ifndef __LITTLE_ENDIAN__
+.Lstruct567:
+	subfic %r6,%r5,32
+	srw %r4,%r4,%r5
+	slw %r6,%r3,%r6
+	srw %r3,%r3,%r5
+	or %r4,%r6,%r4
+	mtlr %r0
+	addi %r1,%r1,144
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_def_cfa_offset 144
+#endif
+
+.Luint128:
+	lwz %r6,112+12(%r1)
+	mtlr %r0
+	addi %r1,%r1,144
+	.cfi_def_cfa_offset 0
+	blr
+	.cfi_endproc
+END(ffi_closure_SYSV)
+
+
+FFI_HIDDEN(ffi_go_closure_sysv)
+ENTRY(ffi_go_closure_sysv)
+	.cfi_startproc
+	stwu %r1,-144(%r1)
+	.cfi_def_cfa_offset 144
+	mflr %r0
+	stw %r0,148(%r1)
+	.cfi_offset 65, 4
+
+	stw   %r3, 16(%r1)
+	stw   %r4, 20(%r1)
+	stw   %r5, 24(%r1)
+
+	# closure->cif
+	lwz %r3,4(%r11)
+	# closure->fun
+	lwz %r4,8(%r11)
+	# user_data
+	mr %r5,%r11
+	b .Ldoclosure
+	.cfi_endproc
+END(ffi_go_closure_sysv)
+
+#if defined __ELF__ && defined __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
+#endif
diff --git a/contrib/restricted/libffi/src/powerpc/sysv.S b/contrib/restricted/libffi/src/powerpc/sysv.S
index 47e0d509fa..1474ce702b 100644
--- a/contrib/restricted/libffi/src/powerpc/sysv.S
+++ b/contrib/restricted/libffi/src/powerpc/sysv.S
@@ -1,175 +1,175 @@
-/* ----------------------------------------------------------------------- 
-   sysv.S - Copyright (c) 1998 Geoffrey Keating 
-   Copyright (C) 2007 Free Software Foundation, Inc 
- 
-   PowerPC Assembly glue. 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-#define LIBFFI_ASM 
-#include <fficonfig.h> 
-#include <ffi.h> 
-#include <powerpc/asm.h> 
- 
-#ifndef POWERPC64 
-FFI_HIDDEN(ffi_call_SYSV) 
-ENTRY(ffi_call_SYSV) 
-	.cfi_startproc 
-	/* Save the old stack pointer as AP.  */ 
-	mr	%r10,%r1 
-	.cfi_def_cfa_register 10 
- 
-	/* Allocate the stack space we need.  */ 
-	stwux	%r1,%r1,%r8 
-	/* Save registers we use.  */ 
-	mflr	%r9 
-	stw	%r28,-16(%r10) 
-	stw	%r29,-12(%r10) 
-	stw	%r30, -8(%r10) 
-	stw	%r31, -4(%r10) 
-	stw	%r9,   4(%r10) 
-	.cfi_offset 65, 4 
-	.cfi_offset 31, -4 
-	.cfi_offset 30, -8 
-	.cfi_offset 29, -12 
-	.cfi_offset 28, -16 
- 
-	/* Save arguments over call...  */ 
-	stw	%r7,   -20(%r10)	/* closure, */ 
-	mr	%r31,%r6		/* flags, */ 
-	mr	%r30,%r5		/* rvalue, */ 
-	mr	%r29,%r4		/* function address, */ 
-	mr	%r28,%r10		/* our AP. */ 
-	.cfi_def_cfa_register 28 
- 
-	/* Call ffi_prep_args_SYSV.  */ 
-	mr	%r4,%r1 
-	bl	ffi_prep_args_SYSV@local 
- 
-	/* Now do the call.  */ 
-	/* Set up cr1 with bits 4-7 of the flags.  */ 
-	mtcrf	0x40,%r31 
-	/* Get the address to call into CTR.  */ 
-	mtctr	%r29 
-	/* Load all those argument registers.  */ 
-	lwz	%r3,-24-(8*4)(%r28) 
-	lwz	%r4,-24-(7*4)(%r28) 
-	lwz	%r5,-24-(6*4)(%r28) 
-	lwz	%r6,-24-(5*4)(%r28) 
-	bf-	5,1f 
-	nop 
-	lwz	%r7,-24-(4*4)(%r28) 
-	lwz	%r8,-24-(3*4)(%r28) 
-	lwz	%r9,-24-(2*4)(%r28) 
-	lwz	%r10,-24-(1*4)(%r28) 
-	nop 
-1: 
- 
-#ifndef __NO_FPRS__ 
-	/* Load all the FP registers.  */ 
-	bf-	6,2f 
-	lfd	%f1,-24-(8*4)-(8*8)(%r28) 
-	lfd	%f2,-24-(8*4)-(7*8)(%r28) 
-	lfd	%f3,-24-(8*4)-(6*8)(%r28) 
-	lfd	%f4,-24-(8*4)-(5*8)(%r28) 
-	nop 
-	lfd	%f5,-24-(8*4)-(4*8)(%r28) 
-	lfd	%f6,-24-(8*4)-(3*8)(%r28) 
-	lfd	%f7,-24-(8*4)-(2*8)(%r28) 
-	lfd	%f8,-24-(8*4)-(1*8)(%r28) 
-#endif 
-2: 
- 
-	/* Make the call.  */ 
-	lwz	%r11, -20(%r28) 
-	bctrl 
- 
-	/* Now, deal with the return value.  */ 
-	mtcrf	0x01,%r31 /* cr7  */ 
-	bt-	31,L(small_struct_return_value) 
-	bt-	30,L(done_return_value) 
-#ifndef __NO_FPRS__ 
-	bt-	29,L(fp_return_value) 
-#endif 
-	stw	%r3,0(%r30) 
-	bf+	28,L(done_return_value) 
-	stw	%r4,4(%r30) 
-	mtcrf	0x02,%r31 /* cr6  */ 
-	bf	27,L(done_return_value) 
-	stw     %r5,8(%r30) 
-	stw	%r6,12(%r30) 
-	/* Fall through...  */ 
- 
-L(done_return_value): 
-	/* Restore the registers we used and return.  */ 
-	lwz	%r9,   4(%r28) 
-	lwz	%r31, -4(%r28) 
-	mtlr	%r9 
-	lwz	%r30, -8(%r28) 
-	lwz	%r29,-12(%r28) 
-	lwz	%r28,-16(%r28) 
-	.cfi_remember_state 
-	/* At this point we don't have a cfa register.  Say all our 
-	   saved regs have been restored.  */ 
-	.cfi_same_value 65 
-	.cfi_same_value 31 
-	.cfi_same_value 30 
-	.cfi_same_value 29 
-	.cfi_same_value 28 
-	/* Hopefully this works..  */ 
-	.cfi_def_cfa_register 1 
-	.cfi_offset 1, 0 
-	lwz	%r1,0(%r1) 
-	.cfi_same_value 1 
-	blr 
- 
-#ifndef __NO_FPRS__ 
-L(fp_return_value): 
-	.cfi_restore_state 
-	bf	28,L(float_return_value) 
-	stfd	%f1,0(%r30) 
-	mtcrf   0x02,%r31 /* cr6  */ 
-	bf	27,L(done_return_value) 
-	stfd	%f2,8(%r30) 
-	b	L(done_return_value) 
-L(float_return_value): 
-	stfs	%f1,0(%r30) 
-	b	L(done_return_value) 
-#endif 
- 
-L(small_struct_return_value): 
-	/* 
-	 * The C code always allocates a properly-aligned 8-byte bounce 
-	 * buffer to make this assembly code very simple.  Just write out 
-	 * r3 and r4 to the buffer to allow the C code to handle the rest. 
-	 */ 
-	stw %r3, 0(%r30) 
-	stw %r4, 4(%r30) 
-	b L(done_return_value) 
-	.cfi_endproc 
- 
-END(ffi_call_SYSV) 
- 
-#if defined __ELF__ && defined __linux__ 
-	.section	.note.GNU-stack,"",@progbits 
-#endif 
-#endif 
+/* -----------------------------------------------------------------------
+   sysv.S - Copyright (c) 1998 Geoffrey Keating
+   Copyright (C) 2007 Free Software Foundation, Inc
+
+   PowerPC Assembly glue.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <powerpc/asm.h>
+
+#ifndef POWERPC64
+FFI_HIDDEN(ffi_call_SYSV)
+ENTRY(ffi_call_SYSV)
+	.cfi_startproc
+	/* Save the old stack pointer as AP.  */
+	mr	%r10,%r1
+	.cfi_def_cfa_register 10
+
+	/* Allocate the stack space we need.  */
+	stwux	%r1,%r1,%r8
+	/* Save registers we use.  */
+	mflr	%r9
+	stw	%r28,-16(%r10)
+	stw	%r29,-12(%r10)
+	stw	%r30, -8(%r10)
+	stw	%r31, -4(%r10)
+	stw	%r9,   4(%r10)
+	.cfi_offset 65, 4
+	.cfi_offset 31, -4
+	.cfi_offset 30, -8
+	.cfi_offset 29, -12
+	.cfi_offset 28, -16
+
+	/* Save arguments over call...  */
+	stw	%r7,   -20(%r10)	/* closure, */
+	mr	%r31,%r6		/* flags, */
+	mr	%r30,%r5		/* rvalue, */
+	mr	%r29,%r4		/* function address, */
+	mr	%r28,%r10		/* our AP. */
+	.cfi_def_cfa_register 28
+
+	/* Call ffi_prep_args_SYSV.  */
+	mr	%r4,%r1
+	bl	ffi_prep_args_SYSV@local
+
+	/* Now do the call.  */
+	/* Set up cr1 with bits 4-7 of the flags.  */
+	mtcrf	0x40,%r31
+	/* Get the address to call into CTR.  */
+	mtctr	%r29
+	/* Load all those argument registers.  */
+	lwz	%r3,-24-(8*4)(%r28)
+	lwz	%r4,-24-(7*4)(%r28)
+	lwz	%r5,-24-(6*4)(%r28)
+	lwz	%r6,-24-(5*4)(%r28)
+	bf-	5,1f
+	nop
+	lwz	%r7,-24-(4*4)(%r28)
+	lwz	%r8,-24-(3*4)(%r28)
+	lwz	%r9,-24-(2*4)(%r28)
+	lwz	%r10,-24-(1*4)(%r28)
+	nop
+1:
+
+#ifndef __NO_FPRS__
+	/* Load all the FP registers.  */
+	bf-	6,2f
+	lfd	%f1,-24-(8*4)-(8*8)(%r28)
+	lfd	%f2,-24-(8*4)-(7*8)(%r28)
+	lfd	%f3,-24-(8*4)-(6*8)(%r28)
+	lfd	%f4,-24-(8*4)-(5*8)(%r28)
+	nop
+	lfd	%f5,-24-(8*4)-(4*8)(%r28)
+	lfd	%f6,-24-(8*4)-(3*8)(%r28)
+	lfd	%f7,-24-(8*4)-(2*8)(%r28)
+	lfd	%f8,-24-(8*4)-(1*8)(%r28)
+#endif
+2:
+
+	/* Make the call.  */
+	lwz	%r11, -20(%r28)
+	bctrl
+
+	/* Now, deal with the return value.  */
+	mtcrf	0x01,%r31 /* cr7  */
+	bt-	31,L(small_struct_return_value)
+	bt-	30,L(done_return_value)
+#ifndef __NO_FPRS__
+	bt-	29,L(fp_return_value)
+#endif
+	stw	%r3,0(%r30)
+	bf+	28,L(done_return_value)
+	stw	%r4,4(%r30)
+	mtcrf	0x02,%r31 /* cr6  */
+	bf	27,L(done_return_value)
+	stw     %r5,8(%r30)
+	stw	%r6,12(%r30)
+	/* Fall through...  */
+
+L(done_return_value):
+	/* Restore the registers we used and return.  */
+	lwz	%r9,   4(%r28)
+	lwz	%r31, -4(%r28)
+	mtlr	%r9
+	lwz	%r30, -8(%r28)
+	lwz	%r29,-12(%r28)
+	lwz	%r28,-16(%r28)
+	.cfi_remember_state
+	/* At this point we don't have a cfa register.  Say all our
+	   saved regs have been restored.  */
+	.cfi_same_value 65
+	.cfi_same_value 31
+	.cfi_same_value 30
+	.cfi_same_value 29
+	.cfi_same_value 28
+	/* Hopefully this works..  */
+	.cfi_def_cfa_register 1
+	.cfi_offset 1, 0
+	lwz	%r1,0(%r1)
+	.cfi_same_value 1
+	blr
+
+#ifndef __NO_FPRS__
+L(fp_return_value):
+	.cfi_restore_state
+	bf	28,L(float_return_value)
+	stfd	%f1,0(%r30)
+	mtcrf   0x02,%r31 /* cr6  */
+	bf	27,L(done_return_value)
+	stfd	%f2,8(%r30)
+	b	L(done_return_value)
+L(float_return_value):
+	stfs	%f1,0(%r30)
+	b	L(done_return_value)
+#endif
+
+L(small_struct_return_value):
+	/*
+	 * The C code always allocates a properly-aligned 8-byte bounce
+	 * buffer to make this assembly code very simple.  Just write out
+	 * r3 and r4 to the buffer to allow the C code to handle the rest.
+	 */
+	stw %r3, 0(%r30)
+	stw %r4, 4(%r30)
+	b L(done_return_value)
+	.cfi_endproc
+
+END(ffi_call_SYSV)
+
+#if defined __ELF__ && defined __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
+#endif
diff --git a/contrib/restricted/libffi/src/prep_cif.c b/contrib/restricted/libffi/src/prep_cif.c
index 0aece251b9..06c6544036 100644
--- a/contrib/restricted/libffi/src/prep_cif.c
+++ b/contrib/restricted/libffi/src/prep_cif.c
@@ -29,12 +29,12 @@
 
 /* Round up to FFI_SIZEOF_ARG. */
 
-#define STACK_ARG_SIZE(x) FFI_ALIGN(x, FFI_SIZEOF_ARG) 
+#define STACK_ARG_SIZE(x) FFI_ALIGN(x, FFI_SIZEOF_ARG)
 
 /* Perform machine independent initialization of aggregate type
    specifications. */
 
-static ffi_status initialize_aggregate(ffi_type *arg, size_t *offsets) 
+static ffi_status initialize_aggregate(ffi_type *arg, size_t *offsets)
 {
   ffi_type **ptr;
 
@@ -52,15 +52,15 @@ static ffi_status initialize_aggregate(ffi_type *arg, size_t *offsets)
   while ((*ptr) != NULL)
     {
       if (UNLIKELY(((*ptr)->size == 0)
-		    && (initialize_aggregate((*ptr), NULL) != FFI_OK))) 
+		    && (initialize_aggregate((*ptr), NULL) != FFI_OK)))
 	return FFI_BAD_TYPEDEF;
 
       /* Perform a sanity check on the argument type */
       FFI_ASSERT_VALID_TYPE(*ptr);
 
-      arg->size = FFI_ALIGN(arg->size, (*ptr)->alignment); 
-      if (offsets) 
-	*offsets++ = arg->size; 
+      arg->size = FFI_ALIGN(arg->size, (*ptr)->alignment);
+      if (offsets)
+	*offsets++ = arg->size;
       arg->size += (*ptr)->size;
 
       arg->alignment = (arg->alignment > (*ptr)->alignment) ?
@@ -76,15 +76,15 @@ static ffi_status initialize_aggregate(ffi_type *arg, size_t *offsets)
      struct A { long a; char b; }; struct B { struct A x; char y; };
      should find y at an offset of 2*sizeof(long) and result in a
      total size of 3*sizeof(long).  */
-  arg->size = FFI_ALIGN (arg->size, arg->alignment); 
-
-  /* On some targets, the ABI defines that structures have an additional 
-     alignment beyond the "natural" one based on their elements.  */ 
-#ifdef FFI_AGGREGATE_ALIGNMENT 
-  if (FFI_AGGREGATE_ALIGNMENT > arg->alignment) 
-    arg->alignment = FFI_AGGREGATE_ALIGNMENT; 
-#endif 
- 
+  arg->size = FFI_ALIGN (arg->size, arg->alignment);
+
+  /* On some targets, the ABI defines that structures have an additional
+     alignment beyond the "natural" one based on their elements.  */
+#ifdef FFI_AGGREGATE_ALIGNMENT
+  if (FFI_AGGREGATE_ALIGNMENT > arg->alignment)
+    arg->alignment = FFI_AGGREGATE_ALIGNMENT;
+#endif
+
   if (arg->size == 0)
     return FFI_BAD_TYPEDEF;
   else
@@ -129,27 +129,27 @@ ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi,
   cif->rtype = rtype;
 
   cif->flags = 0;
-#ifdef _M_ARM64 
-  cif->is_variadic = isvariadic; 
-#endif 
-#if HAVE_LONG_DOUBLE_VARIANT 
-  ffi_prep_types (abi); 
-#endif 
+#ifdef _M_ARM64
+  cif->is_variadic = isvariadic;
+#endif
+#if HAVE_LONG_DOUBLE_VARIANT
+  ffi_prep_types (abi);
+#endif
 
   /* Initialize the return type if necessary */
-  if ((cif->rtype->size == 0) 
-      && (initialize_aggregate(cif->rtype, NULL) != FFI_OK)) 
+  if ((cif->rtype->size == 0)
+      && (initialize_aggregate(cif->rtype, NULL) != FFI_OK))
     return FFI_BAD_TYPEDEF;
 
-#ifndef FFI_TARGET_HAS_COMPLEX_TYPE 
-  if (rtype->type == FFI_TYPE_COMPLEX) 
-    abort(); 
-#endif 
+#ifndef FFI_TARGET_HAS_COMPLEX_TYPE
+  if (rtype->type == FFI_TYPE_COMPLEX)
+    abort();
+#endif
   /* Perform a sanity check on the return type */
   FFI_ASSERT_VALID_TYPE(cif->rtype);
 
   /* x86, x86-64 and s390 stack space allocation is handled in prep_machdep. */
-#if !defined FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION 
+#if !defined FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION
   /* Make space for the return structure pointer */
   if (cif->rtype->type == FFI_TYPE_STRUCT
 #ifdef TILE
@@ -158,9 +158,9 @@ ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi,
 #ifdef XTENSA
       && (cif->rtype->size > 16)
 #endif
-#ifdef NIOS2 
-      && (cif->rtype->size > 8) 
-#endif 
+#ifdef NIOS2
+      && (cif->rtype->size > 8)
+#endif
      )
     bytes = STACK_ARG_SIZE(sizeof(void*));
 #endif
@@ -169,23 +169,23 @@ ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi,
     {
 
       /* Initialize any uninitialized aggregate type definitions */
-      if (((*ptr)->size == 0) 
-	  && (initialize_aggregate((*ptr), NULL) != FFI_OK)) 
+      if (((*ptr)->size == 0)
+	  && (initialize_aggregate((*ptr), NULL) != FFI_OK))
 	return FFI_BAD_TYPEDEF;
 
-#ifndef FFI_TARGET_HAS_COMPLEX_TYPE 
-      if ((*ptr)->type == FFI_TYPE_COMPLEX) 
-	abort(); 
-#endif 
+#ifndef FFI_TARGET_HAS_COMPLEX_TYPE
+      if ((*ptr)->type == FFI_TYPE_COMPLEX)
+	abort();
+#endif
       /* Perform a sanity check on the argument type, do this
 	 check after the initialization.  */
       FFI_ASSERT_VALID_TYPE(*ptr);
 
-#if !defined FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION 
+#if !defined FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION
 	{
 	  /* Add any padding if necessary */
 	  if (((*ptr)->alignment - 1) & bytes)
-	    bytes = (unsigned)FFI_ALIGN(bytes, (*ptr)->alignment); 
+	    bytes = (unsigned)FFI_ALIGN(bytes, (*ptr)->alignment);
 
 #ifdef TILE
 	  if (bytes < 10 * FFI_SIZEOF_ARG &&
@@ -201,7 +201,7 @@ ffi_status FFI_HIDDEN ffi_prep_cif_core(ffi_cif *cif, ffi_abi abi,
 	    bytes = 6*4;
 #endif
 
-	  bytes += (unsigned int)STACK_ARG_SIZE((*ptr)->size); 
+	  bytes += (unsigned int)STACK_ARG_SIZE((*ptr)->size);
 	}
 #endif
     }
@@ -246,18 +246,18 @@ ffi_prep_closure (ffi_closure* closure,
 }
 
 #endif
- 
-ffi_status 
-ffi_get_struct_offsets (ffi_abi abi, ffi_type *struct_type, size_t *offsets) 
-{ 
-  if (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI)) 
-    return FFI_BAD_ABI; 
-  if (struct_type->type != FFI_TYPE_STRUCT) 
-    return FFI_BAD_TYPEDEF; 
- 
-#if HAVE_LONG_DOUBLE_VARIANT 
-  ffi_prep_types (abi); 
-#endif 
- 
-  return initialize_aggregate(struct_type, offsets); 
-} 
+
+ffi_status
+ffi_get_struct_offsets (ffi_abi abi, ffi_type *struct_type, size_t *offsets)
+{
+  if (! (abi > FFI_FIRST_ABI && abi < FFI_LAST_ABI))
+    return FFI_BAD_ABI;
+  if (struct_type->type != FFI_TYPE_STRUCT)
+    return FFI_BAD_TYPEDEF;
+
+#if HAVE_LONG_DOUBLE_VARIANT
+  ffi_prep_types (abi);
+#endif
+
+  return initialize_aggregate(struct_type, offsets);
+}
diff --git a/contrib/restricted/libffi/src/raw_api.c b/contrib/restricted/libffi/src/raw_api.c
index c121c24f50..be156116cb 100644
--- a/contrib/restricted/libffi/src/raw_api.c
+++ b/contrib/restricted/libffi/src/raw_api.c
@@ -43,10 +43,10 @@ ffi_raw_size (ffi_cif *cif)
     {
 #if !FFI_NO_STRUCTS
       if ((*at)->type == FFI_TYPE_STRUCT)
-	result += FFI_ALIGN (sizeof (void*), FFI_SIZEOF_ARG); 
+	result += FFI_ALIGN (sizeof (void*), FFI_SIZEOF_ARG);
       else
 #endif
-	result += FFI_ALIGN ((*at)->size, FFI_SIZEOF_ARG); 
+	result += FFI_ALIGN ((*at)->size, FFI_SIZEOF_ARG);
     }
 
   return result;
@@ -88,17 +88,17 @@ ffi_raw_to_ptrarray (ffi_cif *cif, ffi_raw *raw, void **args)
 	  break;
 #endif
 
-	case FFI_TYPE_COMPLEX: 
-	  *args = (raw++)->ptr; 
-	  break; 
- 
+	case FFI_TYPE_COMPLEX:
+	  *args = (raw++)->ptr;
+	  break;
+
 	case FFI_TYPE_POINTER:
 	  *args = (void*) &(raw++)->ptr;
 	  break;
 	  
 	default:
 	  *args = raw;
-	  raw += FFI_ALIGN ((*tp)->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG; 
+	  raw += FFI_ALIGN ((*tp)->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
 	}
     }
 
@@ -116,14 +116,14 @@ ffi_raw_to_ptrarray (ffi_cif *cif, ffi_raw *raw, void **args)
 	}
       else
 #endif
-      if ((*tp)->type == FFI_TYPE_COMPLEX) 
+      if ((*tp)->type == FFI_TYPE_COMPLEX)
+	{
+	  *args = (raw++)->ptr;
+	}
+      else
 	{
-	  *args = (raw++)->ptr; 
-	} 
-      else 
-	{ 
 	  *args = (void*) raw;
-	  raw += FFI_ALIGN ((*tp)->size, sizeof (void*)) / sizeof (void*); 
+	  raw += FFI_ALIGN ((*tp)->size, sizeof (void*)) / sizeof (void*);
 	}
     }
 
@@ -176,17 +176,17 @@ ffi_ptrarray_to_raw (ffi_cif *cif, void **args, ffi_raw *raw)
 	  break;
 #endif
 
-	case FFI_TYPE_COMPLEX: 
-	  (raw++)->ptr = *args; 
-	  break; 
- 
+	case FFI_TYPE_COMPLEX:
+	  (raw++)->ptr = *args;
+	  break;
+
 	case FFI_TYPE_POINTER:
 	  (raw++)->ptr = **(void***) args;
 	  break;
 
 	default:
 	  memcpy ((void*) raw->data, (void*)*args, (*tp)->size);
-	  raw += FFI_ALIGN ((*tp)->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG; 
+	  raw += FFI_ALIGN ((*tp)->size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
 	}
     }
 }
diff --git a/contrib/restricted/libffi/src/types.c b/contrib/restricted/libffi/src/types.c
index 341252ee60..9ec27f6cf3 100644
--- a/contrib/restricted/libffi/src/types.c
+++ b/contrib/restricted/libffi/src/types.c
@@ -33,59 +33,59 @@
 
 /* Type definitions */
 
-#define FFI_TYPEDEF(name, type, id, maybe_const)\ 
+#define FFI_TYPEDEF(name, type, id, maybe_const)\
 struct struct_align_##name {			\
   char c;					\
   type x;					\
 };						\
-FFI_EXTERN					\ 
-maybe_const ffi_type ffi_type_##name = {	\ 
+FFI_EXTERN					\
+maybe_const ffi_type ffi_type_##name = {	\
   sizeof(type),					\
   offsetof(struct struct_align_##name, x),	\
   id, NULL					\
 }
 
-#define FFI_COMPLEX_TYPEDEF(name, type, maybe_const)	\ 
-static ffi_type *ffi_elements_complex_##name [2] = {	\ 
-	(ffi_type *)(&ffi_type_##name), NULL		\ 
-};							\ 
-struct struct_align_complex_##name {			\ 
-  char c;						\ 
-  _Complex type x;					\ 
-};							\ 
-FFI_EXTERN						\ 
-maybe_const ffi_type ffi_type_complex_##name = {	\ 
-  sizeof(_Complex type),				\ 
-  offsetof(struct struct_align_complex_##name, x),	\ 
-  FFI_TYPE_COMPLEX,					\ 
-  (ffi_type **)ffi_elements_complex_##name		\ 
-} 
- 
+#define FFI_COMPLEX_TYPEDEF(name, type, maybe_const)	\
+static ffi_type *ffi_elements_complex_##name [2] = {	\
+	(ffi_type *)(&ffi_type_##name), NULL		\
+};							\
+struct struct_align_complex_##name {			\
+  char c;						\
+  _Complex type x;					\
+};							\
+FFI_EXTERN						\
+maybe_const ffi_type ffi_type_complex_##name = {	\
+  sizeof(_Complex type),				\
+  offsetof(struct struct_align_complex_##name, x),	\
+  FFI_TYPE_COMPLEX,					\
+  (ffi_type **)ffi_elements_complex_##name		\
+}
+
 /* Size and alignment are fake here. They must not be 0. */
-FFI_EXTERN const ffi_type ffi_type_void = { 
+FFI_EXTERN const ffi_type ffi_type_void = {
   1, 1, FFI_TYPE_VOID, NULL
 };
 
-FFI_TYPEDEF(uint8, UINT8, FFI_TYPE_UINT8, const); 
-FFI_TYPEDEF(sint8, SINT8, FFI_TYPE_SINT8, const); 
-FFI_TYPEDEF(uint16, UINT16, FFI_TYPE_UINT16, const); 
-FFI_TYPEDEF(sint16, SINT16, FFI_TYPE_SINT16, const); 
-FFI_TYPEDEF(uint32, UINT32, FFI_TYPE_UINT32, const); 
-FFI_TYPEDEF(sint32, SINT32, FFI_TYPE_SINT32, const); 
-FFI_TYPEDEF(uint64, UINT64, FFI_TYPE_UINT64, const); 
-FFI_TYPEDEF(sint64, SINT64, FFI_TYPE_SINT64, const); 
+FFI_TYPEDEF(uint8, UINT8, FFI_TYPE_UINT8, const);
+FFI_TYPEDEF(sint8, SINT8, FFI_TYPE_SINT8, const);
+FFI_TYPEDEF(uint16, UINT16, FFI_TYPE_UINT16, const);
+FFI_TYPEDEF(sint16, SINT16, FFI_TYPE_SINT16, const);
+FFI_TYPEDEF(uint32, UINT32, FFI_TYPE_UINT32, const);
+FFI_TYPEDEF(sint32, SINT32, FFI_TYPE_SINT32, const);
+FFI_TYPEDEF(uint64, UINT64, FFI_TYPE_UINT64, const);
+FFI_TYPEDEF(sint64, SINT64, FFI_TYPE_SINT64, const);
 
-FFI_TYPEDEF(pointer, void*, FFI_TYPE_POINTER, const); 
+FFI_TYPEDEF(pointer, void*, FFI_TYPE_POINTER, const);
 
-FFI_TYPEDEF(float, float, FFI_TYPE_FLOAT, const); 
-FFI_TYPEDEF(double, double, FFI_TYPE_DOUBLE, const); 
+FFI_TYPEDEF(float, float, FFI_TYPE_FLOAT, const);
+FFI_TYPEDEF(double, double, FFI_TYPE_DOUBLE, const);
+
+#if !defined HAVE_LONG_DOUBLE_VARIANT || defined __alpha__
+#define FFI_LDBL_CONST const
+#else
+#define FFI_LDBL_CONST
+#endif
 
-#if !defined HAVE_LONG_DOUBLE_VARIANT || defined __alpha__ 
-#define FFI_LDBL_CONST const 
-#else 
-#define FFI_LDBL_CONST 
-#endif 
- 
 #ifdef __alpha__
 /* Even if we're not configured to default to 128-bit long double, 
    maintain binary compatibility, as -mlong-double-128 can be used
@@ -96,13 +96,13 @@ FFI_TYPEDEF(double, double, FFI_TYPE_DOUBLE, const);
 # endif
 const ffi_type ffi_type_longdouble = { 16, 16, 4, NULL };
 #elif FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
-FFI_TYPEDEF(longdouble, long double, FFI_TYPE_LONGDOUBLE, FFI_LDBL_CONST); 
+FFI_TYPEDEF(longdouble, long double, FFI_TYPE_LONGDOUBLE, FFI_LDBL_CONST);
+#endif
+
+#ifdef FFI_TARGET_HAS_COMPLEX_TYPE
+FFI_COMPLEX_TYPEDEF(float, float, const);
+FFI_COMPLEX_TYPEDEF(double, double, const);
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+FFI_COMPLEX_TYPEDEF(longdouble, long double, FFI_LDBL_CONST);
+#endif
 #endif
- 
-#ifdef FFI_TARGET_HAS_COMPLEX_TYPE 
-FFI_COMPLEX_TYPEDEF(float, float, const); 
-FFI_COMPLEX_TYPEDEF(double, double, const); 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-FFI_COMPLEX_TYPEDEF(longdouble, long double, FFI_LDBL_CONST); 
-#endif 
-#endif 
diff --git a/contrib/restricted/libffi/src/x86/asmnames.h b/contrib/restricted/libffi/src/x86/asmnames.h
index fc5c4cb9e7..7551021e17 100644
--- a/contrib/restricted/libffi/src/x86/asmnames.h
+++ b/contrib/restricted/libffi/src/x86/asmnames.h
@@ -1,30 +1,30 @@
-#ifndef ASMNAMES_H 
-#define ASMNAMES_H 
- 
-#define C2(X, Y)  X ## Y 
-#define C1(X, Y)  C2(X, Y) 
-#ifdef __USER_LABEL_PREFIX__ 
-# define C(X)     C1(__USER_LABEL_PREFIX__, X) 
-#else 
-# define C(X)     X 
-#endif 
- 
-#ifdef __APPLE__ 
-# define L(X)     C1(L, X) 
-#else 
-# define L(X)     C1(.L, X) 
-#endif 
- 
-#if defined(__ELF__) && defined(__PIC__) 
-# define PLT(X)	  X@PLT 
-#else 
-# define PLT(X)	  X 
-#endif 
- 
-#ifdef __ELF__ 
-# define ENDF(X)  .type	X,@function; .size X, . - X 
-#else 
-# define ENDF(X) 
-#endif 
- 
-#endif /* ASMNAMES_H */ 
+#ifndef ASMNAMES_H
+#define ASMNAMES_H
+
+#define C2(X, Y)  X ## Y
+#define C1(X, Y)  C2(X, Y)
+#ifdef __USER_LABEL_PREFIX__
+# define C(X)     C1(__USER_LABEL_PREFIX__, X)
+#else
+# define C(X)     X
+#endif
+
+#ifdef __APPLE__
+# define L(X)     C1(L, X)
+#else
+# define L(X)     C1(.L, X)
+#endif
+
+#if defined(__ELF__) && defined(__PIC__)
+# define PLT(X)	  X@PLT
+#else
+# define PLT(X)	  X
+#endif
+
+#ifdef __ELF__
+# define ENDF(X)  .type	X,@function; .size X, . - X
+#else
+# define ENDF(X)
+#endif
+
+#endif /* ASMNAMES_H */
diff --git a/contrib/restricted/libffi/src/x86/ffi.c b/contrib/restricted/libffi/src/x86/ffi.c
index 2950ba8fca..9a592185a1 100644
--- a/contrib/restricted/libffi/src/x86/ffi.c
+++ b/contrib/restricted/libffi/src/x86/ffi.c
@@ -1,6 +1,6 @@
 /* -----------------------------------------------------------------------
-   ffi.c - Copyright (c) 2017  Anthony Green 
-           Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008  Red Hat, Inc. 
+   ffi.c - Copyright (c) 2017  Anthony Green
+           Copyright (c) 1996, 1998, 1999, 2001, 2007, 2008  Red Hat, Inc.
            Copyright (c) 2002  Ranjit Mathew
            Copyright (c) 2002  Bo Thorsen
            Copyright (c) 2002  Roger Sayle
@@ -29,502 +29,502 @@
    DEALINGS IN THE SOFTWARE.
    ----------------------------------------------------------------------- */
 
-#if defined(__i386__) || defined(_M_IX86) 
+#if defined(__i386__) || defined(_M_IX86)
 #include <ffi.h>
 #include <ffi_common.h>
-#include <stdint.h> 
+#include <stdint.h>
 #include <stdlib.h>
-#include "internal.h" 
-
-/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE; 
-   all further uses in this file will refer to the 80-bit type.  */ 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-# if FFI_TYPE_LONGDOUBLE != 4 
-#  error FFI_TYPE_LONGDOUBLE out of date 
-# endif 
-#else 
-# undef FFI_TYPE_LONGDOUBLE 
-# define FFI_TYPE_LONGDOUBLE 4 
+#include "internal.h"
+
+/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
+   all further uses in this file will refer to the 80-bit type.  */
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+# if FFI_TYPE_LONGDOUBLE != 4
+#  error FFI_TYPE_LONGDOUBLE out of date
+# endif
+#else
+# undef FFI_TYPE_LONGDOUBLE
+# define FFI_TYPE_LONGDOUBLE 4
 #endif
 
-#if defined(__GNUC__) && !defined(__declspec) 
-# define __declspec(x)  __attribute__((x)) 
+#if defined(__GNUC__) && !defined(__declspec)
+# define __declspec(x)  __attribute__((x))
 #endif
 
-#if defined(_MSC_VER) && defined(_M_IX86) 
-/* Stack is not 16-byte aligned on Windows.  */ 
-#define STACK_ALIGN(bytes) (bytes) 
-#else 
-#define STACK_ALIGN(bytes) FFI_ALIGN (bytes, 16) 
+#if defined(_MSC_VER) && defined(_M_IX86)
+/* Stack is not 16-byte aligned on Windows.  */
+#define STACK_ALIGN(bytes) (bytes)
+#else
+#define STACK_ALIGN(bytes) FFI_ALIGN (bytes, 16)
 #endif
 
-/* Perform machine dependent cif processing.  */ 
-ffi_status FFI_HIDDEN 
-ffi_prep_cif_machdep(ffi_cif *cif) 
-{ 
-  size_t bytes = 0; 
-  int i, n, flags, cabi = cif->abi; 
+/* Perform machine dependent cif processing.  */
+ffi_status FFI_HIDDEN
+ffi_prep_cif_machdep(ffi_cif *cif)
+{
+  size_t bytes = 0;
+  int i, n, flags, cabi = cif->abi;
 
-  switch (cabi) 
+  switch (cabi)
     {
-    case FFI_SYSV: 
-    case FFI_STDCALL: 
-    case FFI_THISCALL: 
-    case FFI_FASTCALL: 
-    case FFI_MS_CDECL: 
-    case FFI_PASCAL: 
-    case FFI_REGISTER: 
-      break; 
-    default: 
-      return FFI_BAD_ABI; 
+    case FFI_SYSV:
+    case FFI_STDCALL:
+    case FFI_THISCALL:
+    case FFI_FASTCALL:
+    case FFI_MS_CDECL:
+    case FFI_PASCAL:
+    case FFI_REGISTER:
+      break;
+    default:
+      return FFI_BAD_ABI;
     }
 
   switch (cif->rtype->type)
     {
     case FFI_TYPE_VOID:
-      flags = X86_RET_VOID; 
-      break; 
-    case FFI_TYPE_FLOAT: 
-      flags = X86_RET_FLOAT; 
-      break; 
-    case FFI_TYPE_DOUBLE: 
-      flags = X86_RET_DOUBLE; 
-      break; 
-    case FFI_TYPE_LONGDOUBLE: 
-      flags = X86_RET_LDOUBLE; 
-      break; 
+      flags = X86_RET_VOID;
+      break;
+    case FFI_TYPE_FLOAT:
+      flags = X86_RET_FLOAT;
+      break;
+    case FFI_TYPE_DOUBLE:
+      flags = X86_RET_DOUBLE;
+      break;
+    case FFI_TYPE_LONGDOUBLE:
+      flags = X86_RET_LDOUBLE;
+      break;
     case FFI_TYPE_UINT8:
-      flags = X86_RET_UINT8; 
-      break; 
+      flags = X86_RET_UINT8;
+      break;
     case FFI_TYPE_UINT16:
-      flags = X86_RET_UINT16; 
-      break; 
+      flags = X86_RET_UINT16;
+      break;
     case FFI_TYPE_SINT8:
-      flags = X86_RET_SINT8; 
-      break; 
+      flags = X86_RET_SINT8;
+      break;
     case FFI_TYPE_SINT16:
-      flags = X86_RET_SINT16; 
-      break; 
-    case FFI_TYPE_INT: 
-    case FFI_TYPE_SINT32: 
+      flags = X86_RET_SINT16;
+      break;
+    case FFI_TYPE_INT:
+    case FFI_TYPE_SINT32:
     case FFI_TYPE_UINT32:
-    case FFI_TYPE_POINTER: 
-      flags = X86_RET_INT32; 
-      break; 
+    case FFI_TYPE_POINTER:
+      flags = X86_RET_INT32;
+      break;
     case FFI_TYPE_SINT64:
     case FFI_TYPE_UINT64:
-      flags = X86_RET_INT64; 
+      flags = X86_RET_INT64;
       break;
     case FFI_TYPE_STRUCT:
 #ifndef X86
-      /* ??? This should be a different ABI rather than an ifdef.  */ 
+      /* ??? This should be a different ABI rather than an ifdef.  */
       if (cif->rtype->size == 1)
-	flags = X86_RET_STRUCT_1B; 
+	flags = X86_RET_STRUCT_1B;
       else if (cif->rtype->size == 2)
-	flags = X86_RET_STRUCT_2B; 
+	flags = X86_RET_STRUCT_2B;
       else if (cif->rtype->size == 4)
-	flags = X86_RET_INT32; 
+	flags = X86_RET_INT32;
       else if (cif->rtype->size == 8)
-	flags = X86_RET_INT64; 
+	flags = X86_RET_INT64;
       else
 #endif
-	{ 
-	do_struct: 
-	  switch (cabi) 
-	    { 
-	    case FFI_THISCALL: 
-	    case FFI_FASTCALL: 
-	    case FFI_STDCALL: 
-	    case FFI_MS_CDECL: 
-	      flags = X86_RET_STRUCTARG; 
-	      break; 
-	    default: 
-	      flags = X86_RET_STRUCTPOP; 
-	      break; 
-	    } 
-	  /* Allocate space for return value pointer.  */ 
-	  bytes += FFI_ALIGN (sizeof(void*), FFI_SIZEOF_ARG); 
-	} 
+	{
+	do_struct:
+	  switch (cabi)
+	    {
+	    case FFI_THISCALL:
+	    case FFI_FASTCALL:
+	    case FFI_STDCALL:
+	    case FFI_MS_CDECL:
+	      flags = X86_RET_STRUCTARG;
+	      break;
+	    default:
+	      flags = X86_RET_STRUCTPOP;
+	      break;
+	    }
+	  /* Allocate space for return value pointer.  */
+	  bytes += FFI_ALIGN (sizeof(void*), FFI_SIZEOF_ARG);
+	}
+      break;
+    case FFI_TYPE_COMPLEX:
+      switch (cif->rtype->elements[0]->type)
+	{
+	case FFI_TYPE_DOUBLE:
+	case FFI_TYPE_LONGDOUBLE:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_UINT64:
+	  goto do_struct;
+	case FFI_TYPE_FLOAT:
+	case FFI_TYPE_INT:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT32:
+	  flags = X86_RET_INT64;
+	  break;
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_UINT16:
+	  flags = X86_RET_INT32;
+	  break;
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT8:
+	  flags = X86_RET_STRUCT_2B;
+	  break;
+	default:
+	  return FFI_BAD_TYPEDEF;
+	}
       break;
-    case FFI_TYPE_COMPLEX: 
-      switch (cif->rtype->elements[0]->type) 
-	{ 
-	case FFI_TYPE_DOUBLE: 
-	case FFI_TYPE_LONGDOUBLE: 
-	case FFI_TYPE_SINT64: 
-	case FFI_TYPE_UINT64: 
-	  goto do_struct; 
-	case FFI_TYPE_FLOAT: 
-	case FFI_TYPE_INT: 
-	case FFI_TYPE_SINT32: 
-	case FFI_TYPE_UINT32: 
-	  flags = X86_RET_INT64; 
-	  break; 
-	case FFI_TYPE_SINT16: 
-	case FFI_TYPE_UINT16: 
-	  flags = X86_RET_INT32; 
-	  break; 
-	case FFI_TYPE_SINT8: 
-	case FFI_TYPE_UINT8: 
-	  flags = X86_RET_STRUCT_2B; 
-	  break; 
-	default: 
-	  return FFI_BAD_TYPEDEF; 
-	} 
-      break; 
     default:
-      return FFI_BAD_TYPEDEF; 
+      return FFI_BAD_TYPEDEF;
+    }
+  cif->flags = flags;
+
+  for (i = 0, n = cif->nargs; i < n; i++)
+    {
+      ffi_type *t = cif->arg_types[i];
+
+      bytes = FFI_ALIGN (bytes, t->alignment);
+      bytes += FFI_ALIGN (t->size, FFI_SIZEOF_ARG);
     }
-  cif->flags = flags; 
+  cif->bytes = bytes;
+
+  return FFI_OK;
+}
 
-  for (i = 0, n = cif->nargs; i < n; i++) 
+static ffi_arg
+extend_basic_type(void *arg, int type)
+{
+  switch (type)
     {
-      ffi_type *t = cif->arg_types[i]; 
- 
-      bytes = FFI_ALIGN (bytes, t->alignment); 
-      bytes += FFI_ALIGN (t->size, FFI_SIZEOF_ARG); 
+    case FFI_TYPE_SINT8:
+      return *(SINT8 *)arg;
+    case FFI_TYPE_UINT8:
+      return *(UINT8 *)arg;
+    case FFI_TYPE_SINT16:
+      return *(SINT16 *)arg;
+    case FFI_TYPE_UINT16:
+      return *(UINT16 *)arg;
+
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_POINTER:
+    case FFI_TYPE_FLOAT:
+      return *(UINT32 *)arg;
+
+    default:
+      abort();
     }
-  cif->bytes = bytes; 
-
-  return FFI_OK; 
-} 
-
-static ffi_arg 
-extend_basic_type(void *arg, int type) 
-{ 
-  switch (type) 
-    { 
-    case FFI_TYPE_SINT8: 
-      return *(SINT8 *)arg; 
-    case FFI_TYPE_UINT8: 
-      return *(UINT8 *)arg; 
-    case FFI_TYPE_SINT16: 
-      return *(SINT16 *)arg; 
-    case FFI_TYPE_UINT16: 
-      return *(UINT16 *)arg; 
-
-    case FFI_TYPE_SINT32: 
-    case FFI_TYPE_UINT32: 
-    case FFI_TYPE_POINTER: 
-    case FFI_TYPE_FLOAT: 
-      return *(UINT32 *)arg; 
- 
-    default: 
-      abort(); 
-    } 
 }
 
-struct call_frame 
-{ 
-  void *ebp;		/* 0 */ 
-  void *retaddr;	/* 4 */ 
-  void (*fn)(void);	/* 8 */ 
-  int flags;		/* 12 */ 
-  void *rvalue;		/* 16 */ 
-  unsigned regs[3];	/* 20-28 */ 
-}; 
- 
-struct abi_params 
-{ 
-  int dir;		/* parameter growth direction */ 
-  int static_chain;	/* the static chain register used by gcc */ 
-  int nregs;		/* number of register parameters */ 
-  int regs[3]; 
-}; 
- 
-static const struct abi_params abi_params[FFI_LAST_ABI] = { 
-  [FFI_SYSV] = { 1, R_ECX, 0 }, 
-  [FFI_THISCALL] = { 1, R_EAX, 1, { R_ECX } }, 
-  [FFI_FASTCALL] = { 1, R_EAX, 2, { R_ECX, R_EDX } }, 
-  [FFI_STDCALL] = { 1, R_ECX, 0 }, 
-  [FFI_PASCAL] = { -1, R_ECX, 0 }, 
-  /* ??? No defined static chain; gcc does not support REGISTER.  */ 
-  [FFI_REGISTER] = { -1, R_ECX, 3, { R_EAX, R_EDX, R_ECX } }, 
-  [FFI_MS_CDECL] = { 1, R_ECX, 0 } 
-}; 
- 
-#ifdef HAVE_FASTCALL 
-  #ifdef _MSC_VER 
-    #define FFI_DECLARE_FASTCALL __fastcall 
-  #else 
-    #define FFI_DECLARE_FASTCALL __declspec(fastcall) 
-  #endif 
+struct call_frame
+{
+  void *ebp;		/* 0 */
+  void *retaddr;	/* 4 */
+  void (*fn)(void);	/* 8 */
+  int flags;		/* 12 */
+  void *rvalue;		/* 16 */
+  unsigned regs[3];	/* 20-28 */
+};
+
+struct abi_params
+{
+  int dir;		/* parameter growth direction */
+  int static_chain;	/* the static chain register used by gcc */
+  int nregs;		/* number of register parameters */
+  int regs[3];
+};
+
+static const struct abi_params abi_params[FFI_LAST_ABI] = {
+  [FFI_SYSV] = { 1, R_ECX, 0 },
+  [FFI_THISCALL] = { 1, R_EAX, 1, { R_ECX } },
+  [FFI_FASTCALL] = { 1, R_EAX, 2, { R_ECX, R_EDX } },
+  [FFI_STDCALL] = { 1, R_ECX, 0 },
+  [FFI_PASCAL] = { -1, R_ECX, 0 },
+  /* ??? No defined static chain; gcc does not support REGISTER.  */
+  [FFI_REGISTER] = { -1, R_ECX, 3, { R_EAX, R_EDX, R_ECX } },
+  [FFI_MS_CDECL] = { 1, R_ECX, 0 }
+};
+
+#ifdef HAVE_FASTCALL
+  #ifdef _MSC_VER
+    #define FFI_DECLARE_FASTCALL __fastcall
+  #else
+    #define FFI_DECLARE_FASTCALL __declspec(fastcall)
+  #endif
 #else
-  #define FFI_DECLARE_FASTCALL 
+  #define FFI_DECLARE_FASTCALL
 #endif
 
-extern void FFI_DECLARE_FASTCALL ffi_call_i386(struct call_frame *, char *) FFI_HIDDEN; 
- 
-static void 
-ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, 
-	      void **avalue, void *closure) 
+extern void FFI_DECLARE_FASTCALL ffi_call_i386(struct call_frame *, char *) FFI_HIDDEN;
+
+static void
+ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
+	      void **avalue, void *closure)
 {
-  size_t rsize, bytes; 
-  struct call_frame *frame; 
-  char *stack, *argp; 
-  ffi_type **arg_types; 
-  int flags, cabi, i, n, dir, narg_reg; 
-  const struct abi_params *pabi; 
-
-  flags = cif->flags; 
-  cabi = cif->abi; 
-  pabi = &abi_params[cabi]; 
-  dir = pabi->dir; 
-
-  rsize = 0; 
-  if (rvalue == NULL) 
+  size_t rsize, bytes;
+  struct call_frame *frame;
+  char *stack, *argp;
+  ffi_type **arg_types;
+  int flags, cabi, i, n, dir, narg_reg;
+  const struct abi_params *pabi;
+
+  flags = cif->flags;
+  cabi = cif->abi;
+  pabi = &abi_params[cabi];
+  dir = pabi->dir;
+
+  rsize = 0;
+  if (rvalue == NULL)
     {
-      switch (flags) 
-	{ 
-	case X86_RET_FLOAT: 
-	case X86_RET_DOUBLE: 
-	case X86_RET_LDOUBLE: 
-	case X86_RET_STRUCTPOP: 
-	case X86_RET_STRUCTARG: 
-	  /* The float cases need to pop the 387 stack. 
-	     The struct cases need to pass a valid pointer to the callee.  */ 
-	  rsize = cif->rtype->size; 
-	  break; 
-	default: 
-	  /* We can pretend that the callee returns nothing.  */ 
-	  flags = X86_RET_VOID; 
-	  break; 
-	} 
+      switch (flags)
+	{
+	case X86_RET_FLOAT:
+	case X86_RET_DOUBLE:
+	case X86_RET_LDOUBLE:
+	case X86_RET_STRUCTPOP:
+	case X86_RET_STRUCTARG:
+	  /* The float cases need to pop the 387 stack.
+	     The struct cases need to pass a valid pointer to the callee.  */
+	  rsize = cif->rtype->size;
+	  break;
+	default:
+	  /* We can pretend that the callee returns nothing.  */
+	  flags = X86_RET_VOID;
+	  break;
+	}
     }
 
-  bytes = STACK_ALIGN (cif->bytes); 
-  stack = alloca(bytes + sizeof(*frame) + rsize); 
-  argp = (dir < 0 ? stack + bytes : stack); 
-  frame = (struct call_frame *)(stack + bytes); 
-  if (rsize) 
-    rvalue = frame + 1; 
-
-  frame->fn = fn; 
-  frame->flags = flags; 
-  frame->rvalue = rvalue; 
-  frame->regs[pabi->static_chain] = (unsigned)closure; 
-
-  narg_reg = 0; 
-  switch (flags) 
-    { 
-    case X86_RET_STRUCTARG: 
-      /* The pointer is passed as the first argument.  */ 
-      if (pabi->nregs > 0) 
-	{ 
-	  frame->regs[pabi->regs[0]] = (unsigned)rvalue; 
-	  narg_reg = 1; 
-	  break; 
-	} 
-      /* fallthru */ 
-    case X86_RET_STRUCTPOP: 
-      *(void **)argp = rvalue; 
-      argp += sizeof(void *); 
+  bytes = STACK_ALIGN (cif->bytes);
+  stack = alloca(bytes + sizeof(*frame) + rsize);
+  argp = (dir < 0 ? stack + bytes : stack);
+  frame = (struct call_frame *)(stack + bytes);
+  if (rsize)
+    rvalue = frame + 1;
+
+  frame->fn = fn;
+  frame->flags = flags;
+  frame->rvalue = rvalue;
+  frame->regs[pabi->static_chain] = (unsigned)closure;
+
+  narg_reg = 0;
+  switch (flags)
+    {
+    case X86_RET_STRUCTARG:
+      /* The pointer is passed as the first argument.  */
+      if (pabi->nregs > 0)
+	{
+	  frame->regs[pabi->regs[0]] = (unsigned)rvalue;
+	  narg_reg = 1;
+	  break;
+	}
+      /* fallthru */
+    case X86_RET_STRUCTPOP:
+      *(void **)argp = rvalue;
+      argp += sizeof(void *);
       break;
     }
 
-  arg_types = cif->arg_types; 
-  for (i = 0, n = cif->nargs; i < n; i++) 
-    { 
-      ffi_type *ty = arg_types[i]; 
-      void *valp = avalue[i]; 
-      size_t z = ty->size; 
-      int t = ty->type; 
-
-      if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT) 
-        { 
-	  ffi_arg val = extend_basic_type (valp, t); 
-
-	  if (t != FFI_TYPE_FLOAT && narg_reg < pabi->nregs) 
-	    frame->regs[pabi->regs[narg_reg++]] = val; 
-	  else if (dir < 0) 
-	    { 
-	      argp -= 4; 
-	      *(ffi_arg *)argp = val; 
-	    } 
-	  else 
-	    { 
-	      *(ffi_arg *)argp = val; 
-	      argp += 4; 
-	    } 
-	} 
-      else 
-	{ 
-	  size_t za = FFI_ALIGN (z, FFI_SIZEOF_ARG); 
-	  size_t align = FFI_SIZEOF_ARG; 
-
-	  /* Issue 434: For thiscall and fastcall, if the paramter passed 
-	     as 64-bit integer or struct, all following integer paramters 
-	     will be passed on stack.  */ 
-	  if ((cabi == FFI_THISCALL || cabi == FFI_FASTCALL) 
-	      && (t == FFI_TYPE_SINT64 
-		  || t == FFI_TYPE_UINT64 
-		  || t == FFI_TYPE_STRUCT)) 
-	    narg_reg = 2; 
-
-	  /* Alignment rules for arguments are quite complex.  Vectors and 
-	     structures with 16 byte alignment get it.  Note that long double 
-	     on Darwin does have 16 byte alignment, and does not get this 
-	     alignment if passed directly; a structure with a long double 
-	     inside, however, would get 16 byte alignment.  Since libffi does 
-	     not support vectors, we need non concern ourselves with other 
-	     cases.  */ 
-	  if (t == FFI_TYPE_STRUCT && ty->alignment >= 16) 
-	    align = 16; 
-	     
-	  if (dir < 0) 
-	    { 
-	      /* ??? These reverse argument ABIs are probably too old 
-		 to have cared about alignment.  Someone should check.  */ 
-	      argp -= za; 
-	      memcpy (argp, valp, z); 
-	    } 
-	  else 
-	    { 
-	      argp = (char *)FFI_ALIGN (argp, align); 
-	      memcpy (argp, valp, z); 
-	      argp += za; 
-	    } 
-	} 
-    } 
-  FFI_ASSERT (dir > 0 || argp == stack); 
-
-  ffi_call_i386 (frame, stack); 
-} 
-
-void 
-ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) 
-{ 
-  ffi_call_int (cif, fn, rvalue, avalue, NULL); 
+  arg_types = cif->arg_types;
+  for (i = 0, n = cif->nargs; i < n; i++)
+    {
+      ffi_type *ty = arg_types[i];
+      void *valp = avalue[i];
+      size_t z = ty->size;
+      int t = ty->type;
+
+      if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT)
+        {
+	  ffi_arg val = extend_basic_type (valp, t);
+
+	  if (t != FFI_TYPE_FLOAT && narg_reg < pabi->nregs)
+	    frame->regs[pabi->regs[narg_reg++]] = val;
+	  else if (dir < 0)
+	    {
+	      argp -= 4;
+	      *(ffi_arg *)argp = val;
+	    }
+	  else
+	    {
+	      *(ffi_arg *)argp = val;
+	      argp += 4;
+	    }
+	}
+      else
+	{
+	  size_t za = FFI_ALIGN (z, FFI_SIZEOF_ARG);
+	  size_t align = FFI_SIZEOF_ARG;
+
+	  /* Issue 434: For thiscall and fastcall, if the paramter passed
+	     as 64-bit integer or struct, all following integer paramters
+	     will be passed on stack.  */
+	  if ((cabi == FFI_THISCALL || cabi == FFI_FASTCALL)
+	      && (t == FFI_TYPE_SINT64
+		  || t == FFI_TYPE_UINT64
+		  || t == FFI_TYPE_STRUCT))
+	    narg_reg = 2;
+
+	  /* Alignment rules for arguments are quite complex.  Vectors and
+	     structures with 16 byte alignment get it.  Note that long double
+	     on Darwin does have 16 byte alignment, and does not get this
+	     alignment if passed directly; a structure with a long double
+	     inside, however, would get 16 byte alignment.  Since libffi does
+	     not support vectors, we need non concern ourselves with other
+	     cases.  */
+	  if (t == FFI_TYPE_STRUCT && ty->alignment >= 16)
+	    align = 16;
+	    
+	  if (dir < 0)
+	    {
+	      /* ??? These reverse argument ABIs are probably too old
+		 to have cared about alignment.  Someone should check.  */
+	      argp -= za;
+	      memcpy (argp, valp, z);
+	    }
+	  else
+	    {
+	      argp = (char *)FFI_ALIGN (argp, align);
+	      memcpy (argp, valp, z);
+	      argp += za;
+	    }
+	}
+    }
+  FFI_ASSERT (dir > 0 || argp == stack);
+
+  ffi_call_i386 (frame, stack);
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+	     void **avalue, void *closure)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, closure);
 }
 
-void 
-ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue, 
-	     void **avalue, void *closure) 
+/** private members **/
+
+void FFI_HIDDEN ffi_closure_i386(void);
+void FFI_HIDDEN ffi_closure_STDCALL(void);
+void FFI_HIDDEN ffi_closure_REGISTER(void);
+
+struct closure_frame
 {
-  ffi_call_int (cif, fn, rvalue, avalue, closure); 
-} 
-
-/** private members **/ 
-
-void FFI_HIDDEN ffi_closure_i386(void); 
-void FFI_HIDDEN ffi_closure_STDCALL(void); 
-void FFI_HIDDEN ffi_closure_REGISTER(void); 
-
-struct closure_frame 
-{ 
-  unsigned rettemp[4];				/* 0 */ 
-  unsigned regs[3];				/* 16-24 */ 
-  ffi_cif *cif;					/* 28 */ 
-  void (*fun)(ffi_cif*,void*,void**,void*);	/* 32 */ 
-  void *user_data;				/* 36 */ 
-}; 
-
-int FFI_HIDDEN FFI_DECLARE_FASTCALL 
-ffi_closure_inner (struct closure_frame *frame, char *stack) 
+  unsigned rettemp[4];				/* 0 */
+  unsigned regs[3];				/* 16-24 */
+  ffi_cif *cif;					/* 28 */
+  void (*fun)(ffi_cif*,void*,void**,void*);	/* 32 */
+  void *user_data;				/* 36 */
+};
+
+int FFI_HIDDEN FFI_DECLARE_FASTCALL
+ffi_closure_inner (struct closure_frame *frame, char *stack)
 {
-  ffi_cif *cif = frame->cif; 
-  int cabi, i, n, flags, dir, narg_reg; 
-  const struct abi_params *pabi; 
-  ffi_type **arg_types; 
+  ffi_cif *cif = frame->cif;
+  int cabi, i, n, flags, dir, narg_reg;
+  const struct abi_params *pabi;
+  ffi_type **arg_types;
   char *argp;
-  void *rvalue; 
-  void **avalue; 
-
-  cabi = cif->abi; 
-  flags = cif->flags; 
-  narg_reg = 0; 
-  rvalue = frame->rettemp; 
-  pabi = &abi_params[cabi]; 
-  dir = pabi->dir; 
-  argp = (dir < 0 ? stack + STACK_ALIGN (cif->bytes) : stack); 
-
-  switch (flags) 
-    { 
-    case X86_RET_STRUCTARG: 
-      if (pabi->nregs > 0) 
-	{ 
-	  rvalue = (void *)frame->regs[pabi->regs[0]]; 
-	  narg_reg = 1; 
-	  frame->rettemp[0] = (unsigned)rvalue; 
-	  break; 
-	} 
-      /* fallthru */ 
-    case X86_RET_STRUCTPOP: 
-      rvalue = *(void **)argp; 
-      argp += sizeof(void *); 
-      frame->rettemp[0] = (unsigned)rvalue; 
-      break; 
-    } 
-
-  n = cif->nargs; 
-  avalue = alloca(sizeof(void *) * n); 
-
-  arg_types = cif->arg_types; 
-  for (i = 0; i < n; ++i) 
+  void *rvalue;
+  void **avalue;
+
+  cabi = cif->abi;
+  flags = cif->flags;
+  narg_reg = 0;
+  rvalue = frame->rettemp;
+  pabi = &abi_params[cabi];
+  dir = pabi->dir;
+  argp = (dir < 0 ? stack + STACK_ALIGN (cif->bytes) : stack);
+
+  switch (flags)
+    {
+    case X86_RET_STRUCTARG:
+      if (pabi->nregs > 0)
+	{
+	  rvalue = (void *)frame->regs[pabi->regs[0]];
+	  narg_reg = 1;
+	  frame->rettemp[0] = (unsigned)rvalue;
+	  break;
+	}
+      /* fallthru */
+    case X86_RET_STRUCTPOP:
+      rvalue = *(void **)argp;
+      argp += sizeof(void *);
+      frame->rettemp[0] = (unsigned)rvalue;
+      break;
+    }
+
+  n = cif->nargs;
+  avalue = alloca(sizeof(void *) * n);
+
+  arg_types = cif->arg_types;
+  for (i = 0; i < n; ++i)
     {
-      ffi_type *ty = arg_types[i]; 
-      size_t z = ty->size; 
-      int t = ty->type; 
-      void *valp; 
-
-      if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT) 
-	{ 
-	  if (t != FFI_TYPE_FLOAT && narg_reg < pabi->nregs) 
-	    valp = &frame->regs[pabi->regs[narg_reg++]]; 
-	  else if (dir < 0) 
-	    { 
-	      argp -= 4; 
-	      valp = argp; 
-	    } 
-	  else 
-	    { 
-	      valp = argp; 
-	      argp += 4; 
-	    } 
-	} 
+      ffi_type *ty = arg_types[i];
+      size_t z = ty->size;
+      int t = ty->type;
+      void *valp;
+
+      if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT)
+	{
+	  if (t != FFI_TYPE_FLOAT && narg_reg < pabi->nregs)
+	    valp = &frame->regs[pabi->regs[narg_reg++]];
+	  else if (dir < 0)
+	    {
+	      argp -= 4;
+	      valp = argp;
+	    }
+	  else
+	    {
+	      valp = argp;
+	      argp += 4;
+	    }
+	}
       else
-	{ 
-	  size_t za = FFI_ALIGN (z, FFI_SIZEOF_ARG); 
-	  size_t align = FFI_SIZEOF_ARG; 
-
-	  /* See the comment in ffi_call_int.  */ 
-	  if (t == FFI_TYPE_STRUCT && ty->alignment >= 16) 
-	    align = 16; 
-
-	  /* Issue 434: For thiscall and fastcall, if the paramter passed 
-	     as 64-bit integer or struct, all following integer paramters 
-	     will be passed on stack.  */ 
-	  if ((cabi == FFI_THISCALL || cabi == FFI_FASTCALL) 
-	      && (t == FFI_TYPE_SINT64 
-		  || t == FFI_TYPE_UINT64 
-		  || t == FFI_TYPE_STRUCT)) 
-	    narg_reg = 2; 
-
-	  if (dir < 0) 
-	    { 
-	      /* ??? These reverse argument ABIs are probably too old 
-		 to have cared about alignment.  Someone should check.  */ 
-	      argp -= za; 
-	      valp = argp; 
-	    } 
-	  else 
-	    { 
-	      argp = (char *)FFI_ALIGN (argp, align); 
-	      valp = argp; 
-	      argp += za; 
-	    } 
-	} 
-
-      avalue[i] = valp; 
-    } 
-
-  frame->fun (cif, rvalue, avalue, frame->user_data); 
-
-  if (cabi == FFI_STDCALL) 
-    return flags + (cif->bytes << X86_RET_POP_SHIFT); 
-  else 
-    return flags; 
-} 
+	{
+	  size_t za = FFI_ALIGN (z, FFI_SIZEOF_ARG);
+	  size_t align = FFI_SIZEOF_ARG;
+
+	  /* See the comment in ffi_call_int.  */
+	  if (t == FFI_TYPE_STRUCT && ty->alignment >= 16)
+	    align = 16;
+
+	  /* Issue 434: For thiscall and fastcall, if the paramter passed
+	     as 64-bit integer or struct, all following integer paramters
+	     will be passed on stack.  */
+	  if ((cabi == FFI_THISCALL || cabi == FFI_FASTCALL)
+	      && (t == FFI_TYPE_SINT64
+		  || t == FFI_TYPE_UINT64
+		  || t == FFI_TYPE_STRUCT))
+	    narg_reg = 2;
+
+	  if (dir < 0)
+	    {
+	      /* ??? These reverse argument ABIs are probably too old
+		 to have cared about alignment.  Someone should check.  */
+	      argp -= za;
+	      valp = argp;
+	    }
+	  else
+	    {
+	      argp = (char *)FFI_ALIGN (argp, align);
+	      valp = argp;
+	      argp += za;
+	    }
+	}
+
+      avalue[i] = valp;
+    }
+
+  frame->fun (cif, rvalue, avalue, frame->user_data);
+
+  if (cabi == FFI_STDCALL)
+    return flags + (cif->bytes << X86_RET_POP_SHIFT);
+  else
+    return flags;
+}
 
 ffi_status
 ffi_prep_closure_loc (ffi_closure* closure,
@@ -533,78 +533,78 @@ ffi_prep_closure_loc (ffi_closure* closure,
                       void *user_data,
                       void *codeloc)
 {
-  char *tramp = closure->tramp; 
-  void (*dest)(void); 
-  int op = 0xb8;  /* movl imm, %eax */ 
- 
-  switch (cif->abi) 
+  char *tramp = closure->tramp;
+  void (*dest)(void);
+  int op = 0xb8;  /* movl imm, %eax */
+
+  switch (cif->abi)
     {
-    case FFI_SYSV: 
-    case FFI_THISCALL: 
-    case FFI_FASTCALL: 
-    case FFI_MS_CDECL: 
-      dest = ffi_closure_i386; 
-      break; 
-    case FFI_STDCALL: 
-    case FFI_PASCAL: 
-      dest = ffi_closure_STDCALL; 
-      break; 
-    case FFI_REGISTER: 
-      dest = ffi_closure_REGISTER; 
-      op = 0x68;  /* pushl imm */ 
-      break; 
-    default: 
-      return FFI_BAD_ABI; 
+    case FFI_SYSV:
+    case FFI_THISCALL:
+    case FFI_FASTCALL:
+    case FFI_MS_CDECL:
+      dest = ffi_closure_i386;
+      break;
+    case FFI_STDCALL:
+    case FFI_PASCAL:
+      dest = ffi_closure_STDCALL;
+      break;
+    case FFI_REGISTER:
+      dest = ffi_closure_REGISTER;
+      op = 0x68;  /* pushl imm */
+      break;
+    default:
+      return FFI_BAD_ABI;
     }
- 
-  /* movl or pushl immediate.  */ 
-  tramp[0] = op; 
-  *(void **)(tramp + 1) = codeloc; 
- 
-  /* jmp dest */ 
-  tramp[5] = 0xe9; 
-  *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10); 
- 
-  closure->cif = cif; 
-  closure->fun = fun; 
-  closure->user_data = user_data; 
- 
-  return FFI_OK; 
-} 
- 
-void FFI_HIDDEN ffi_go_closure_EAX(void); 
-void FFI_HIDDEN ffi_go_closure_ECX(void); 
-void FFI_HIDDEN ffi_go_closure_STDCALL(void); 
- 
-ffi_status 
-ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif, 
-		     void (*fun)(ffi_cif*,void*,void**,void*)) 
-{ 
-  void (*dest)(void); 
- 
-  switch (cif->abi) 
+
+  /* movl or pushl immediate.  */
+  tramp[0] = op;
+  *(void **)(tramp + 1) = codeloc;
+
+  /* jmp dest */
+  tramp[5] = 0xe9;
+  *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10);
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+void FFI_HIDDEN ffi_go_closure_EAX(void);
+void FFI_HIDDEN ffi_go_closure_ECX(void);
+void FFI_HIDDEN ffi_go_closure_STDCALL(void);
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
+		     void (*fun)(ffi_cif*,void*,void**,void*))
+{
+  void (*dest)(void);
+
+  switch (cif->abi)
     {
-    case FFI_SYSV: 
-    case FFI_MS_CDECL: 
-      dest = ffi_go_closure_ECX; 
-      break; 
-    case FFI_THISCALL: 
-    case FFI_FASTCALL: 
-      dest = ffi_go_closure_EAX; 
-      break; 
-    case FFI_STDCALL: 
-    case FFI_PASCAL: 
-      dest = ffi_go_closure_STDCALL; 
-      break; 
-    case FFI_REGISTER: 
-    default: 
+    case FFI_SYSV:
+    case FFI_MS_CDECL:
+      dest = ffi_go_closure_ECX;
+      break;
+    case FFI_THISCALL:
+    case FFI_FASTCALL:
+      dest = ffi_go_closure_EAX;
+      break;
+    case FFI_STDCALL:
+    case FFI_PASCAL:
+      dest = ffi_go_closure_STDCALL;
+      break;
+    case FFI_REGISTER:
+    default:
       return FFI_BAD_ABI;
     }
 
-  closure->tramp = dest; 
-  closure->cif = cif; 
-  closure->fun = fun; 
- 
+  closure->tramp = dest;
+  closure->cif = cif;
+  closure->fun = fun;
+
   return FFI_OK;
 }
 
@@ -612,150 +612,150 @@ ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
 
 #if !FFI_NO_RAW_API
 
-void FFI_HIDDEN ffi_closure_raw_SYSV(void); 
-void FFI_HIDDEN ffi_closure_raw_THISCALL(void); 
- 
+void FFI_HIDDEN ffi_closure_raw_SYSV(void);
+void FFI_HIDDEN ffi_closure_raw_THISCALL(void);
+
 ffi_status
-ffi_prep_raw_closure_loc (ffi_raw_closure *closure, 
-                          ffi_cif *cif, 
+ffi_prep_raw_closure_loc (ffi_raw_closure *closure,
+                          ffi_cif *cif,
                           void (*fun)(ffi_cif*,void*,ffi_raw*,void*),
                           void *user_data,
                           void *codeloc)
 {
-  char *tramp = closure->tramp; 
-  void (*dest)(void); 
+  char *tramp = closure->tramp;
+  void (*dest)(void);
   int i;
 
-  /* We currently don't support certain kinds of arguments for raw 
+  /* We currently don't support certain kinds of arguments for raw
      closures.  This should be implemented by a separate assembly
      language routine, since it would require argument processing,
      something we don't do now for performance.  */
-  for (i = cif->nargs-1; i >= 0; i--) 
-    switch (cif->arg_types[i]->type) 
-      { 
-      case FFI_TYPE_STRUCT: 
-      case FFI_TYPE_LONGDOUBLE: 
-	return FFI_BAD_TYPEDEF; 
-      } 
-
-  switch (cif->abi) 
+  for (i = cif->nargs-1; i >= 0; i--)
+    switch (cif->arg_types[i]->type)
+      {
+      case FFI_TYPE_STRUCT:
+      case FFI_TYPE_LONGDOUBLE:
+	return FFI_BAD_TYPEDEF;
+      }
+
+  switch (cif->abi)
     {
-    case FFI_THISCALL: 
-      dest = ffi_closure_raw_THISCALL; 
-      break; 
-    case FFI_SYSV: 
-      dest = ffi_closure_raw_SYSV; 
-      break; 
-    default: 
-      return FFI_BAD_ABI; 
+    case FFI_THISCALL:
+      dest = ffi_closure_raw_THISCALL;
+      break;
+    case FFI_SYSV:
+      dest = ffi_closure_raw_SYSV;
+      break;
+    default:
+      return FFI_BAD_ABI;
     }
- 
-  /* movl imm, %eax.  */ 
-  tramp[0] = 0xb8; 
-  *(void **)(tramp + 1) = codeloc; 
- 
-  /* jmp dest */ 
-  tramp[5] = 0xe9; 
-  *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10); 
- 
-  closure->cif = cif; 
-  closure->fun = fun; 
+
+  /* movl imm, %eax.  */
+  tramp[0] = 0xb8;
+  *(void **)(tramp + 1) = codeloc;
+
+  /* jmp dest */
+  tramp[5] = 0xe9;
+  *(unsigned *)(tramp + 6) = (unsigned)dest - ((unsigned)codeloc + 10);
+
+  closure->cif = cif;
+  closure->fun = fun;
   closure->user_data = user_data;
 
   return FFI_OK;
 }
 
 void
-ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue) 
+ffi_raw_call(ffi_cif *cif, void (*fn)(void), void *rvalue, ffi_raw *avalue)
 {
-  size_t rsize, bytes; 
-  struct call_frame *frame; 
-  char *stack, *argp; 
-  ffi_type **arg_types; 
-  int flags, cabi, i, n, narg_reg; 
-  const struct abi_params *pabi; 
-
-  flags = cif->flags; 
-  cabi = cif->abi; 
-  pabi = &abi_params[cabi]; 
-
-  rsize = 0; 
-  if (rvalue == NULL) 
+  size_t rsize, bytes;
+  struct call_frame *frame;
+  char *stack, *argp;
+  ffi_type **arg_types;
+  int flags, cabi, i, n, narg_reg;
+  const struct abi_params *pabi;
+
+  flags = cif->flags;
+  cabi = cif->abi;
+  pabi = &abi_params[cabi];
+
+  rsize = 0;
+  if (rvalue == NULL)
     {
-      switch (flags) 
-	{ 
-	case X86_RET_FLOAT: 
-	case X86_RET_DOUBLE: 
-	case X86_RET_LDOUBLE: 
-	case X86_RET_STRUCTPOP: 
-	case X86_RET_STRUCTARG: 
-	  /* The float cases need to pop the 387 stack. 
-	     The struct cases need to pass a valid pointer to the callee.  */ 
-	  rsize = cif->rtype->size; 
-	  break; 
-	default: 
-	  /* We can pretend that the callee returns nothing.  */ 
-	  flags = X86_RET_VOID; 
-	  break; 
-	} 
+      switch (flags)
+	{
+	case X86_RET_FLOAT:
+	case X86_RET_DOUBLE:
+	case X86_RET_LDOUBLE:
+	case X86_RET_STRUCTPOP:
+	case X86_RET_STRUCTARG:
+	  /* The float cases need to pop the 387 stack.
+	     The struct cases need to pass a valid pointer to the callee.  */
+	  rsize = cif->rtype->size;
+	  break;
+	default:
+	  /* We can pretend that the callee returns nothing.  */
+	  flags = X86_RET_VOID;
+	  break;
+	}
     }
 
-  bytes = STACK_ALIGN (cif->bytes); 
-  argp = stack = 
-      (void *)((uintptr_t)alloca(bytes + sizeof(*frame) + rsize + 15) & ~16); 
-  frame = (struct call_frame *)(stack + bytes); 
-  if (rsize) 
-    rvalue = frame + 1; 
-
-  frame->fn = fn; 
-  frame->flags = flags; 
-  frame->rvalue = rvalue; 
-
-  narg_reg = 0; 
-  switch (flags) 
-    { 
-    case X86_RET_STRUCTARG: 
-      /* The pointer is passed as the first argument.  */ 
-      if (pabi->nregs > 0) 
-	{ 
-	  frame->regs[pabi->regs[0]] = (unsigned)rvalue; 
-	  narg_reg = 1; 
-	  break; 
-	} 
-      /* fallthru */ 
-    case X86_RET_STRUCTPOP: 
-      *(void **)argp = rvalue; 
-      argp += sizeof(void *); 
-      bytes -= sizeof(void *); 
+  bytes = STACK_ALIGN (cif->bytes);
+  argp = stack =
+      (void *)((uintptr_t)alloca(bytes + sizeof(*frame) + rsize + 15) & ~16);
+  frame = (struct call_frame *)(stack + bytes);
+  if (rsize)
+    rvalue = frame + 1;
+
+  frame->fn = fn;
+  frame->flags = flags;
+  frame->rvalue = rvalue;
+
+  narg_reg = 0;
+  switch (flags)
+    {
+    case X86_RET_STRUCTARG:
+      /* The pointer is passed as the first argument.  */
+      if (pabi->nregs > 0)
+	{
+	  frame->regs[pabi->regs[0]] = (unsigned)rvalue;
+	  narg_reg = 1;
+	  break;
+	}
+      /* fallthru */
+    case X86_RET_STRUCTPOP:
+      *(void **)argp = rvalue;
+      argp += sizeof(void *);
+      bytes -= sizeof(void *);
       break;
     }
 
-  arg_types = cif->arg_types; 
-  for (i = 0, n = cif->nargs; narg_reg < pabi->nregs && i < n; i++) 
-    { 
-      ffi_type *ty = arg_types[i]; 
-      size_t z = ty->size; 
-      int t = ty->type; 
-
-      if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT && t != FFI_TYPE_FLOAT) 
-	{ 
-	  ffi_arg val = extend_basic_type (avalue, t); 
-	  frame->regs[pabi->regs[narg_reg++]] = val; 
-	  z = FFI_SIZEOF_ARG; 
-	} 
-      else 
-	{ 
-	  memcpy (argp, avalue, z); 
-	  z = FFI_ALIGN (z, FFI_SIZEOF_ARG); 
-	  argp += z; 
-	} 
-      avalue += z; 
-      bytes -= z; 
-    } 
-  if (i < n) 
-    memcpy (argp, avalue, bytes); 
-
-  ffi_call_i386 (frame, stack); 
-} 
-#endif /* !FFI_NO_RAW_API */ 
-#endif /* __i386__ */ 
+  arg_types = cif->arg_types;
+  for (i = 0, n = cif->nargs; narg_reg < pabi->nregs && i < n; i++)
+    {
+      ffi_type *ty = arg_types[i];
+      size_t z = ty->size;
+      int t = ty->type;
+
+      if (z <= FFI_SIZEOF_ARG && t != FFI_TYPE_STRUCT && t != FFI_TYPE_FLOAT)
+	{
+	  ffi_arg val = extend_basic_type (avalue, t);
+	  frame->regs[pabi->regs[narg_reg++]] = val;
+	  z = FFI_SIZEOF_ARG;
+	}
+      else
+	{
+	  memcpy (argp, avalue, z);
+	  z = FFI_ALIGN (z, FFI_SIZEOF_ARG);
+	  argp += z;
+	}
+      avalue += z;
+      bytes -= z;
+    }
+  if (i < n)
+    memcpy (argp, avalue, bytes);
+
+  ffi_call_i386 (frame, stack);
+}
+#endif /* !FFI_NO_RAW_API */
+#endif /* __i386__ */
diff --git a/contrib/restricted/libffi/src/x86/ffi64.c b/contrib/restricted/libffi/src/x86/ffi64.c
index 2d493d0b0a..dec331c958 100644
--- a/contrib/restricted/libffi/src/x86/ffi64.c
+++ b/contrib/restricted/libffi/src/x86/ffi64.c
@@ -1,11 +1,11 @@
 /* -----------------------------------------------------------------------
-   ffi64.c - Copyright (c) 2011, 2018  Anthony Green 
-             Copyright (c) 2013  The Written Word, Inc. 
+   ffi64.c - Copyright (c) 2011, 2018  Anthony Green
+             Copyright (c) 2013  The Written Word, Inc.
              Copyright (c) 2008, 2010  Red Hat, Inc.
              Copyright (c) 2002, 2007  Bo Thorsen <bo@suse.de>
 
-   x86-64 Foreign Function Interface 
- 
+   x86-64 Foreign Function Interface
+
    Permission is hereby granted, free of charge, to any person obtaining
    a copy of this software and associated documentation files (the
    ``Software''), to deal in the Software without restriction, including
@@ -32,8 +32,8 @@
 
 #include <stdlib.h>
 #include <stdarg.h>
-#include <stdint.h> 
-#include "internal64.h" 
+#include <stdint.h>
+#include "internal64.h"
 
 #ifdef __x86_64__
 
@@ -41,7 +41,7 @@
 #define MAX_SSE_REGS 8
 
 #if defined(__INTEL_COMPILER)
-#include "xmmintrin.h" 
+#include "xmmintrin.h"
 #define UINT128 __m128
 #else
 #if defined(__SUNPRO_C)
@@ -63,13 +63,13 @@ struct register_args
 {
   /* Registers for argument passing.  */
   UINT64 gpr[MAX_GPR_REGS];
-  union big_int_union sse[MAX_SSE_REGS]; 
-  UINT64 rax;	/* ssecount */ 
-  UINT64 r10;	/* static chain */ 
+  union big_int_union sse[MAX_SSE_REGS];
+  UINT64 rax;	/* ssecount */
+  UINT64 r10;	/* static chain */
 };
 
 extern void ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
-			     void *raddr, void (*fnaddr)(void)) FFI_HIDDEN; 
+			     void *raddr, void (*fnaddr)(void)) FFI_HIDDEN;
 
 /* All reference to register classes here is identical to the code in
    gcc/config/i386/i386.c. Do *not* change one without the other.  */
@@ -156,7 +156,7 @@ merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
 
    See the x86-64 PS ABI for details.
 */
-static size_t 
+static size_t
 classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
 		   size_t byte_offset)
 {
@@ -171,9 +171,9 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
     case FFI_TYPE_UINT64:
     case FFI_TYPE_SINT64:
     case FFI_TYPE_POINTER:
-    do_integer: 
+    do_integer:
       {
-	size_t size = byte_offset + type->size; 
+	size_t size = byte_offset + type->size;
 
 	if (size <= 4)
 	  {
@@ -193,7 +193,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
 	  }
 	else if (size <= 16)
 	  {
-	    classes[0] = classes[1] = X86_64_INTEGER_CLASS; 
+	    classes[0] = classes[1] = X86_64_INTEGER_CLASS;
 	    return 2;
 	  }
 	else
@@ -208,18 +208,18 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
     case FFI_TYPE_DOUBLE:
       classes[0] = X86_64_SSEDF_CLASS;
       return 1;
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
     case FFI_TYPE_LONGDOUBLE:
       classes[0] = X86_64_X87_CLASS;
       classes[1] = X86_64_X87UP_CLASS;
       return 2;
-#endif 
+#endif
     case FFI_TYPE_STRUCT:
       {
-	const size_t UNITS_PER_WORD = 8; 
-	size_t words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 
-	ffi_type **ptr; 
-	unsigned int i; 
+	const size_t UNITS_PER_WORD = 8;
+	size_t words = (type->size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
+	ffi_type **ptr;
+	unsigned int i;
 	enum x86_64_reg_class subclasses[MAX_CLASSES];
 
 	/* If the struct is larger than 32 bytes, pass it on the stack.  */
@@ -233,7 +233,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
 	   signalize memory class, so handle it as special case.  */
 	if (!words)
 	  {
-    case FFI_TYPE_VOID: 
+    case FFI_TYPE_VOID:
 	    classes[0] = X86_64_NO_CLASS;
 	    return 1;
 	  }
@@ -241,16 +241,16 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
 	/* Merge the fields of structure.  */
 	for (ptr = type->elements; *ptr != NULL; ptr++)
 	  {
-	    size_t num; 
+	    size_t num;
 
-	    byte_offset = FFI_ALIGN (byte_offset, (*ptr)->alignment); 
+	    byte_offset = FFI_ALIGN (byte_offset, (*ptr)->alignment);
 
 	    num = classify_argument (*ptr, subclasses, byte_offset % 8);
 	    if (num == 0)
 	      return 0;
 	    for (i = 0; i < num; i++)
 	      {
-		size_t pos = byte_offset / 8; 
+		size_t pos = byte_offset / 8;
 		classes[i + pos] =
 		  merge_classes (subclasses[i], classes[i + pos]);
 	      }
@@ -282,7 +282,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
 
 	    /* The X86_64_SSEUP_CLASS should be always preceded by
 	       X86_64_SSE_CLASS or X86_64_SSEUP_CLASS.  */
-	    if (i > 1 && classes[i] == X86_64_SSEUP_CLASS 
+	    if (i > 1 && classes[i] == X86_64_SSEUP_CLASS
 		&& classes[i - 1] != X86_64_SSE_CLASS
 		&& classes[i - 1] != X86_64_SSEUP_CLASS)
 	      {
@@ -293,7 +293,7 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
 
 	    /*  If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
 		everything should be passed in memory.  */
-	    if (i > 1 && classes[i] == X86_64_X87UP_CLASS 
+	    if (i > 1 && classes[i] == X86_64_X87UP_CLASS
 		&& (classes[i - 1] != X86_64_X87_CLASS))
 	      {
 		/* The first one should never be X86_64_X87UP_CLASS.  */
@@ -303,55 +303,55 @@ classify_argument (ffi_type *type, enum x86_64_reg_class classes[],
 	  }
 	return words;
       }
-    case FFI_TYPE_COMPLEX: 
-      { 
-	ffi_type *inner = type->elements[0]; 
-	switch (inner->type) 
-	  { 
-	  case FFI_TYPE_INT: 
-	  case FFI_TYPE_UINT8: 
-	  case FFI_TYPE_SINT8: 
-	  case FFI_TYPE_UINT16: 
-	  case FFI_TYPE_SINT16: 
-	  case FFI_TYPE_UINT32: 
-	  case FFI_TYPE_SINT32: 
-	  case FFI_TYPE_UINT64: 
-	  case FFI_TYPE_SINT64: 
-	    goto do_integer; 
-
-	  case FFI_TYPE_FLOAT: 
-	    classes[0] = X86_64_SSE_CLASS; 
-	    if (byte_offset % 8) 
-	      { 
-		classes[1] = X86_64_SSESF_CLASS; 
-		return 2; 
-	      } 
-	    return 1; 
-	  case FFI_TYPE_DOUBLE: 
-	    classes[0] = classes[1] = X86_64_SSEDF_CLASS; 
-	    return 2; 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-	  case FFI_TYPE_LONGDOUBLE: 
-	    classes[0] = X86_64_COMPLEX_X87_CLASS; 
-	    return 1; 
-#endif 
-	  } 
-      } 
+    case FFI_TYPE_COMPLEX:
+      {
+	ffi_type *inner = type->elements[0];
+	switch (inner->type)
+	  {
+	  case FFI_TYPE_INT:
+	  case FFI_TYPE_UINT8:
+	  case FFI_TYPE_SINT8:
+	  case FFI_TYPE_UINT16:
+	  case FFI_TYPE_SINT16:
+	  case FFI_TYPE_UINT32:
+	  case FFI_TYPE_SINT32:
+	  case FFI_TYPE_UINT64:
+	  case FFI_TYPE_SINT64:
+	    goto do_integer;
+
+	  case FFI_TYPE_FLOAT:
+	    classes[0] = X86_64_SSE_CLASS;
+	    if (byte_offset % 8)
+	      {
+		classes[1] = X86_64_SSESF_CLASS;
+		return 2;
+	      }
+	    return 1;
+	  case FFI_TYPE_DOUBLE:
+	    classes[0] = classes[1] = X86_64_SSEDF_CLASS;
+	    return 2;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+	  case FFI_TYPE_LONGDOUBLE:
+	    classes[0] = X86_64_COMPLEX_X87_CLASS;
+	    return 1;
+#endif
+	  }
+      }
     }
-  abort(); 
+  abort();
 }
 
 /* Examine the argument and return set number of register required in each
    class.  Return zero iff parameter should be passed in memory, otherwise
    the number of registers.  */
 
-static size_t 
+static size_t
 examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
 		  _Bool in_return, int *pngpr, int *pnsse)
 {
-  size_t n; 
-  unsigned int i; 
-  int ngpr, nsse; 
+  size_t n;
+  unsigned int i;
+  int ngpr, nsse;
 
   n = classify_argument (type, classes, 0);
   if (n == 0)
@@ -389,74 +389,74 @@ examine_argument (ffi_type *type, enum x86_64_reg_class classes[MAX_CLASSES],
 
 /* Perform machine dependent cif processing.  */
 
-#ifndef __ILP32__ 
-extern ffi_status 
-ffi_prep_cif_machdep_efi64(ffi_cif *cif); 
-#endif 
- 
-ffi_status FFI_HIDDEN 
+#ifndef __ILP32__
+extern ffi_status
+ffi_prep_cif_machdep_efi64(ffi_cif *cif);
+#endif
+
+ffi_status FFI_HIDDEN
 ffi_prep_cif_machdep (ffi_cif *cif)
 {
-  int gprcount, ssecount, i, avn, ngpr, nsse; 
-  unsigned flags; 
+  int gprcount, ssecount, i, avn, ngpr, nsse;
+  unsigned flags;
   enum x86_64_reg_class classes[MAX_CLASSES];
-  size_t bytes, n, rtype_size; 
-  ffi_type *rtype; 
-
-#ifndef __ILP32__ 
-  if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64) 
-    return ffi_prep_cif_machdep_efi64(cif); 
-#endif 
-  if (cif->abi != FFI_UNIX64) 
-    return FFI_BAD_ABI; 
- 
+  size_t bytes, n, rtype_size;
+  ffi_type *rtype;
+
+#ifndef __ILP32__
+  if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64)
+    return ffi_prep_cif_machdep_efi64(cif);
+#endif
+  if (cif->abi != FFI_UNIX64)
+    return FFI_BAD_ABI;
+
   gprcount = ssecount = 0;
 
-  rtype = cif->rtype; 
-  rtype_size = rtype->size; 
-  switch (rtype->type) 
+  rtype = cif->rtype;
+  rtype_size = rtype->size;
+  switch (rtype->type)
     {
-    case FFI_TYPE_VOID: 
-      flags = UNIX64_RET_VOID; 
-      break; 
-    case FFI_TYPE_UINT8: 
-      flags = UNIX64_RET_UINT8; 
-      break; 
-    case FFI_TYPE_SINT8: 
-      flags = UNIX64_RET_SINT8; 
-      break; 
-    case FFI_TYPE_UINT16: 
-      flags = UNIX64_RET_UINT16; 
-      break; 
-    case FFI_TYPE_SINT16: 
-      flags = UNIX64_RET_SINT16; 
-      break; 
-    case FFI_TYPE_UINT32: 
-      flags = UNIX64_RET_UINT32; 
-      break; 
-    case FFI_TYPE_INT: 
-    case FFI_TYPE_SINT32: 
-      flags = UNIX64_RET_SINT32; 
-      break; 
-    case FFI_TYPE_UINT64: 
-    case FFI_TYPE_SINT64: 
-      flags = UNIX64_RET_INT64; 
-      break; 
-    case FFI_TYPE_POINTER: 
-      flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64); 
-      break; 
-    case FFI_TYPE_FLOAT: 
-      flags = UNIX64_RET_XMM32; 
-      break; 
-    case FFI_TYPE_DOUBLE: 
-      flags = UNIX64_RET_XMM64; 
-      break; 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-    case FFI_TYPE_LONGDOUBLE: 
-      flags = UNIX64_RET_X87; 
-      break; 
-#endif 
-    case FFI_TYPE_STRUCT: 
+    case FFI_TYPE_VOID:
+      flags = UNIX64_RET_VOID;
+      break;
+    case FFI_TYPE_UINT8:
+      flags = UNIX64_RET_UINT8;
+      break;
+    case FFI_TYPE_SINT8:
+      flags = UNIX64_RET_SINT8;
+      break;
+    case FFI_TYPE_UINT16:
+      flags = UNIX64_RET_UINT16;
+      break;
+    case FFI_TYPE_SINT16:
+      flags = UNIX64_RET_SINT16;
+      break;
+    case FFI_TYPE_UINT32:
+      flags = UNIX64_RET_UINT32;
+      break;
+    case FFI_TYPE_INT:
+    case FFI_TYPE_SINT32:
+      flags = UNIX64_RET_SINT32;
+      break;
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+      flags = UNIX64_RET_INT64;
+      break;
+    case FFI_TYPE_POINTER:
+      flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64);
+      break;
+    case FFI_TYPE_FLOAT:
+      flags = UNIX64_RET_XMM32;
+      break;
+    case FFI_TYPE_DOUBLE:
+      flags = UNIX64_RET_XMM64;
+      break;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+    case FFI_TYPE_LONGDOUBLE:
+      flags = UNIX64_RET_X87;
+      break;
+#endif
+    case FFI_TYPE_STRUCT:
       n = examine_argument (cif->rtype, classes, 1, &ngpr, &nsse);
       if (n == 0)
 	{
@@ -464,62 +464,62 @@ ffi_prep_cif_machdep (ffi_cif *cif)
 	     memory is the first argument.  Allocate a register for it.  */
 	  gprcount++;
 	  /* We don't have to do anything in asm for the return.  */
-	  flags = UNIX64_RET_VOID | UNIX64_FLAG_RET_IN_MEM; 
+	  flags = UNIX64_RET_VOID | UNIX64_FLAG_RET_IN_MEM;
 	}
-      else 
+      else
 	{
 	  _Bool sse0 = SSE_CLASS_P (classes[0]);
- 
-	  if (rtype_size == 4 && sse0) 
-	    flags = UNIX64_RET_XMM32; 
-	  else if (rtype_size == 8) 
-	    flags = sse0 ? UNIX64_RET_XMM64 : UNIX64_RET_INT64; 
-	  else 
-	    { 
-	      _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]); 
-	      if (sse0 && sse1) 
-		flags = UNIX64_RET_ST_XMM0_XMM1; 
-	      else if (sse0) 
-		flags = UNIX64_RET_ST_XMM0_RAX; 
-	      else if (sse1) 
-		flags = UNIX64_RET_ST_RAX_XMM0; 
-	      else 
-		flags = UNIX64_RET_ST_RAX_RDX; 
-	      flags |= rtype_size << UNIX64_SIZE_SHIFT; 
-	    } 
+
+	  if (rtype_size == 4 && sse0)
+	    flags = UNIX64_RET_XMM32;
+	  else if (rtype_size == 8)
+	    flags = sse0 ? UNIX64_RET_XMM64 : UNIX64_RET_INT64;
+	  else
+	    {
+	      _Bool sse1 = n == 2 && SSE_CLASS_P (classes[1]);
+	      if (sse0 && sse1)
+		flags = UNIX64_RET_ST_XMM0_XMM1;
+	      else if (sse0)
+		flags = UNIX64_RET_ST_XMM0_RAX;
+	      else if (sse1)
+		flags = UNIX64_RET_ST_RAX_XMM0;
+	      else
+		flags = UNIX64_RET_ST_RAX_RDX;
+	      flags |= rtype_size << UNIX64_SIZE_SHIFT;
+	    }
+	}
+      break;
+    case FFI_TYPE_COMPLEX:
+      switch (rtype->elements[0]->type)
+	{
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_INT:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_SINT64:
+	  flags = UNIX64_RET_ST_RAX_RDX | ((unsigned) rtype_size << UNIX64_SIZE_SHIFT);
+	  break;
+	case FFI_TYPE_FLOAT:
+	  flags = UNIX64_RET_XMM64;
+	  break;
+	case FFI_TYPE_DOUBLE:
+	  flags = UNIX64_RET_ST_XMM0_XMM1 | (16 << UNIX64_SIZE_SHIFT);
+	  break;
+#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE
+	case FFI_TYPE_LONGDOUBLE:
+	  flags = UNIX64_RET_X87_2;
+	  break;
+#endif
+	default:
+	  return FFI_BAD_TYPEDEF;
 	}
-      break; 
-    case FFI_TYPE_COMPLEX: 
-      switch (rtype->elements[0]->type) 
-	{ 
-	case FFI_TYPE_UINT8: 
-	case FFI_TYPE_SINT8: 
-	case FFI_TYPE_UINT16: 
-	case FFI_TYPE_SINT16: 
-	case FFI_TYPE_INT: 
-	case FFI_TYPE_UINT32: 
-	case FFI_TYPE_SINT32: 
-	case FFI_TYPE_UINT64: 
-	case FFI_TYPE_SINT64: 
-	  flags = UNIX64_RET_ST_RAX_RDX | ((unsigned) rtype_size << UNIX64_SIZE_SHIFT); 
-	  break; 
-	case FFI_TYPE_FLOAT: 
-	  flags = UNIX64_RET_XMM64; 
-	  break; 
-	case FFI_TYPE_DOUBLE: 
-	  flags = UNIX64_RET_ST_XMM0_XMM1 | (16 << UNIX64_SIZE_SHIFT); 
-	  break; 
-#if FFI_TYPE_LONGDOUBLE != FFI_TYPE_DOUBLE 
-	case FFI_TYPE_LONGDOUBLE: 
-	  flags = UNIX64_RET_X87_2; 
-	  break; 
-#endif 
-	default: 
-	  return FFI_BAD_TYPEDEF; 
-	} 
-      break; 
-    default: 
-      return FFI_BAD_TYPEDEF; 
+      break;
+    default:
+      return FFI_BAD_TYPEDEF;
     }
 
   /* Go over all arguments and determine the way they should be passed.
@@ -536,7 +536,7 @@ ffi_prep_cif_machdep (ffi_cif *cif)
 	  if (align < 8)
 	    align = 8;
 
-	  bytes = FFI_ALIGN (bytes, align); 
+	  bytes = FFI_ALIGN (bytes, align);
 	  bytes += cif->arg_types[i]->size;
 	}
       else
@@ -546,50 +546,50 @@ ffi_prep_cif_machdep (ffi_cif *cif)
 	}
     }
   if (ssecount)
-    flags |= UNIX64_FLAG_XMM_ARGS; 
- 
+    flags |= UNIX64_FLAG_XMM_ARGS;
+
   cif->flags = flags;
-  cif->bytes = (unsigned) FFI_ALIGN (bytes, 8); 
+  cif->bytes = (unsigned) FFI_ALIGN (bytes, 8);
 
   return FFI_OK;
 }
 
-static void 
-ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, 
-	      void **avalue, void *closure) 
+static void
+ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
+	      void **avalue, void *closure)
 {
   enum x86_64_reg_class classes[MAX_CLASSES];
   char *stack, *argp;
   ffi_type **arg_types;
-  int gprcount, ssecount, ngpr, nsse, i, avn, flags; 
+  int gprcount, ssecount, ngpr, nsse, i, avn, flags;
   struct register_args *reg_args;
 
   /* Can't call 32-bit mode from 64-bit mode.  */
   FFI_ASSERT (cif->abi == FFI_UNIX64);
 
   /* If the return value is a struct and we don't have a return value
-     address then we need to make one.  Otherwise we can ignore it.  */ 
-  flags = cif->flags; 
-  if (rvalue == NULL) 
-    { 
-      if (flags & UNIX64_FLAG_RET_IN_MEM) 
-	rvalue = alloca (cif->rtype->size); 
-      else 
-	flags = UNIX64_RET_VOID; 
-    } 
+     address then we need to make one.  Otherwise we can ignore it.  */
+  flags = cif->flags;
+  if (rvalue == NULL)
+    {
+      if (flags & UNIX64_FLAG_RET_IN_MEM)
+	rvalue = alloca (cif->rtype->size);
+      else
+	flags = UNIX64_RET_VOID;
+    }
 
   /* Allocate the space for the arguments, plus 4 words of temp space.  */
   stack = alloca (sizeof (struct register_args) + cif->bytes + 4*8);
   reg_args = (struct register_args *) stack;
   argp = stack + sizeof (struct register_args);
 
-  reg_args->r10 = (uintptr_t) closure; 
- 
+  reg_args->r10 = (uintptr_t) closure;
+
   gprcount = ssecount = 0;
 
   /* If the return value is passed in memory, add the pointer as the
      first integer argument.  */
-  if (flags & UNIX64_FLAG_RET_IN_MEM) 
+  if (flags & UNIX64_FLAG_RET_IN_MEM)
     reg_args->gpr[gprcount++] = (unsigned long) rvalue;
 
   avn = cif->nargs;
@@ -597,7 +597,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
 
   for (i = 0; i < avn; ++i)
     {
-      size_t n, size = arg_types[i]->size; 
+      size_t n, size = arg_types[i]->size;
 
       n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
       if (n == 0
@@ -611,7 +611,7 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
 	    align = 8;
 
 	  /* Pass this argument in memory.  */
-	  argp = (void *) FFI_ALIGN (argp, align); 
+	  argp = (void *) FFI_ALIGN (argp, align);
 	  memcpy (argp, avalue[i], size);
 	  argp += size;
 	}
@@ -619,15 +619,15 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
 	{
 	  /* The argument is passed entirely in registers.  */
 	  char *a = (char *) avalue[i];
-	  unsigned int j; 
+	  unsigned int j;
 
 	  for (j = 0; j < n; j++, a += 8, size -= 8)
 	    {
 	      switch (classes[j])
 		{
-		case X86_64_NO_CLASS: 
-		case X86_64_SSEUP_CLASS: 
-		  break; 
+		case X86_64_NO_CLASS:
+		case X86_64_SSEUP_CLASS:
+		  break;
 		case X86_64_INTEGER_CLASS:
 		case X86_64_INTEGERSI_CLASS:
 		  /* Sign-extend integer arguments passed in general
@@ -637,26 +637,26 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
 		  switch (arg_types[i]->type)
 		    {
 		    case FFI_TYPE_SINT8:
-		      reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a); 
+		      reg_args->gpr[gprcount] = (SINT64) *((SINT8 *) a);
 		      break;
 		    case FFI_TYPE_SINT16:
-		      reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a); 
+		      reg_args->gpr[gprcount] = (SINT64) *((SINT16 *) a);
 		      break;
 		    case FFI_TYPE_SINT32:
-		      reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a); 
+		      reg_args->gpr[gprcount] = (SINT64) *((SINT32 *) a);
 		      break;
 		    default:
 		      reg_args->gpr[gprcount] = 0;
-		      memcpy (&reg_args->gpr[gprcount], a, size); 
+		      memcpy (&reg_args->gpr[gprcount], a, size);
 		    }
 		  gprcount++;
 		  break;
 		case X86_64_SSE_CLASS:
 		case X86_64_SSEDF_CLASS:
-		  memcpy (&reg_args->sse[ssecount++].i64, a, sizeof(UINT64)); 
+		  memcpy (&reg_args->sse[ssecount++].i64, a, sizeof(UINT64));
 		  break;
 		case X86_64_SSESF_CLASS:
-		  memcpy (&reg_args->sse[ssecount++].i32, a, sizeof(UINT32)); 
+		  memcpy (&reg_args->sse[ssecount++].i32, a, sizeof(UINT32));
 		  break;
 		default:
 		  abort();
@@ -664,63 +664,63 @@ ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
 	    }
 	}
     }
-  reg_args->rax = ssecount; 
+  reg_args->rax = ssecount;
 
   ffi_call_unix64 (stack, cif->bytes + sizeof (struct register_args),
-		   flags, rvalue, fn); 
+		   flags, rvalue, fn);
+}
+
+#ifndef __ILP32__
+extern void
+ffi_call_efi64(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue);
+#endif
+
+void
+ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+#ifndef __ILP32__
+  if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64)
+    {
+      ffi_call_efi64(cif, fn, rvalue, avalue);
+      return;
+    }
+#endif
+  ffi_call_int (cif, fn, rvalue, avalue, NULL);
 }
 
-#ifndef __ILP32__ 
-extern void 
-ffi_call_efi64(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue); 
-#endif 
-
-void 
-ffi_call (ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) 
-{ 
-#ifndef __ILP32__ 
-  if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64) 
-    { 
-      ffi_call_efi64(cif, fn, rvalue, avalue); 
-      return; 
-    } 
-#endif 
-  ffi_call_int (cif, fn, rvalue, avalue, NULL); 
-} 
-
-#ifndef __ILP32__ 
-extern void 
-ffi_call_go_efi64(ffi_cif *cif, void (*fn)(void), void *rvalue, 
-		  void **avalue, void *closure); 
-#endif 
- 
-void 
-ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue, 
-	     void **avalue, void *closure) 
-{ 
-#ifndef __ILP32__ 
-  if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64) 
-    { 
-      ffi_call_go_efi64(cif, fn, rvalue, avalue, closure); 
-      return; 
-    } 
-#endif 
-  ffi_call_int (cif, fn, rvalue, avalue, closure); 
-} 
- 
- 
-extern void ffi_closure_unix64(void) FFI_HIDDEN; 
-extern void ffi_closure_unix64_sse(void) FFI_HIDDEN; 
- 
-#ifndef __ILP32__ 
-extern ffi_status 
-ffi_prep_closure_loc_efi64(ffi_closure* closure, 
-			   ffi_cif* cif, 
-			   void (*fun)(ffi_cif*, void*, void**, void*), 
-			   void *user_data, 
-			   void *codeloc); 
-#endif 
- 
+#ifndef __ILP32__
+extern void
+ffi_call_go_efi64(ffi_cif *cif, void (*fn)(void), void *rvalue,
+		  void **avalue, void *closure);
+#endif
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn)(void), void *rvalue,
+	     void **avalue, void *closure)
+{
+#ifndef __ILP32__
+  if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64)
+    {
+      ffi_call_go_efi64(cif, fn, rvalue, avalue, closure);
+      return;
+    }
+#endif
+  ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
+
+
+extern void ffi_closure_unix64(void) FFI_HIDDEN;
+extern void ffi_closure_unix64_sse(void) FFI_HIDDEN;
+
+#ifndef __ILP32__
+extern ffi_status
+ffi_prep_closure_loc_efi64(ffi_closure* closure,
+			   ffi_cif* cif,
+			   void (*fun)(ffi_cif*, void*, void**, void*),
+			   void *user_data,
+			   void *codeloc);
+#endif
+
 ffi_status
 ffi_prep_closure_loc (ffi_closure* closure,
 		      ffi_cif* cif,
@@ -728,31 +728,31 @@ ffi_prep_closure_loc (ffi_closure* closure,
 		      void *user_data,
 		      void *codeloc)
 {
-  static const unsigned char trampoline[16] = { 
-    /* leaq  -0x7(%rip),%r10   # 0x0  */ 
-    0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff, 
-    /* jmpq  *0x3(%rip)        # 0x10 */ 
-    0xff, 0x25, 0x03, 0x00, 0x00, 0x00, 
-    /* nopl  (%rax) */ 
-    0x0f, 0x1f, 0x00 
-  }; 
-  void (*dest)(void); 
-  char *tramp = closure->tramp; 
-
-#ifndef __ILP32__ 
-  if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64) 
-    return ffi_prep_closure_loc_efi64(closure, cif, fun, user_data, codeloc); 
-#endif 
-  if (cif->abi != FFI_UNIX64) 
-    return FFI_BAD_ABI; 
-
-  if (cif->flags & UNIX64_FLAG_XMM_ARGS) 
-    dest = ffi_closure_unix64_sse; 
-  else 
-    dest = ffi_closure_unix64; 
-
-  memcpy (tramp, trampoline, sizeof(trampoline)); 
-  *(UINT64 *)(tramp + 16) = (uintptr_t)dest; 
+  static const unsigned char trampoline[16] = {
+    /* leaq  -0x7(%rip),%r10   # 0x0  */
+    0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
+    /* jmpq  *0x3(%rip)        # 0x10 */
+    0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
+    /* nopl  (%rax) */
+    0x0f, 0x1f, 0x00
+  };
+  void (*dest)(void);
+  char *tramp = closure->tramp;
+
+#ifndef __ILP32__
+  if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64)
+    return ffi_prep_closure_loc_efi64(closure, cif, fun, user_data, codeloc);
+#endif
+  if (cif->abi != FFI_UNIX64)
+    return FFI_BAD_ABI;
+
+  if (cif->flags & UNIX64_FLAG_XMM_ARGS)
+    dest = ffi_closure_unix64_sse;
+  else
+    dest = ffi_closure_unix64;
+
+  memcpy (tramp, trampoline, sizeof(trampoline));
+  *(UINT64 *)(tramp + 16) = (uintptr_t)dest;
 
   closure->cif = cif;
   closure->fun = fun;
@@ -761,40 +761,40 @@ ffi_prep_closure_loc (ffi_closure* closure,
   return FFI_OK;
 }
 
-int FFI_HIDDEN 
-ffi_closure_unix64_inner(ffi_cif *cif, 
-			 void (*fun)(ffi_cif*, void*, void**, void*), 
-			 void *user_data, 
-			 void *rvalue, 
-			 struct register_args *reg_args, 
-			 char *argp) 
+int FFI_HIDDEN
+ffi_closure_unix64_inner(ffi_cif *cif,
+			 void (*fun)(ffi_cif*, void*, void**, void*),
+			 void *user_data,
+			 void *rvalue,
+			 struct register_args *reg_args,
+			 char *argp)
 {
   void **avalue;
   ffi_type **arg_types;
   long i, avn;
   int gprcount, ssecount, ngpr, nsse;
-  int flags; 
+  int flags;
 
-  avn = cif->nargs; 
-  flags = cif->flags; 
-  avalue = alloca(avn * sizeof(void *)); 
+  avn = cif->nargs;
+  flags = cif->flags;
+  avalue = alloca(avn * sizeof(void *));
   gprcount = ssecount = 0;
 
-  if (flags & UNIX64_FLAG_RET_IN_MEM) 
+  if (flags & UNIX64_FLAG_RET_IN_MEM)
     {
-      /* On return, %rax will contain the address that was passed 
-	 by the caller in %rdi.  */ 
-      void *r = (void *)(uintptr_t)reg_args->gpr[gprcount++]; 
-      *(void **)rvalue = r; 
-      rvalue = r; 
-      flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64); 
+      /* On return, %rax will contain the address that was passed
+	 by the caller in %rdi.  */
+      void *r = (void *)(uintptr_t)reg_args->gpr[gprcount++];
+      *(void **)rvalue = r;
+      rvalue = r;
+      flags = (sizeof(void *) == 4 ? UNIX64_RET_UINT32 : UNIX64_RET_INT64);
     }
 
   arg_types = cif->arg_types;
   for (i = 0; i < avn; ++i)
     {
       enum x86_64_reg_class classes[MAX_CLASSES];
-      size_t n; 
+      size_t n;
 
       n = examine_argument (arg_types[i], classes, 0, &ngpr, &nsse);
       if (n == 0
@@ -808,7 +808,7 @@ ffi_closure_unix64_inner(ffi_cif *cif,
 	    align = 8;
 
 	  /* Pass this argument in memory.  */
-	  argp = (void *) FFI_ALIGN (argp, align); 
+	  argp = (void *) FFI_ALIGN (argp, align);
 	  avalue[i] = argp;
 	  argp += arg_types[i]->size;
 	}
@@ -834,7 +834,7 @@ ffi_closure_unix64_inner(ffi_cif *cif,
       else
 	{
 	  char *a = alloca (16);
-	  unsigned int j; 
+	  unsigned int j;
 
 	  avalue[i] = a;
 	  for (j = 0; j < n; j++, a += 8)
@@ -848,39 +848,39 @@ ffi_closure_unix64_inner(ffi_cif *cif,
     }
 
   /* Invoke the closure.  */
-  fun (cif, rvalue, avalue, user_data); 
+  fun (cif, rvalue, avalue, user_data);
 
   /* Tell assembly how to perform return type promotions.  */
-  return flags; 
+  return flags;
+}
+
+extern void ffi_go_closure_unix64(void) FFI_HIDDEN;
+extern void ffi_go_closure_unix64_sse(void) FFI_HIDDEN;
+
+#ifndef __ILP32__
+extern ffi_status
+ffi_prep_go_closure_efi64(ffi_go_closure* closure, ffi_cif* cif,
+			  void (*fun)(ffi_cif*, void*, void**, void*));
+#endif
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif,
+		     void (*fun)(ffi_cif*, void*, void**, void*))
+{
+#ifndef __ILP32__
+  if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64)
+    return ffi_prep_go_closure_efi64(closure, cif, fun);
+#endif
+  if (cif->abi != FFI_UNIX64)
+    return FFI_BAD_ABI;
+
+  closure->tramp = (cif->flags & UNIX64_FLAG_XMM_ARGS
+		    ? ffi_go_closure_unix64_sse
+		    : ffi_go_closure_unix64);
+  closure->cif = cif;
+  closure->fun = fun;
+
+  return FFI_OK;
 }
 
-extern void ffi_go_closure_unix64(void) FFI_HIDDEN; 
-extern void ffi_go_closure_unix64_sse(void) FFI_HIDDEN; 
- 
-#ifndef __ILP32__ 
-extern ffi_status 
-ffi_prep_go_closure_efi64(ffi_go_closure* closure, ffi_cif* cif, 
-			  void (*fun)(ffi_cif*, void*, void**, void*)); 
-#endif 
- 
-ffi_status 
-ffi_prep_go_closure (ffi_go_closure* closure, ffi_cif* cif, 
-		     void (*fun)(ffi_cif*, void*, void**, void*)) 
-{ 
-#ifndef __ILP32__ 
-  if (cif->abi == FFI_EFI64 || cif->abi == FFI_GNUW64) 
-    return ffi_prep_go_closure_efi64(closure, cif, fun); 
-#endif 
-  if (cif->abi != FFI_UNIX64) 
-    return FFI_BAD_ABI; 
- 
-  closure->tramp = (cif->flags & UNIX64_FLAG_XMM_ARGS 
-		    ? ffi_go_closure_unix64_sse 
-		    : ffi_go_closure_unix64); 
-  closure->cif = cif; 
-  closure->fun = fun; 
- 
-  return FFI_OK; 
-} 
- 
 #endif /* __x86_64__ */
diff --git a/contrib/restricted/libffi/src/x86/ffitarget.h b/contrib/restricted/libffi/src/x86/ffitarget.h
index ab04dfa791..85ccedfedc 100644
--- a/contrib/restricted/libffi/src/x86/ffitarget.h
+++ b/contrib/restricted/libffi/src/x86/ffitarget.h
@@ -1,5 +1,5 @@
 /* -----------------------------------------------------------------*-C-*-
-   ffitarget.h - Copyright (c) 2012, 2014, 2018  Anthony Green 
+   ffitarget.h - Copyright (c) 2012, 2014, 2018  Anthony Green
                  Copyright (c) 1996-2003, 2010  Red Hat, Inc.
                  Copyright (C) 2008  Free Software Foundation, Inc.
 
@@ -49,11 +49,11 @@
 #define USE_BUILTIN_FFS 0 /* not yet implemented in mingw-64 */
 #endif
 
-#define FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION 
-#ifndef _MSC_VER 
-#define FFI_TARGET_HAS_COMPLEX_TYPE 
-#endif 
- 
+#define FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION
+#ifndef _MSC_VER
+#define FFI_TARGET_HAS_COMPLEX_TYPE
+#endif
+
 /* ---- Generic type definitions ----------------------------------------- */
 
 #ifndef LIBFFI_ASM
@@ -78,46 +78,46 @@ typedef signed long            ffi_sarg;
 #endif
 
 typedef enum ffi_abi {
-#if defined(X86_WIN64) 
+#if defined(X86_WIN64)
   FFI_FIRST_ABI = 0,
-  FFI_WIN64,            /* sizeof(long double) == 8  - microsoft compilers */ 
-  FFI_GNUW64,           /* sizeof(long double) == 16 - GNU compilers */ 
+  FFI_WIN64,            /* sizeof(long double) == 8  - microsoft compilers */
+  FFI_GNUW64,           /* sizeof(long double) == 16 - GNU compilers */
   FFI_LAST_ABI,
-#ifdef __GNUC__ 
-  FFI_DEFAULT_ABI = FFI_GNUW64 
-#else   
-  FFI_DEFAULT_ABI = FFI_WIN64 
-#endif   
-
-#elif defined(X86_64) || (defined (__x86_64__) && defined (X86_DARWIN)) 
-  FFI_FIRST_ABI = 1, 
-  FFI_UNIX64, 
+#ifdef __GNUC__
+  FFI_DEFAULT_ABI = FFI_GNUW64
+#else  
+  FFI_DEFAULT_ABI = FFI_WIN64
+#endif  
+
+#elif defined(X86_64) || (defined (__x86_64__) && defined (X86_DARWIN))
+  FFI_FIRST_ABI = 1,
+  FFI_UNIX64,
   FFI_WIN64,
-  FFI_EFI64 = FFI_WIN64, 
-  FFI_GNUW64, 
+  FFI_EFI64 = FFI_WIN64,
+  FFI_GNUW64,
   FFI_LAST_ABI,
-  FFI_DEFAULT_ABI = FFI_UNIX64 
-
-#elif defined(X86_WIN32) 
-  FFI_FIRST_ABI = 0, 
-  FFI_SYSV      = 1, 
-  FFI_STDCALL   = 2, 
-  FFI_THISCALL  = 3, 
-  FFI_FASTCALL  = 4, 
-  FFI_MS_CDECL  = 5, 
-  FFI_PASCAL    = 6, 
-  FFI_REGISTER  = 7, 
-  FFI_LAST_ABI, 
-  FFI_DEFAULT_ABI = FFI_MS_CDECL 
+  FFI_DEFAULT_ABI = FFI_UNIX64
+
+#elif defined(X86_WIN32)
+  FFI_FIRST_ABI = 0,
+  FFI_SYSV      = 1,
+  FFI_STDCALL   = 2,
+  FFI_THISCALL  = 3,
+  FFI_FASTCALL  = 4,
+  FFI_MS_CDECL  = 5,
+  FFI_PASCAL    = 6,
+  FFI_REGISTER  = 7,
+  FFI_LAST_ABI,
+  FFI_DEFAULT_ABI = FFI_MS_CDECL
 #else
-  FFI_FIRST_ABI = 0, 
-  FFI_SYSV      = 1, 
-  FFI_THISCALL  = 3, 
-  FFI_FASTCALL  = 4, 
-  FFI_STDCALL   = 5, 
-  FFI_PASCAL    = 6, 
-  FFI_REGISTER  = 7, 
-  FFI_MS_CDECL  = 8, 
+  FFI_FIRST_ABI = 0,
+  FFI_SYSV      = 1,
+  FFI_THISCALL  = 3,
+  FFI_FASTCALL  = 4,
+  FFI_STDCALL   = 5,
+  FFI_PASCAL    = 6,
+  FFI_REGISTER  = 7,
+  FFI_MS_CDECL  = 8,
   FFI_LAST_ABI,
   FFI_DEFAULT_ABI = FFI_SYSV
 #endif
@@ -127,20 +127,20 @@ typedef enum ffi_abi {
 /* ---- Definitions for closures ----------------------------------------- */
 
 #define FFI_CLOSURES 1
-#define FFI_GO_CLOSURES 1 
- 
+#define FFI_GO_CLOSURES 1
+
 #define FFI_TYPE_SMALL_STRUCT_1B (FFI_TYPE_LAST + 1)
 #define FFI_TYPE_SMALL_STRUCT_2B (FFI_TYPE_LAST + 2)
 #define FFI_TYPE_SMALL_STRUCT_4B (FFI_TYPE_LAST + 3)
 #define FFI_TYPE_MS_STRUCT       (FFI_TYPE_LAST + 4)
 
-#if defined (X86_64) || defined(X86_WIN64) \ 
-    || (defined (__x86_64__) && defined (X86_DARWIN)) 
-# define FFI_TRAMPOLINE_SIZE 24 
-# define FFI_NATIVE_RAW_API 0 
+#if defined (X86_64) || defined(X86_WIN64) \
+    || (defined (__x86_64__) && defined (X86_DARWIN))
+# define FFI_TRAMPOLINE_SIZE 24
+# define FFI_NATIVE_RAW_API 0
 #else
-# define FFI_TRAMPOLINE_SIZE 12 
-# define FFI_NATIVE_RAW_API 1  /* x86 has native raw api support */ 
+# define FFI_TRAMPOLINE_SIZE 12
+# define FFI_NATIVE_RAW_API 1  /* x86 has native raw api support */
 #endif
 
 #endif
diff --git a/contrib/restricted/libffi/src/x86/ffiw64.c b/contrib/restricted/libffi/src/x86/ffiw64.c
index 3eafc9d3e9..b68f69ccf6 100644
--- a/contrib/restricted/libffi/src/x86/ffiw64.c
+++ b/contrib/restricted/libffi/src/x86/ffiw64.c
@@ -1,311 +1,311 @@
-/* ----------------------------------------------------------------------- 
-   ffiw64.c - Copyright (c) 2018 Anthony Green 
-              Copyright (c) 2014 Red Hat, Inc. 
- 
-   x86 win64 Foreign Function Interface 
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-#if defined(__x86_64__) || defined(_M_AMD64) 
-#include <ffi.h> 
-#include <ffi_common.h> 
-#include <stdlib.h> 
-#include <stdint.h> 
- 
-#ifdef X86_WIN64 
-#define EFI64(name) name 
-#else 
-#define EFI64(name) FFI_HIDDEN name##_efi64 
-#endif 
- 
-struct win64_call_frame 
-{ 
-  UINT64 rbp;		/* 0 */ 
-  UINT64 retaddr;	/* 8 */ 
-  UINT64 fn;		/* 16 */ 
-  UINT64 flags;		/* 24 */ 
-  UINT64 rvalue;	/* 32 */ 
-}; 
- 
-extern void ffi_call_win64 (void *stack, struct win64_call_frame *, 
-			    void *closure) FFI_HIDDEN; 
- 
-ffi_status FFI_HIDDEN 
-EFI64(ffi_prep_cif_machdep)(ffi_cif *cif) 
-{ 
-  int flags, n; 
- 
-  switch (cif->abi) 
-    { 
-    case FFI_WIN64: 
-    case FFI_GNUW64: 
-      break; 
-    default: 
-      return FFI_BAD_ABI; 
-    } 
- 
-  flags = cif->rtype->type; 
-  switch (flags) 
-    { 
-    default: 
-      break; 
-    case FFI_TYPE_LONGDOUBLE: 
-      /* GCC returns long double values by reference, like a struct */ 
-      if (cif->abi == FFI_GNUW64) 
-	flags = FFI_TYPE_STRUCT; 
-      break; 
-    case FFI_TYPE_COMPLEX: 
-      flags = FFI_TYPE_STRUCT; 
-      /* FALLTHRU */ 
-    case FFI_TYPE_STRUCT: 
-      switch (cif->rtype->size) 
-	{ 
-	case 8: 
-	  flags = FFI_TYPE_UINT64; 
-	  break; 
-	case 4: 
-	  flags = FFI_TYPE_SMALL_STRUCT_4B; 
-	  break; 
-	case 2: 
-	  flags = FFI_TYPE_SMALL_STRUCT_2B; 
-	  break; 
-	case 1: 
-	  flags = FFI_TYPE_SMALL_STRUCT_1B; 
-	  break; 
-	} 
-      break; 
-    } 
-  cif->flags = flags; 
- 
-  /* Each argument either fits in a register, an 8 byte slot, or is 
-     passed by reference with the pointer in the 8 byte slot.  */ 
-  n = cif->nargs; 
-  n += (flags == FFI_TYPE_STRUCT); 
-  if (n < 4) 
-    n = 4; 
-  cif->bytes = n * 8; 
- 
-  return FFI_OK; 
-} 
- 
-static void 
-ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue, 
-	      void **avalue, void *closure) 
-{ 
-  int i, j, n, flags; 
-  UINT64 *stack; 
-  size_t rsize; 
-  struct win64_call_frame *frame; 
- 
-  FFI_ASSERT(cif->abi == FFI_GNUW64 || cif->abi == FFI_WIN64); 
- 
-  flags = cif->flags; 
-  rsize = 0; 
- 
-  /* If we have no return value for a structure, we need to create one. 
-     Otherwise we can ignore the return type entirely.  */ 
-  if (rvalue == NULL) 
-    { 
-      if (flags == FFI_TYPE_STRUCT) 
-	rsize = cif->rtype->size; 
-      else 
-	flags = FFI_TYPE_VOID; 
-    } 
- 
-  stack = alloca(cif->bytes + sizeof(struct win64_call_frame) + rsize); 
-  frame = (struct win64_call_frame *)((char *)stack + cif->bytes); 
-  if (rsize) 
-    rvalue = frame + 1; 
- 
-  frame->fn = (uintptr_t)fn; 
-  frame->flags = flags; 
-  frame->rvalue = (uintptr_t)rvalue; 
- 
-  j = 0; 
-  if (flags == FFI_TYPE_STRUCT) 
-    { 
-      stack[0] = (uintptr_t)rvalue; 
-      j = 1; 
-    } 
- 
-  for (i = 0, n = cif->nargs; i < n; ++i, ++j) 
-    { 
-      switch (cif->arg_types[i]->size) 
-	{ 
-	case 8: 
-	  stack[j] = *(UINT64 *)avalue[i]; 
-	  break; 
-	case 4: 
-	  stack[j] = *(UINT32 *)avalue[i]; 
-	  break; 
-	case 2: 
-	  stack[j] = *(UINT16 *)avalue[i]; 
-	  break; 
-	case 1: 
-	  stack[j] = *(UINT8 *)avalue[i]; 
-	  break; 
-	default: 
-	  stack[j] = (uintptr_t)avalue[i]; 
-	  break; 
-	} 
-    } 
- 
-  ffi_call_win64 (stack, frame, closure); 
-} 
- 
-void 
-EFI64(ffi_call)(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue) 
-{ 
-  ffi_call_int (cif, fn, rvalue, avalue, NULL); 
-} 
- 
-void 
-EFI64(ffi_call_go)(ffi_cif *cif, void (*fn)(void), void *rvalue, 
-	     void **avalue, void *closure) 
-{ 
-  ffi_call_int (cif, fn, rvalue, avalue, closure); 
-} 
- 
- 
-extern void ffi_closure_win64(void) FFI_HIDDEN; 
-extern void ffi_go_closure_win64(void) FFI_HIDDEN; 
- 
-ffi_status 
-EFI64(ffi_prep_closure_loc)(ffi_closure* closure, 
-		      ffi_cif* cif, 
-		      void (*fun)(ffi_cif*, void*, void**, void*), 
-		      void *user_data, 
-		      void *codeloc) 
-{ 
-  static const unsigned char trampoline[16] = { 
-    /* leaq  -0x7(%rip),%r10   # 0x0  */ 
-    0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff, 
-    /* jmpq  *0x3(%rip)        # 0x10 */ 
-    0xff, 0x25, 0x03, 0x00, 0x00, 0x00, 
-    /* nopl  (%rax) */ 
-    0x0f, 0x1f, 0x00 
-  }; 
-  char *tramp = closure->tramp; 
- 
-  switch (cif->abi) 
-    { 
-    case FFI_WIN64: 
-    case FFI_GNUW64: 
-      break; 
-    default: 
-      return FFI_BAD_ABI; 
-    } 
- 
-  memcpy (tramp, trampoline, sizeof(trampoline)); 
-  *(UINT64 *)(tramp + 16) = (uintptr_t)ffi_closure_win64; 
- 
-  closure->cif = cif; 
-  closure->fun = fun; 
-  closure->user_data = user_data; 
- 
-  return FFI_OK; 
-} 
- 
-ffi_status 
-EFI64(ffi_prep_go_closure)(ffi_go_closure* closure, ffi_cif* cif, 
-		     void (*fun)(ffi_cif*, void*, void**, void*)) 
-{ 
-  switch (cif->abi) 
-    { 
-    case FFI_WIN64: 
-    case FFI_GNUW64: 
-      break; 
-    default: 
-      return FFI_BAD_ABI; 
-    } 
- 
-  closure->tramp = ffi_go_closure_win64; 
-  closure->cif = cif; 
-  closure->fun = fun; 
- 
-  return FFI_OK; 
-} 
- 
-struct win64_closure_frame 
-{ 
-  UINT64 rvalue[2]; 
-  UINT64 fargs[4]; 
-  UINT64 retaddr; 
-  UINT64 args[]; 
-}; 
- 
-/* Force the inner function to use the MS ABI.  When compiling on win64 
-   this is a nop.  When compiling on unix, this simplifies the assembly, 
-   and places the burden of saving the extra call-saved registers on 
-   the compiler.  */ 
-int FFI_HIDDEN __attribute__((ms_abi)) 
-ffi_closure_win64_inner(ffi_cif *cif, 
-			void (*fun)(ffi_cif*, void*, void**, void*), 
-			void *user_data, 
-			struct win64_closure_frame *frame) 
-{ 
-  void **avalue; 
-  void *rvalue; 
-  int i, n, nreg, flags; 
- 
-  avalue = alloca(cif->nargs * sizeof(void *)); 
-  rvalue = frame->rvalue; 
-  nreg = 0; 
- 
-  /* When returning a structure, the address is in the first argument. 
-     We must also be prepared to return the same address in eax, so 
-     install that address in the frame and pretend we return a pointer.  */ 
-  flags = cif->flags; 
-  if (flags == FFI_TYPE_STRUCT) 
-    { 
-      rvalue = (void *)(uintptr_t)frame->args[0]; 
-      frame->rvalue[0] = frame->args[0]; 
-      nreg = 1; 
-    } 
- 
-  for (i = 0, n = cif->nargs; i < n; ++i, ++nreg) 
-    { 
-      size_t size = cif->arg_types[i]->size; 
-      size_t type = cif->arg_types[i]->type; 
-      void *a; 
- 
-      if (type == FFI_TYPE_DOUBLE || type == FFI_TYPE_FLOAT) 
-	{ 
-	  if (nreg < 4) 
-	    a = &frame->fargs[nreg]; 
-	  else 
-	    a = &frame->args[nreg]; 
-	} 
-      else if (size == 1 || size == 2 || size == 4 || size == 8) 
-	a = &frame->args[nreg]; 
-      else 
-	a = (void *)(uintptr_t)frame->args[nreg]; 
- 
-      avalue[i] = a; 
-    } 
- 
-  /* Invoke the closure.  */ 
-  fun (cif, rvalue, avalue, user_data); 
-  return flags; 
-} 
- 
-#endif /* __x86_64__ */ 
+/* -----------------------------------------------------------------------
+   ffiw64.c - Copyright (c) 2018 Anthony Green
+              Copyright (c) 2014 Red Hat, Inc.
+
+   x86 win64 Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#if defined(__x86_64__) || defined(_M_AMD64)
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+#include <stdint.h>
+
+#ifdef X86_WIN64
+#define EFI64(name) name
+#else
+#define EFI64(name) FFI_HIDDEN name##_efi64
+#endif
+
+struct win64_call_frame
+{
+  UINT64 rbp;		/* 0 */
+  UINT64 retaddr;	/* 8 */
+  UINT64 fn;		/* 16 */
+  UINT64 flags;		/* 24 */
+  UINT64 rvalue;	/* 32 */
+};
+
+extern void ffi_call_win64 (void *stack, struct win64_call_frame *,
+			    void *closure) FFI_HIDDEN;
+
+ffi_status FFI_HIDDEN
+EFI64(ffi_prep_cif_machdep)(ffi_cif *cif)
+{
+  int flags, n;
+
+  switch (cif->abi)
+    {
+    case FFI_WIN64:
+    case FFI_GNUW64:
+      break;
+    default:
+      return FFI_BAD_ABI;
+    }
+
+  flags = cif->rtype->type;
+  switch (flags)
+    {
+    default:
+      break;
+    case FFI_TYPE_LONGDOUBLE:
+      /* GCC returns long double values by reference, like a struct */
+      if (cif->abi == FFI_GNUW64)
+	flags = FFI_TYPE_STRUCT;
+      break;
+    case FFI_TYPE_COMPLEX:
+      flags = FFI_TYPE_STRUCT;
+      /* FALLTHRU */
+    case FFI_TYPE_STRUCT:
+      switch (cif->rtype->size)
+	{
+	case 8:
+	  flags = FFI_TYPE_UINT64;
+	  break;
+	case 4:
+	  flags = FFI_TYPE_SMALL_STRUCT_4B;
+	  break;
+	case 2:
+	  flags = FFI_TYPE_SMALL_STRUCT_2B;
+	  break;
+	case 1:
+	  flags = FFI_TYPE_SMALL_STRUCT_1B;
+	  break;
+	}
+      break;
+    }
+  cif->flags = flags;
+
+  /* Each argument either fits in a register, an 8 byte slot, or is
+     passed by reference with the pointer in the 8 byte slot.  */
+  n = cif->nargs;
+  n += (flags == FFI_TYPE_STRUCT);
+  if (n < 4)
+    n = 4;
+  cif->bytes = n * 8;
+
+  return FFI_OK;
+}
+
+static void
+ffi_call_int (ffi_cif *cif, void (*fn)(void), void *rvalue,
+	      void **avalue, void *closure)
+{
+  int i, j, n, flags;
+  UINT64 *stack;
+  size_t rsize;
+  struct win64_call_frame *frame;
+
+  FFI_ASSERT(cif->abi == FFI_GNUW64 || cif->abi == FFI_WIN64);
+
+  flags = cif->flags;
+  rsize = 0;
+
+  /* If we have no return value for a structure, we need to create one.
+     Otherwise we can ignore the return type entirely.  */
+  if (rvalue == NULL)
+    {
+      if (flags == FFI_TYPE_STRUCT)
+	rsize = cif->rtype->size;
+      else
+	flags = FFI_TYPE_VOID;
+    }
+
+  stack = alloca(cif->bytes + sizeof(struct win64_call_frame) + rsize);
+  frame = (struct win64_call_frame *)((char *)stack + cif->bytes);
+  if (rsize)
+    rvalue = frame + 1;
+
+  frame->fn = (uintptr_t)fn;
+  frame->flags = flags;
+  frame->rvalue = (uintptr_t)rvalue;
+
+  j = 0;
+  if (flags == FFI_TYPE_STRUCT)
+    {
+      stack[0] = (uintptr_t)rvalue;
+      j = 1;
+    }
+
+  for (i = 0, n = cif->nargs; i < n; ++i, ++j)
+    {
+      switch (cif->arg_types[i]->size)
+	{
+	case 8:
+	  stack[j] = *(UINT64 *)avalue[i];
+	  break;
+	case 4:
+	  stack[j] = *(UINT32 *)avalue[i];
+	  break;
+	case 2:
+	  stack[j] = *(UINT16 *)avalue[i];
+	  break;
+	case 1:
+	  stack[j] = *(UINT8 *)avalue[i];
+	  break;
+	default:
+	  stack[j] = (uintptr_t)avalue[i];
+	  break;
+	}
+    }
+
+  ffi_call_win64 (stack, frame, closure);
+}
+
+void
+EFI64(ffi_call)(ffi_cif *cif, void (*fn)(void), void *rvalue, void **avalue)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+EFI64(ffi_call_go)(ffi_cif *cif, void (*fn)(void), void *rvalue,
+	     void **avalue, void *closure)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
+
+
+extern void ffi_closure_win64(void) FFI_HIDDEN;
+extern void ffi_go_closure_win64(void) FFI_HIDDEN;
+
+ffi_status
+EFI64(ffi_prep_closure_loc)(ffi_closure* closure,
+		      ffi_cif* cif,
+		      void (*fun)(ffi_cif*, void*, void**, void*),
+		      void *user_data,
+		      void *codeloc)
+{
+  static const unsigned char trampoline[16] = {
+    /* leaq  -0x7(%rip),%r10   # 0x0  */
+    0x4c, 0x8d, 0x15, 0xf9, 0xff, 0xff, 0xff,
+    /* jmpq  *0x3(%rip)        # 0x10 */
+    0xff, 0x25, 0x03, 0x00, 0x00, 0x00,
+    /* nopl  (%rax) */
+    0x0f, 0x1f, 0x00
+  };
+  char *tramp = closure->tramp;
+
+  switch (cif->abi)
+    {
+    case FFI_WIN64:
+    case FFI_GNUW64:
+      break;
+    default:
+      return FFI_BAD_ABI;
+    }
+
+  memcpy (tramp, trampoline, sizeof(trampoline));
+  *(UINT64 *)(tramp + 16) = (uintptr_t)ffi_closure_win64;
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  return FFI_OK;
+}
+
+ffi_status
+EFI64(ffi_prep_go_closure)(ffi_go_closure* closure, ffi_cif* cif,
+		     void (*fun)(ffi_cif*, void*, void**, void*))
+{
+  switch (cif->abi)
+    {
+    case FFI_WIN64:
+    case FFI_GNUW64:
+      break;
+    default:
+      return FFI_BAD_ABI;
+    }
+
+  closure->tramp = ffi_go_closure_win64;
+  closure->cif = cif;
+  closure->fun = fun;
+
+  return FFI_OK;
+}
+
+struct win64_closure_frame
+{
+  UINT64 rvalue[2];
+  UINT64 fargs[4];
+  UINT64 retaddr;
+  UINT64 args[];
+};
+
+/* Force the inner function to use the MS ABI.  When compiling on win64
+   this is a nop.  When compiling on unix, this simplifies the assembly,
+   and places the burden of saving the extra call-saved registers on
+   the compiler.  */
+int FFI_HIDDEN __attribute__((ms_abi))
+ffi_closure_win64_inner(ffi_cif *cif,
+			void (*fun)(ffi_cif*, void*, void**, void*),
+			void *user_data,
+			struct win64_closure_frame *frame)
+{
+  void **avalue;
+  void *rvalue;
+  int i, n, nreg, flags;
+
+  avalue = alloca(cif->nargs * sizeof(void *));
+  rvalue = frame->rvalue;
+  nreg = 0;
+
+  /* When returning a structure, the address is in the first argument.
+     We must also be prepared to return the same address in eax, so
+     install that address in the frame and pretend we return a pointer.  */
+  flags = cif->flags;
+  if (flags == FFI_TYPE_STRUCT)
+    {
+      rvalue = (void *)(uintptr_t)frame->args[0];
+      frame->rvalue[0] = frame->args[0];
+      nreg = 1;
+    }
+
+  for (i = 0, n = cif->nargs; i < n; ++i, ++nreg)
+    {
+      size_t size = cif->arg_types[i]->size;
+      size_t type = cif->arg_types[i]->type;
+      void *a;
+
+      if (type == FFI_TYPE_DOUBLE || type == FFI_TYPE_FLOAT)
+	{
+	  if (nreg < 4)
+	    a = &frame->fargs[nreg];
+	  else
+	    a = &frame->args[nreg];
+	}
+      else if (size == 1 || size == 2 || size == 4 || size == 8)
+	a = &frame->args[nreg];
+      else
+	a = (void *)(uintptr_t)frame->args[nreg];
+
+      avalue[i] = a;
+    }
+
+  /* Invoke the closure.  */
+  fun (cif, rvalue, avalue, user_data);
+  return flags;
+}
+
+#endif /* __x86_64__ */
diff --git a/contrib/restricted/libffi/src/x86/internal.h b/contrib/restricted/libffi/src/x86/internal.h
index 7cfca13a30..09771ba8cf 100644
--- a/contrib/restricted/libffi/src/x86/internal.h
+++ b/contrib/restricted/libffi/src/x86/internal.h
@@ -1,29 +1,29 @@
-#define X86_RET_FLOAT		0 
-#define X86_RET_DOUBLE		1 
-#define X86_RET_LDOUBLE		2 
-#define X86_RET_SINT8		3 
-#define X86_RET_SINT16		4 
-#define X86_RET_UINT8		5 
-#define X86_RET_UINT16		6 
-#define X86_RET_INT64		7 
-#define X86_RET_INT32		8 
-#define X86_RET_VOID		9 
-#define X86_RET_STRUCTPOP	10 
-#define X86_RET_STRUCTARG       11 
-#define X86_RET_STRUCT_1B	12 
-#define X86_RET_STRUCT_2B	13 
-#define X86_RET_UNUSED14	14 
-#define X86_RET_UNUSED15	15 
- 
-#define X86_RET_TYPE_MASK	15 
-#define X86_RET_POP_SHIFT	4 
- 
-#define R_EAX	0 
-#define R_EDX	1 
-#define R_ECX	2 
- 
-#ifdef __PCC__ 
-# define HAVE_FASTCALL 0 
-#else 
-# define HAVE_FASTCALL 1 
-#endif 
+#define X86_RET_FLOAT		0
+#define X86_RET_DOUBLE		1
+#define X86_RET_LDOUBLE		2
+#define X86_RET_SINT8		3
+#define X86_RET_SINT16		4
+#define X86_RET_UINT8		5
+#define X86_RET_UINT16		6
+#define X86_RET_INT64		7
+#define X86_RET_INT32		8
+#define X86_RET_VOID		9
+#define X86_RET_STRUCTPOP	10
+#define X86_RET_STRUCTARG       11
+#define X86_RET_STRUCT_1B	12
+#define X86_RET_STRUCT_2B	13
+#define X86_RET_UNUSED14	14
+#define X86_RET_UNUSED15	15
+
+#define X86_RET_TYPE_MASK	15
+#define X86_RET_POP_SHIFT	4
+
+#define R_EAX	0
+#define R_EDX	1
+#define R_ECX	2
+
+#ifdef __PCC__
+# define HAVE_FASTCALL 0
+#else
+# define HAVE_FASTCALL 1
+#endif
diff --git a/contrib/restricted/libffi/src/x86/internal64.h b/contrib/restricted/libffi/src/x86/internal64.h
index 62afd4a7e4..512e95523e 100644
--- a/contrib/restricted/libffi/src/x86/internal64.h
+++ b/contrib/restricted/libffi/src/x86/internal64.h
@@ -1,22 +1,22 @@
-#define UNIX64_RET_VOID		0 
-#define UNIX64_RET_UINT8	1 
-#define UNIX64_RET_UINT16	2 
-#define UNIX64_RET_UINT32	3 
-#define UNIX64_RET_SINT8	4 
-#define UNIX64_RET_SINT16	5 
-#define UNIX64_RET_SINT32	6 
-#define UNIX64_RET_INT64	7 
-#define UNIX64_RET_XMM32	8 
-#define UNIX64_RET_XMM64	9 
-#define UNIX64_RET_X87		10 
-#define UNIX64_RET_X87_2	11 
-#define UNIX64_RET_ST_XMM0_RAX	12 
-#define UNIX64_RET_ST_RAX_XMM0	13 
-#define UNIX64_RET_ST_XMM0_XMM1	14 
-#define UNIX64_RET_ST_RAX_RDX	15 
- 
-#define UNIX64_RET_LAST		15 
- 
-#define UNIX64_FLAG_RET_IN_MEM	(1 << 10) 
-#define UNIX64_FLAG_XMM_ARGS	(1 << 11) 
-#define UNIX64_SIZE_SHIFT	12 
+#define UNIX64_RET_VOID		0
+#define UNIX64_RET_UINT8	1
+#define UNIX64_RET_UINT16	2
+#define UNIX64_RET_UINT32	3
+#define UNIX64_RET_SINT8	4
+#define UNIX64_RET_SINT16	5
+#define UNIX64_RET_SINT32	6
+#define UNIX64_RET_INT64	7
+#define UNIX64_RET_XMM32	8
+#define UNIX64_RET_XMM64	9
+#define UNIX64_RET_X87		10
+#define UNIX64_RET_X87_2	11
+#define UNIX64_RET_ST_XMM0_RAX	12
+#define UNIX64_RET_ST_RAX_XMM0	13
+#define UNIX64_RET_ST_XMM0_XMM1	14
+#define UNIX64_RET_ST_RAX_RDX	15
+
+#define UNIX64_RET_LAST		15
+
+#define UNIX64_FLAG_RET_IN_MEM	(1 << 10)
+#define UNIX64_FLAG_XMM_ARGS	(1 << 11)
+#define UNIX64_SIZE_SHIFT	12
diff --git a/contrib/restricted/libffi/src/x86/sysv.S b/contrib/restricted/libffi/src/x86/sysv.S
index 5cf58668fe..7c9598c93c 100644
--- a/contrib/restricted/libffi/src/x86/sysv.S
+++ b/contrib/restricted/libffi/src/x86/sysv.S
@@ -1,7 +1,7 @@
 /* -----------------------------------------------------------------------
-   sysv.S - Copyright (c) 2017  Anthony Green 
-          - Copyright (c) 2013  The Written Word, Inc. 
-          - Copyright (c) 1996,1998,2001-2003,2005,2008,2010  Red Hat, Inc. 
+   sysv.S - Copyright (c) 2017  Anthony Green
+          - Copyright (c) 2013  The Written Word, Inc.
+          - Copyright (c) 1996,1998,2001-2003,2005,2008,2010  Red Hat, Inc.
    
    X86 Foreign Function Interface 
 
@@ -26,1104 +26,1104 @@
    DEALINGS IN THE SOFTWARE.
    ----------------------------------------------------------------------- */
 
-#ifdef __i386__ 
-#ifndef _MSC_VER 
+#ifdef __i386__
+#ifndef _MSC_VER
 
 #define LIBFFI_ASM	
 #include <fficonfig.h>
 #include <ffi.h>
-#include "internal.h" 
-
-#define C2(X, Y)  X ## Y 
-#define C1(X, Y)  C2(X, Y) 
-#ifdef __USER_LABEL_PREFIX__ 
-# define C(X)     C1(__USER_LABEL_PREFIX__, X) 
-#else 
-# define C(X)     X 
-#endif 
-
-#ifdef X86_DARWIN 
-# define L(X)     C1(L, X) 
-#else 
-# define L(X)     C1(.L, X) 
-#endif 
-
-#ifdef __ELF__ 
-# define ENDF(X)  .type	X,@function; .size X, . - X 
-#else 
-# define ENDF(X) 
-#endif 
-
-/* Handle win32 fastcall name mangling.  */ 
-#ifdef X86_WIN32 
-# define ffi_call_i386		@ffi_call_i386@8 
-# define ffi_closure_inner	@ffi_closure_inner@8 
-#else 
-# define ffi_call_i386		C(ffi_call_i386) 
-# define ffi_closure_inner	C(ffi_closure_inner) 
-#endif 
-
-/* This macro allows the safe creation of jump tables without an 
-   actual table.  The entry points into the table are all 8 bytes. 
-   The use of ORG asserts that we're at the correct location.  */ 
-/* ??? The clang assembler doesn't handle .org with symbolic expressions.  */ 
-#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) 
-# define E(BASE, X)	.balign 8 
-#else 
-# define E(BASE, X)	.balign 8; .org BASE + X * 8 
-#endif 
-
-	.text 
-	.balign	16 
-	.globl	ffi_call_i386 
-	FFI_HIDDEN(ffi_call_i386) 
-
-/* This is declared as 
-
-   void ffi_call_i386(struct call_frame *frame, char *argp) 
-        __attribute__((fastcall)); 
-
-   Thus the arguments are present in 
-
-        ecx: frame 
-        edx: argp 
-*/ 
-
-ffi_call_i386: 
-L(UW0): 
-	# cfi_startproc 
-#if !HAVE_FASTCALL 
-	movl	4(%esp), %ecx 
-	movl	8(%esp), %edx 
-#endif 
-	movl	(%esp), %eax		/* move the return address */ 
-	movl	%ebp, (%ecx)		/* store %ebp into local frame */ 
-	movl	%eax, 4(%ecx)		/* store retaddr into local frame */ 
-
-	/* New stack frame based off ebp.  This is a itty bit of unwind 
-	   trickery in that the CFA *has* changed.  There is no easy way 
-	   to describe it correctly on entry to the function.  Fortunately, 
-	   it doesn't matter too much since at all points we can correctly 
-	   unwind back to ffi_call.  Note that the location to which we 
-	   moved the return address is (the new) CFA-4, so from the 
-	   perspective of the unwind info, it hasn't moved.  */ 
-	movl	%ecx, %ebp 
-L(UW1): 
-	# cfi_def_cfa(%ebp, 8) 
-	# cfi_rel_offset(%ebp, 0) 
-
-	movl	%edx, %esp		/* set outgoing argument stack */ 
-	movl	20+R_EAX*4(%ebp), %eax	/* set register arguments */ 
-	movl	20+R_EDX*4(%ebp), %edx 
-	movl	20+R_ECX*4(%ebp), %ecx 
-
-	call	*8(%ebp) 
-
-	movl	12(%ebp), %ecx		/* load return type code */ 
-	movl	%ebx, 8(%ebp)		/* preserve %ebx */ 
-L(UW2): 
-	# cfi_rel_offset(%ebx, 8) 
-
-	andl	$X86_RET_TYPE_MASK, %ecx 
-#ifdef __PIC__ 
-	call	C(__x86.get_pc_thunk.bx) 
-L(pc1): 
-	leal	L(store_table)-L(pc1)(%ebx, %ecx, 8), %ebx 
-#else 
-	leal	L(store_table)(,%ecx, 8), %ebx 
-#endif 
-	movl	16(%ebp), %ecx		/* load result address */ 
-	jmp	*%ebx 
-
-	.balign	8 
-L(store_table): 
-E(L(store_table), X86_RET_FLOAT) 
-	fstps	(%ecx) 
-	jmp	L(e1) 
-E(L(store_table), X86_RET_DOUBLE) 
-	fstpl	(%ecx) 
-	jmp	L(e1) 
-E(L(store_table), X86_RET_LDOUBLE) 
-	fstpt	(%ecx) 
-	jmp	L(e1) 
-E(L(store_table), X86_RET_SINT8) 
-	movsbl	%al, %eax 
-	mov	%eax, (%ecx) 
-	jmp	L(e1) 
-E(L(store_table), X86_RET_SINT16) 
-	movswl	%ax, %eax 
-	mov	%eax, (%ecx) 
-	jmp	L(e1) 
-E(L(store_table), X86_RET_UINT8) 
-	movzbl	%al, %eax 
-	mov	%eax, (%ecx) 
-	jmp	L(e1) 
-E(L(store_table), X86_RET_UINT16) 
-	movzwl	%ax, %eax 
-	mov	%eax, (%ecx) 
-	jmp	L(e1) 
-E(L(store_table), X86_RET_INT64) 
-	movl	%edx, 4(%ecx) 
-	/* fallthru */ 
-E(L(store_table), X86_RET_INT32) 
-	movl	%eax, (%ecx) 
-	/* fallthru */ 
-E(L(store_table), X86_RET_VOID) 
-L(e1): 
-	movl	8(%ebp), %ebx 
-	movl	%ebp, %esp 
-	popl	%ebp 
-L(UW3): 
-	# cfi_remember_state 
-	# cfi_def_cfa(%esp, 4) 
-	# cfi_restore(%ebx) 
-	# cfi_restore(%ebp) 
-	ret 
-L(UW4): 
-	# cfi_restore_state 
-
-E(L(store_table), X86_RET_STRUCTPOP) 
-	jmp	L(e1) 
-E(L(store_table), X86_RET_STRUCTARG) 
-	jmp	L(e1) 
-E(L(store_table), X86_RET_STRUCT_1B) 
-	movb	%al, (%ecx) 
-	jmp	L(e1) 
-E(L(store_table), X86_RET_STRUCT_2B) 
-	movw	%ax, (%ecx) 
-	jmp	L(e1) 
-
-	/* Fill out the table so that bad values are predictable.  */ 
-E(L(store_table), X86_RET_UNUSED14) 
-	ud2 
-E(L(store_table), X86_RET_UNUSED15) 
-	ud2 
-
-L(UW5): 
-	# cfi_endproc 
-ENDF(ffi_call_i386) 
-
-/* The inner helper is declared as 
-
-   void ffi_closure_inner(struct closure_frame *frame, char *argp) 
-	__attribute_((fastcall)) 
-
-   Thus the arguments are placed in 
-
-	ecx:	frame 
-	edx:	argp 
-*/ 
-
-/* Macros to help setting up the closure_data structure.  */ 
-
-#if HAVE_FASTCALL 
-# define closure_FS	(40 + 4) 
-# define closure_CF	0 
-#else 
-# define closure_FS	(8 + 40 + 12) 
-# define closure_CF	8 
-#endif 
-
-#define FFI_CLOSURE_SAVE_REGS		\ 
-	movl	%eax, closure_CF+16+R_EAX*4(%esp);	\ 
-	movl	%edx, closure_CF+16+R_EDX*4(%esp);	\ 
-	movl	%ecx, closure_CF+16+R_ECX*4(%esp) 
-
-#define FFI_CLOSURE_COPY_TRAMP_DATA					\ 
-	movl	FFI_TRAMPOLINE_SIZE(%eax), %edx;	/* copy cif */	\ 
-	movl	FFI_TRAMPOLINE_SIZE+4(%eax), %ecx;	/* copy fun */	\ 
-	movl	FFI_TRAMPOLINE_SIZE+8(%eax), %eax;	/* copy user_data */ \ 
-	movl	%edx, closure_CF+28(%esp);				\ 
-	movl	%ecx, closure_CF+32(%esp);				\ 
-	movl	%eax, closure_CF+36(%esp) 
- 
-#if HAVE_FASTCALL 
-# define FFI_CLOSURE_PREP_CALL						\ 
-	movl	%esp, %ecx;			/* load closure_data */	\ 
-	leal	closure_FS+4(%esp), %edx;	/* load incoming stack */ 
+#include "internal.h"
+
+#define C2(X, Y)  X ## Y
+#define C1(X, Y)  C2(X, Y)
+#ifdef __USER_LABEL_PREFIX__
+# define C(X)     C1(__USER_LABEL_PREFIX__, X)
+#else
+# define C(X)     X
+#endif
+
+#ifdef X86_DARWIN
+# define L(X)     C1(L, X)
+#else
+# define L(X)     C1(.L, X)
+#endif
+
+#ifdef __ELF__
+# define ENDF(X)  .type	X,@function; .size X, . - X
+#else
+# define ENDF(X)
+#endif
+
+/* Handle win32 fastcall name mangling.  */
+#ifdef X86_WIN32
+# define ffi_call_i386		@ffi_call_i386@8
+# define ffi_closure_inner	@ffi_closure_inner@8
+#else
+# define ffi_call_i386		C(ffi_call_i386)
+# define ffi_closure_inner	C(ffi_closure_inner)
+#endif
+
+/* This macro allows the safe creation of jump tables without an
+   actual table.  The entry points into the table are all 8 bytes.
+   The use of ORG asserts that we're at the correct location.  */
+/* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
+#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
+# define E(BASE, X)	.balign 8
+#else
+# define E(BASE, X)	.balign 8; .org BASE + X * 8
+#endif
+
+	.text
+	.balign	16
+	.globl	ffi_call_i386
+	FFI_HIDDEN(ffi_call_i386)
+
+/* This is declared as
+
+   void ffi_call_i386(struct call_frame *frame, char *argp)
+        __attribute__((fastcall));
+
+   Thus the arguments are present in
+
+        ecx: frame
+        edx: argp
+*/
+
+ffi_call_i386:
+L(UW0):
+	# cfi_startproc
+#if !HAVE_FASTCALL
+	movl	4(%esp), %ecx
+	movl	8(%esp), %edx
+#endif
+	movl	(%esp), %eax		/* move the return address */
+	movl	%ebp, (%ecx)		/* store %ebp into local frame */
+	movl	%eax, 4(%ecx)		/* store retaddr into local frame */
+
+	/* New stack frame based off ebp.  This is a itty bit of unwind
+	   trickery in that the CFA *has* changed.  There is no easy way
+	   to describe it correctly on entry to the function.  Fortunately,
+	   it doesn't matter too much since at all points we can correctly
+	   unwind back to ffi_call.  Note that the location to which we
+	   moved the return address is (the new) CFA-4, so from the
+	   perspective of the unwind info, it hasn't moved.  */
+	movl	%ecx, %ebp
+L(UW1):
+	# cfi_def_cfa(%ebp, 8)
+	# cfi_rel_offset(%ebp, 0)
+
+	movl	%edx, %esp		/* set outgoing argument stack */
+	movl	20+R_EAX*4(%ebp), %eax	/* set register arguments */
+	movl	20+R_EDX*4(%ebp), %edx
+	movl	20+R_ECX*4(%ebp), %ecx
+
+	call	*8(%ebp)
+
+	movl	12(%ebp), %ecx		/* load return type code */
+	movl	%ebx, 8(%ebp)		/* preserve %ebx */
+L(UW2):
+	# cfi_rel_offset(%ebx, 8)
+
+	andl	$X86_RET_TYPE_MASK, %ecx
+#ifdef __PIC__
+	call	C(__x86.get_pc_thunk.bx)
+L(pc1):
+	leal	L(store_table)-L(pc1)(%ebx, %ecx, 8), %ebx
+#else
+	leal	L(store_table)(,%ecx, 8), %ebx
+#endif
+	movl	16(%ebp), %ecx		/* load result address */
+	jmp	*%ebx
+
+	.balign	8
+L(store_table):
+E(L(store_table), X86_RET_FLOAT)
+	fstps	(%ecx)
+	jmp	L(e1)
+E(L(store_table), X86_RET_DOUBLE)
+	fstpl	(%ecx)
+	jmp	L(e1)
+E(L(store_table), X86_RET_LDOUBLE)
+	fstpt	(%ecx)
+	jmp	L(e1)
+E(L(store_table), X86_RET_SINT8)
+	movsbl	%al, %eax
+	mov	%eax, (%ecx)
+	jmp	L(e1)
+E(L(store_table), X86_RET_SINT16)
+	movswl	%ax, %eax
+	mov	%eax, (%ecx)
+	jmp	L(e1)
+E(L(store_table), X86_RET_UINT8)
+	movzbl	%al, %eax
+	mov	%eax, (%ecx)
+	jmp	L(e1)
+E(L(store_table), X86_RET_UINT16)
+	movzwl	%ax, %eax
+	mov	%eax, (%ecx)
+	jmp	L(e1)
+E(L(store_table), X86_RET_INT64)
+	movl	%edx, 4(%ecx)
+	/* fallthru */
+E(L(store_table), X86_RET_INT32)
+	movl	%eax, (%ecx)
+	/* fallthru */
+E(L(store_table), X86_RET_VOID)
+L(e1):
+	movl	8(%ebp), %ebx
+	movl	%ebp, %esp
+	popl	%ebp
+L(UW3):
+	# cfi_remember_state
+	# cfi_def_cfa(%esp, 4)
+	# cfi_restore(%ebx)
+	# cfi_restore(%ebp)
+	ret
+L(UW4):
+	# cfi_restore_state
+
+E(L(store_table), X86_RET_STRUCTPOP)
+	jmp	L(e1)
+E(L(store_table), X86_RET_STRUCTARG)
+	jmp	L(e1)
+E(L(store_table), X86_RET_STRUCT_1B)
+	movb	%al, (%ecx)
+	jmp	L(e1)
+E(L(store_table), X86_RET_STRUCT_2B)
+	movw	%ax, (%ecx)
+	jmp	L(e1)
+
+	/* Fill out the table so that bad values are predictable.  */
+E(L(store_table), X86_RET_UNUSED14)
+	ud2
+E(L(store_table), X86_RET_UNUSED15)
+	ud2
+
+L(UW5):
+	# cfi_endproc
+ENDF(ffi_call_i386)
+
+/* The inner helper is declared as
+
+   void ffi_closure_inner(struct closure_frame *frame, char *argp)
+	__attribute_((fastcall))
+
+   Thus the arguments are placed in
+
+	ecx:	frame
+	edx:	argp
+*/
+
+/* Macros to help setting up the closure_data structure.  */
+
+#if HAVE_FASTCALL
+# define closure_FS	(40 + 4)
+# define closure_CF	0
 #else
-# define FFI_CLOSURE_PREP_CALL						\ 
-	leal	closure_CF(%esp), %ecx;		/* load closure_data */	\ 
-	leal	closure_FS+4(%esp), %edx;	/* load incoming stack */ \ 
-	movl	%ecx, (%esp);						\ 
-	movl	%edx, 4(%esp) 
+# define closure_FS	(8 + 40 + 12)
+# define closure_CF	8
 #endif
 
-#define FFI_CLOSURE_CALL_INNER(UWN) \ 
-	call	ffi_closure_inner 
- 
-#define FFI_CLOSURE_MASK_AND_JUMP(N, UW)				\ 
-	andl	$X86_RET_TYPE_MASK, %eax;				\ 
-	leal	L(C1(load_table,N))(, %eax, 8), %edx;			\ 
-	movl	closure_CF(%esp), %eax;		/* optimiztic load */	\ 
-	jmp	*%edx 
- 
-#ifdef __PIC__ 
-# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE 
-#  undef FFI_CLOSURE_MASK_AND_JUMP 
-#  define FFI_CLOSURE_MASK_AND_JUMP(N, UW)				\ 
-	andl	$X86_RET_TYPE_MASK, %eax;				\ 
-	call	C(__x86.get_pc_thunk.dx);				\ 
-L(C1(pc,N)):								\ 
-	leal	L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %edx;	\ 
-	movl	closure_CF(%esp), %eax;		/* optimiztic load */	\ 
-	jmp	*%edx 
-# else 
-#  define FFI_CLOSURE_CALL_INNER_SAVE_EBX 
-#  undef FFI_CLOSURE_CALL_INNER 
-#  define FFI_CLOSURE_CALL_INNER(UWN)					\ 
-	movl	%ebx, 40(%esp);			/* save ebx */		\ 
-L(C1(UW,UWN)):								\ 
-	/* cfi_rel_offset(%ebx, 40); */					\ 
-	call	C(__x86.get_pc_thunk.bx);	/* load got register */	\ 
-	addl	$C(_GLOBAL_OFFSET_TABLE_), %ebx;			\ 
-	call	ffi_closure_inner@PLT 
-#  undef FFI_CLOSURE_MASK_AND_JUMP 
-#  define FFI_CLOSURE_MASK_AND_JUMP(N, UWN)				\ 
-	andl	$X86_RET_TYPE_MASK, %eax;				\ 
-	leal	L(C1(load_table,N))@GOTOFF(%ebx, %eax, 8), %edx;	\ 
-	movl	40(%esp), %ebx;			/* restore ebx */	\ 
-L(C1(UW,UWN)):								\ 
-	/* cfi_restore(%ebx); */					\ 
-	movl	closure_CF(%esp), %eax;		/* optimiztic load */	\ 
-	jmp	*%edx 
-# endif /* DARWIN || HIDDEN */ 
-#endif /* __PIC__ */ 
- 
-	.balign	16 
-	.globl	C(ffi_go_closure_EAX) 
-	FFI_HIDDEN(C(ffi_go_closure_EAX)) 
-C(ffi_go_closure_EAX): 
-L(UW6): 
-	# cfi_startproc 
-	subl	$closure_FS, %esp 
-L(UW7): 
-	# cfi_def_cfa_offset(closure_FS + 4) 
-	FFI_CLOSURE_SAVE_REGS 
-	movl	4(%eax), %edx			/* copy cif */ 
-	movl	8(%eax), %ecx			/* copy fun */ 
-	movl	%edx, closure_CF+28(%esp) 
-	movl	%ecx, closure_CF+32(%esp) 
-	movl	%eax, closure_CF+36(%esp)	/* closure is user_data */ 
-	jmp	L(do_closure_i386) 
-L(UW8): 
-	# cfi_endproc 
-ENDF(C(ffi_go_closure_EAX)) 
- 
-	.balign	16 
-	.globl	C(ffi_go_closure_ECX) 
-	FFI_HIDDEN(C(ffi_go_closure_ECX)) 
-C(ffi_go_closure_ECX): 
-L(UW9): 
-	# cfi_startproc 
-	subl	$closure_FS, %esp 
-L(UW10): 
-	# cfi_def_cfa_offset(closure_FS + 4) 
-	FFI_CLOSURE_SAVE_REGS 
-	movl	4(%ecx), %edx			/* copy cif */ 
-	movl	8(%ecx), %eax			/* copy fun */ 
-	movl	%edx, closure_CF+28(%esp) 
-	movl	%eax, closure_CF+32(%esp) 
-	movl	%ecx, closure_CF+36(%esp)	/* closure is user_data */ 
-	jmp	L(do_closure_i386) 
-L(UW11): 
-	# cfi_endproc 
-ENDF(C(ffi_go_closure_ECX)) 
- 
-/* The closure entry points are reached from the ffi_closure trampoline. 
-   On entry, %eax contains the address of the ffi_closure.  */ 
- 
-	.balign	16 
-	.globl	C(ffi_closure_i386) 
-	FFI_HIDDEN(C(ffi_closure_i386)) 
- 
-C(ffi_closure_i386): 
-L(UW12): 
-	# cfi_startproc 
-	subl	$closure_FS, %esp 
-L(UW13): 
-	# cfi_def_cfa_offset(closure_FS + 4) 
- 
-	FFI_CLOSURE_SAVE_REGS 
-	FFI_CLOSURE_COPY_TRAMP_DATA 
- 
-	/* Entry point from preceeding Go closures.  */ 
-L(do_closure_i386): 
- 
-	FFI_CLOSURE_PREP_CALL 
-	FFI_CLOSURE_CALL_INNER(14) 
-	FFI_CLOSURE_MASK_AND_JUMP(2, 15) 
- 
-	.balign	8 
-L(load_table2): 
-E(L(load_table2), X86_RET_FLOAT) 
-	flds	closure_CF(%esp) 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_DOUBLE) 
-	fldl	closure_CF(%esp) 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_LDOUBLE) 
-	fldt	closure_CF(%esp) 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_SINT8) 
-	movsbl	%al, %eax 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_SINT16) 
-	movswl	%ax, %eax 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_UINT8) 
-	movzbl	%al, %eax 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_UINT16) 
-	movzwl	%ax, %eax 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_INT64) 
-	movl	closure_CF+4(%esp), %edx 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_INT32) 
-	nop 
-	/* fallthru */ 
-E(L(load_table2), X86_RET_VOID) 
-L(e2): 
-	addl	$closure_FS, %esp 
-L(UW16): 
-	# cfi_adjust_cfa_offset(-closure_FS) 
+#define FFI_CLOSURE_SAVE_REGS		\
+	movl	%eax, closure_CF+16+R_EAX*4(%esp);	\
+	movl	%edx, closure_CF+16+R_EDX*4(%esp);	\
+	movl	%ecx, closure_CF+16+R_ECX*4(%esp)
+
+#define FFI_CLOSURE_COPY_TRAMP_DATA					\
+	movl	FFI_TRAMPOLINE_SIZE(%eax), %edx;	/* copy cif */	\
+	movl	FFI_TRAMPOLINE_SIZE+4(%eax), %ecx;	/* copy fun */	\
+	movl	FFI_TRAMPOLINE_SIZE+8(%eax), %eax;	/* copy user_data */ \
+	movl	%edx, closure_CF+28(%esp);				\
+	movl	%ecx, closure_CF+32(%esp);				\
+	movl	%eax, closure_CF+36(%esp)
+
+#if HAVE_FASTCALL
+# define FFI_CLOSURE_PREP_CALL						\
+	movl	%esp, %ecx;			/* load closure_data */	\
+	leal	closure_FS+4(%esp), %edx;	/* load incoming stack */
+#else
+# define FFI_CLOSURE_PREP_CALL						\
+	leal	closure_CF(%esp), %ecx;		/* load closure_data */	\
+	leal	closure_FS+4(%esp), %edx;	/* load incoming stack */ \
+	movl	%ecx, (%esp);						\
+	movl	%edx, 4(%esp)
+#endif
+
+#define FFI_CLOSURE_CALL_INNER(UWN) \
+	call	ffi_closure_inner
+
+#define FFI_CLOSURE_MASK_AND_JUMP(N, UW)				\
+	andl	$X86_RET_TYPE_MASK, %eax;				\
+	leal	L(C1(load_table,N))(, %eax, 8), %edx;			\
+	movl	closure_CF(%esp), %eax;		/* optimiztic load */	\
+	jmp	*%edx
+
+#ifdef __PIC__
+# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
+#  undef FFI_CLOSURE_MASK_AND_JUMP
+#  define FFI_CLOSURE_MASK_AND_JUMP(N, UW)				\
+	andl	$X86_RET_TYPE_MASK, %eax;				\
+	call	C(__x86.get_pc_thunk.dx);				\
+L(C1(pc,N)):								\
+	leal	L(C1(load_table,N))-L(C1(pc,N))(%edx, %eax, 8), %edx;	\
+	movl	closure_CF(%esp), %eax;		/* optimiztic load */	\
+	jmp	*%edx
+# else
+#  define FFI_CLOSURE_CALL_INNER_SAVE_EBX
+#  undef FFI_CLOSURE_CALL_INNER
+#  define FFI_CLOSURE_CALL_INNER(UWN)					\
+	movl	%ebx, 40(%esp);			/* save ebx */		\
+L(C1(UW,UWN)):								\
+	/* cfi_rel_offset(%ebx, 40); */					\
+	call	C(__x86.get_pc_thunk.bx);	/* load got register */	\
+	addl	$C(_GLOBAL_OFFSET_TABLE_), %ebx;			\
+	call	ffi_closure_inner@PLT
+#  undef FFI_CLOSURE_MASK_AND_JUMP
+#  define FFI_CLOSURE_MASK_AND_JUMP(N, UWN)				\
+	andl	$X86_RET_TYPE_MASK, %eax;				\
+	leal	L(C1(load_table,N))@GOTOFF(%ebx, %eax, 8), %edx;	\
+	movl	40(%esp), %ebx;			/* restore ebx */	\
+L(C1(UW,UWN)):								\
+	/* cfi_restore(%ebx); */					\
+	movl	closure_CF(%esp), %eax;		/* optimiztic load */	\
+	jmp	*%edx
+# endif /* DARWIN || HIDDEN */
+#endif /* __PIC__ */
+
+	.balign	16
+	.globl	C(ffi_go_closure_EAX)
+	FFI_HIDDEN(C(ffi_go_closure_EAX))
+C(ffi_go_closure_EAX):
+L(UW6):
+	# cfi_startproc
+	subl	$closure_FS, %esp
+L(UW7):
+	# cfi_def_cfa_offset(closure_FS + 4)
+	FFI_CLOSURE_SAVE_REGS
+	movl	4(%eax), %edx			/* copy cif */
+	movl	8(%eax), %ecx			/* copy fun */
+	movl	%edx, closure_CF+28(%esp)
+	movl	%ecx, closure_CF+32(%esp)
+	movl	%eax, closure_CF+36(%esp)	/* closure is user_data */
+	jmp	L(do_closure_i386)
+L(UW8):
+	# cfi_endproc
+ENDF(C(ffi_go_closure_EAX))
+
+	.balign	16
+	.globl	C(ffi_go_closure_ECX)
+	FFI_HIDDEN(C(ffi_go_closure_ECX))
+C(ffi_go_closure_ECX):
+L(UW9):
+	# cfi_startproc
+	subl	$closure_FS, %esp
+L(UW10):
+	# cfi_def_cfa_offset(closure_FS + 4)
+	FFI_CLOSURE_SAVE_REGS
+	movl	4(%ecx), %edx			/* copy cif */
+	movl	8(%ecx), %eax			/* copy fun */
+	movl	%edx, closure_CF+28(%esp)
+	movl	%eax, closure_CF+32(%esp)
+	movl	%ecx, closure_CF+36(%esp)	/* closure is user_data */
+	jmp	L(do_closure_i386)
+L(UW11):
+	# cfi_endproc
+ENDF(C(ffi_go_closure_ECX))
+
+/* The closure entry points are reached from the ffi_closure trampoline.
+   On entry, %eax contains the address of the ffi_closure.  */
+
+	.balign	16
+	.globl	C(ffi_closure_i386)
+	FFI_HIDDEN(C(ffi_closure_i386))
+
+C(ffi_closure_i386):
+L(UW12):
+	# cfi_startproc
+	subl	$closure_FS, %esp
+L(UW13):
+	# cfi_def_cfa_offset(closure_FS + 4)
+
+	FFI_CLOSURE_SAVE_REGS
+	FFI_CLOSURE_COPY_TRAMP_DATA
+
+	/* Entry point from preceeding Go closures.  */
+L(do_closure_i386):
+
+	FFI_CLOSURE_PREP_CALL
+	FFI_CLOSURE_CALL_INNER(14)
+	FFI_CLOSURE_MASK_AND_JUMP(2, 15)
+
+	.balign	8
+L(load_table2):
+E(L(load_table2), X86_RET_FLOAT)
+	flds	closure_CF(%esp)
+	jmp	L(e2)
+E(L(load_table2), X86_RET_DOUBLE)
+	fldl	closure_CF(%esp)
+	jmp	L(e2)
+E(L(load_table2), X86_RET_LDOUBLE)
+	fldt	closure_CF(%esp)
+	jmp	L(e2)
+E(L(load_table2), X86_RET_SINT8)
+	movsbl	%al, %eax
+	jmp	L(e2)
+E(L(load_table2), X86_RET_SINT16)
+	movswl	%ax, %eax
+	jmp	L(e2)
+E(L(load_table2), X86_RET_UINT8)
+	movzbl	%al, %eax
+	jmp	L(e2)
+E(L(load_table2), X86_RET_UINT16)
+	movzwl	%ax, %eax
+	jmp	L(e2)
+E(L(load_table2), X86_RET_INT64)
+	movl	closure_CF+4(%esp), %edx
+	jmp	L(e2)
+E(L(load_table2), X86_RET_INT32)
+	nop
+	/* fallthru */
+E(L(load_table2), X86_RET_VOID)
+L(e2):
+	addl	$closure_FS, %esp
+L(UW16):
+	# cfi_adjust_cfa_offset(-closure_FS)
 	ret
-L(UW17): 
-	# cfi_adjust_cfa_offset(closure_FS) 
-E(L(load_table2), X86_RET_STRUCTPOP) 
-	addl	$closure_FS, %esp 
-L(UW18): 
-	# cfi_adjust_cfa_offset(-closure_FS) 
+L(UW17):
+	# cfi_adjust_cfa_offset(closure_FS)
+E(L(load_table2), X86_RET_STRUCTPOP)
+	addl	$closure_FS, %esp
+L(UW18):
+	# cfi_adjust_cfa_offset(-closure_FS)
 	ret	$4
-L(UW19): 
-	# cfi_adjust_cfa_offset(closure_FS) 
-E(L(load_table2), X86_RET_STRUCTARG) 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_STRUCT_1B) 
-	movzbl	%al, %eax 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_STRUCT_2B) 
-	movzwl	%ax, %eax 
-	jmp	L(e2) 
-
-	/* Fill out the table so that bad values are predictable.  */ 
-E(L(load_table2), X86_RET_UNUSED14) 
-	ud2 
-E(L(load_table2), X86_RET_UNUSED15) 
-	ud2 
- 
-L(UW20): 
-	# cfi_endproc 
-ENDF(C(ffi_closure_i386)) 
- 
-	.balign	16 
-	.globl	C(ffi_go_closure_STDCALL) 
-	FFI_HIDDEN(C(ffi_go_closure_STDCALL)) 
-C(ffi_go_closure_STDCALL): 
-L(UW21): 
-	# cfi_startproc 
-	subl	$closure_FS, %esp 
-L(UW22): 
-	# cfi_def_cfa_offset(closure_FS + 4) 
-	FFI_CLOSURE_SAVE_REGS 
-	movl	4(%ecx), %edx			/* copy cif */ 
-	movl	8(%ecx), %eax			/* copy fun */ 
-	movl	%edx, closure_CF+28(%esp) 
-	movl	%eax, closure_CF+32(%esp) 
-	movl	%ecx, closure_CF+36(%esp)	/* closure is user_data */ 
-	jmp	L(do_closure_STDCALL) 
-L(UW23): 
-	# cfi_endproc 
-ENDF(C(ffi_go_closure_STDCALL)) 
- 
-/* For REGISTER, we have no available parameter registers, and so we 
-   enter here having pushed the closure onto the stack.  */ 
- 
-	.balign	16 
-	.globl	C(ffi_closure_REGISTER) 
-	FFI_HIDDEN(C(ffi_closure_REGISTER)) 
-C(ffi_closure_REGISTER): 
-L(UW24): 
-	# cfi_startproc 
-	# cfi_def_cfa(%esp, 8) 
-	# cfi_offset(%eip, -8) 
-	subl	$closure_FS-4, %esp 
-L(UW25): 
-	# cfi_def_cfa_offset(closure_FS + 4) 
-	FFI_CLOSURE_SAVE_REGS 
-	movl	closure_FS-4(%esp), %ecx	/* load retaddr */ 
-	movl	closure_FS(%esp), %eax		/* load closure */ 
-	movl	%ecx, closure_FS(%esp)		/* move retaddr */ 
-	jmp	L(do_closure_REGISTER) 
-L(UW26): 
-	# cfi_endproc 
-ENDF(C(ffi_closure_REGISTER)) 
- 
-/* For STDCALL (and others), we need to pop N bytes of arguments off 
-   the stack following the closure.  The amount needing to be popped 
-   is returned to us from ffi_closure_inner.  */ 
- 
-	.balign	16 
-	.globl	C(ffi_closure_STDCALL) 
-	FFI_HIDDEN(C(ffi_closure_STDCALL)) 
-C(ffi_closure_STDCALL): 
-L(UW27): 
-	# cfi_startproc 
-	subl	$closure_FS, %esp 
-L(UW28): 
-	# cfi_def_cfa_offset(closure_FS + 4) 
- 
-	FFI_CLOSURE_SAVE_REGS 
- 
-	/* Entry point from ffi_closure_REGISTER.  */ 
-L(do_closure_REGISTER): 
- 
-	FFI_CLOSURE_COPY_TRAMP_DATA 
- 
-	/* Entry point from preceeding Go closure.  */ 
-L(do_closure_STDCALL): 
- 
-	FFI_CLOSURE_PREP_CALL 
-	FFI_CLOSURE_CALL_INNER(29) 
- 
-	movl	%eax, %ecx 
-	shrl	$X86_RET_POP_SHIFT, %ecx	/* isolate pop count */ 
-	leal	closure_FS(%esp, %ecx), %ecx	/* compute popped esp */ 
-	movl	closure_FS(%esp), %edx		/* move return address */ 
-	movl	%edx, (%ecx) 
- 
-	/* From this point on, the value of %esp upon return is %ecx+4, 
-	   and we've copied the return address to %ecx to make return easy. 
-	   There's no point in representing this in the unwind info, as 
-	   there is always a window between the mov and the ret which 
-	   will be wrong from one point of view or another.  */ 
- 
-	FFI_CLOSURE_MASK_AND_JUMP(3, 30) 
- 
-	.balign	8 
-L(load_table3): 
-E(L(load_table3), X86_RET_FLOAT) 
-	flds    closure_CF(%esp) 
-	movl    %ecx, %esp 
-	ret 
-E(L(load_table3), X86_RET_DOUBLE) 
-	fldl    closure_CF(%esp) 
-	movl    %ecx, %esp 
-	ret 
-E(L(load_table3), X86_RET_LDOUBLE) 
-	fldt    closure_CF(%esp) 
-	movl    %ecx, %esp 
-	ret 
-E(L(load_table3), X86_RET_SINT8) 
-	movsbl  %al, %eax 
-	movl    %ecx, %esp 
-	ret 
-E(L(load_table3), X86_RET_SINT16) 
-	movswl  %ax, %eax 
-	movl    %ecx, %esp 
-	ret 
-E(L(load_table3), X86_RET_UINT8) 
-	movzbl  %al, %eax 
-	movl    %ecx, %esp 
-	ret 
-E(L(load_table3), X86_RET_UINT16) 
-	movzwl  %ax, %eax 
-	movl    %ecx, %esp 
-	ret 
-E(L(load_table3), X86_RET_INT64) 
-	movl	closure_CF+4(%esp), %edx 
-	movl    %ecx, %esp 
-	ret 
-E(L(load_table3), X86_RET_INT32) 
-	movl    %ecx, %esp 
-	ret 
-E(L(load_table3), X86_RET_VOID) 
-	movl    %ecx, %esp 
-	ret 
-E(L(load_table3), X86_RET_STRUCTPOP) 
-	movl    %ecx, %esp 
-	ret 
-E(L(load_table3), X86_RET_STRUCTARG) 
-	movl	%ecx, %esp 
-	ret 
-E(L(load_table3), X86_RET_STRUCT_1B) 
-	movzbl	%al, %eax 
-	movl	%ecx, %esp 
-	ret 
-E(L(load_table3), X86_RET_STRUCT_2B) 
-	movzwl	%ax, %eax 
-	movl	%ecx, %esp 
-	ret 
- 
-	/* Fill out the table so that bad values are predictable.  */ 
-E(L(load_table3), X86_RET_UNUSED14) 
-	ud2 
-E(L(load_table3), X86_RET_UNUSED15) 
-	ud2 
- 
-L(UW31): 
-	# cfi_endproc 
-ENDF(C(ffi_closure_STDCALL)) 
- 
+L(UW19):
+	# cfi_adjust_cfa_offset(closure_FS)
+E(L(load_table2), X86_RET_STRUCTARG)
+	jmp	L(e2)
+E(L(load_table2), X86_RET_STRUCT_1B)
+	movzbl	%al, %eax
+	jmp	L(e2)
+E(L(load_table2), X86_RET_STRUCT_2B)
+	movzwl	%ax, %eax
+	jmp	L(e2)
+
+	/* Fill out the table so that bad values are predictable.  */
+E(L(load_table2), X86_RET_UNUSED14)
+	ud2
+E(L(load_table2), X86_RET_UNUSED15)
+	ud2
+
+L(UW20):
+	# cfi_endproc
+ENDF(C(ffi_closure_i386))
+
+	.balign	16
+	.globl	C(ffi_go_closure_STDCALL)
+	FFI_HIDDEN(C(ffi_go_closure_STDCALL))
+C(ffi_go_closure_STDCALL):
+L(UW21):
+	# cfi_startproc
+	subl	$closure_FS, %esp
+L(UW22):
+	# cfi_def_cfa_offset(closure_FS + 4)
+	FFI_CLOSURE_SAVE_REGS
+	movl	4(%ecx), %edx			/* copy cif */
+	movl	8(%ecx), %eax			/* copy fun */
+	movl	%edx, closure_CF+28(%esp)
+	movl	%eax, closure_CF+32(%esp)
+	movl	%ecx, closure_CF+36(%esp)	/* closure is user_data */
+	jmp	L(do_closure_STDCALL)
+L(UW23):
+	# cfi_endproc
+ENDF(C(ffi_go_closure_STDCALL))
+
+/* For REGISTER, we have no available parameter registers, and so we
+   enter here having pushed the closure onto the stack.  */
+
+	.balign	16
+	.globl	C(ffi_closure_REGISTER)
+	FFI_HIDDEN(C(ffi_closure_REGISTER))
+C(ffi_closure_REGISTER):
+L(UW24):
+	# cfi_startproc
+	# cfi_def_cfa(%esp, 8)
+	# cfi_offset(%eip, -8)
+	subl	$closure_FS-4, %esp
+L(UW25):
+	# cfi_def_cfa_offset(closure_FS + 4)
+	FFI_CLOSURE_SAVE_REGS
+	movl	closure_FS-4(%esp), %ecx	/* load retaddr */
+	movl	closure_FS(%esp), %eax		/* load closure */
+	movl	%ecx, closure_FS(%esp)		/* move retaddr */
+	jmp	L(do_closure_REGISTER)
+L(UW26):
+	# cfi_endproc
+ENDF(C(ffi_closure_REGISTER))
+
+/* For STDCALL (and others), we need to pop N bytes of arguments off
+   the stack following the closure.  The amount needing to be popped
+   is returned to us from ffi_closure_inner.  */
+
+	.balign	16
+	.globl	C(ffi_closure_STDCALL)
+	FFI_HIDDEN(C(ffi_closure_STDCALL))
+C(ffi_closure_STDCALL):
+L(UW27):
+	# cfi_startproc
+	subl	$closure_FS, %esp
+L(UW28):
+	# cfi_def_cfa_offset(closure_FS + 4)
+
+	FFI_CLOSURE_SAVE_REGS
+
+	/* Entry point from ffi_closure_REGISTER.  */
+L(do_closure_REGISTER):
+
+	FFI_CLOSURE_COPY_TRAMP_DATA
+
+	/* Entry point from preceeding Go closure.  */
+L(do_closure_STDCALL):
+
+	FFI_CLOSURE_PREP_CALL
+	FFI_CLOSURE_CALL_INNER(29)
+
+	movl	%eax, %ecx
+	shrl	$X86_RET_POP_SHIFT, %ecx	/* isolate pop count */
+	leal	closure_FS(%esp, %ecx), %ecx	/* compute popped esp */
+	movl	closure_FS(%esp), %edx		/* move return address */
+	movl	%edx, (%ecx)
+
+	/* From this point on, the value of %esp upon return is %ecx+4,
+	   and we've copied the return address to %ecx to make return easy.
+	   There's no point in representing this in the unwind info, as
+	   there is always a window between the mov and the ret which
+	   will be wrong from one point of view or another.  */
+
+	FFI_CLOSURE_MASK_AND_JUMP(3, 30)
+
+	.balign	8
+L(load_table3):
+E(L(load_table3), X86_RET_FLOAT)
+	flds    closure_CF(%esp)
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_DOUBLE)
+	fldl    closure_CF(%esp)
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_LDOUBLE)
+	fldt    closure_CF(%esp)
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_SINT8)
+	movsbl  %al, %eax
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_SINT16)
+	movswl  %ax, %eax
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_UINT8)
+	movzbl  %al, %eax
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_UINT16)
+	movzwl  %ax, %eax
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_INT64)
+	movl	closure_CF+4(%esp), %edx
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_INT32)
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_VOID)
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_STRUCTPOP)
+	movl    %ecx, %esp
+	ret
+E(L(load_table3), X86_RET_STRUCTARG)
+	movl	%ecx, %esp
+	ret
+E(L(load_table3), X86_RET_STRUCT_1B)
+	movzbl	%al, %eax
+	movl	%ecx, %esp
+	ret
+E(L(load_table3), X86_RET_STRUCT_2B)
+	movzwl	%ax, %eax
+	movl	%ecx, %esp
+	ret
+
+	/* Fill out the table so that bad values are predictable.  */
+E(L(load_table3), X86_RET_UNUSED14)
+	ud2
+E(L(load_table3), X86_RET_UNUSED15)
+	ud2
+
+L(UW31):
+	# cfi_endproc
+ENDF(C(ffi_closure_STDCALL))
+
 #if !FFI_NO_RAW_API
 
-#define raw_closure_S_FS	(16+16+12) 
- 
-	.balign	16 
-	.globl	C(ffi_closure_raw_SYSV) 
-	FFI_HIDDEN(C(ffi_closure_raw_SYSV)) 
-C(ffi_closure_raw_SYSV): 
-L(UW32): 
-	# cfi_startproc 
-	subl	$raw_closure_S_FS, %esp 
-L(UW33): 
-	# cfi_def_cfa_offset(raw_closure_S_FS + 4) 
-	movl	%ebx, raw_closure_S_FS-4(%esp) 
-L(UW34): 
-	# cfi_rel_offset(%ebx, raw_closure_S_FS-4) 
- 
-	movl	FFI_TRAMPOLINE_SIZE+8(%eax), %edx	/* load cl->user_data */ 
-	movl	%edx, 12(%esp) 
-	leal	raw_closure_S_FS+4(%esp), %edx		/* load raw_args */ 
-	movl	%edx, 8(%esp) 
-	leal	16(%esp), %edx				/* load &res */ 
-	movl	%edx, 4(%esp) 
-	movl	FFI_TRAMPOLINE_SIZE(%eax), %ebx		/* load cl->cif */ 
-	movl	%ebx, (%esp) 
-	call	*FFI_TRAMPOLINE_SIZE+4(%eax)		/* call cl->fun */ 
- 
-	movl	20(%ebx), %eax				/* load cif->flags */ 
-	andl	$X86_RET_TYPE_MASK, %eax 
-#ifdef __PIC__ 
-	call	C(__x86.get_pc_thunk.bx) 
-L(pc4): 
-	leal	L(load_table4)-L(pc4)(%ebx, %eax, 8), %ecx 
+#define raw_closure_S_FS	(16+16+12)
+
+	.balign	16
+	.globl	C(ffi_closure_raw_SYSV)
+	FFI_HIDDEN(C(ffi_closure_raw_SYSV))
+C(ffi_closure_raw_SYSV):
+L(UW32):
+	# cfi_startproc
+	subl	$raw_closure_S_FS, %esp
+L(UW33):
+	# cfi_def_cfa_offset(raw_closure_S_FS + 4)
+	movl	%ebx, raw_closure_S_FS-4(%esp)
+L(UW34):
+	# cfi_rel_offset(%ebx, raw_closure_S_FS-4)
+
+	movl	FFI_TRAMPOLINE_SIZE+8(%eax), %edx	/* load cl->user_data */
+	movl	%edx, 12(%esp)
+	leal	raw_closure_S_FS+4(%esp), %edx		/* load raw_args */
+	movl	%edx, 8(%esp)
+	leal	16(%esp), %edx				/* load &res */
+	movl	%edx, 4(%esp)
+	movl	FFI_TRAMPOLINE_SIZE(%eax), %ebx		/* load cl->cif */
+	movl	%ebx, (%esp)
+	call	*FFI_TRAMPOLINE_SIZE+4(%eax)		/* call cl->fun */
+
+	movl	20(%ebx), %eax				/* load cif->flags */
+	andl	$X86_RET_TYPE_MASK, %eax
+#ifdef __PIC__
+	call	C(__x86.get_pc_thunk.bx)
+L(pc4):
+	leal	L(load_table4)-L(pc4)(%ebx, %eax, 8), %ecx
 #else
-	leal	L(load_table4)(,%eax, 8), %ecx 
+	leal	L(load_table4)(,%eax, 8), %ecx
 #endif
-	movl	raw_closure_S_FS-4(%esp), %ebx 
-L(UW35): 
-	# cfi_restore(%ebx) 
-	movl	16(%esp), %eax				/* Optimistic load */ 
-	jmp	*%ecx 
-
-	.balign	8 
-L(load_table4): 
-E(L(load_table4), X86_RET_FLOAT) 
-	flds	16(%esp) 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_DOUBLE) 
-	fldl	16(%esp) 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_LDOUBLE) 
-	fldt	16(%esp) 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_SINT8) 
-	movsbl	%al, %eax 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_SINT16) 
-	movswl	%ax, %eax 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_UINT8) 
-	movzbl	%al, %eax 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_UINT16) 
-	movzwl	%ax, %eax 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_INT64) 
-	movl	16+4(%esp), %edx 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_INT32) 
-	nop 
-	/* fallthru */ 
-E(L(load_table4), X86_RET_VOID) 
-L(e4): 
-	addl	$raw_closure_S_FS, %esp 
-L(UW36): 
-	# cfi_adjust_cfa_offset(-raw_closure_S_FS) 
-	ret 
-L(UW37): 
-	# cfi_adjust_cfa_offset(raw_closure_S_FS) 
-E(L(load_table4), X86_RET_STRUCTPOP) 
-	addl	$raw_closure_S_FS, %esp 
-L(UW38): 
-	# cfi_adjust_cfa_offset(-raw_closure_S_FS) 
-	ret	$4 
-L(UW39): 
-	# cfi_adjust_cfa_offset(raw_closure_S_FS) 
-E(L(load_table4), X86_RET_STRUCTARG) 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_STRUCT_1B) 
-	movzbl	%al, %eax 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_STRUCT_2B) 
-	movzwl	%ax, %eax 
-	jmp	L(e4) 
-
-	/* Fill out the table so that bad values are predictable.  */ 
-E(L(load_table4), X86_RET_UNUSED14) 
-	ud2 
-E(L(load_table4), X86_RET_UNUSED15) 
-	ud2 
-
-L(UW40): 
-	# cfi_endproc 
-ENDF(C(ffi_closure_raw_SYSV)) 
- 
-#define raw_closure_T_FS	(16+16+8) 
- 
-	.balign	16 
-	.globl	C(ffi_closure_raw_THISCALL) 
-	FFI_HIDDEN(C(ffi_closure_raw_THISCALL)) 
-C(ffi_closure_raw_THISCALL): 
-L(UW41): 
-	# cfi_startproc 
-	/* Rearrange the stack such that %ecx is the first argument. 
-	   This means moving the return address.  */ 
-	popl	%edx 
-L(UW42): 
-	# cfi_def_cfa_offset(0) 
-	# cfi_register(%eip, %edx) 
-	pushl	%ecx 
-L(UW43): 
-	# cfi_adjust_cfa_offset(4) 
-	pushl	%edx 
-L(UW44): 
-	# cfi_adjust_cfa_offset(4) 
-	# cfi_rel_offset(%eip, 0) 
-	subl	$raw_closure_T_FS, %esp 
-L(UW45): 
-	# cfi_adjust_cfa_offset(raw_closure_T_FS) 
-	movl	%ebx, raw_closure_T_FS-4(%esp) 
-L(UW46): 
-	# cfi_rel_offset(%ebx, raw_closure_T_FS-4) 
- 
-	movl	FFI_TRAMPOLINE_SIZE+8(%eax), %edx	/* load cl->user_data */ 
-	movl	%edx, 12(%esp) 
-	leal	raw_closure_T_FS+4(%esp), %edx		/* load raw_args */ 
-	movl	%edx, 8(%esp) 
-	leal	16(%esp), %edx				/* load &res */ 
-	movl	%edx, 4(%esp) 
-	movl	FFI_TRAMPOLINE_SIZE(%eax), %ebx		/* load cl->cif */ 
-	movl	%ebx, (%esp) 
-	call	*FFI_TRAMPOLINE_SIZE+4(%eax)		/* call cl->fun */ 
- 
-	movl	20(%ebx), %eax				/* load cif->flags */ 
-	andl	$X86_RET_TYPE_MASK, %eax 
-#ifdef __PIC__ 
-	call	C(__x86.get_pc_thunk.bx) 
-L(pc5): 
-	leal	L(load_table5)-L(pc5)(%ebx, %eax, 8), %ecx 
-#else 
-	leal	L(load_table5)(,%eax, 8), %ecx 
+	movl	raw_closure_S_FS-4(%esp), %ebx
+L(UW35):
+	# cfi_restore(%ebx)
+	movl	16(%esp), %eax				/* Optimistic load */
+	jmp	*%ecx
+
+	.balign	8
+L(load_table4):
+E(L(load_table4), X86_RET_FLOAT)
+	flds	16(%esp)
+	jmp	L(e4)
+E(L(load_table4), X86_RET_DOUBLE)
+	fldl	16(%esp)
+	jmp	L(e4)
+E(L(load_table4), X86_RET_LDOUBLE)
+	fldt	16(%esp)
+	jmp	L(e4)
+E(L(load_table4), X86_RET_SINT8)
+	movsbl	%al, %eax
+	jmp	L(e4)
+E(L(load_table4), X86_RET_SINT16)
+	movswl	%ax, %eax
+	jmp	L(e4)
+E(L(load_table4), X86_RET_UINT8)
+	movzbl	%al, %eax
+	jmp	L(e4)
+E(L(load_table4), X86_RET_UINT16)
+	movzwl	%ax, %eax
+	jmp	L(e4)
+E(L(load_table4), X86_RET_INT64)
+	movl	16+4(%esp), %edx
+	jmp	L(e4)
+E(L(load_table4), X86_RET_INT32)
+	nop
+	/* fallthru */
+E(L(load_table4), X86_RET_VOID)
+L(e4):
+	addl	$raw_closure_S_FS, %esp
+L(UW36):
+	# cfi_adjust_cfa_offset(-raw_closure_S_FS)
+	ret
+L(UW37):
+	# cfi_adjust_cfa_offset(raw_closure_S_FS)
+E(L(load_table4), X86_RET_STRUCTPOP)
+	addl	$raw_closure_S_FS, %esp
+L(UW38):
+	# cfi_adjust_cfa_offset(-raw_closure_S_FS)
+	ret	$4
+L(UW39):
+	# cfi_adjust_cfa_offset(raw_closure_S_FS)
+E(L(load_table4), X86_RET_STRUCTARG)
+	jmp	L(e4)
+E(L(load_table4), X86_RET_STRUCT_1B)
+	movzbl	%al, %eax
+	jmp	L(e4)
+E(L(load_table4), X86_RET_STRUCT_2B)
+	movzwl	%ax, %eax
+	jmp	L(e4)
+
+	/* Fill out the table so that bad values are predictable.  */
+E(L(load_table4), X86_RET_UNUSED14)
+	ud2
+E(L(load_table4), X86_RET_UNUSED15)
+	ud2
+
+L(UW40):
+	# cfi_endproc
+ENDF(C(ffi_closure_raw_SYSV))
+
+#define raw_closure_T_FS	(16+16+8)
+
+	.balign	16
+	.globl	C(ffi_closure_raw_THISCALL)
+	FFI_HIDDEN(C(ffi_closure_raw_THISCALL))
+C(ffi_closure_raw_THISCALL):
+L(UW41):
+	# cfi_startproc
+	/* Rearrange the stack such that %ecx is the first argument.
+	   This means moving the return address.  */
+	popl	%edx
+L(UW42):
+	# cfi_def_cfa_offset(0)
+	# cfi_register(%eip, %edx)
+	pushl	%ecx
+L(UW43):
+	# cfi_adjust_cfa_offset(4)
+	pushl	%edx
+L(UW44):
+	# cfi_adjust_cfa_offset(4)
+	# cfi_rel_offset(%eip, 0)
+	subl	$raw_closure_T_FS, %esp
+L(UW45):
+	# cfi_adjust_cfa_offset(raw_closure_T_FS)
+	movl	%ebx, raw_closure_T_FS-4(%esp)
+L(UW46):
+	# cfi_rel_offset(%ebx, raw_closure_T_FS-4)
+
+	movl	FFI_TRAMPOLINE_SIZE+8(%eax), %edx	/* load cl->user_data */
+	movl	%edx, 12(%esp)
+	leal	raw_closure_T_FS+4(%esp), %edx		/* load raw_args */
+	movl	%edx, 8(%esp)
+	leal	16(%esp), %edx				/* load &res */
+	movl	%edx, 4(%esp)
+	movl	FFI_TRAMPOLINE_SIZE(%eax), %ebx		/* load cl->cif */
+	movl	%ebx, (%esp)
+	call	*FFI_TRAMPOLINE_SIZE+4(%eax)		/* call cl->fun */
+
+	movl	20(%ebx), %eax				/* load cif->flags */
+	andl	$X86_RET_TYPE_MASK, %eax
+#ifdef __PIC__
+	call	C(__x86.get_pc_thunk.bx)
+L(pc5):
+	leal	L(load_table5)-L(pc5)(%ebx, %eax, 8), %ecx
+#else
+	leal	L(load_table5)(,%eax, 8), %ecx
 #endif
-	movl	raw_closure_T_FS-4(%esp), %ebx 
-L(UW47): 
-	# cfi_restore(%ebx) 
-	movl	16(%esp), %eax				/* Optimistic load */ 
-	jmp	*%ecx 
-
-	.balign	8 
-L(load_table5): 
-E(L(load_table5), X86_RET_FLOAT) 
-	flds	16(%esp) 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_DOUBLE) 
-	fldl	16(%esp) 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_LDOUBLE) 
-	fldt	16(%esp) 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_SINT8) 
-	movsbl	%al, %eax 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_SINT16) 
-	movswl	%ax, %eax 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_UINT8) 
-	movzbl	%al, %eax 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_UINT16) 
-	movzwl	%ax, %eax 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_INT64) 
-	movl	16+4(%esp), %edx 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_INT32) 
-	nop 
-	/* fallthru */ 
-E(L(load_table5), X86_RET_VOID) 
-L(e5): 
-	addl	$raw_closure_T_FS, %esp 
-L(UW48): 
-	# cfi_adjust_cfa_offset(-raw_closure_T_FS) 
-	/* Remove the extra %ecx argument we pushed.  */ 
-	ret	$4 
-L(UW49): 
-	# cfi_adjust_cfa_offset(raw_closure_T_FS) 
-E(L(load_table5), X86_RET_STRUCTPOP) 
-	addl	$raw_closure_T_FS, %esp 
-L(UW50): 
-	# cfi_adjust_cfa_offset(-raw_closure_T_FS) 
-	ret	$8 
-L(UW51): 
-	# cfi_adjust_cfa_offset(raw_closure_T_FS) 
-E(L(load_table5), X86_RET_STRUCTARG) 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_STRUCT_1B) 
-	movzbl	%al, %eax 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_STRUCT_2B) 
-	movzwl	%ax, %eax 
-	jmp	L(e5) 
-
-	/* Fill out the table so that bad values are predictable.  */ 
-E(L(load_table5), X86_RET_UNUSED14) 
-	ud2 
-E(L(load_table5), X86_RET_UNUSED15) 
-	ud2 
- 
-L(UW52): 
-	# cfi_endproc 
-ENDF(C(ffi_closure_raw_THISCALL)) 
- 
-#endif /* !FFI_NO_RAW_API */ 
- 
-#ifdef X86_DARWIN 
-# define COMDAT(X)							\ 
-        .section __TEXT,__text,coalesced,pure_instructions;		\ 
-        .weak_definition X;						\ 
-        FFI_HIDDEN(X) 
-#elif defined __ELF__ && !(defined(__sun__) && defined(__svr4__)) 
-# define COMDAT(X)							\ 
-	.section .text.X,"axG",@progbits,X,comdat;			\ 
-	.globl	X;							\ 
-	FFI_HIDDEN(X) 
+	movl	raw_closure_T_FS-4(%esp), %ebx
+L(UW47):
+	# cfi_restore(%ebx)
+	movl	16(%esp), %eax				/* Optimistic load */
+	jmp	*%ecx
+
+	.balign	8
+L(load_table5):
+E(L(load_table5), X86_RET_FLOAT)
+	flds	16(%esp)
+	jmp	L(e5)
+E(L(load_table5), X86_RET_DOUBLE)
+	fldl	16(%esp)
+	jmp	L(e5)
+E(L(load_table5), X86_RET_LDOUBLE)
+	fldt	16(%esp)
+	jmp	L(e5)
+E(L(load_table5), X86_RET_SINT8)
+	movsbl	%al, %eax
+	jmp	L(e5)
+E(L(load_table5), X86_RET_SINT16)
+	movswl	%ax, %eax
+	jmp	L(e5)
+E(L(load_table5), X86_RET_UINT8)
+	movzbl	%al, %eax
+	jmp	L(e5)
+E(L(load_table5), X86_RET_UINT16)
+	movzwl	%ax, %eax
+	jmp	L(e5)
+E(L(load_table5), X86_RET_INT64)
+	movl	16+4(%esp), %edx
+	jmp	L(e5)
+E(L(load_table5), X86_RET_INT32)
+	nop
+	/* fallthru */
+E(L(load_table5), X86_RET_VOID)
+L(e5):
+	addl	$raw_closure_T_FS, %esp
+L(UW48):
+	# cfi_adjust_cfa_offset(-raw_closure_T_FS)
+	/* Remove the extra %ecx argument we pushed.  */
+	ret	$4
+L(UW49):
+	# cfi_adjust_cfa_offset(raw_closure_T_FS)
+E(L(load_table5), X86_RET_STRUCTPOP)
+	addl	$raw_closure_T_FS, %esp
+L(UW50):
+	# cfi_adjust_cfa_offset(-raw_closure_T_FS)
+	ret	$8
+L(UW51):
+	# cfi_adjust_cfa_offset(raw_closure_T_FS)
+E(L(load_table5), X86_RET_STRUCTARG)
+	jmp	L(e5)
+E(L(load_table5), X86_RET_STRUCT_1B)
+	movzbl	%al, %eax
+	jmp	L(e5)
+E(L(load_table5), X86_RET_STRUCT_2B)
+	movzwl	%ax, %eax
+	jmp	L(e5)
+
+	/* Fill out the table so that bad values are predictable.  */
+E(L(load_table5), X86_RET_UNUSED14)
+	ud2
+E(L(load_table5), X86_RET_UNUSED15)
+	ud2
+
+L(UW52):
+	# cfi_endproc
+ENDF(C(ffi_closure_raw_THISCALL))
+
+#endif /* !FFI_NO_RAW_API */
+
+#ifdef X86_DARWIN
+# define COMDAT(X)							\
+        .section __TEXT,__text,coalesced,pure_instructions;		\
+        .weak_definition X;						\
+        FFI_HIDDEN(X)
+#elif defined __ELF__ && !(defined(__sun__) && defined(__svr4__))
+# define COMDAT(X)							\
+	.section .text.X,"axG",@progbits,X,comdat;			\
+	.globl	X;							\
+	FFI_HIDDEN(X)
 #else
-# define COMDAT(X) 
+# define COMDAT(X)
 #endif
 
-#if defined(__PIC__) 
-	COMDAT(C(__x86.get_pc_thunk.bx)) 
-C(__x86.get_pc_thunk.bx): 
-	movl	(%esp), %ebx 
-	ret 
-ENDF(C(__x86.get_pc_thunk.bx)) 
-# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE 
-	COMDAT(C(__x86.get_pc_thunk.dx)) 
-C(__x86.get_pc_thunk.dx): 
-	movl	(%esp), %edx 
-	ret 
-ENDF(C(__x86.get_pc_thunk.dx)) 
-#endif /* DARWIN || HIDDEN */ 
-#endif /* __PIC__ */ 
- 
-/* Sadly, OSX cctools-as doesn't understand .cfi directives at all.  */ 
- 
-#ifdef __APPLE__ 
-.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support 
-EHFrame0: 
-#elif defined(X86_WIN32) 
-.section .eh_frame,"r" 
-#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE) 
-.section .eh_frame,EH_FRAME_FLAGS,@unwind 
+#if defined(__PIC__)
+	COMDAT(C(__x86.get_pc_thunk.bx))
+C(__x86.get_pc_thunk.bx):
+	movl	(%esp), %ebx
+	ret
+ENDF(C(__x86.get_pc_thunk.bx))
+# if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
+	COMDAT(C(__x86.get_pc_thunk.dx))
+C(__x86.get_pc_thunk.dx):
+	movl	(%esp), %edx
+	ret
+ENDF(C(__x86.get_pc_thunk.dx))
+#endif /* DARWIN || HIDDEN */
+#endif /* __PIC__ */
+
+/* Sadly, OSX cctools-as doesn't understand .cfi directives at all.  */
+
+#ifdef __APPLE__
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EHFrame0:
+#elif defined(X86_WIN32)
+.section .eh_frame,"r"
+#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE)
+.section .eh_frame,EH_FRAME_FLAGS,@unwind
 #else
-.section .eh_frame,EH_FRAME_FLAGS,@progbits 
+.section .eh_frame,EH_FRAME_FLAGS,@progbits
 #endif
- 
-#ifdef HAVE_AS_X86_PCREL 
-# define PCREL(X)	X - . 
+
+#ifdef HAVE_AS_X86_PCREL
+# define PCREL(X)	X - .
 #else
-# define PCREL(X)	X@rel 
+# define PCREL(X)	X@rel
 #endif
- 
-/* Simplify advancing between labels.  Assume DW_CFA_advance_loc1 fits.  */ 
-#define ADV(N, P)	.byte 2, L(N)-L(P) 
- 
-	.balign 4 
-L(CIE): 
-	.set	L(set0),L(ECIE)-L(SCIE) 
-	.long	L(set0)			/* CIE Length */ 
-L(SCIE): 
-	.long	0			/* CIE Identifier Tag */ 
-	.byte	1			/* CIE Version */ 
-	.ascii	"zR\0"			/* CIE Augmentation */ 
-	.byte	1			/* CIE Code Alignment Factor */ 
-	.byte	0x7c			/* CIE Data Alignment Factor */ 
-	.byte	0x8			/* CIE RA Column */ 
-	.byte	1			/* Augmentation size */ 
-	.byte	0x1b			/* FDE Encoding (pcrel sdata4) */ 
-	.byte	0xc, 4, 4		/* DW_CFA_def_cfa, %esp offset 4 */ 
-	.byte	0x80+8, 1		/* DW_CFA_offset, %eip offset 1*-4 */ 
-	.balign 4 
-L(ECIE): 
- 
-	.set	L(set1),L(EFDE1)-L(SFDE1) 
-	.long	L(set1)			/* FDE Length */ 
-L(SFDE1): 
-	.long	L(SFDE1)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW0))		/* Initial location */ 
-	.long	L(UW5)-L(UW0)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW1, UW0) 
-	.byte	0xc, 5, 8		/* DW_CFA_def_cfa, %ebp 8 */ 
-	.byte	0x80+5, 2		/* DW_CFA_offset, %ebp 2*-4 */ 
-	ADV(UW2, UW1) 
-	.byte	0x80+3, 0		/* DW_CFA_offset, %ebx 0*-4 */ 
-	ADV(UW3, UW2) 
-	.byte	0xa			/* DW_CFA_remember_state */ 
-	.byte	0xc, 4, 4		/* DW_CFA_def_cfa, %esp 4 */ 
-	.byte	0xc0+3			/* DW_CFA_restore, %ebx */ 
-	.byte	0xc0+5			/* DW_CFA_restore, %ebp */ 
-	ADV(UW4, UW3) 
-	.byte	0xb			/* DW_CFA_restore_state */ 
-	.balign	4 
-L(EFDE1): 
- 
-	.set	L(set2),L(EFDE2)-L(SFDE2) 
-	.long	L(set2)			/* FDE Length */ 
-L(SFDE2): 
-	.long	L(SFDE2)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW6))		/* Initial location */ 
-	.long	L(UW8)-L(UW6)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW7, UW6) 
-	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */ 
-	.balign	4 
-L(EFDE2): 
- 
-	.set	L(set3),L(EFDE3)-L(SFDE3) 
-	.long	L(set3)			/* FDE Length */ 
-L(SFDE3): 
-	.long	L(SFDE3)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW9))		/* Initial location */ 
-	.long	L(UW11)-L(UW9)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW10, UW9) 
-	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */ 
-	.balign	4 
-L(EFDE3): 
- 
-	.set	L(set4),L(EFDE4)-L(SFDE4) 
-	.long	L(set4)			/* FDE Length */ 
-L(SFDE4): 
-	.long	L(SFDE4)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW12))		/* Initial location */ 
-	.long	L(UW20)-L(UW12)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW13, UW12) 
-	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */ 
-#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX 
-	ADV(UW14, UW13) 
-	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */ 
-	ADV(UW15, UW14) 
-	.byte	0xc0+3			/* DW_CFA_restore %ebx */ 
-	ADV(UW16, UW15) 
+
+/* Simplify advancing between labels.  Assume DW_CFA_advance_loc1 fits.  */
+#define ADV(N, P)	.byte 2, L(N)-L(P)
+
+	.balign 4
+L(CIE):
+	.set	L(set0),L(ECIE)-L(SCIE)
+	.long	L(set0)			/* CIE Length */
+L(SCIE):
+	.long	0			/* CIE Identifier Tag */
+	.byte	1			/* CIE Version */
+	.ascii	"zR\0"			/* CIE Augmentation */
+	.byte	1			/* CIE Code Alignment Factor */
+	.byte	0x7c			/* CIE Data Alignment Factor */
+	.byte	0x8			/* CIE RA Column */
+	.byte	1			/* Augmentation size */
+	.byte	0x1b			/* FDE Encoding (pcrel sdata4) */
+	.byte	0xc, 4, 4		/* DW_CFA_def_cfa, %esp offset 4 */
+	.byte	0x80+8, 1		/* DW_CFA_offset, %eip offset 1*-4 */
+	.balign 4
+L(ECIE):
+
+	.set	L(set1),L(EFDE1)-L(SFDE1)
+	.long	L(set1)			/* FDE Length */
+L(SFDE1):
+	.long	L(SFDE1)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW0))		/* Initial location */
+	.long	L(UW5)-L(UW0)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW1, UW0)
+	.byte	0xc, 5, 8		/* DW_CFA_def_cfa, %ebp 8 */
+	.byte	0x80+5, 2		/* DW_CFA_offset, %ebp 2*-4 */
+	ADV(UW2, UW1)
+	.byte	0x80+3, 0		/* DW_CFA_offset, %ebx 0*-4 */
+	ADV(UW3, UW2)
+	.byte	0xa			/* DW_CFA_remember_state */
+	.byte	0xc, 4, 4		/* DW_CFA_def_cfa, %esp 4 */
+	.byte	0xc0+3			/* DW_CFA_restore, %ebx */
+	.byte	0xc0+5			/* DW_CFA_restore, %ebp */
+	ADV(UW4, UW3)
+	.byte	0xb			/* DW_CFA_restore_state */
+	.balign	4
+L(EFDE1):
+
+	.set	L(set2),L(EFDE2)-L(SFDE2)
+	.long	L(set2)			/* FDE Length */
+L(SFDE2):
+	.long	L(SFDE2)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW6))		/* Initial location */
+	.long	L(UW8)-L(UW6)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW7, UW6)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE2):
+
+	.set	L(set3),L(EFDE3)-L(SFDE3)
+	.long	L(set3)			/* FDE Length */
+L(SFDE3):
+	.long	L(SFDE3)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW9))		/* Initial location */
+	.long	L(UW11)-L(UW9)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW10, UW9)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE3):
+
+	.set	L(set4),L(EFDE4)-L(SFDE4)
+	.long	L(set4)			/* FDE Length */
+L(SFDE4):
+	.long	L(SFDE4)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW12))		/* Initial location */
+	.long	L(UW20)-L(UW12)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW13, UW12)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
+	ADV(UW14, UW13)
+	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */
+	ADV(UW15, UW14)
+	.byte	0xc0+3			/* DW_CFA_restore %ebx */
+	ADV(UW16, UW15)
 #else
-	ADV(UW16, UW13) 
+	ADV(UW16, UW13)
 #endif
-	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */ 
-	ADV(UW17, UW16) 
-	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */ 
-	ADV(UW18, UW17) 
-	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */ 
-	ADV(UW19, UW18) 
-	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */ 
-	.balign	4 
-L(EFDE4): 
- 
-	.set	L(set5),L(EFDE5)-L(SFDE5) 
-	.long	L(set5)			/* FDE Length */ 
-L(SFDE5): 
-	.long	L(SFDE5)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW21))		/* Initial location */ 
-	.long	L(UW23)-L(UW21)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW22, UW21) 
-	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */ 
-	.balign	4 
-L(EFDE5): 
- 
-	.set	L(set6),L(EFDE6)-L(SFDE6) 
-	.long	L(set6)			/* FDE Length */ 
-L(SFDE6): 
-	.long	L(SFDE6)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW24))		/* Initial location */ 
-	.long	L(UW26)-L(UW24)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */ 
-	.byte	0x80+8, 2		/* DW_CFA_offset %eip, 2*-4 */ 
-	ADV(UW25, UW24) 
-	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */ 
-	.balign	4 
-L(EFDE6): 
- 
-	.set	L(set7),L(EFDE7)-L(SFDE7) 
-	.long	L(set7)			/* FDE Length */ 
-L(SFDE7): 
-	.long	L(SFDE7)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW27))		/* Initial location */ 
-	.long	L(UW31)-L(UW27)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW28, UW27) 
-	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */ 
-#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX 
-	ADV(UW29, UW28) 
-	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */ 
-	ADV(UW30, UW29) 
-	.byte	0xc0+3			/* DW_CFA_restore %ebx */ 
+	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
+	ADV(UW17, UW16)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+	ADV(UW18, UW17)
+	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
+	ADV(UW19, UW18)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE4):
+
+	.set	L(set5),L(EFDE5)-L(SFDE5)
+	.long	L(set5)			/* FDE Length */
+L(SFDE5):
+	.long	L(SFDE5)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW21))		/* Initial location */
+	.long	L(UW23)-L(UW21)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW22, UW21)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE5):
+
+	.set	L(set6),L(EFDE6)-L(SFDE6)
+	.long	L(set6)			/* FDE Length */
+L(SFDE6):
+	.long	L(SFDE6)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW24))		/* Initial location */
+	.long	L(UW26)-L(UW24)		/* Address range */
+	.byte	0			/* Augmentation size */
+	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
+	.byte	0x80+8, 2		/* DW_CFA_offset %eip, 2*-4 */
+	ADV(UW25, UW24)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE6):
+
+	.set	L(set7),L(EFDE7)-L(SFDE7)
+	.long	L(set7)			/* FDE Length */
+L(SFDE7):
+	.long	L(SFDE7)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW27))		/* Initial location */
+	.long	L(UW31)-L(UW27)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW28, UW27)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
+	ADV(UW29, UW28)
+	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */
+	ADV(UW30, UW29)
+	.byte	0xc0+3			/* DW_CFA_restore %ebx */
 #endif
-	.balign	4 
-L(EFDE7): 
+	.balign	4
+L(EFDE7):
 
 #if !FFI_NO_RAW_API
-	.set	L(set8),L(EFDE8)-L(SFDE8) 
-	.long	L(set8)			/* FDE Length */ 
-L(SFDE8): 
-	.long	L(SFDE8)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW32))		/* Initial location */ 
-	.long	L(UW40)-L(UW32)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW33, UW32) 
-	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */ 
-	ADV(UW34, UW33) 
-	.byte	0x80+3, 2		/* DW_CFA_offset %ebx 2*-4 */ 
-	ADV(UW35, UW34) 
-	.byte	0xc0+3			/* DW_CFA_restore %ebx */ 
-	ADV(UW36, UW35) 
-	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */ 
-	ADV(UW37, UW36) 
-	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */ 
-	ADV(UW38, UW37) 
-	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */ 
-	ADV(UW39, UW38) 
-	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */ 
-	.balign	4 
-L(EFDE8): 
-
-	.set	L(set9),L(EFDE9)-L(SFDE9) 
-	.long	L(set9)			/* FDE Length */ 
-L(SFDE9): 
-	.long	L(SFDE9)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW41))		/* Initial location */ 
-	.long	L(UW52)-L(UW41)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW42, UW41) 
-	.byte	0xe, 0			/* DW_CFA_def_cfa_offset */ 
-	.byte	0x9, 8, 2		/* DW_CFA_register %eip, %edx */ 
-	ADV(UW43, UW42) 
-	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */ 
-	ADV(UW44, UW43) 
-	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */ 
-	.byte	0x80+8, 2		/* DW_CFA_offset %eip 2*-4 */ 
-	ADV(UW45, UW44) 
-	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */ 
-	ADV(UW46, UW45) 
-	.byte	0x80+3, 3		/* DW_CFA_offset %ebx 3*-4 */ 
-	ADV(UW47, UW46) 
-	.byte	0xc0+3			/* DW_CFA_restore %ebx */ 
-	ADV(UW48, UW47) 
-	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */ 
-	ADV(UW49, UW48) 
-	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */ 
-	ADV(UW50, UW49) 
-	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */ 
-	ADV(UW51, UW50) 
-	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */ 
-	.balign	4 
-L(EFDE9): 
-#endif /* !FFI_NO_RAW_API */ 
-
-#ifdef _WIN32 
-	.def	 @feat.00; 
-	.scl	3; 
-	.type	0; 
-	.endef 
-	.globl	@feat.00 
-@feat.00 = 1 
+	.set	L(set8),L(EFDE8)-L(SFDE8)
+	.long	L(set8)			/* FDE Length */
+L(SFDE8):
+	.long	L(SFDE8)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW32))		/* Initial location */
+	.long	L(UW40)-L(UW32)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW33, UW32)
+	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
+	ADV(UW34, UW33)
+	.byte	0x80+3, 2		/* DW_CFA_offset %ebx 2*-4 */
+	ADV(UW35, UW34)
+	.byte	0xc0+3			/* DW_CFA_restore %ebx */
+	ADV(UW36, UW35)
+	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
+	ADV(UW37, UW36)
+	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
+	ADV(UW38, UW37)
+	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
+	ADV(UW39, UW38)
+	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE8):
+
+	.set	L(set9),L(EFDE9)-L(SFDE9)
+	.long	L(set9)			/* FDE Length */
+L(SFDE9):
+	.long	L(SFDE9)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW41))		/* Initial location */
+	.long	L(UW52)-L(UW41)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW42, UW41)
+	.byte	0xe, 0			/* DW_CFA_def_cfa_offset */
+	.byte	0x9, 8, 2		/* DW_CFA_register %eip, %edx */
+	ADV(UW43, UW42)
+	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
+	ADV(UW44, UW43)
+	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
+	.byte	0x80+8, 2		/* DW_CFA_offset %eip 2*-4 */
+	ADV(UW45, UW44)
+	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
+	ADV(UW46, UW45)
+	.byte	0x80+3, 3		/* DW_CFA_offset %ebx 3*-4 */
+	ADV(UW47, UW46)
+	.byte	0xc0+3			/* DW_CFA_restore %ebx */
+	ADV(UW48, UW47)
+	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
+	ADV(UW49, UW48)
+	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
+	ADV(UW50, UW49)
+	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
+	ADV(UW51, UW50)
+	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE9):
+#endif /* !FFI_NO_RAW_API */
+
+#ifdef _WIN32
+	.def	 @feat.00;
+	.scl	3;
+	.type	0;
+	.endef
+	.globl	@feat.00
+@feat.00 = 1
 #endif
 
-#ifdef __APPLE__ 
-    .subsections_via_symbols 
-    .section __LD,__compact_unwind,regular,debug 
-
-    /* compact unwind for ffi_call_i386 */ 
-    .long    C(ffi_call_i386) 
-    .set     L1,L(UW5)-L(UW0) 
-    .long    L1 
-    .long    0x04000000 /* use dwarf unwind info */ 
-    .long    0 
-    .long    0 
- 
-    /* compact unwind for ffi_go_closure_EAX */ 
-    .long    C(ffi_go_closure_EAX) 
-    .set     L2,L(UW8)-L(UW6) 
-    .long    L2 
-    .long    0x04000000 /* use dwarf unwind info */ 
-    .long    0 
-    .long    0 
- 
-    /* compact unwind for ffi_go_closure_ECX */ 
-    .long    C(ffi_go_closure_ECX) 
-    .set     L3,L(UW11)-L(UW9) 
-    .long    L3 
-    .long    0x04000000 /* use dwarf unwind info */ 
-    .long    0 
-    .long    0 
- 
-    /* compact unwind for ffi_closure_i386 */ 
-    .long    C(ffi_closure_i386) 
-    .set     L4,L(UW20)-L(UW12) 
-    .long    L4 
-    .long    0x04000000 /* use dwarf unwind info */ 
-    .long    0 
-    .long    0 
- 
-    /* compact unwind for ffi_go_closure_STDCALL */ 
-    .long    C(ffi_go_closure_STDCALL) 
-    .set     L5,L(UW23)-L(UW21) 
-    .long    L5 
-    .long    0x04000000 /* use dwarf unwind info */ 
-    .long    0 
-    .long    0 
- 
-    /* compact unwind for ffi_closure_REGISTER */ 
-    .long    C(ffi_closure_REGISTER) 
-    .set     L6,L(UW26)-L(UW24) 
-    .long    L6 
-    .long    0x04000000 /* use dwarf unwind info */ 
-    .long    0 
-    .long    0 
- 
-    /* compact unwind for ffi_closure_STDCALL */ 
-    .long    C(ffi_closure_STDCALL) 
-    .set     L7,L(UW31)-L(UW27) 
-    .long    L7 
-    .long    0x04000000 /* use dwarf unwind info */ 
-    .long    0 
-    .long    0 
- 
-    /* compact unwind for ffi_closure_raw_SYSV */ 
-    .long    C(ffi_closure_raw_SYSV) 
-    .set     L8,L(UW40)-L(UW32) 
-    .long    L8 
-    .long    0x04000000 /* use dwarf unwind info */ 
-    .long    0 
-    .long    0 
- 
-    /* compact unwind for ffi_closure_raw_THISCALL */ 
-    .long    C(ffi_closure_raw_THISCALL) 
-    .set     L9,L(UW52)-L(UW41) 
-    .long    L9 
-    .long    0x04000000 /* use dwarf unwind info */ 
-    .long    0 
-    .long    0 
-#endif /* __APPLE__ */ 
- 
-#endif /* ifndef _MSC_VER */ 
-#endif /* ifdef __i386__ */ 
- 
+#ifdef __APPLE__
+    .subsections_via_symbols
+    .section __LD,__compact_unwind,regular,debug
+
+    /* compact unwind for ffi_call_i386 */
+    .long    C(ffi_call_i386)
+    .set     L1,L(UW5)-L(UW0)
+    .long    L1
+    .long    0x04000000 /* use dwarf unwind info */
+    .long    0
+    .long    0
+
+    /* compact unwind for ffi_go_closure_EAX */
+    .long    C(ffi_go_closure_EAX)
+    .set     L2,L(UW8)-L(UW6)
+    .long    L2
+    .long    0x04000000 /* use dwarf unwind info */
+    .long    0
+    .long    0
+
+    /* compact unwind for ffi_go_closure_ECX */
+    .long    C(ffi_go_closure_ECX)
+    .set     L3,L(UW11)-L(UW9)
+    .long    L3
+    .long    0x04000000 /* use dwarf unwind info */
+    .long    0
+    .long    0
+
+    /* compact unwind for ffi_closure_i386 */
+    .long    C(ffi_closure_i386)
+    .set     L4,L(UW20)-L(UW12)
+    .long    L4
+    .long    0x04000000 /* use dwarf unwind info */
+    .long    0
+    .long    0
+
+    /* compact unwind for ffi_go_closure_STDCALL */
+    .long    C(ffi_go_closure_STDCALL)
+    .set     L5,L(UW23)-L(UW21)
+    .long    L5
+    .long    0x04000000 /* use dwarf unwind info */
+    .long    0
+    .long    0
+
+    /* compact unwind for ffi_closure_REGISTER */
+    .long    C(ffi_closure_REGISTER)
+    .set     L6,L(UW26)-L(UW24)
+    .long    L6
+    .long    0x04000000 /* use dwarf unwind info */
+    .long    0
+    .long    0
+
+    /* compact unwind for ffi_closure_STDCALL */
+    .long    C(ffi_closure_STDCALL)
+    .set     L7,L(UW31)-L(UW27)
+    .long    L7
+    .long    0x04000000 /* use dwarf unwind info */
+    .long    0
+    .long    0
+
+    /* compact unwind for ffi_closure_raw_SYSV */
+    .long    C(ffi_closure_raw_SYSV)
+    .set     L8,L(UW40)-L(UW32)
+    .long    L8
+    .long    0x04000000 /* use dwarf unwind info */
+    .long    0
+    .long    0
+
+    /* compact unwind for ffi_closure_raw_THISCALL */
+    .long    C(ffi_closure_raw_THISCALL)
+    .set     L9,L(UW52)-L(UW41)
+    .long    L9
+    .long    0x04000000 /* use dwarf unwind info */
+    .long    0
+    .long    0
+#endif /* __APPLE__ */
+
+#endif /* ifndef _MSC_VER */
+#endif /* ifdef __i386__ */
+
 #if defined __ELF__ && defined __linux__
 	.section	.note.GNU-stack,"",@progbits
 #endif
diff --git a/contrib/restricted/libffi/src/x86/sysv_intel.S b/contrib/restricted/libffi/src/x86/sysv_intel.S
index 5fc9ff21fa..3cafd71ce1 100644
--- a/contrib/restricted/libffi/src/x86/sysv_intel.S
+++ b/contrib/restricted/libffi/src/x86/sysv_intel.S
@@ -1,995 +1,995 @@
-/* ----------------------------------------------------------------------- 
-   sysv.S - Copyright (c) 2017  Anthony Green 
-          - Copyright (c) 2013  The Written Word, Inc. 
-          - Copyright (c) 1996,1998,2001-2003,2005,2008,2010  Red Hat, Inc. 
-    
-   X86 Foreign Function Interface  
- 
-   Permission is hereby granted, free of charge, to any person obtaining 
-   a copy of this software and associated documentation files (the 
-   ``Software''), to deal in the Software without restriction, including 
-   without limitation the rights to use, copy, modify, merge, publish, 
-   distribute, sublicense, and/or sell copies of the Software, and to 
-   permit persons to whom the Software is furnished to do so, subject to 
-   the following conditions: 
- 
-   The above copyright notice and this permission notice shall be included 
-   in all copies or substantial portions of the Software. 
- 
-   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND, 
-   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 
-   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
-   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 
-   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
-   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 
-   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
-   DEALINGS IN THE SOFTWARE. 
-   ----------------------------------------------------------------------- */ 
- 
-#ifndef __x86_64__ 
-#ifdef _MSC_VER 
- 
-#define LIBFFI_ASM	 
-#include <fficonfig.h> 
-#include <ffi.h> 
-#include <ffi_cfi.h> 
-#include "internal.h"  
- 
-#define C2(X, Y)  X ## Y 
-#define C1(X, Y)  C2(X, Y) 
-#define L(X)     C1(L, X) 
-# define ENDF(X) X ENDP 
- 
-/* This macro allows the safe creation of jump tables without an 
-   actual table.  The entry points into the table are all 8 bytes. 
-   The use of ORG asserts that we're at the correct location.  */ 
-/* ??? The clang assembler doesn't handle .org with symbolic expressions.  */ 
-#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) 
-# define E(BASE, X)	ALIGN 8 
-#else 
-# define E(BASE, X)	ALIGN 8; ORG BASE + X * 8 
-#endif 
- 
-    .686P 
-    .MODEL FLAT 
- 
-EXTRN	@ffi_closure_inner@8:PROC 
-_TEXT SEGMENT 
- 
-/* This is declared as 
- 
-   void ffi_call_i386(struct call_frame *frame, char *argp) 
-        __attribute__((fastcall)); 
- 
-   Thus the arguments are present in 
- 
-        ecx: frame 
-        edx: argp 
-*/ 
- 
-ALIGN 16 
-PUBLIC @ffi_call_i386@8 
-@ffi_call_i386@8 PROC 
-L(UW0): 
-	cfi_startproc 
- #if !HAVE_FASTCALL 
-	mov	    ecx, [esp+4] 
-	mov 	edx, [esp+8] 
- #endif 
-	mov	    eax, [esp]		/* move the return address */ 
-	mov	    [ecx], ebp		/* store ebp into local frame */ 
-	mov 	[ecx+4], eax	/* store retaddr into local frame */ 
- 
-	/* New stack frame based off ebp.  This is a itty bit of unwind 
-	   trickery in that the CFA *has* changed.  There is no easy way 
-	   to describe it correctly on entry to the function.  Fortunately, 
-	   it doesn't matter too much since at all points we can correctly 
-	   unwind back to ffi_call.  Note that the location to which we 
-	   moved the return address is (the new) CFA-4, so from the 
-	   perspective of the unwind info, it hasn't moved.  */ 
-	mov 	ebp, ecx 
-L(UW1): 
-	// cfi_def_cfa(%ebp, 8) 
-	// cfi_rel_offset(%ebp, 0) 
- 
-	mov 	esp, edx		/* set outgoing argument stack */ 
-	mov 	eax, [20+R_EAX*4+ebp]	/* set register arguments */ 
-	mov 	edx, [20+R_EDX*4+ebp] 
-	mov	    ecx, [20+R_ECX*4+ebp] 
- 
-	call	dword ptr [ebp+8] 
- 
-	mov	    ecx, [12+ebp]		/* load return type code */ 
-	mov 	[ebp+8], ebx		/* preserve %ebx */ 
-L(UW2): 
-	// cfi_rel_offset(%ebx, 8) 
- 
-	and 	ecx, X86_RET_TYPE_MASK 
-	lea 	ebx, [L(store_table) + ecx * 8] 
-	mov 	ecx, [ebp+16]		/* load result address */ 
-	jmp	    ebx 
- 
-	ALIGN	8 
-L(store_table): 
-E(L(store_table), X86_RET_FLOAT) 
-	fstp	DWORD PTR [ecx] 
-	jmp	L(e1) 
-E(L(store_table), X86_RET_DOUBLE) 
-	fstp	QWORD PTR [ecx] 
-	jmp	L(e1) 
-E(L(store_table), X86_RET_LDOUBLE) 
-	fstp	QWORD PTR [ecx] 
-	jmp	L(e1) 
-E(L(store_table), X86_RET_SINT8) 
-	movsx	eax, al 
-	mov	[ecx], eax 
-	jmp	L(e1) 
-E(L(store_table), X86_RET_SINT16) 
-	movsx	eax, ax 
-	mov	[ecx], eax 
-	jmp	L(e1) 
-E(L(store_table), X86_RET_UINT8) 
-	movzx	eax, al 
-	mov	[ecx], eax 
-	jmp	L(e1) 
-E(L(store_table), X86_RET_UINT16) 
-	movzx	eax, ax 
-	mov	[ecx], eax 
-	jmp	L(e1) 
-E(L(store_table), X86_RET_INT64) 
-	mov	[ecx+4], edx 
-	/* fallthru */ 
-E(L(store_table), X86_RET_int 32) 
-	mov	[ecx], eax 
-	/* fallthru */ 
-E(L(store_table), X86_RET_VOID) 
-L(e1): 
-	mov	    ebx, [ebp+8] 
-	mov	    esp, ebp 
-	pop 	ebp 
-L(UW3): 
-	// cfi_remember_state 
-	// cfi_def_cfa(%esp, 4) 
-	// cfi_restore(%ebx) 
-	// cfi_restore(%ebp) 
-	ret 
-L(UW4): 
-	// cfi_restore_state 
- 
-E(L(store_table), X86_RET_STRUCTPOP) 
-	jmp	    L(e1) 
-E(L(store_table), X86_RET_STRUCTARG) 
-	jmp	    L(e1) 
-E(L(store_table), X86_RET_STRUCT_1B) 
-	mov 	[ecx], al 
-	jmp	    L(e1) 
-E(L(store_table), X86_RET_STRUCT_2B) 
-	mov 	[ecx], ax 
-	jmp	    L(e1) 
- 
-	/* Fill out the table so that bad values are predictable.  */ 
-E(L(store_table), X86_RET_UNUSED14) 
-	int 3 
-E(L(store_table), X86_RET_UNUSED15) 
-	int 3 
- 
-L(UW5): 
-	// cfi_endproc 
-ENDF(@ffi_call_i386@8) 
- 
-/* The inner helper is declared as 
- 
-   void ffi_closure_inner(struct closure_frame *frame, char *argp) 
-	__attribute_((fastcall)) 
- 
-   Thus the arguments are placed in 
- 
-	ecx:	frame 
-	edx:	argp 
-*/ 
- 
-/* Macros to help setting up the closure_data structure.  */ 
- 
-#if HAVE_FASTCALL 
-# define closure_FS	(40 + 4) 
-# define closure_CF	0 
-#else 
-# define closure_FS	(8 + 40 + 12) 
-# define closure_CF	8 
-#endif 
- 
-FFI_CLOSURE_SAVE_REGS MACRO 
-	mov 	[esp + closure_CF+16+R_EAX*4], eax 
-	mov 	[esp + closure_CF+16+R_EDX*4], edx 
-	mov 	[esp + closure_CF+16+R_ECX*4], ecx 
-ENDM 
- 
-FFI_CLOSURE_COPY_TRAMP_DATA MACRO 
-	mov 	edx, [eax+FFI_TRAMPOLINE_SIZE]      /* copy cif */ 
-	mov 	ecx, [eax+FFI_TRAMPOLINE_SIZE+4]    /* copy fun */ 
-	mov 	eax, [eax+FFI_TRAMPOLINE_SIZE+8];   /* copy user_data */ 
-	mov 	[esp+closure_CF+28], edx 
-	mov 	[esp+closure_CF+32], ecx 
-	mov 	[esp+closure_CF+36], eax 
-ENDM 
- 
-#if HAVE_FASTCALL 
-FFI_CLOSURE_PREP_CALL MACRO 
-	mov	    ecx, esp                    /* load closure_data */ 
-	lea 	edx, [esp+closure_FS+4]     /* load incoming stack */ 
-ENDM 
-#else 
-FFI_CLOSURE_PREP_CALL MACRO 
-	lea 	ecx, [esp+closure_CF]       /* load closure_data */ 
-	lea 	edx, [esp+closure_FS+4]     /* load incoming stack */ 
-	mov 	[esp], ecx 
-	mov 	[esp+4], edx 
-ENDM 
-#endif 
- 
-FFI_CLOSURE_CALL_INNER MACRO UWN 
-	call	@ffi_closure_inner@8 
-ENDM 
- 
-FFI_CLOSURE_MASK_AND_JUMP MACRO LABEL 
-	and	    eax, X86_RET_TYPE_MASK 
-	lea 	edx, [LABEL+eax*8] 
-	mov 	eax, [esp+closure_CF]       /* optimiztic load */ 
-	jmp	    edx 
-ENDM 
- 
-ALIGN 16 
-PUBLIC ffi_go_closure_EAX 
-ffi_go_closure_EAX PROC C 
-L(UW6): 
-	// cfi_startproc 
-	sub	esp, closure_FS 
-L(UW7): 
-	// cfi_def_cfa_offset(closure_FS + 4) 
-	FFI_CLOSURE_SAVE_REGS 
-	mov     edx, [eax+4]			/* copy cif */ 
-	mov 	ecx, [eax +8]			/* copy fun */ 
-	mov 	[esp+closure_CF+28], edx 
-	mov 	[esp+closure_CF+32], ecx 
-	mov 	[esp+closure_CF+36], eax	/* closure is user_data */ 
-	jmp	L(do_closure_i386) 
-L(UW8): 
-	// cfi_endproc 
-ENDF(ffi_go_closure_EAX) 
- 
-ALIGN 16 
-PUBLIC ffi_go_closure_ECX 
-ffi_go_closure_ECX PROC C 
-L(UW9): 
-	// cfi_startproc 
-	sub 	esp, closure_FS 
-L(UW10): 
-	// cfi_def_cfa_offset(closure_FS + 4) 
-	FFI_CLOSURE_SAVE_REGS 
-	mov 	edx, [ecx+4]			/* copy cif */ 
-	mov 	eax, [ecx+8]			/* copy fun */ 
-	mov 	[esp+closure_CF+28], edx 
-	mov 	[esp+closure_CF+32], eax 
-	mov 	[esp+closure_CF+36], ecx	/* closure is user_data */ 
-	jmp	L(do_closure_i386) 
-L(UW11): 
-	// cfi_endproc 
-ENDF(ffi_go_closure_ECX) 
- 
-/* The closure entry points are reached from the ffi_closure trampoline. 
-   On entry, %eax contains the address of the ffi_closure.  */ 
- 
-ALIGN 16 
-PUBLIC ffi_closure_i386 
-ffi_closure_i386 PROC C 
-L(UW12): 
-	// cfi_startproc 
-	sub	    esp, closure_FS 
-L(UW13): 
-	// cfi_def_cfa_offset(closure_FS + 4) 
- 
-	FFI_CLOSURE_SAVE_REGS 
-	FFI_CLOSURE_COPY_TRAMP_DATA 
- 
-	/* Entry point from preceeding Go closures.  */ 
-L(do_closure_i386):: 
- 
-	FFI_CLOSURE_PREP_CALL 
-	FFI_CLOSURE_CALL_INNER(14) 
-	FFI_CLOSURE_MASK_AND_JUMP L(C1(load_table,2)) 
- 
-    ALIGN 8 
-L(load_table2): 
-E(L(load_table2), X86_RET_FLOAT) 
-	fld 	dword ptr [esp+closure_CF] 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_DOUBLE) 
-	fld 	qword ptr [esp+closure_CF] 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_LDOUBLE) 
-	fld 	qword ptr [esp+closure_CF] 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_SINT8) 
-	movsx	eax, al 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_SINT16) 
-	movsx	eax, ax 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_UINT8) 
-	movzx	eax, al 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_UINT16) 
-	movzx	eax, ax 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_INT64) 
-	mov 	edx, [esp+closure_CF+4] 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_INT32) 
-	nop 
-	/* fallthru */ 
-E(L(load_table2), X86_RET_VOID) 
-L(e2): 
-	add 	esp, closure_FS 
-L(UW16): 
-	// cfi_adjust_cfa_offset(-closure_FS) 
-	ret 
-L(UW17): 
-	// cfi_adjust_cfa_offset(closure_FS) 
-E(L(load_table2), X86_RET_STRUCTPOP) 
-	add 	esp, closure_FS 
-L(UW18): 
-	// cfi_adjust_cfa_offset(-closure_FS) 
-	ret	4 
-L(UW19): 
-	// cfi_adjust_cfa_offset(closure_FS) 
-E(L(load_table2), X86_RET_STRUCTARG) 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_STRUCT_1B) 
-	movzx	eax, al 
-	jmp	L(e2) 
-E(L(load_table2), X86_RET_STRUCT_2B) 
-	movzx	eax, ax 
-	jmp	L(e2) 
- 
-	/* Fill out the table so that bad values are predictable.  */ 
-E(L(load_table2), X86_RET_UNUSED14) 
-	int 3 
-E(L(load_table2), X86_RET_UNUSED15) 
-	int 3 
- 
-L(UW20): 
-	// cfi_endproc 
-ENDF(ffi_closure_i386) 
- 
-ALIGN 16 
-PUBLIC	ffi_go_closure_STDCALL 
-ffi_go_closure_STDCALL PROC C 
-L(UW21): 
-	// cfi_startproc 
-	sub 	esp, closure_FS 
-L(UW22): 
-	// cfi_def_cfa_offset(closure_FS + 4) 
-	FFI_CLOSURE_SAVE_REGS 
-	mov 	edx, [ecx+4]			/* copy cif */ 
-	mov 	eax, [ecx+8]			/* copy fun */ 
-	mov 	[esp+closure_CF+28], edx 
-	mov 	[esp+closure_CF+32], eax 
-	mov 	[esp+closure_CF+36], ecx	/* closure is user_data */ 
-	jmp	L(do_closure_STDCALL) 
-L(UW23): 
-	// cfi_endproc 
-ENDF(ffi_go_closure_STDCALL) 
- 
-/* For REGISTER, we have no available parameter registers, and so we 
-   enter here having pushed the closure onto the stack.  */ 
- 
-ALIGN 16 
-PUBLIC ffi_closure_REGISTER 
-ffi_closure_REGISTER PROC C 
-L(UW24): 
-	// cfi_startproc 
-	// cfi_def_cfa(%esp, 8) 
-	// cfi_offset(%eip, -8) 
-	sub 	esp, closure_FS-4 
-L(UW25): 
-	// cfi_def_cfa_offset(closure_FS + 4) 
-	FFI_CLOSURE_SAVE_REGS 
-	mov	ecx, [esp+closure_FS-4] 	/* load retaddr */ 
-	mov	eax, [esp+closure_FS]		/* load closure */ 
-	mov	[esp+closure_FS], ecx		/* move retaddr */ 
-	jmp	L(do_closure_REGISTER) 
-L(UW26): 
-	// cfi_endproc 
-ENDF(ffi_closure_REGISTER) 
- 
-/* For STDCALL (and others), we need to pop N bytes of arguments off 
-   the stack following the closure.  The amount needing to be popped 
-   is returned to us from ffi_closure_inner.  */ 
- 
-ALIGN 16 
-PUBLIC ffi_closure_STDCALL 
-ffi_closure_STDCALL PROC C 
-L(UW27): 
-	// cfi_startproc 
-	sub 	esp, closure_FS 
-L(UW28): 
-	// cfi_def_cfa_offset(closure_FS + 4) 
- 
-	FFI_CLOSURE_SAVE_REGS 
- 
-	/* Entry point from ffi_closure_REGISTER.  */ 
-L(do_closure_REGISTER):: 
- 
-	FFI_CLOSURE_COPY_TRAMP_DATA 
- 
-	/* Entry point from preceeding Go closure.  */ 
-L(do_closure_STDCALL):: 
- 
-	FFI_CLOSURE_PREP_CALL 
-	FFI_CLOSURE_CALL_INNER(29) 
- 
-	mov 	ecx, eax 
-	shr 	ecx, X86_RET_POP_SHIFT	    /* isolate pop count */ 
-	lea 	ecx, [esp+closure_FS+ecx]	/* compute popped esp */ 
-	mov 	edx, [esp+closure_FS]		/* move return address */ 
-	mov 	[ecx], edx 
- 
-	/* From this point on, the value of %esp upon return is %ecx+4, 
-	   and we've copied the return address to %ecx to make return easy. 
-	   There's no point in representing this in the unwind info, as 
-	   there is always a window between the mov and the ret which 
-	   will be wrong from one point of view or another.  */ 
- 
-	FFI_CLOSURE_MASK_AND_JUMP  L(C1(load_table,3)) 
- 
-    ALIGN 8 
-L(load_table3): 
-E(L(load_table3), X86_RET_FLOAT) 
-	fld    DWORD PTR [esp+closure_CF] 
-	mov     esp, ecx 
-	ret 
-E(L(load_table3), X86_RET_DOUBLE) 
-	fld    QWORD PTR [esp+closure_CF] 
-	mov     esp, ecx 
-	ret 
-E(L(load_table3), X86_RET_LDOUBLE) 
-	fld    QWORD PTR [esp+closure_CF] 
-	mov     esp, ecx 
-	ret 
-E(L(load_table3), X86_RET_SINT8) 
-	movsx   eax, al 
-	mov     esp, ecx 
-	ret 
-E(L(load_table3), X86_RET_SINT16) 
-	movsx   eax, ax 
-	mov     esp, ecx 
-	ret 
-E(L(load_table3), X86_RET_UINT8) 
-	movzx   eax, al 
-	mov     esp, ecx 
-	ret 
-E(L(load_table3), X86_RET_UINT16) 
-	movzx   eax, ax 
-	mov     esp, ecx 
-	ret 
-E(L(load_table3), X86_RET_INT64) 
-	mov 	edx, [esp+closure_CF+4] 
-	mov     esp, ecx 
-	ret 
-E(L(load_table3), X86_RET_int 32) 
-	mov     esp, ecx 
-	ret 
-E(L(load_table3), X86_RET_VOID) 
-	mov     esp, ecx 
-	ret 
-E(L(load_table3), X86_RET_STRUCTPOP) 
-	mov     esp, ecx 
-	ret 
-E(L(load_table3), X86_RET_STRUCTARG) 
-	mov 	esp, ecx 
-	ret 
-E(L(load_table3), X86_RET_STRUCT_1B) 
-	movzx	eax, al 
-	mov 	esp, ecx 
-	ret 
-E(L(load_table3), X86_RET_STRUCT_2B) 
-	movzx	eax, ax 
-	mov 	esp, ecx 
-	ret 
- 
-	/* Fill out the table so that bad values are predictable.  */ 
-E(L(load_table3), X86_RET_UNUSED14) 
-	int 3 
-E(L(load_table3), X86_RET_UNUSED15) 
-	int 3 
- 
-L(UW31): 
-	// cfi_endproc 
-ENDF(ffi_closure_STDCALL) 
- 
-#if !FFI_NO_RAW_API 
- 
-#define raw_closure_S_FS	(16+16+12) 
- 
-ALIGN 16 
-PUBLIC ffi_closure_raw_SYSV 
-ffi_closure_raw_SYSV PROC C 
-L(UW32): 
-	// cfi_startproc 
-	sub 	esp, raw_closure_S_FS 
-L(UW33): 
-	// cfi_def_cfa_offset(raw_closure_S_FS + 4) 
-	mov 	[esp+raw_closure_S_FS-4], ebx 
-L(UW34): 
-	// cfi_rel_offset(%ebx, raw_closure_S_FS-4) 
- 
-	mov 	edx, [eax+FFI_TRAMPOLINE_SIZE+8]	/* load cl->user_data */ 
-	mov 	[esp+12], edx 
-	lea 	edx, [esp+raw_closure_S_FS+4]		/* load raw_args */ 
-	mov 	[esp+8], edx 
-	lea 	edx, [esp+16]				/* load &res */ 
-	mov 	[esp+4], edx 
-	mov 	ebx, [eax+FFI_TRAMPOLINE_SIZE]		/* load cl->cif */ 
-	mov 	[esp], ebx 
-	call	DWORD PTR [eax+FFI_TRAMPOLINE_SIZE+4]		/* call cl->fun */ 
- 
-	mov 	eax, [ebx+20]			/* load cif->flags */ 
-	and 	eax, X86_RET_TYPE_MASK 
-// #ifdef __PIC__ 
-// 	call	__x86.get_pc_thunk.bx 
-// L(pc4): 
-// 	lea 	ecx, L(load_table4)-L(pc4)(%ebx, %eax, 8), %ecx 
-// #else 
-	lea 	ecx, [L(load_table4)+eax+8] 
-// #endif 
-	mov 	ebx, [esp+raw_closure_S_FS-4] 
-L(UW35): 
-	// cfi_restore(%ebx) 
-	mov 	eax, [esp+16]				/* Optimistic load */ 
-	jmp	    dword ptr [ecx] 
- 
-	ALIGN 8 
-L(load_table4): 
-E(L(load_table4), X86_RET_FLOAT) 
-	fld 	DWORD PTR [esp +16] 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_DOUBLE) 
-	fld 	QWORD PTR [esp +16] 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_LDOUBLE) 
-	fld 	QWORD PTR [esp +16] 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_SINT8) 
-	movsx	eax, al 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_SINT16) 
-	movsx	eax, ax 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_UINT8) 
-	movzx	eax, al 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_UINT16) 
-	movzx	eax, ax 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_INT64) 
-	mov 	edx, [esp+16+4] 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_int 32) 
-	nop 
-	/* fallthru */ 
-E(L(load_table4), X86_RET_VOID) 
-L(e4): 
-	add 	esp, raw_closure_S_FS 
-L(UW36): 
-	// cfi_adjust_cfa_offset(-raw_closure_S_FS) 
-	ret 
-L(UW37): 
-	// cfi_adjust_cfa_offset(raw_closure_S_FS) 
-E(L(load_table4), X86_RET_STRUCTPOP) 
-	add 	esp, raw_closure_S_FS 
-L(UW38): 
-	// cfi_adjust_cfa_offset(-raw_closure_S_FS) 
-	ret	4 
-L(UW39): 
-	// cfi_adjust_cfa_offset(raw_closure_S_FS) 
-E(L(load_table4), X86_RET_STRUCTARG) 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_STRUCT_1B) 
-	movzx	eax, al 
-	jmp	L(e4) 
-E(L(load_table4), X86_RET_STRUCT_2B) 
-	movzx	eax, ax 
-	jmp	L(e4) 
- 
-	/* Fill out the table so that bad values are predictable.  */ 
-E(L(load_table4), X86_RET_UNUSED14) 
-	int 3 
-E(L(load_table4), X86_RET_UNUSED15) 
-	int 3 
- 
-L(UW40): 
-	// cfi_endproc 
-ENDF(ffi_closure_raw_SYSV) 
- 
-#define raw_closure_T_FS	(16+16+8) 
- 
-ALIGN 16 
-PUBLIC ffi_closure_raw_THISCALL 
-ffi_closure_raw_THISCALL PROC C 
-L(UW41): 
-	// cfi_startproc 
-	/* Rearrange the stack such that %ecx is the first argument. 
-	   This means moving the return address.  */ 
-	pop 	edx 
-L(UW42): 
-	// cfi_def_cfa_offset(0) 
-	// cfi_register(%eip, %edx) 
-	push	ecx 
-L(UW43): 
-	// cfi_adjust_cfa_offset(4) 
-	push 	edx 
-L(UW44): 
-	// cfi_adjust_cfa_offset(4) 
-	// cfi_rel_offset(%eip, 0) 
-	sub 	esp, raw_closure_T_FS 
-L(UW45): 
-	// cfi_adjust_cfa_offset(raw_closure_T_FS) 
-	mov 	[esp+raw_closure_T_FS-4], ebx 
-L(UW46): 
-	// cfi_rel_offset(%ebx, raw_closure_T_FS-4) 
- 
-	mov 	edx, [eax+FFI_TRAMPOLINE_SIZE+8]	/* load cl->user_data */ 
-	mov 	[esp+12], edx 
-	lea 	edx, [esp+raw_closure_T_FS+4]		/* load raw_args */ 
-	mov 	[esp+8], edx 
-	lea 	edx, [esp+16]				/* load &res */ 
-	mov 	[esp+4], edx 
-	mov 	ebx, [eax+FFI_TRAMPOLINE_SIZE]		/* load cl->cif */ 
-	mov 	[esp], ebx 
-	call	DWORD PTR [eax+FFI_TRAMPOLINE_SIZE+4]		/* call cl->fun */ 
- 
-	mov 	eax, [ebx+20]				/* load cif->flags */ 
-	and 	eax, X86_RET_TYPE_MASK 
-// #ifdef __PIC__ 
-// 	call	__x86.get_pc_thunk.bx 
-// L(pc5): 
-// 	leal	L(load_table5)-L(pc5)(%ebx, %eax, 8), %ecx 
-// #else 
-	lea 	ecx, [L(load_table5)+eax*8] 
-//#endif 
-	mov 	ebx, [esp+raw_closure_T_FS-4] 
-L(UW47): 
-	// cfi_restore(%ebx) 
-	mov 	eax, [esp+16]				/* Optimistic load */ 
-	jmp	    DWORD PTR [ecx] 
- 
-	AlIGN 4 
-L(load_table5): 
-E(L(load_table5), X86_RET_FLOAT) 
-	fld	DWORD PTR [esp +16] 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_DOUBLE) 
-	fld	QWORD PTR [esp +16] 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_LDOUBLE) 
-	fld	QWORD PTR [esp+16] 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_SINT8) 
-	movsx	eax, al 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_SINT16) 
-	movsx	eax, ax 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_UINT8) 
-	movzx	eax, al 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_UINT16) 
-	movzx	eax, ax 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_INT64) 
-	mov 	edx, [esp+16+4] 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_int 32) 
-	nop 
-	/* fallthru */ 
-E(L(load_table5), X86_RET_VOID) 
-L(e5): 
-	add 	esp, raw_closure_T_FS 
-L(UW48): 
-	// cfi_adjust_cfa_offset(-raw_closure_T_FS) 
-	/* Remove the extra %ecx argument we pushed.  */ 
-	ret	4 
-L(UW49): 
-	// cfi_adjust_cfa_offset(raw_closure_T_FS) 
-E(L(load_table5), X86_RET_STRUCTPOP) 
-	add 	esp, raw_closure_T_FS 
-L(UW50): 
-	// cfi_adjust_cfa_offset(-raw_closure_T_FS) 
-	ret	8 
-L(UW51): 
-	// cfi_adjust_cfa_offset(raw_closure_T_FS) 
-E(L(load_table5), X86_RET_STRUCTARG) 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_STRUCT_1B) 
-	movzx	eax, al 
-	jmp	L(e5) 
-E(L(load_table5), X86_RET_STRUCT_2B) 
-	movzx	eax, ax 
-	jmp	L(e5) 
- 
-	/* Fill out the table so that bad values are predictable.  */ 
-E(L(load_table5), X86_RET_UNUSED14) 
-	int 3 
-E(L(load_table5), X86_RET_UNUSED15) 
-	int 3 
- 
-L(UW52): 
-	// cfi_endproc 
-ENDF(ffi_closure_raw_THISCALL) 
- 
-#endif /* !FFI_NO_RAW_API */ 
- 
-#ifdef X86_DARWIN 
-# define COMDAT(X)							\ 
-        .section __TEXT,__text,coalesced,pure_instructions;		\ 
-        .weak_definition X;						\ 
-        FFI_HIDDEN(X) 
-#elif defined __ELF__ && !(defined(__sun__) && defined(__svr4__)) 
-# define COMDAT(X)							\ 
-	.section .text.X,"axG",@progbits,X,comdat;			\ 
-	PUBLIC	X;							\ 
-	FFI_HIDDEN(X) 
-#else 
-# define COMDAT(X) 
-#endif 
- 
-// #if defined(__PIC__) 
-// 	COMDAT(C(__x86.get_pc_thunk.bx)) 
-// C(__x86.get_pc_thunk.bx): 
-// 	movl	(%esp), %ebx 
-// 	ret 
-// ENDF(C(__x86.get_pc_thunk.bx)) 
-// # if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE 
-// 	COMDAT(C(__x86.get_pc_thunk.dx)) 
-// C(__x86.get_pc_thunk.dx): 
-// 	movl	(%esp), %edx 
-// 	ret 
-// ENDF(C(__x86.get_pc_thunk.dx)) 
-// #endif /* DARWIN || HIDDEN */ 
-// #endif /* __PIC__ */ 
- 
-#if 0 
-/* Sadly, OSX cctools-as doesn't understand .cfi directives at all.  */ 
- 
-#ifdef __APPLE__ 
-.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support 
-EHFrame0: 
-#elif defined(X86_WIN32) 
-.section .eh_frame,"r" 
-#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE) 
-.section .eh_frame,EH_FRAME_FLAGS,@unwind 
-#else 
-.section .eh_frame,EH_FRAME_FLAGS,@progbits 
-#endif 
- 
-#ifdef HAVE_AS_X86_PCREL 
-# define PCREL(X)	X - . 
-#else 
-# define PCREL(X)	X@rel 
-#endif 
- 
-/* Simplify advancing between labels.  Assume DW_CFA_advance_loc1 fits.  */ 
-#define ADV(N, P)	.byte 2, L(N)-L(P) 
- 
-	.balign 4 
-L(CIE): 
-	.set	L(set0),L(ECIE)-L(SCIE) 
-	.long	L(set0)			/* CIE Length */ 
-L(SCIE): 
-	.long	0			/* CIE Identifier Tag */ 
-	.byte	1			/* CIE Version */ 
-	.ascii	"zR\0"			/* CIE Augmentation */ 
-	.byte	1			/* CIE Code Alignment Factor */ 
-	.byte	0x7c			/* CIE Data Alignment Factor */ 
-	.byte	0x8			/* CIE RA Column */ 
-	.byte	1			/* Augmentation size */ 
-	.byte	0x1b			/* FDE Encoding (pcrel sdata4) */ 
-	.byte	0xc, 4, 4		/* DW_CFA_def_cfa, %esp offset 4 */ 
-	.byte	0x80+8, 1		/* DW_CFA_offset, %eip offset 1*-4 */ 
-	.balign 4 
-L(ECIE): 
- 
-	.set	L(set1),L(EFDE1)-L(SFDE1) 
-	.long	L(set1)			/* FDE Length */ 
-L(SFDE1): 
-	.long	L(SFDE1)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW0))		/* Initial location */ 
-	.long	L(UW5)-L(UW0)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW1, UW0) 
-	.byte	0xc, 5, 8		/* DW_CFA_def_cfa, %ebp 8 */ 
-	.byte	0x80+5, 2		/* DW_CFA_offset, %ebp 2*-4 */ 
-	ADV(UW2, UW1) 
-	.byte	0x80+3, 0		/* DW_CFA_offset, %ebx 0*-4 */ 
-	ADV(UW3, UW2) 
-	.byte	0xa			/* DW_CFA_remember_state */ 
-	.byte	0xc, 4, 4		/* DW_CFA_def_cfa, %esp 4 */ 
-	.byte	0xc0+3			/* DW_CFA_restore, %ebx */ 
-	.byte	0xc0+5			/* DW_CFA_restore, %ebp */ 
-	ADV(UW4, UW3) 
-	.byte	0xb			/* DW_CFA_restore_state */ 
-	.balign	4 
-L(EFDE1): 
- 
-	.set	L(set2),L(EFDE2)-L(SFDE2) 
-	.long	L(set2)			/* FDE Length */ 
-L(SFDE2): 
-	.long	L(SFDE2)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW6))		/* Initial location */ 
-	.long	L(UW8)-L(UW6)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW7, UW6) 
-	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */ 
-	.balign	4 
-L(EFDE2): 
- 
-	.set	L(set3),L(EFDE3)-L(SFDE3) 
-	.long	L(set3)			/* FDE Length */ 
-L(SFDE3): 
-	.long	L(SFDE3)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW9))		/* Initial location */ 
-	.long	L(UW11)-L(UW9)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW10, UW9) 
-	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */ 
-	.balign	4 
-L(EFDE3): 
- 
-	.set	L(set4),L(EFDE4)-L(SFDE4) 
-	.long	L(set4)			/* FDE Length */ 
-L(SFDE4): 
-	.long	L(SFDE4)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW12))		/* Initial location */ 
-	.long	L(UW20)-L(UW12)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW13, UW12) 
-	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */ 
-#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX 
-	ADV(UW14, UW13) 
-	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */ 
-	ADV(UW15, UW14) 
-	.byte	0xc0+3			/* DW_CFA_restore %ebx */ 
-	ADV(UW16, UW15) 
-#else 
-	ADV(UW16, UW13) 
-#endif 
-	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */ 
-	ADV(UW17, UW16) 
-	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */ 
-	ADV(UW18, UW17) 
-	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */ 
-	ADV(UW19, UW18) 
-	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */ 
-	.balign	4 
-L(EFDE4): 
- 
-	.set	L(set5),L(EFDE5)-L(SFDE5) 
-	.long	L(set5)			/* FDE Length */ 
-L(SFDE5): 
-	.long	L(SFDE5)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW21))		/* Initial location */ 
-	.long	L(UW23)-L(UW21)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW22, UW21) 
-	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */ 
-	.balign	4 
-L(EFDE5): 
- 
-	.set	L(set6),L(EFDE6)-L(SFDE6) 
-	.long	L(set6)			/* FDE Length */ 
-L(SFDE6): 
-	.long	L(SFDE6)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW24))		/* Initial location */ 
-	.long	L(UW26)-L(UW24)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */ 
-	.byte	0x80+8, 2		/* DW_CFA_offset %eip, 2*-4 */ 
-	ADV(UW25, UW24) 
-	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */ 
-	.balign	4 
-L(EFDE6): 
- 
-	.set	L(set7),L(EFDE7)-L(SFDE7) 
-	.long	L(set7)			/* FDE Length */ 
-L(SFDE7): 
-	.long	L(SFDE7)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW27))		/* Initial location */ 
-	.long	L(UW31)-L(UW27)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW28, UW27) 
-	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */ 
-#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX 
-	ADV(UW29, UW28) 
-	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */ 
-	ADV(UW30, UW29) 
-	.byte	0xc0+3			/* DW_CFA_restore %ebx */ 
-#endif 
-	.balign	4 
-L(EFDE7): 
- 
-#if !FFI_NO_RAW_API 
-	.set	L(set8),L(EFDE8)-L(SFDE8) 
-	.long	L(set8)			/* FDE Length */ 
-L(SFDE8): 
-	.long	L(SFDE8)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW32))		/* Initial location */ 
-	.long	L(UW40)-L(UW32)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW33, UW32) 
-	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */ 
-	ADV(UW34, UW33) 
-	.byte	0x80+3, 2		/* DW_CFA_offset %ebx 2*-4 */ 
-	ADV(UW35, UW34) 
-	.byte	0xc0+3			/* DW_CFA_restore %ebx */ 
-	ADV(UW36, UW35) 
-	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */ 
-	ADV(UW37, UW36) 
-	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */ 
-	ADV(UW38, UW37) 
-	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */ 
-	ADV(UW39, UW38) 
-	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */ 
-	.balign	4 
-L(EFDE8): 
- 
-	.set	L(set9),L(EFDE9)-L(SFDE9) 
-	.long	L(set9)			/* FDE Length */ 
-L(SFDE9): 
-	.long	L(SFDE9)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW41))		/* Initial location */ 
-	.long	L(UW52)-L(UW41)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW42, UW41) 
-	.byte	0xe, 0			/* DW_CFA_def_cfa_offset */ 
-	.byte	0x9, 8, 2		/* DW_CFA_register %eip, %edx */ 
-	ADV(UW43, UW42) 
-	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */ 
-	ADV(UW44, UW43) 
-	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */ 
-	.byte	0x80+8, 2		/* DW_CFA_offset %eip 2*-4 */ 
-	ADV(UW45, UW44) 
-	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */ 
-	ADV(UW46, UW45) 
-	.byte	0x80+3, 3		/* DW_CFA_offset %ebx 3*-4 */ 
-	ADV(UW47, UW46) 
-	.byte	0xc0+3			/* DW_CFA_restore %ebx */ 
-	ADV(UW48, UW47) 
-	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */ 
-	ADV(UW49, UW48) 
-	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */ 
-	ADV(UW50, UW49) 
-	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */ 
-	ADV(UW51, UW50) 
-	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */ 
-	.balign	4 
-L(EFDE9): 
-#endif /* !FFI_NO_RAW_API */ 
- 
-#ifdef _WIN32 
-	.def	 @feat.00; 
-	.scl	3; 
-	.type	0; 
-	.endef 
-	PUBLIC	@feat.00 
-@feat.00 = 1 
-#endif 
- 
-#endif /* ifndef _MSC_VER */ 
-#endif /* ifndef __x86_64__ */ 
- 
-#if defined __ELF__ && defined __linux__ 
-	.section	.note.GNU-stack,"",@progbits 
-#endif 
-#endif 
- 
-END 
-\ No newline at end of file
+/* -----------------------------------------------------------------------
+   sysv.S - Copyright (c) 2017  Anthony Green
+          - Copyright (c) 2013  The Written Word, Inc.
+          - Copyright (c) 1996,1998,2001-2003,2005,2008,2010  Red Hat, Inc.
+   
+   X86 Foreign Function Interface 
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#ifndef __x86_64__
+#ifdef _MSC_VER
+
+#define LIBFFI_ASM	
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_cfi.h>
+#include "internal.h" 
+
+#define C2(X, Y)  X ## Y
+#define C1(X, Y)  C2(X, Y)
+#define L(X)     C1(L, X)
+# define ENDF(X) X ENDP
+
+/* This macro allows the safe creation of jump tables without an
+   actual table.  The entry points into the table are all 8 bytes.
+   The use of ORG asserts that we're at the correct location.  */
+/* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
+#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
+# define E(BASE, X)	ALIGN 8
+#else
+# define E(BASE, X)	ALIGN 8; ORG BASE + X * 8
+#endif
+
+    .686P
+    .MODEL FLAT
+
+EXTRN	@ffi_closure_inner@8:PROC
+_TEXT SEGMENT
+
+/* This is declared as
+
+   void ffi_call_i386(struct call_frame *frame, char *argp)
+        __attribute__((fastcall));
+
+   Thus the arguments are present in
+
+        ecx: frame
+        edx: argp
+*/
+
+ALIGN 16
+PUBLIC @ffi_call_i386@8
+@ffi_call_i386@8 PROC
+L(UW0):
+	cfi_startproc
+ #if !HAVE_FASTCALL
+	mov	    ecx, [esp+4]
+	mov 	edx, [esp+8]
+ #endif
+	mov	    eax, [esp]		/* move the return address */
+	mov	    [ecx], ebp		/* store ebp into local frame */
+	mov 	[ecx+4], eax	/* store retaddr into local frame */
+
+	/* New stack frame based off ebp.  This is a itty bit of unwind
+	   trickery in that the CFA *has* changed.  There is no easy way
+	   to describe it correctly on entry to the function.  Fortunately,
+	   it doesn't matter too much since at all points we can correctly
+	   unwind back to ffi_call.  Note that the location to which we
+	   moved the return address is (the new) CFA-4, so from the
+	   perspective of the unwind info, it hasn't moved.  */
+	mov 	ebp, ecx
+L(UW1):
+	// cfi_def_cfa(%ebp, 8)
+	// cfi_rel_offset(%ebp, 0)
+
+	mov 	esp, edx		/* set outgoing argument stack */
+	mov 	eax, [20+R_EAX*4+ebp]	/* set register arguments */
+	mov 	edx, [20+R_EDX*4+ebp]
+	mov	    ecx, [20+R_ECX*4+ebp]
+
+	call	dword ptr [ebp+8]
+
+	mov	    ecx, [12+ebp]		/* load return type code */
+	mov 	[ebp+8], ebx		/* preserve %ebx */
+L(UW2):
+	// cfi_rel_offset(%ebx, 8)
+
+	and 	ecx, X86_RET_TYPE_MASK
+	lea 	ebx, [L(store_table) + ecx * 8]
+	mov 	ecx, [ebp+16]		/* load result address */
+	jmp	    ebx
+
+	ALIGN	8
+L(store_table):
+E(L(store_table), X86_RET_FLOAT)
+	fstp	DWORD PTR [ecx]
+	jmp	L(e1)
+E(L(store_table), X86_RET_DOUBLE)
+	fstp	QWORD PTR [ecx]
+	jmp	L(e1)
+E(L(store_table), X86_RET_LDOUBLE)
+	fstp	QWORD PTR [ecx]
+	jmp	L(e1)
+E(L(store_table), X86_RET_SINT8)
+	movsx	eax, al
+	mov	[ecx], eax
+	jmp	L(e1)
+E(L(store_table), X86_RET_SINT16)
+	movsx	eax, ax
+	mov	[ecx], eax
+	jmp	L(e1)
+E(L(store_table), X86_RET_UINT8)
+	movzx	eax, al
+	mov	[ecx], eax
+	jmp	L(e1)
+E(L(store_table), X86_RET_UINT16)
+	movzx	eax, ax
+	mov	[ecx], eax
+	jmp	L(e1)
+E(L(store_table), X86_RET_INT64)
+	mov	[ecx+4], edx
+	/* fallthru */
+E(L(store_table), X86_RET_int 32)
+	mov	[ecx], eax
+	/* fallthru */
+E(L(store_table), X86_RET_VOID)
+L(e1):
+	mov	    ebx, [ebp+8]
+	mov	    esp, ebp
+	pop 	ebp
+L(UW3):
+	// cfi_remember_state
+	// cfi_def_cfa(%esp, 4)
+	// cfi_restore(%ebx)
+	// cfi_restore(%ebp)
+	ret
+L(UW4):
+	// cfi_restore_state
+
+E(L(store_table), X86_RET_STRUCTPOP)
+	jmp	    L(e1)
+E(L(store_table), X86_RET_STRUCTARG)
+	jmp	    L(e1)
+E(L(store_table), X86_RET_STRUCT_1B)
+	mov 	[ecx], al
+	jmp	    L(e1)
+E(L(store_table), X86_RET_STRUCT_2B)
+	mov 	[ecx], ax
+	jmp	    L(e1)
+
+	/* Fill out the table so that bad values are predictable.  */
+E(L(store_table), X86_RET_UNUSED14)
+	int 3
+E(L(store_table), X86_RET_UNUSED15)
+	int 3
+
+L(UW5):
+	// cfi_endproc
+ENDF(@ffi_call_i386@8)
+
+/* The inner helper is declared as
+
+   void ffi_closure_inner(struct closure_frame *frame, char *argp)
+	__attribute_((fastcall))
+
+   Thus the arguments are placed in
+
+	ecx:	frame
+	edx:	argp
+*/
+
+/* Macros to help setting up the closure_data structure.  */
+
+#if HAVE_FASTCALL
+# define closure_FS	(40 + 4)
+# define closure_CF	0
+#else
+# define closure_FS	(8 + 40 + 12)
+# define closure_CF	8
+#endif
+
+FFI_CLOSURE_SAVE_REGS MACRO
+	mov 	[esp + closure_CF+16+R_EAX*4], eax
+	mov 	[esp + closure_CF+16+R_EDX*4], edx
+	mov 	[esp + closure_CF+16+R_ECX*4], ecx
+ENDM
+
+FFI_CLOSURE_COPY_TRAMP_DATA MACRO
+	mov 	edx, [eax+FFI_TRAMPOLINE_SIZE]      /* copy cif */
+	mov 	ecx, [eax+FFI_TRAMPOLINE_SIZE+4]    /* copy fun */
+	mov 	eax, [eax+FFI_TRAMPOLINE_SIZE+8];   /* copy user_data */
+	mov 	[esp+closure_CF+28], edx
+	mov 	[esp+closure_CF+32], ecx
+	mov 	[esp+closure_CF+36], eax
+ENDM
+
+#if HAVE_FASTCALL
+FFI_CLOSURE_PREP_CALL MACRO
+	mov	    ecx, esp                    /* load closure_data */
+	lea 	edx, [esp+closure_FS+4]     /* load incoming stack */
+ENDM
+#else
+FFI_CLOSURE_PREP_CALL MACRO
+	lea 	ecx, [esp+closure_CF]       /* load closure_data */
+	lea 	edx, [esp+closure_FS+4]     /* load incoming stack */
+	mov 	[esp], ecx
+	mov 	[esp+4], edx
+ENDM
+#endif
+
+FFI_CLOSURE_CALL_INNER MACRO UWN
+	call	@ffi_closure_inner@8
+ENDM
+
+FFI_CLOSURE_MASK_AND_JUMP MACRO LABEL
+	and	    eax, X86_RET_TYPE_MASK
+	lea 	edx, [LABEL+eax*8]
+	mov 	eax, [esp+closure_CF]       /* optimiztic load */
+	jmp	    edx
+ENDM
+
+ALIGN 16
+PUBLIC ffi_go_closure_EAX
+ffi_go_closure_EAX PROC C
+L(UW6):
+	// cfi_startproc
+	sub	esp, closure_FS
+L(UW7):
+	// cfi_def_cfa_offset(closure_FS + 4)
+	FFI_CLOSURE_SAVE_REGS
+	mov     edx, [eax+4]			/* copy cif */
+	mov 	ecx, [eax +8]			/* copy fun */
+	mov 	[esp+closure_CF+28], edx
+	mov 	[esp+closure_CF+32], ecx
+	mov 	[esp+closure_CF+36], eax	/* closure is user_data */
+	jmp	L(do_closure_i386)
+L(UW8):
+	// cfi_endproc
+ENDF(ffi_go_closure_EAX)
+
+ALIGN 16
+PUBLIC ffi_go_closure_ECX
+ffi_go_closure_ECX PROC C
+L(UW9):
+	// cfi_startproc
+	sub 	esp, closure_FS
+L(UW10):
+	// cfi_def_cfa_offset(closure_FS + 4)
+	FFI_CLOSURE_SAVE_REGS
+	mov 	edx, [ecx+4]			/* copy cif */
+	mov 	eax, [ecx+8]			/* copy fun */
+	mov 	[esp+closure_CF+28], edx
+	mov 	[esp+closure_CF+32], eax
+	mov 	[esp+closure_CF+36], ecx	/* closure is user_data */
+	jmp	L(do_closure_i386)
+L(UW11):
+	// cfi_endproc
+ENDF(ffi_go_closure_ECX)
+
+/* The closure entry points are reached from the ffi_closure trampoline.
+   On entry, %eax contains the address of the ffi_closure.  */
+
+ALIGN 16
+PUBLIC ffi_closure_i386
+ffi_closure_i386 PROC C
+L(UW12):
+	// cfi_startproc
+	sub	    esp, closure_FS
+L(UW13):
+	// cfi_def_cfa_offset(closure_FS + 4)
+
+	FFI_CLOSURE_SAVE_REGS
+	FFI_CLOSURE_COPY_TRAMP_DATA
+
+	/* Entry point from preceeding Go closures.  */
+L(do_closure_i386)::
+
+	FFI_CLOSURE_PREP_CALL
+	FFI_CLOSURE_CALL_INNER(14)
+	FFI_CLOSURE_MASK_AND_JUMP L(C1(load_table,2))
+
+    ALIGN 8
+L(load_table2):
+E(L(load_table2), X86_RET_FLOAT)
+	fld 	dword ptr [esp+closure_CF]
+	jmp	L(e2)
+E(L(load_table2), X86_RET_DOUBLE)
+	fld 	qword ptr [esp+closure_CF]
+	jmp	L(e2)
+E(L(load_table2), X86_RET_LDOUBLE)
+	fld 	qword ptr [esp+closure_CF]
+	jmp	L(e2)
+E(L(load_table2), X86_RET_SINT8)
+	movsx	eax, al
+	jmp	L(e2)
+E(L(load_table2), X86_RET_SINT16)
+	movsx	eax, ax
+	jmp	L(e2)
+E(L(load_table2), X86_RET_UINT8)
+	movzx	eax, al
+	jmp	L(e2)
+E(L(load_table2), X86_RET_UINT16)
+	movzx	eax, ax
+	jmp	L(e2)
+E(L(load_table2), X86_RET_INT64)
+	mov 	edx, [esp+closure_CF+4]
+	jmp	L(e2)
+E(L(load_table2), X86_RET_INT32)
+	nop
+	/* fallthru */
+E(L(load_table2), X86_RET_VOID)
+L(e2):
+	add 	esp, closure_FS
+L(UW16):
+	// cfi_adjust_cfa_offset(-closure_FS)
+	ret
+L(UW17):
+	// cfi_adjust_cfa_offset(closure_FS)
+E(L(load_table2), X86_RET_STRUCTPOP)
+	add 	esp, closure_FS
+L(UW18):
+	// cfi_adjust_cfa_offset(-closure_FS)
+	ret	4
+L(UW19):
+	// cfi_adjust_cfa_offset(closure_FS)
+E(L(load_table2), X86_RET_STRUCTARG)
+	jmp	L(e2)
+E(L(load_table2), X86_RET_STRUCT_1B)
+	movzx	eax, al
+	jmp	L(e2)
+E(L(load_table2), X86_RET_STRUCT_2B)
+	movzx	eax, ax
+	jmp	L(e2)
+
+	/* Fill out the table so that bad values are predictable.  */
+E(L(load_table2), X86_RET_UNUSED14)
+	int 3
+E(L(load_table2), X86_RET_UNUSED15)
+	int 3
+
+L(UW20):
+	// cfi_endproc
+ENDF(ffi_closure_i386)
+
+ALIGN 16
+PUBLIC	ffi_go_closure_STDCALL
+ffi_go_closure_STDCALL PROC C
+L(UW21):
+	// cfi_startproc
+	sub 	esp, closure_FS
+L(UW22):
+	// cfi_def_cfa_offset(closure_FS + 4)
+	FFI_CLOSURE_SAVE_REGS
+	mov 	edx, [ecx+4]			/* copy cif */
+	mov 	eax, [ecx+8]			/* copy fun */
+	mov 	[esp+closure_CF+28], edx
+	mov 	[esp+closure_CF+32], eax
+	mov 	[esp+closure_CF+36], ecx	/* closure is user_data */
+	jmp	L(do_closure_STDCALL)
+L(UW23):
+	// cfi_endproc
+ENDF(ffi_go_closure_STDCALL)
+
+/* For REGISTER, we have no available parameter registers, and so we
+   enter here having pushed the closure onto the stack.  */
+
+ALIGN 16
+PUBLIC ffi_closure_REGISTER
+ffi_closure_REGISTER PROC C
+L(UW24):
+	// cfi_startproc
+	// cfi_def_cfa(%esp, 8)
+	// cfi_offset(%eip, -8)
+	sub 	esp, closure_FS-4
+L(UW25):
+	// cfi_def_cfa_offset(closure_FS + 4)
+	FFI_CLOSURE_SAVE_REGS
+	mov	ecx, [esp+closure_FS-4] 	/* load retaddr */
+	mov	eax, [esp+closure_FS]		/* load closure */
+	mov	[esp+closure_FS], ecx		/* move retaddr */
+	jmp	L(do_closure_REGISTER)
+L(UW26):
+	// cfi_endproc
+ENDF(ffi_closure_REGISTER)
+
+/* For STDCALL (and others), we need to pop N bytes of arguments off
+   the stack following the closure.  The amount needing to be popped
+   is returned to us from ffi_closure_inner.  */
+
+ALIGN 16
+PUBLIC ffi_closure_STDCALL
+ffi_closure_STDCALL PROC C
+L(UW27):
+	// cfi_startproc
+	sub 	esp, closure_FS
+L(UW28):
+	// cfi_def_cfa_offset(closure_FS + 4)
+
+	FFI_CLOSURE_SAVE_REGS
+
+	/* Entry point from ffi_closure_REGISTER.  */
+L(do_closure_REGISTER)::
+
+	FFI_CLOSURE_COPY_TRAMP_DATA
+
+	/* Entry point from preceeding Go closure.  */
+L(do_closure_STDCALL)::
+
+	FFI_CLOSURE_PREP_CALL
+	FFI_CLOSURE_CALL_INNER(29)
+
+	mov 	ecx, eax
+	shr 	ecx, X86_RET_POP_SHIFT	    /* isolate pop count */
+	lea 	ecx, [esp+closure_FS+ecx]	/* compute popped esp */
+	mov 	edx, [esp+closure_FS]		/* move return address */
+	mov 	[ecx], edx
+
+	/* From this point on, the value of %esp upon return is %ecx+4,
+	   and we've copied the return address to %ecx to make return easy.
+	   There's no point in representing this in the unwind info, as
+	   there is always a window between the mov and the ret which
+	   will be wrong from one point of view or another.  */
+
+	FFI_CLOSURE_MASK_AND_JUMP  L(C1(load_table,3))
+
+    ALIGN 8
+L(load_table3):
+E(L(load_table3), X86_RET_FLOAT)
+	fld    DWORD PTR [esp+closure_CF]
+	mov     esp, ecx
+	ret
+E(L(load_table3), X86_RET_DOUBLE)
+	fld    QWORD PTR [esp+closure_CF]
+	mov     esp, ecx
+	ret
+E(L(load_table3), X86_RET_LDOUBLE)
+	fld    QWORD PTR [esp+closure_CF]
+	mov     esp, ecx
+	ret
+E(L(load_table3), X86_RET_SINT8)
+	movsx   eax, al
+	mov     esp, ecx
+	ret
+E(L(load_table3), X86_RET_SINT16)
+	movsx   eax, ax
+	mov     esp, ecx
+	ret
+E(L(load_table3), X86_RET_UINT8)
+	movzx   eax, al
+	mov     esp, ecx
+	ret
+E(L(load_table3), X86_RET_UINT16)
+	movzx   eax, ax
+	mov     esp, ecx
+	ret
+E(L(load_table3), X86_RET_INT64)
+	mov 	edx, [esp+closure_CF+4]
+	mov     esp, ecx
+	ret
+E(L(load_table3), X86_RET_int 32)
+	mov     esp, ecx
+	ret
+E(L(load_table3), X86_RET_VOID)
+	mov     esp, ecx
+	ret
+E(L(load_table3), X86_RET_STRUCTPOP)
+	mov     esp, ecx
+	ret
+E(L(load_table3), X86_RET_STRUCTARG)
+	mov 	esp, ecx
+	ret
+E(L(load_table3), X86_RET_STRUCT_1B)
+	movzx	eax, al
+	mov 	esp, ecx
+	ret
+E(L(load_table3), X86_RET_STRUCT_2B)
+	movzx	eax, ax
+	mov 	esp, ecx
+	ret
+
+	/* Fill out the table so that bad values are predictable.  */
+E(L(load_table3), X86_RET_UNUSED14)
+	int 3
+E(L(load_table3), X86_RET_UNUSED15)
+	int 3
+
+L(UW31):
+	// cfi_endproc
+ENDF(ffi_closure_STDCALL)
+
+#if !FFI_NO_RAW_API
+
+#define raw_closure_S_FS	(16+16+12)
+
+ALIGN 16
+PUBLIC ffi_closure_raw_SYSV
+ffi_closure_raw_SYSV PROC C
+L(UW32):
+	// cfi_startproc
+	sub 	esp, raw_closure_S_FS
+L(UW33):
+	// cfi_def_cfa_offset(raw_closure_S_FS + 4)
+	mov 	[esp+raw_closure_S_FS-4], ebx
+L(UW34):
+	// cfi_rel_offset(%ebx, raw_closure_S_FS-4)
+
+	mov 	edx, [eax+FFI_TRAMPOLINE_SIZE+8]	/* load cl->user_data */
+	mov 	[esp+12], edx
+	lea 	edx, [esp+raw_closure_S_FS+4]		/* load raw_args */
+	mov 	[esp+8], edx
+	lea 	edx, [esp+16]				/* load &res */
+	mov 	[esp+4], edx
+	mov 	ebx, [eax+FFI_TRAMPOLINE_SIZE]		/* load cl->cif */
+	mov 	[esp], ebx
+	call	DWORD PTR [eax+FFI_TRAMPOLINE_SIZE+4]		/* call cl->fun */
+
+	mov 	eax, [ebx+20]			/* load cif->flags */
+	and 	eax, X86_RET_TYPE_MASK
+// #ifdef __PIC__
+// 	call	__x86.get_pc_thunk.bx
+// L(pc4):
+// 	lea 	ecx, L(load_table4)-L(pc4)(%ebx, %eax, 8), %ecx
+// #else
+	lea 	ecx, [L(load_table4)+eax+8]
+// #endif
+	mov 	ebx, [esp+raw_closure_S_FS-4]
+L(UW35):
+	// cfi_restore(%ebx)
+	mov 	eax, [esp+16]				/* Optimistic load */
+	jmp	    dword ptr [ecx]
+
+	ALIGN 8
+L(load_table4):
+E(L(load_table4), X86_RET_FLOAT)
+	fld 	DWORD PTR [esp +16]
+	jmp	L(e4)
+E(L(load_table4), X86_RET_DOUBLE)
+	fld 	QWORD PTR [esp +16]
+	jmp	L(e4)
+E(L(load_table4), X86_RET_LDOUBLE)
+	fld 	QWORD PTR [esp +16]
+	jmp	L(e4)
+E(L(load_table4), X86_RET_SINT8)
+	movsx	eax, al
+	jmp	L(e4)
+E(L(load_table4), X86_RET_SINT16)
+	movsx	eax, ax
+	jmp	L(e4)
+E(L(load_table4), X86_RET_UINT8)
+	movzx	eax, al
+	jmp	L(e4)
+E(L(load_table4), X86_RET_UINT16)
+	movzx	eax, ax
+	jmp	L(e4)
+E(L(load_table4), X86_RET_INT64)
+	mov 	edx, [esp+16+4]
+	jmp	L(e4)
+E(L(load_table4), X86_RET_int 32)
+	nop
+	/* fallthru */
+E(L(load_table4), X86_RET_VOID)
+L(e4):
+	add 	esp, raw_closure_S_FS
+L(UW36):
+	// cfi_adjust_cfa_offset(-raw_closure_S_FS)
+	ret
+L(UW37):
+	// cfi_adjust_cfa_offset(raw_closure_S_FS)
+E(L(load_table4), X86_RET_STRUCTPOP)
+	add 	esp, raw_closure_S_FS
+L(UW38):
+	// cfi_adjust_cfa_offset(-raw_closure_S_FS)
+	ret	4
+L(UW39):
+	// cfi_adjust_cfa_offset(raw_closure_S_FS)
+E(L(load_table4), X86_RET_STRUCTARG)
+	jmp	L(e4)
+E(L(load_table4), X86_RET_STRUCT_1B)
+	movzx	eax, al
+	jmp	L(e4)
+E(L(load_table4), X86_RET_STRUCT_2B)
+	movzx	eax, ax
+	jmp	L(e4)
+
+	/* Fill out the table so that bad values are predictable.  */
+E(L(load_table4), X86_RET_UNUSED14)
+	int 3
+E(L(load_table4), X86_RET_UNUSED15)
+	int 3
+
+L(UW40):
+	// cfi_endproc
+ENDF(ffi_closure_raw_SYSV)
+
+#define raw_closure_T_FS	(16+16+8)
+
+ALIGN 16
+PUBLIC ffi_closure_raw_THISCALL
+ffi_closure_raw_THISCALL PROC C
+L(UW41):
+	// cfi_startproc
+	/* Rearrange the stack such that %ecx is the first argument.
+	   This means moving the return address.  */
+	pop 	edx
+L(UW42):
+	// cfi_def_cfa_offset(0)
+	// cfi_register(%eip, %edx)
+	push	ecx
+L(UW43):
+	// cfi_adjust_cfa_offset(4)
+	push 	edx
+L(UW44):
+	// cfi_adjust_cfa_offset(4)
+	// cfi_rel_offset(%eip, 0)
+	sub 	esp, raw_closure_T_FS
+L(UW45):
+	// cfi_adjust_cfa_offset(raw_closure_T_FS)
+	mov 	[esp+raw_closure_T_FS-4], ebx
+L(UW46):
+	// cfi_rel_offset(%ebx, raw_closure_T_FS-4)
+
+	mov 	edx, [eax+FFI_TRAMPOLINE_SIZE+8]	/* load cl->user_data */
+	mov 	[esp+12], edx
+	lea 	edx, [esp+raw_closure_T_FS+4]		/* load raw_args */
+	mov 	[esp+8], edx
+	lea 	edx, [esp+16]				/* load &res */
+	mov 	[esp+4], edx
+	mov 	ebx, [eax+FFI_TRAMPOLINE_SIZE]		/* load cl->cif */
+	mov 	[esp], ebx
+	call	DWORD PTR [eax+FFI_TRAMPOLINE_SIZE+4]		/* call cl->fun */
+
+	mov 	eax, [ebx+20]				/* load cif->flags */
+	and 	eax, X86_RET_TYPE_MASK
+// #ifdef __PIC__
+// 	call	__x86.get_pc_thunk.bx
+// L(pc5):
+// 	leal	L(load_table5)-L(pc5)(%ebx, %eax, 8), %ecx
+// #else
+	lea 	ecx, [L(load_table5)+eax*8]
+//#endif
+	mov 	ebx, [esp+raw_closure_T_FS-4]
+L(UW47):
+	// cfi_restore(%ebx)
+	mov 	eax, [esp+16]				/* Optimistic load */
+	jmp	    DWORD PTR [ecx]
+
+	AlIGN 4
+L(load_table5):
+E(L(load_table5), X86_RET_FLOAT)
+	fld	DWORD PTR [esp +16]
+	jmp	L(e5)
+E(L(load_table5), X86_RET_DOUBLE)
+	fld	QWORD PTR [esp +16]
+	jmp	L(e5)
+E(L(load_table5), X86_RET_LDOUBLE)
+	fld	QWORD PTR [esp+16]
+	jmp	L(e5)
+E(L(load_table5), X86_RET_SINT8)
+	movsx	eax, al
+	jmp	L(e5)
+E(L(load_table5), X86_RET_SINT16)
+	movsx	eax, ax
+	jmp	L(e5)
+E(L(load_table5), X86_RET_UINT8)
+	movzx	eax, al
+	jmp	L(e5)
+E(L(load_table5), X86_RET_UINT16)
+	movzx	eax, ax
+	jmp	L(e5)
+E(L(load_table5), X86_RET_INT64)
+	mov 	edx, [esp+16+4]
+	jmp	L(e5)
+E(L(load_table5), X86_RET_int 32)
+	nop
+	/* fallthru */
+E(L(load_table5), X86_RET_VOID)
+L(e5):
+	add 	esp, raw_closure_T_FS
+L(UW48):
+	// cfi_adjust_cfa_offset(-raw_closure_T_FS)
+	/* Remove the extra %ecx argument we pushed.  */
+	ret	4
+L(UW49):
+	// cfi_adjust_cfa_offset(raw_closure_T_FS)
+E(L(load_table5), X86_RET_STRUCTPOP)
+	add 	esp, raw_closure_T_FS
+L(UW50):
+	// cfi_adjust_cfa_offset(-raw_closure_T_FS)
+	ret	8
+L(UW51):
+	// cfi_adjust_cfa_offset(raw_closure_T_FS)
+E(L(load_table5), X86_RET_STRUCTARG)
+	jmp	L(e5)
+E(L(load_table5), X86_RET_STRUCT_1B)
+	movzx	eax, al
+	jmp	L(e5)
+E(L(load_table5), X86_RET_STRUCT_2B)
+	movzx	eax, ax
+	jmp	L(e5)
+
+	/* Fill out the table so that bad values are predictable.  */
+E(L(load_table5), X86_RET_UNUSED14)
+	int 3
+E(L(load_table5), X86_RET_UNUSED15)
+	int 3
+
+L(UW52):
+	// cfi_endproc
+ENDF(ffi_closure_raw_THISCALL)
+
+#endif /* !FFI_NO_RAW_API */
+
+#ifdef X86_DARWIN
+# define COMDAT(X)							\
+        .section __TEXT,__text,coalesced,pure_instructions;		\
+        .weak_definition X;						\
+        FFI_HIDDEN(X)
+#elif defined __ELF__ && !(defined(__sun__) && defined(__svr4__))
+# define COMDAT(X)							\
+	.section .text.X,"axG",@progbits,X,comdat;			\
+	PUBLIC	X;							\
+	FFI_HIDDEN(X)
+#else
+# define COMDAT(X)
+#endif
+
+// #if defined(__PIC__)
+// 	COMDAT(C(__x86.get_pc_thunk.bx))
+// C(__x86.get_pc_thunk.bx):
+// 	movl	(%esp), %ebx
+// 	ret
+// ENDF(C(__x86.get_pc_thunk.bx))
+// # if defined X86_DARWIN || defined HAVE_HIDDEN_VISIBILITY_ATTRIBUTE
+// 	COMDAT(C(__x86.get_pc_thunk.dx))
+// C(__x86.get_pc_thunk.dx):
+// 	movl	(%esp), %edx
+// 	ret
+// ENDF(C(__x86.get_pc_thunk.dx))
+// #endif /* DARWIN || HIDDEN */
+// #endif /* __PIC__ */
+
+#if 0
+/* Sadly, OSX cctools-as doesn't understand .cfi directives at all.  */
+
+#ifdef __APPLE__
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EHFrame0:
+#elif defined(X86_WIN32)
+.section .eh_frame,"r"
+#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE)
+.section .eh_frame,EH_FRAME_FLAGS,@unwind
+#else
+.section .eh_frame,EH_FRAME_FLAGS,@progbits
+#endif
+
+#ifdef HAVE_AS_X86_PCREL
+# define PCREL(X)	X - .
+#else
+# define PCREL(X)	X@rel
+#endif
+
+/* Simplify advancing between labels.  Assume DW_CFA_advance_loc1 fits.  */
+#define ADV(N, P)	.byte 2, L(N)-L(P)
+
+	.balign 4
+L(CIE):
+	.set	L(set0),L(ECIE)-L(SCIE)
+	.long	L(set0)			/* CIE Length */
+L(SCIE):
+	.long	0			/* CIE Identifier Tag */
+	.byte	1			/* CIE Version */
+	.ascii	"zR\0"			/* CIE Augmentation */
+	.byte	1			/* CIE Code Alignment Factor */
+	.byte	0x7c			/* CIE Data Alignment Factor */
+	.byte	0x8			/* CIE RA Column */
+	.byte	1			/* Augmentation size */
+	.byte	0x1b			/* FDE Encoding (pcrel sdata4) */
+	.byte	0xc, 4, 4		/* DW_CFA_def_cfa, %esp offset 4 */
+	.byte	0x80+8, 1		/* DW_CFA_offset, %eip offset 1*-4 */
+	.balign 4
+L(ECIE):
+
+	.set	L(set1),L(EFDE1)-L(SFDE1)
+	.long	L(set1)			/* FDE Length */
+L(SFDE1):
+	.long	L(SFDE1)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW0))		/* Initial location */
+	.long	L(UW5)-L(UW0)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW1, UW0)
+	.byte	0xc, 5, 8		/* DW_CFA_def_cfa, %ebp 8 */
+	.byte	0x80+5, 2		/* DW_CFA_offset, %ebp 2*-4 */
+	ADV(UW2, UW1)
+	.byte	0x80+3, 0		/* DW_CFA_offset, %ebx 0*-4 */
+	ADV(UW3, UW2)
+	.byte	0xa			/* DW_CFA_remember_state */
+	.byte	0xc, 4, 4		/* DW_CFA_def_cfa, %esp 4 */
+	.byte	0xc0+3			/* DW_CFA_restore, %ebx */
+	.byte	0xc0+5			/* DW_CFA_restore, %ebp */
+	ADV(UW4, UW3)
+	.byte	0xb			/* DW_CFA_restore_state */
+	.balign	4
+L(EFDE1):
+
+	.set	L(set2),L(EFDE2)-L(SFDE2)
+	.long	L(set2)			/* FDE Length */
+L(SFDE2):
+	.long	L(SFDE2)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW6))		/* Initial location */
+	.long	L(UW8)-L(UW6)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW7, UW6)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE2):
+
+	.set	L(set3),L(EFDE3)-L(SFDE3)
+	.long	L(set3)			/* FDE Length */
+L(SFDE3):
+	.long	L(SFDE3)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW9))		/* Initial location */
+	.long	L(UW11)-L(UW9)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW10, UW9)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE3):
+
+	.set	L(set4),L(EFDE4)-L(SFDE4)
+	.long	L(set4)			/* FDE Length */
+L(SFDE4):
+	.long	L(SFDE4)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW12))		/* Initial location */
+	.long	L(UW20)-L(UW12)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW13, UW12)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
+	ADV(UW14, UW13)
+	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */
+	ADV(UW15, UW14)
+	.byte	0xc0+3			/* DW_CFA_restore %ebx */
+	ADV(UW16, UW15)
+#else
+	ADV(UW16, UW13)
+#endif
+	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
+	ADV(UW17, UW16)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+	ADV(UW18, UW17)
+	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
+	ADV(UW19, UW18)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE4):
+
+	.set	L(set5),L(EFDE5)-L(SFDE5)
+	.long	L(set5)			/* FDE Length */
+L(SFDE5):
+	.long	L(SFDE5)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW21))		/* Initial location */
+	.long	L(UW23)-L(UW21)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW22, UW21)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE5):
+
+	.set	L(set6),L(EFDE6)-L(SFDE6)
+	.long	L(set6)			/* FDE Length */
+L(SFDE6):
+	.long	L(SFDE6)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW24))		/* Initial location */
+	.long	L(UW26)-L(UW24)		/* Address range */
+	.byte	0			/* Augmentation size */
+	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
+	.byte	0x80+8, 2		/* DW_CFA_offset %eip, 2*-4 */
+	ADV(UW25, UW24)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE6):
+
+	.set	L(set7),L(EFDE7)-L(SFDE7)
+	.long	L(set7)			/* FDE Length */
+L(SFDE7):
+	.long	L(SFDE7)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW27))		/* Initial location */
+	.long	L(UW31)-L(UW27)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW28, UW27)
+	.byte	0xe, closure_FS+4	/* DW_CFA_def_cfa_offset */
+#ifdef FFI_CLOSURE_CALL_INNER_SAVE_EBX
+	ADV(UW29, UW28)
+	.byte	0x80+3, (40-(closure_FS+4))/-4  /* DW_CFA_offset %ebx */
+	ADV(UW30, UW29)
+	.byte	0xc0+3			/* DW_CFA_restore %ebx */
+#endif
+	.balign	4
+L(EFDE7):
+
+#if !FFI_NO_RAW_API
+	.set	L(set8),L(EFDE8)-L(SFDE8)
+	.long	L(set8)			/* FDE Length */
+L(SFDE8):
+	.long	L(SFDE8)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW32))		/* Initial location */
+	.long	L(UW40)-L(UW32)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW33, UW32)
+	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
+	ADV(UW34, UW33)
+	.byte	0x80+3, 2		/* DW_CFA_offset %ebx 2*-4 */
+	ADV(UW35, UW34)
+	.byte	0xc0+3			/* DW_CFA_restore %ebx */
+	ADV(UW36, UW35)
+	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
+	ADV(UW37, UW36)
+	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
+	ADV(UW38, UW37)
+	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
+	ADV(UW39, UW38)
+	.byte	0xe, raw_closure_S_FS+4	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE8):
+
+	.set	L(set9),L(EFDE9)-L(SFDE9)
+	.long	L(set9)			/* FDE Length */
+L(SFDE9):
+	.long	L(SFDE9)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW41))		/* Initial location */
+	.long	L(UW52)-L(UW41)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW42, UW41)
+	.byte	0xe, 0			/* DW_CFA_def_cfa_offset */
+	.byte	0x9, 8, 2		/* DW_CFA_register %eip, %edx */
+	ADV(UW43, UW42)
+	.byte	0xe, 4			/* DW_CFA_def_cfa_offset */
+	ADV(UW44, UW43)
+	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
+	.byte	0x80+8, 2		/* DW_CFA_offset %eip 2*-4 */
+	ADV(UW45, UW44)
+	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
+	ADV(UW46, UW45)
+	.byte	0x80+3, 3		/* DW_CFA_offset %ebx 3*-4 */
+	ADV(UW47, UW46)
+	.byte	0xc0+3			/* DW_CFA_restore %ebx */
+	ADV(UW48, UW47)
+	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
+	ADV(UW49, UW48)
+	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
+	ADV(UW50, UW49)
+	.byte	0xe, 8			/* DW_CFA_def_cfa_offset */
+	ADV(UW51, UW50)
+	.byte	0xe, raw_closure_T_FS+8	/* DW_CFA_def_cfa_offset */
+	.balign	4
+L(EFDE9):
+#endif /* !FFI_NO_RAW_API */
+
+#ifdef _WIN32
+	.def	 @feat.00;
+	.scl	3;
+	.type	0;
+	.endef
+	PUBLIC	@feat.00
+@feat.00 = 1
+#endif
+
+#endif /* ifndef _MSC_VER */
+#endif /* ifndef __x86_64__ */
+
+#if defined __ELF__ && defined __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
+#endif
+
+END
+\ No newline at end of file
diff --git a/contrib/restricted/libffi/src/x86/unix64.S b/contrib/restricted/libffi/src/x86/unix64.S
index ae81f77d09..41563f5c60 100644
--- a/contrib/restricted/libffi/src/x86/unix64.S
+++ b/contrib/restricted/libffi/src/x86/unix64.S
@@ -30,21 +30,21 @@
 #define LIBFFI_ASM	
 #include <fficonfig.h>
 #include <ffi.h>
-#include "internal64.h" 
-#include "asmnames.h" 
-
-	.text 
-
-/* This macro allows the safe creation of jump tables without an 
-   actual table.  The entry points into the table are all 8 bytes. 
-   The use of ORG asserts that we're at the correct location.  */ 
-/* ??? The clang assembler doesn't handle .org with symbolic expressions.  */ 
-#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) 
-# define E(BASE, X)	.balign 8 
-#else 
-# define E(BASE, X)	.balign 8; .org BASE + X * 8 
-#endif 
- 
+#include "internal64.h"
+#include "asmnames.h"
+
+	.text
+
+/* This macro allows the safe creation of jump tables without an
+   actual table.  The entry points into the table are all 8 bytes.
+   The use of ORG asserts that we're at the correct location.  */
+/* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
+#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
+# define E(BASE, X)	.balign 8
+#else
+# define E(BASE, X)	.balign 8; .org BASE + X * 8
+#endif
+
 /* ffi_call_unix64 (void *args, unsigned long bytes, unsigned flags,
 	            void *raddr, void (*fnaddr)(void));
 
@@ -52,12 +52,12 @@
    for this function.  This has been allocated by ffi_call.  We also
    deallocate some of the stack that has been alloca'd.  */
 
-	.balign	8 
-	.globl	C(ffi_call_unix64) 
-	FFI_HIDDEN(C(ffi_call_unix64)) 
+	.balign	8
+	.globl	C(ffi_call_unix64)
+	FFI_HIDDEN(C(ffi_call_unix64))
 
-C(ffi_call_unix64): 
-L(UW0): 
+C(ffi_call_unix64):
+L(UW0):
 	movq	(%rsp), %r10		/* Load return address.  */
 	leaq	(%rdi, %rsi), %rax	/* Find local stack base.  */
 	movq	%rdx, (%rax)		/* Save flags.  */
@@ -65,37 +65,37 @@ L(UW0):
 	movq	%rbp, 16(%rax)		/* Save old frame pointer.  */
 	movq	%r10, 24(%rax)		/* Relocate return address.  */
 	movq	%rax, %rbp		/* Finalize local stack frame.  */
- 
-	/* New stack frame based off rbp.  This is a itty bit of unwind 
-	   trickery in that the CFA *has* changed.  There is no easy way 
-	   to describe it correctly on entry to the function.  Fortunately, 
-	   it doesn't matter too much since at all points we can correctly 
-	   unwind back to ffi_call.  Note that the location to which we 
-	   moved the return address is (the new) CFA-8, so from the 
-	   perspective of the unwind info, it hasn't moved.  */ 
-L(UW1): 
-	/* cfi_def_cfa(%rbp, 32) */ 
-	/* cfi_rel_offset(%rbp, 16) */ 
- 
+
+	/* New stack frame based off rbp.  This is a itty bit of unwind
+	   trickery in that the CFA *has* changed.  There is no easy way
+	   to describe it correctly on entry to the function.  Fortunately,
+	   it doesn't matter too much since at all points we can correctly
+	   unwind back to ffi_call.  Note that the location to which we
+	   moved the return address is (the new) CFA-8, so from the
+	   perspective of the unwind info, it hasn't moved.  */
+L(UW1):
+	/* cfi_def_cfa(%rbp, 32) */
+	/* cfi_rel_offset(%rbp, 16) */
+
 	movq	%rdi, %r10		/* Save a copy of the register area. */
 	movq	%r8, %r11		/* Save a copy of the target fn.  */
 	movl	%r9d, %eax		/* Set number of SSE registers.  */
 
 	/* Load up all argument registers.  */
 	movq	(%r10), %rdi
-	movq	0x08(%r10), %rsi 
-	movq	0x10(%r10), %rdx 
-	movq	0x18(%r10), %rcx 
-	movq	0x20(%r10), %r8 
-	movq	0x28(%r10), %r9 
-	movl	0xb0(%r10), %eax 
+	movq	0x08(%r10), %rsi
+	movq	0x10(%r10), %rdx
+	movq	0x18(%r10), %rcx
+	movq	0x20(%r10), %r8
+	movq	0x28(%r10), %r9
+	movl	0xb0(%r10), %eax
 	testl	%eax, %eax
-	jnz	L(load_sse) 
-L(ret_from_load_sse): 
+	jnz	L(load_sse)
+L(ret_from_load_sse):
 
-	/* Deallocate the reg arg area, except for r10, then load via pop.  */ 
-	leaq	0xb8(%r10), %rsp 
-	popq	%r10 
+	/* Deallocate the reg arg area, except for r10, then load via pop.  */
+	leaq	0xb8(%r10), %rsp
+	popq	%r10
 
 	/* Call the user function.  */
 	call	*%r11
@@ -106,460 +106,460 @@ L(ret_from_load_sse):
 	movq	0(%rbp), %rcx		/* Reload flags.  */
 	movq	8(%rbp), %rdi		/* Reload raddr.  */
 	movq	16(%rbp), %rbp		/* Reload old frame pointer.  */
-L(UW2): 
-	/* cfi_remember_state */ 
-	/* cfi_def_cfa(%rsp, 8) */ 
-	/* cfi_restore(%rbp) */ 
+L(UW2):
+	/* cfi_remember_state */
+	/* cfi_def_cfa(%rsp, 8) */
+	/* cfi_restore(%rbp) */
 
 	/* The first byte of the flags contains the FFI_TYPE.  */
-	cmpb	$UNIX64_RET_LAST, %cl 
+	cmpb	$UNIX64_RET_LAST, %cl
 	movzbl	%cl, %r10d
-	leaq	L(store_table)(%rip), %r11 
-	ja	L(sa) 
-	leaq	(%r11, %r10, 8), %r10 
- 
-	/* Prep for the structure cases: scratch area in redzone.  */ 
-	leaq	-20(%rsp), %rsi 
+	leaq	L(store_table)(%rip), %r11
+	ja	L(sa)
+	leaq	(%r11, %r10, 8), %r10
+
+	/* Prep for the structure cases: scratch area in redzone.  */
+	leaq	-20(%rsp), %rsi
 	jmp	*%r10
 
-	.balign	8 
-L(store_table): 
-E(L(store_table), UNIX64_RET_VOID) 
+	.balign	8
+L(store_table):
+E(L(store_table), UNIX64_RET_VOID)
 	ret
-E(L(store_table), UNIX64_RET_UINT8) 
-	movzbl	%al, %eax 
+E(L(store_table), UNIX64_RET_UINT8)
+	movzbl	%al, %eax
 	movq	%rax, (%rdi)
 	ret
-E(L(store_table), UNIX64_RET_UINT16) 
-	movzwl	%ax, %eax 
-	movq	%rax, (%rdi) 
-	ret 
-E(L(store_table), UNIX64_RET_UINT32) 
-	movl	%eax, %eax 
-	movq	%rax, (%rdi) 
-	ret 
-E(L(store_table), UNIX64_RET_SINT8) 
+E(L(store_table), UNIX64_RET_UINT16)
+	movzwl	%ax, %eax
+	movq	%rax, (%rdi)
+	ret
+E(L(store_table), UNIX64_RET_UINT32)
+	movl	%eax, %eax
+	movq	%rax, (%rdi)
+	ret
+E(L(store_table), UNIX64_RET_SINT8)
 	movsbq	%al, %rax
 	movq	%rax, (%rdi)
 	ret
-E(L(store_table), UNIX64_RET_SINT16) 
+E(L(store_table), UNIX64_RET_SINT16)
 	movswq	%ax, %rax
 	movq	%rax, (%rdi)
 	ret
-E(L(store_table), UNIX64_RET_SINT32) 
+E(L(store_table), UNIX64_RET_SINT32)
 	cltq
 	movq	%rax, (%rdi)
 	ret
-E(L(store_table), UNIX64_RET_INT64) 
+E(L(store_table), UNIX64_RET_INT64)
 	movq	%rax, (%rdi)
 	ret
-E(L(store_table), UNIX64_RET_XMM32) 
-	movd	%xmm0, (%rdi) 
+E(L(store_table), UNIX64_RET_XMM32)
+	movd	%xmm0, (%rdi)
+	ret
+E(L(store_table), UNIX64_RET_XMM64)
+	movq	%xmm0, (%rdi)
 	ret
-E(L(store_table), UNIX64_RET_XMM64) 
-	movq	%xmm0, (%rdi) 
+E(L(store_table), UNIX64_RET_X87)
+	fstpt	(%rdi)
 	ret
-E(L(store_table), UNIX64_RET_X87) 
+E(L(store_table), UNIX64_RET_X87_2)
 	fstpt	(%rdi)
+	fstpt	16(%rdi)
 	ret
-E(L(store_table), UNIX64_RET_X87_2) 
-	fstpt	(%rdi) 
-	fstpt	16(%rdi) 
-	ret 
-E(L(store_table), UNIX64_RET_ST_XMM0_RAX) 
-	movq	%rax, 8(%rsi) 
-	jmp	L(s3) 
-E(L(store_table), UNIX64_RET_ST_RAX_XMM0) 
-	movq	%xmm0, 8(%rsi) 
-	jmp	L(s2) 
-E(L(store_table), UNIX64_RET_ST_XMM0_XMM1) 
-	movq	%xmm1, 8(%rsi) 
-	jmp	L(s3) 
-E(L(store_table), UNIX64_RET_ST_RAX_RDX) 
-	movq	%rdx, 8(%rsi) 
-L(s2): 
+E(L(store_table), UNIX64_RET_ST_XMM0_RAX)
+	movq	%rax, 8(%rsi)
+	jmp	L(s3)
+E(L(store_table), UNIX64_RET_ST_RAX_XMM0)
+	movq	%xmm0, 8(%rsi)
+	jmp	L(s2)
+E(L(store_table), UNIX64_RET_ST_XMM0_XMM1)
+	movq	%xmm1, 8(%rsi)
+	jmp	L(s3)
+E(L(store_table), UNIX64_RET_ST_RAX_RDX)
+	movq	%rdx, 8(%rsi)
+L(s2):
 	movq	%rax, (%rsi)
-	shrl	$UNIX64_SIZE_SHIFT, %ecx 
+	shrl	$UNIX64_SIZE_SHIFT, %ecx
+	rep movsb
+	ret
+	.balign 8
+L(s3):
+	movq	%xmm0, (%rsi)
+	shrl	$UNIX64_SIZE_SHIFT, %ecx
 	rep movsb
 	ret
-	.balign 8 
-L(s3): 
-	movq	%xmm0, (%rsi) 
-	shrl	$UNIX64_SIZE_SHIFT, %ecx 
-	rep movsb 
-	ret 
-
-L(sa):	call	PLT(C(abort)) 
- 
+
+L(sa):	call	PLT(C(abort))
+
 	/* Many times we can avoid loading any SSE registers at all.
 	   It's not worth an indirect jump to load the exact set of
 	   SSE registers needed; zero or all is a good compromise.  */
-	.balign 2 
-L(UW3): 
-	/* cfi_restore_state */ 
-L(load_sse): 
-	movdqa	0x30(%r10), %xmm0 
-	movdqa	0x40(%r10), %xmm1 
-	movdqa	0x50(%r10), %xmm2 
-	movdqa	0x60(%r10), %xmm3 
-	movdqa	0x70(%r10), %xmm4 
-	movdqa	0x80(%r10), %xmm5 
-	movdqa	0x90(%r10), %xmm6 
-	movdqa	0xa0(%r10), %xmm7 
-	jmp	L(ret_from_load_sse) 
-
-L(UW4): 
-ENDF(C(ffi_call_unix64)) 
-
-/* 6 general registers, 8 vector registers, 
-   32 bytes of rvalue, 8 bytes of alignment.  */ 
-#define ffi_closure_OFS_G	0 
-#define ffi_closure_OFS_V	(6*8) 
-#define ffi_closure_OFS_RVALUE	(ffi_closure_OFS_V + 8*16) 
-#define ffi_closure_FS		(ffi_closure_OFS_RVALUE + 32 + 8) 
-
-/* The location of rvalue within the red zone after deallocating the frame.  */ 
-#define ffi_closure_RED_RVALUE	(ffi_closure_OFS_RVALUE - ffi_closure_FS) 
-
-	.balign	2 
-	.globl	C(ffi_closure_unix64_sse) 
-	FFI_HIDDEN(C(ffi_closure_unix64_sse)) 
-
-C(ffi_closure_unix64_sse): 
-L(UW5): 
-	subq	$ffi_closure_FS, %rsp 
-L(UW6): 
-	/* cfi_adjust_cfa_offset(ffi_closure_FS) */ 
- 
-	movdqa	%xmm0, ffi_closure_OFS_V+0x00(%rsp) 
-	movdqa	%xmm1, ffi_closure_OFS_V+0x10(%rsp) 
-	movdqa	%xmm2, ffi_closure_OFS_V+0x20(%rsp) 
-	movdqa	%xmm3, ffi_closure_OFS_V+0x30(%rsp) 
-	movdqa	%xmm4, ffi_closure_OFS_V+0x40(%rsp) 
-	movdqa	%xmm5, ffi_closure_OFS_V+0x50(%rsp) 
-	movdqa	%xmm6, ffi_closure_OFS_V+0x60(%rsp) 
-	movdqa	%xmm7, ffi_closure_OFS_V+0x70(%rsp) 
-	jmp	L(sse_entry1) 
- 
-L(UW7): 
-ENDF(C(ffi_closure_unix64_sse)) 
- 
-	.balign	2 
-	.globl	C(ffi_closure_unix64) 
-	FFI_HIDDEN(C(ffi_closure_unix64)) 
- 
-C(ffi_closure_unix64): 
-L(UW8): 
-	subq	$ffi_closure_FS, %rsp 
-L(UW9): 
-	/* cfi_adjust_cfa_offset(ffi_closure_FS) */ 
-L(sse_entry1): 
-	movq	%rdi, ffi_closure_OFS_G+0x00(%rsp) 
-	movq    %rsi, ffi_closure_OFS_G+0x08(%rsp) 
-	movq    %rdx, ffi_closure_OFS_G+0x10(%rsp) 
-	movq    %rcx, ffi_closure_OFS_G+0x18(%rsp) 
-	movq    %r8,  ffi_closure_OFS_G+0x20(%rsp) 
-	movq    %r9,  ffi_closure_OFS_G+0x28(%rsp) 
- 
-#ifdef __ILP32__ 
-	movl	FFI_TRAMPOLINE_SIZE(%r10), %edi		/* Load cif */ 
-	movl	FFI_TRAMPOLINE_SIZE+4(%r10), %esi	/* Load fun */ 
-	movl	FFI_TRAMPOLINE_SIZE+8(%r10), %edx	/* Load user_data */ 
-#else 
-	movq	FFI_TRAMPOLINE_SIZE(%r10), %rdi		/* Load cif */ 
-	movq	FFI_TRAMPOLINE_SIZE+8(%r10), %rsi	/* Load fun */ 
-	movq	FFI_TRAMPOLINE_SIZE+16(%r10), %rdx	/* Load user_data */ 
-#endif 
-L(do_closure): 
-	leaq	ffi_closure_OFS_RVALUE(%rsp), %rcx	/* Load rvalue */ 
-	movq	%rsp, %r8				/* Load reg_args */ 
-	leaq	ffi_closure_FS+8(%rsp), %r9		/* Load argp */ 
-	call	PLT(C(ffi_closure_unix64_inner)) 
- 
+	.balign 2
+L(UW3):
+	/* cfi_restore_state */
+L(load_sse):
+	movdqa	0x30(%r10), %xmm0
+	movdqa	0x40(%r10), %xmm1
+	movdqa	0x50(%r10), %xmm2
+	movdqa	0x60(%r10), %xmm3
+	movdqa	0x70(%r10), %xmm4
+	movdqa	0x80(%r10), %xmm5
+	movdqa	0x90(%r10), %xmm6
+	movdqa	0xa0(%r10), %xmm7
+	jmp	L(ret_from_load_sse)
+
+L(UW4):
+ENDF(C(ffi_call_unix64))
+
+/* 6 general registers, 8 vector registers,
+   32 bytes of rvalue, 8 bytes of alignment.  */
+#define ffi_closure_OFS_G	0
+#define ffi_closure_OFS_V	(6*8)
+#define ffi_closure_OFS_RVALUE	(ffi_closure_OFS_V + 8*16)
+#define ffi_closure_FS		(ffi_closure_OFS_RVALUE + 32 + 8)
+
+/* The location of rvalue within the red zone after deallocating the frame.  */
+#define ffi_closure_RED_RVALUE	(ffi_closure_OFS_RVALUE - ffi_closure_FS)
+
+	.balign	2
+	.globl	C(ffi_closure_unix64_sse)
+	FFI_HIDDEN(C(ffi_closure_unix64_sse))
+
+C(ffi_closure_unix64_sse):
+L(UW5):
+	subq	$ffi_closure_FS, %rsp
+L(UW6):
+	/* cfi_adjust_cfa_offset(ffi_closure_FS) */
+
+	movdqa	%xmm0, ffi_closure_OFS_V+0x00(%rsp)
+	movdqa	%xmm1, ffi_closure_OFS_V+0x10(%rsp)
+	movdqa	%xmm2, ffi_closure_OFS_V+0x20(%rsp)
+	movdqa	%xmm3, ffi_closure_OFS_V+0x30(%rsp)
+	movdqa	%xmm4, ffi_closure_OFS_V+0x40(%rsp)
+	movdqa	%xmm5, ffi_closure_OFS_V+0x50(%rsp)
+	movdqa	%xmm6, ffi_closure_OFS_V+0x60(%rsp)
+	movdqa	%xmm7, ffi_closure_OFS_V+0x70(%rsp)
+	jmp	L(sse_entry1)
+
+L(UW7):
+ENDF(C(ffi_closure_unix64_sse))
+
+	.balign	2
+	.globl	C(ffi_closure_unix64)
+	FFI_HIDDEN(C(ffi_closure_unix64))
+
+C(ffi_closure_unix64):
+L(UW8):
+	subq	$ffi_closure_FS, %rsp
+L(UW9):
+	/* cfi_adjust_cfa_offset(ffi_closure_FS) */
+L(sse_entry1):
+	movq	%rdi, ffi_closure_OFS_G+0x00(%rsp)
+	movq    %rsi, ffi_closure_OFS_G+0x08(%rsp)
+	movq    %rdx, ffi_closure_OFS_G+0x10(%rsp)
+	movq    %rcx, ffi_closure_OFS_G+0x18(%rsp)
+	movq    %r8,  ffi_closure_OFS_G+0x20(%rsp)
+	movq    %r9,  ffi_closure_OFS_G+0x28(%rsp)
+
+#ifdef __ILP32__
+	movl	FFI_TRAMPOLINE_SIZE(%r10), %edi		/* Load cif */
+	movl	FFI_TRAMPOLINE_SIZE+4(%r10), %esi	/* Load fun */
+	movl	FFI_TRAMPOLINE_SIZE+8(%r10), %edx	/* Load user_data */
+#else
+	movq	FFI_TRAMPOLINE_SIZE(%r10), %rdi		/* Load cif */
+	movq	FFI_TRAMPOLINE_SIZE+8(%r10), %rsi	/* Load fun */
+	movq	FFI_TRAMPOLINE_SIZE+16(%r10), %rdx	/* Load user_data */
+#endif
+L(do_closure):
+	leaq	ffi_closure_OFS_RVALUE(%rsp), %rcx	/* Load rvalue */
+	movq	%rsp, %r8				/* Load reg_args */
+	leaq	ffi_closure_FS+8(%rsp), %r9		/* Load argp */
+	call	PLT(C(ffi_closure_unix64_inner))
+
 	/* Deallocate stack frame early; return value is now in redzone.  */
-	addq	$ffi_closure_FS, %rsp 
-L(UW10): 
-	/* cfi_adjust_cfa_offset(-ffi_closure_FS) */ 
+	addq	$ffi_closure_FS, %rsp
+L(UW10):
+	/* cfi_adjust_cfa_offset(-ffi_closure_FS) */
 
 	/* The first byte of the return value contains the FFI_TYPE.  */
-	cmpb	$UNIX64_RET_LAST, %al 
+	cmpb	$UNIX64_RET_LAST, %al
 	movzbl	%al, %r10d
-	leaq	L(load_table)(%rip), %r11 
-	ja	L(la) 
-	leaq	(%r11, %r10, 8), %r10 
-	leaq	ffi_closure_RED_RVALUE(%rsp), %rsi 
+	leaq	L(load_table)(%rip), %r11
+	ja	L(la)
+	leaq	(%r11, %r10, 8), %r10
+	leaq	ffi_closure_RED_RVALUE(%rsp), %rsi
 	jmp	*%r10
 
-	.balign	8 
-L(load_table): 
-E(L(load_table), UNIX64_RET_VOID) 
+	.balign	8
+L(load_table):
+E(L(load_table), UNIX64_RET_VOID)
 	ret
-E(L(load_table), UNIX64_RET_UINT8) 
-	movzbl	(%rsi), %eax 
+E(L(load_table), UNIX64_RET_UINT8)
+	movzbl	(%rsi), %eax
 	ret
-E(L(load_table), UNIX64_RET_UINT16) 
-	movzwl	(%rsi), %eax 
+E(L(load_table), UNIX64_RET_UINT16)
+	movzwl	(%rsi), %eax
 	ret
-E(L(load_table), UNIX64_RET_UINT32) 
-	movl	(%rsi), %eax 
+E(L(load_table), UNIX64_RET_UINT32)
+	movl	(%rsi), %eax
 	ret
-E(L(load_table), UNIX64_RET_SINT8) 
-	movsbl	(%rsi), %eax 
+E(L(load_table), UNIX64_RET_SINT8)
+	movsbl	(%rsi), %eax
 	ret
-E(L(load_table), UNIX64_RET_SINT16) 
-	movswl	(%rsi), %eax 
+E(L(load_table), UNIX64_RET_SINT16)
+	movswl	(%rsi), %eax
 	ret
-E(L(load_table), UNIX64_RET_SINT32) 
-	movl	(%rsi), %eax 
+E(L(load_table), UNIX64_RET_SINT32)
+	movl	(%rsi), %eax
 	ret
-E(L(load_table), UNIX64_RET_INT64) 
-	movq	(%rsi), %rax 
+E(L(load_table), UNIX64_RET_INT64)
+	movq	(%rsi), %rax
 	ret
-E(L(load_table), UNIX64_RET_XMM32) 
-	movd	(%rsi), %xmm0 
+E(L(load_table), UNIX64_RET_XMM32)
+	movd	(%rsi), %xmm0
 	ret
-E(L(load_table), UNIX64_RET_XMM64) 
-	movq	(%rsi), %xmm0 
-	ret 
-E(L(load_table), UNIX64_RET_X87) 
-	fldt	(%rsi) 
-	ret 
-E(L(load_table), UNIX64_RET_X87_2) 
-	fldt	16(%rsi) 
-	fldt	(%rsi) 
-	ret 
-E(L(load_table), UNIX64_RET_ST_XMM0_RAX) 
-	movq	8(%rsi), %rax 
-	jmp	L(l3) 
-E(L(load_table), UNIX64_RET_ST_RAX_XMM0) 
-	movq	8(%rsi), %xmm0 
-	jmp	L(l2) 
-E(L(load_table), UNIX64_RET_ST_XMM0_XMM1) 
-	movq	8(%rsi), %xmm1 
-	jmp	L(l3) 
-E(L(load_table), UNIX64_RET_ST_RAX_RDX) 
-	movq	8(%rsi), %rdx 
-L(l2): 
-	movq	(%rsi), %rax 
-	ret 
-	.balign	8 
-L(l3): 
-	movq	(%rsi), %xmm0 
-	ret 
-
-L(la):	call	PLT(C(abort)) 
-
-L(UW11): 
-ENDF(C(ffi_closure_unix64)) 
-
-	.balign	2 
-	.globl	C(ffi_go_closure_unix64_sse) 
-	FFI_HIDDEN(C(ffi_go_closure_unix64_sse)) 
-
-C(ffi_go_closure_unix64_sse): 
-L(UW12): 
-	subq	$ffi_closure_FS, %rsp 
-L(UW13): 
-	/* cfi_adjust_cfa_offset(ffi_closure_FS) */ 
- 
-	movdqa	%xmm0, ffi_closure_OFS_V+0x00(%rsp) 
-	movdqa	%xmm1, ffi_closure_OFS_V+0x10(%rsp) 
-	movdqa	%xmm2, ffi_closure_OFS_V+0x20(%rsp) 
-	movdqa	%xmm3, ffi_closure_OFS_V+0x30(%rsp) 
-	movdqa	%xmm4, ffi_closure_OFS_V+0x40(%rsp) 
-	movdqa	%xmm5, ffi_closure_OFS_V+0x50(%rsp) 
-	movdqa	%xmm6, ffi_closure_OFS_V+0x60(%rsp) 
-	movdqa	%xmm7, ffi_closure_OFS_V+0x70(%rsp) 
-	jmp	L(sse_entry2) 
- 
-L(UW14): 
-ENDF(C(ffi_go_closure_unix64_sse)) 
- 
-	.balign	2 
-	.globl	C(ffi_go_closure_unix64) 
-	FFI_HIDDEN(C(ffi_go_closure_unix64)) 
- 
-C(ffi_go_closure_unix64): 
-L(UW15): 
-	subq	$ffi_closure_FS, %rsp 
-L(UW16): 
-	/* cfi_adjust_cfa_offset(ffi_closure_FS) */ 
-L(sse_entry2): 
-	movq	%rdi, ffi_closure_OFS_G+0x00(%rsp) 
-	movq    %rsi, ffi_closure_OFS_G+0x08(%rsp) 
-	movq    %rdx, ffi_closure_OFS_G+0x10(%rsp) 
-	movq    %rcx, ffi_closure_OFS_G+0x18(%rsp) 
-	movq    %r8,  ffi_closure_OFS_G+0x20(%rsp) 
-	movq    %r9,  ffi_closure_OFS_G+0x28(%rsp) 
- 
-#ifdef __ILP32__ 
-	movl	4(%r10), %edi		/* Load cif */ 
-	movl	8(%r10), %esi		/* Load fun */ 
-	movl	%r10d, %edx		/* Load closure (user_data) */ 
+E(L(load_table), UNIX64_RET_XMM64)
+	movq	(%rsi), %xmm0
+	ret
+E(L(load_table), UNIX64_RET_X87)
+	fldt	(%rsi)
+	ret
+E(L(load_table), UNIX64_RET_X87_2)
+	fldt	16(%rsi)
+	fldt	(%rsi)
+	ret
+E(L(load_table), UNIX64_RET_ST_XMM0_RAX)
+	movq	8(%rsi), %rax
+	jmp	L(l3)
+E(L(load_table), UNIX64_RET_ST_RAX_XMM0)
+	movq	8(%rsi), %xmm0
+	jmp	L(l2)
+E(L(load_table), UNIX64_RET_ST_XMM0_XMM1)
+	movq	8(%rsi), %xmm1
+	jmp	L(l3)
+E(L(load_table), UNIX64_RET_ST_RAX_RDX)
+	movq	8(%rsi), %rdx
+L(l2):
+	movq	(%rsi), %rax
+	ret
+	.balign	8
+L(l3):
+	movq	(%rsi), %xmm0
+	ret
+
+L(la):	call	PLT(C(abort))
+
+L(UW11):
+ENDF(C(ffi_closure_unix64))
+
+	.balign	2
+	.globl	C(ffi_go_closure_unix64_sse)
+	FFI_HIDDEN(C(ffi_go_closure_unix64_sse))
+
+C(ffi_go_closure_unix64_sse):
+L(UW12):
+	subq	$ffi_closure_FS, %rsp
+L(UW13):
+	/* cfi_adjust_cfa_offset(ffi_closure_FS) */
+
+	movdqa	%xmm0, ffi_closure_OFS_V+0x00(%rsp)
+	movdqa	%xmm1, ffi_closure_OFS_V+0x10(%rsp)
+	movdqa	%xmm2, ffi_closure_OFS_V+0x20(%rsp)
+	movdqa	%xmm3, ffi_closure_OFS_V+0x30(%rsp)
+	movdqa	%xmm4, ffi_closure_OFS_V+0x40(%rsp)
+	movdqa	%xmm5, ffi_closure_OFS_V+0x50(%rsp)
+	movdqa	%xmm6, ffi_closure_OFS_V+0x60(%rsp)
+	movdqa	%xmm7, ffi_closure_OFS_V+0x70(%rsp)
+	jmp	L(sse_entry2)
+
+L(UW14):
+ENDF(C(ffi_go_closure_unix64_sse))
+
+	.balign	2
+	.globl	C(ffi_go_closure_unix64)
+	FFI_HIDDEN(C(ffi_go_closure_unix64))
+
+C(ffi_go_closure_unix64):
+L(UW15):
+	subq	$ffi_closure_FS, %rsp
+L(UW16):
+	/* cfi_adjust_cfa_offset(ffi_closure_FS) */
+L(sse_entry2):
+	movq	%rdi, ffi_closure_OFS_G+0x00(%rsp)
+	movq    %rsi, ffi_closure_OFS_G+0x08(%rsp)
+	movq    %rdx, ffi_closure_OFS_G+0x10(%rsp)
+	movq    %rcx, ffi_closure_OFS_G+0x18(%rsp)
+	movq    %r8,  ffi_closure_OFS_G+0x20(%rsp)
+	movq    %r9,  ffi_closure_OFS_G+0x28(%rsp)
+
+#ifdef __ILP32__
+	movl	4(%r10), %edi		/* Load cif */
+	movl	8(%r10), %esi		/* Load fun */
+	movl	%r10d, %edx		/* Load closure (user_data) */
+#else
+	movq	8(%r10), %rdi		/* Load cif */
+	movq	16(%r10), %rsi		/* Load fun */
+	movq	%r10, %rdx		/* Load closure (user_data) */
+#endif
+	jmp	L(do_closure)
+
+L(UW17):
+ENDF(C(ffi_go_closure_unix64))
+
+/* Sadly, OSX cctools-as doesn't understand .cfi directives at all.  */
+
+#ifdef __APPLE__
+.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support
+EHFrame0:
+#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE)
+.section .eh_frame,"a",@unwind
 #else
-	movq	8(%r10), %rdi		/* Load cif */ 
-	movq	16(%r10), %rsi		/* Load fun */ 
-	movq	%r10, %rdx		/* Load closure (user_data) */ 
+.section .eh_frame,"a",@progbits
 #endif
-	jmp	L(do_closure) 
- 
-L(UW17): 
-ENDF(C(ffi_go_closure_unix64)) 
- 
-/* Sadly, OSX cctools-as doesn't understand .cfi directives at all.  */ 
- 
-#ifdef __APPLE__ 
-.section __TEXT,__eh_frame,coalesced,no_toc+strip_static_syms+live_support 
-EHFrame0: 
-#elif defined(HAVE_AS_X86_64_UNWIND_SECTION_TYPE) 
-.section .eh_frame,"a",@unwind 
-#else 
-.section .eh_frame,"a",@progbits 
-#endif 
- 
-#ifdef HAVE_AS_X86_PCREL 
-# define PCREL(X)	X - . 
-#else 
-# define PCREL(X)	X@rel 
-#endif 
- 
-/* Simplify advancing between labels.  Assume DW_CFA_advance_loc1 fits.  */ 
-#define ADV(N, P)	.byte 2, L(N)-L(P) 
- 
-	.balign 8 
-L(CIE): 
-	.set	L(set0),L(ECIE)-L(SCIE) 
-	.long	L(set0)			/* CIE Length */ 
-L(SCIE): 
+
+#ifdef HAVE_AS_X86_PCREL
+# define PCREL(X)	X - .
+#else
+# define PCREL(X)	X@rel
+#endif
+
+/* Simplify advancing between labels.  Assume DW_CFA_advance_loc1 fits.  */
+#define ADV(N, P)	.byte 2, L(N)-L(P)
+
+	.balign 8
+L(CIE):
+	.set	L(set0),L(ECIE)-L(SCIE)
+	.long	L(set0)			/* CIE Length */
+L(SCIE):
 	.long	0			/* CIE Identifier Tag */
 	.byte	1			/* CIE Version */
-	.ascii	"zR\0"			/* CIE Augmentation */ 
-	.byte	1			/* CIE Code Alignment Factor */ 
-	.byte	0x78			/* CIE Data Alignment Factor */ 
+	.ascii	"zR\0"			/* CIE Augmentation */
+	.byte	1			/* CIE Code Alignment Factor */
+	.byte	0x78			/* CIE Data Alignment Factor */
 	.byte	0x10			/* CIE RA Column */
-	.byte	1			/* Augmentation size */ 
+	.byte	1			/* Augmentation size */
 	.byte	0x1b			/* FDE Encoding (pcrel sdata4) */
-	.byte	0xc, 7, 8		/* DW_CFA_def_cfa, %rsp offset 8 */ 
-	.byte	0x80+16, 1		/* DW_CFA_offset, %rip offset 1*-8 */ 
-	.balign 8 
-L(ECIE): 
-
-	.set	L(set1),L(EFDE1)-L(SFDE1) 
-	.long	L(set1)			/* FDE Length */ 
-L(SFDE1): 
-	.long	L(SFDE1)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW0))		/* Initial location */ 
-	.long	L(UW4)-L(UW0)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW1, UW0) 
-	.byte	0xc, 6, 32		/* DW_CFA_def_cfa, %rbp 32 */ 
-	.byte	0x80+6, 2		/* DW_CFA_offset, %rbp 2*-8 */ 
-	ADV(UW2, UW1) 
+	.byte	0xc, 7, 8		/* DW_CFA_def_cfa, %rsp offset 8 */
+	.byte	0x80+16, 1		/* DW_CFA_offset, %rip offset 1*-8 */
+	.balign 8
+L(ECIE):
+
+	.set	L(set1),L(EFDE1)-L(SFDE1)
+	.long	L(set1)			/* FDE Length */
+L(SFDE1):
+	.long	L(SFDE1)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW0))		/* Initial location */
+	.long	L(UW4)-L(UW0)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW1, UW0)
+	.byte	0xc, 6, 32		/* DW_CFA_def_cfa, %rbp 32 */
+	.byte	0x80+6, 2		/* DW_CFA_offset, %rbp 2*-8 */
+	ADV(UW2, UW1)
 	.byte	0xa			/* DW_CFA_remember_state */
-	.byte	0xc, 7, 8		/* DW_CFA_def_cfa, %rsp 8 */ 
+	.byte	0xc, 7, 8		/* DW_CFA_def_cfa, %rsp 8 */
 	.byte	0xc0+6			/* DW_CFA_restore, %rbp */
-	ADV(UW3, UW2) 
+	ADV(UW3, UW2)
 	.byte	0xb			/* DW_CFA_restore_state */
-	.balign	8 
-L(EFDE1): 
-
-	.set	L(set2),L(EFDE2)-L(SFDE2) 
-	.long	L(set2)			/* FDE Length */ 
-L(SFDE2): 
-	.long	L(SFDE2)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW5))		/* Initial location */ 
-	.long	L(UW7)-L(UW5)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW6, UW5) 
-	.byte	0xe			/* DW_CFA_def_cfa_offset */ 
-	.byte	ffi_closure_FS + 8, 1	/* uleb128, assuming 128 <= FS < 255 */ 
-	.balign	8 
-L(EFDE2): 
-
-	.set	L(set3),L(EFDE3)-L(SFDE3) 
-	.long	L(set3)			/* FDE Length */ 
-L(SFDE3): 
-	.long	L(SFDE3)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW8))		/* Initial location */ 
-	.long	L(UW11)-L(UW8)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW9, UW8) 
+	.balign	8
+L(EFDE1):
+
+	.set	L(set2),L(EFDE2)-L(SFDE2)
+	.long	L(set2)			/* FDE Length */
+L(SFDE2):
+	.long	L(SFDE2)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW5))		/* Initial location */
+	.long	L(UW7)-L(UW5)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW6, UW5)
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.byte	ffi_closure_FS + 8, 1	/* uleb128, assuming 128 <= FS < 255 */
+	.balign	8
+L(EFDE2):
+
+	.set	L(set3),L(EFDE3)-L(SFDE3)
+	.long	L(set3)			/* FDE Length */
+L(SFDE3):
+	.long	L(SFDE3)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW8))		/* Initial location */
+	.long	L(UW11)-L(UW8)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW9, UW8)
+	.byte	0xe			/* DW_CFA_def_cfa_offset */
+	.byte	ffi_closure_FS + 8, 1	/* uleb128, assuming 128 <= FS < 255 */
+	ADV(UW10, UW9)
+	.byte	0xe, 8			/* DW_CFA_def_cfa_offset 8 */
+L(EFDE3):
+
+	.set	L(set4),L(EFDE4)-L(SFDE4)
+	.long	L(set4)			/* FDE Length */
+L(SFDE4):
+	.long	L(SFDE4)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW12))		/* Initial location */
+	.long	L(UW14)-L(UW12)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW13, UW12)
 	.byte	0xe			/* DW_CFA_def_cfa_offset */
-	.byte	ffi_closure_FS + 8, 1	/* uleb128, assuming 128 <= FS < 255 */ 
-	ADV(UW10, UW9) 
-	.byte	0xe, 8			/* DW_CFA_def_cfa_offset 8 */ 
-L(EFDE3): 
-
-	.set	L(set4),L(EFDE4)-L(SFDE4) 
-	.long	L(set4)			/* FDE Length */ 
-L(SFDE4): 
-	.long	L(SFDE4)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW12))		/* Initial location */ 
-	.long	L(UW14)-L(UW12)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW13, UW12) 
+	.byte	ffi_closure_FS + 8, 1	/* uleb128, assuming 128 <= FS < 255 */
+	.balign	8
+L(EFDE4):
+
+	.set	L(set5),L(EFDE5)-L(SFDE5)
+	.long	L(set5)			/* FDE Length */
+L(SFDE5):
+	.long	L(SFDE5)-L(CIE)		/* FDE CIE offset */
+	.long	PCREL(L(UW15))		/* Initial location */
+	.long	L(UW17)-L(UW15)		/* Address range */
+	.byte	0			/* Augmentation size */
+	ADV(UW16, UW15)
 	.byte	0xe			/* DW_CFA_def_cfa_offset */
-	.byte	ffi_closure_FS + 8, 1	/* uleb128, assuming 128 <= FS < 255 */ 
-	.balign	8 
-L(EFDE4): 
-
-	.set	L(set5),L(EFDE5)-L(SFDE5) 
-	.long	L(set5)			/* FDE Length */ 
-L(SFDE5): 
-	.long	L(SFDE5)-L(CIE)		/* FDE CIE offset */ 
-	.long	PCREL(L(UW15))		/* Initial location */ 
-	.long	L(UW17)-L(UW15)		/* Address range */ 
-	.byte	0			/* Augmentation size */ 
-	ADV(UW16, UW15) 
-	.byte	0xe			/* DW_CFA_def_cfa_offset */ 
-	.byte	ffi_closure_FS + 8, 1	/* uleb128, assuming 128 <= FS < 255 */ 
-	.balign	8 
-L(EFDE5): 
-#ifdef __APPLE__ 
-	.subsections_via_symbols 
-	.section __LD,__compact_unwind,regular,debug 
-
-	/* compact unwind for ffi_call_unix64 */ 
-	.quad    C(ffi_call_unix64) 
-	.set     L1,L(UW4)-L(UW0) 
-	.long    L1 
-	.long    0x04000000 /* use dwarf unwind info */ 
-	.quad    0 
-	.quad    0 
-
-	/* compact unwind for ffi_closure_unix64_sse */ 
-	.quad    C(ffi_closure_unix64_sse) 
-	.set     L2,L(UW7)-L(UW5) 
-	.long    L2 
-	.long    0x04000000 /* use dwarf unwind info */ 
-	.quad    0 
-	.quad    0 
- 
-	/* compact unwind for ffi_closure_unix64 */ 
-	.quad    C(ffi_closure_unix64) 
-	.set     L3,L(UW11)-L(UW8) 
-	.long    L3 
-	.long    0x04000000 /* use dwarf unwind info */ 
-	.quad    0 
-	.quad    0 
- 
-	/* compact unwind for ffi_go_closure_unix64_sse */ 
-	.quad    C(ffi_go_closure_unix64_sse) 
-	.set     L4,L(UW14)-L(UW12) 
-	.long    L4 
-	.long    0x04000000 /* use dwarf unwind info */ 
-	.quad    0 
-	.quad    0 
- 
-	/* compact unwind for ffi_go_closure_unix64 */ 
-	.quad    C(ffi_go_closure_unix64) 
-	.set     L5,L(UW17)-L(UW15) 
-	.long    L5 
-	.long    0x04000000 /* use dwarf unwind info */ 
-	.quad    0 
-	.quad    0 
-#endif 
- 
+	.byte	ffi_closure_FS + 8, 1	/* uleb128, assuming 128 <= FS < 255 */
+	.balign	8
+L(EFDE5):
+#ifdef __APPLE__
+	.subsections_via_symbols
+	.section __LD,__compact_unwind,regular,debug
+
+	/* compact unwind for ffi_call_unix64 */
+	.quad    C(ffi_call_unix64)
+	.set     L1,L(UW4)-L(UW0)
+	.long    L1
+	.long    0x04000000 /* use dwarf unwind info */
+	.quad    0
+	.quad    0
+
+	/* compact unwind for ffi_closure_unix64_sse */
+	.quad    C(ffi_closure_unix64_sse)
+	.set     L2,L(UW7)-L(UW5)
+	.long    L2
+	.long    0x04000000 /* use dwarf unwind info */
+	.quad    0
+	.quad    0
+
+	/* compact unwind for ffi_closure_unix64 */
+	.quad    C(ffi_closure_unix64)
+	.set     L3,L(UW11)-L(UW8)
+	.long    L3
+	.long    0x04000000 /* use dwarf unwind info */
+	.quad    0
+	.quad    0
+
+	/* compact unwind for ffi_go_closure_unix64_sse */
+	.quad    C(ffi_go_closure_unix64_sse)
+	.set     L4,L(UW14)-L(UW12)
+	.long    L4
+	.long    0x04000000 /* use dwarf unwind info */
+	.quad    0
+	.quad    0
+
+	/* compact unwind for ffi_go_closure_unix64 */
+	.quad    C(ffi_go_closure_unix64)
+	.set     L5,L(UW17)-L(UW15)
+	.long    L5
+	.long    0x04000000 /* use dwarf unwind info */
+	.quad    0
+	.quad    0
+#endif
+
 #endif /* __x86_64__ */
 #if defined __ELF__ && defined __linux__
 	.section	.note.GNU-stack,"",@progbits
diff --git a/contrib/restricted/libffi/src/x86/win64.S b/contrib/restricted/libffi/src/x86/win64.S
index 13b89acbdd..2c334c82f9 100644
--- a/contrib/restricted/libffi/src/x86/win64.S
+++ b/contrib/restricted/libffi/src/x86/win64.S
@@ -1,237 +1,237 @@
-#ifdef __x86_64__ 
+#ifdef __x86_64__
 #define LIBFFI_ASM
 #include <fficonfig.h>
 #include <ffi.h>
-#include <ffi_cfi.h> 
-#include "asmnames.h" 
-
-#if defined(HAVE_AS_CFI_PSEUDO_OP) 
-        .cfi_sections   .debug_frame 
-#endif 
-
-#ifdef X86_WIN64 
-#define SEH(...) __VA_ARGS__ 
-#define arg0	%rcx 
-#define arg1	%rdx 
-#define arg2	%r8 
-#define arg3	%r9 
+#include <ffi_cfi.h>
+#include "asmnames.h"
+
+#if defined(HAVE_AS_CFI_PSEUDO_OP)
+        .cfi_sections   .debug_frame
+#endif
+
+#ifdef X86_WIN64
+#define SEH(...) __VA_ARGS__
+#define arg0	%rcx
+#define arg1	%rdx
+#define arg2	%r8
+#define arg3	%r9
 #else
-#define SEH(...) 
-#define arg0	%rdi 
-#define arg1	%rsi 
-#define arg2	%rdx 
-#define arg3	%rcx 
-#endif 
-
-/* This macro allows the safe creation of jump tables without an 
-   actual table.  The entry points into the table are all 8 bytes. 
-   The use of ORG asserts that we're at the correct location.  */ 
-/* ??? The clang assembler doesn't handle .org with symbolic expressions.  */ 
-#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) 
-# define E(BASE, X)	.balign 8 
+#define SEH(...)
+#define arg0	%rdi
+#define arg1	%rsi
+#define arg2	%rdx
+#define arg3	%rcx
+#endif
+
+/* This macro allows the safe creation of jump tables without an
+   actual table.  The entry points into the table are all 8 bytes.
+   The use of ORG asserts that we're at the correct location.  */
+/* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
+#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
+# define E(BASE, X)	.balign 8
 #else
-# define E(BASE, X)	.balign 8; .org BASE + X * 8 
+# define E(BASE, X)	.balign 8; .org BASE + X * 8
 #endif
 
-	.text 
-
-/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10) 
-
-   Bit o trickiness here -- FRAME is the base of the stack frame 
-   for this function.  This has been allocated by ffi_call.  We also 
-   deallocate some of the stack that has been alloca'd.  */ 
-
-	.align	8 
-	.globl	C(ffi_call_win64) 
-	FFI_HIDDEN(C(ffi_call_win64)) 
-
-	SEH(.seh_proc ffi_call_win64) 
-C(ffi_call_win64): 
-	cfi_startproc 
-	/* Set up the local stack frame and install it in rbp/rsp.  */ 
-	movq	(%rsp), %rax 
-	movq	%rbp, (arg1) 
-	movq	%rax, 8(arg1) 
-	movq	arg1, %rbp 
-	cfi_def_cfa(%rbp, 16) 
-	cfi_rel_offset(%rbp, 0) 
-	SEH(.seh_pushreg %rbp) 
-	SEH(.seh_setframe %rbp, 0) 
-	SEH(.seh_endprologue) 
-	movq	arg0, %rsp 
-
-	movq	arg2, %r10 
-
-	/* Load all slots into both general and xmm registers.  */ 
-	movq	(%rsp), %rcx 
-	movsd	(%rsp), %xmm0 
-	movq	8(%rsp), %rdx 
-	movsd	8(%rsp), %xmm1 
-	movq	16(%rsp), %r8 
-	movsd	16(%rsp), %xmm2 
-	movq	24(%rsp), %r9 
-	movsd	24(%rsp), %xmm3 
-
-	call	*16(%rbp) 
-
-	movl	24(%rbp), %ecx 
-	movq	32(%rbp), %r8 
-	leaq	0f(%rip), %r10 
-	cmpl	$FFI_TYPE_SMALL_STRUCT_4B, %ecx 
-	leaq	(%r10, %rcx, 8), %r10 
-	ja	99f 
-	jmp	*%r10 
-
-/* Below, we're space constrained most of the time.  Thus we eschew the 
-   modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes).  */ 
-.macro epilogue 
-	leaveq 
-	cfi_remember_state 
-	cfi_def_cfa(%rsp, 8) 
-	cfi_restore(%rbp) 
-	ret 
-	cfi_restore_state 
-.endm 
-
-	.align	8 
-0: 
-E(0b, FFI_TYPE_VOID) 
-	epilogue 
-E(0b, FFI_TYPE_INT) 
-	movslq	%eax, %rax 
-	movq	%rax, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_FLOAT) 
-	movss	%xmm0, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_DOUBLE) 
-	movsd	%xmm0, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_LONGDOUBLE) 
-	call	PLT(C(abort)) 
-E(0b, FFI_TYPE_UINT8) 
-	movzbl	%al, %eax 
-	movq	%rax, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_SINT8) 
-	movsbq	%al, %rax 
-	jmp	98f 
-E(0b, FFI_TYPE_UINT16) 
-	movzwl	%ax, %eax 
-	movq	%rax, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_SINT16) 
-	movswq	%ax, %rax 
-	jmp	98f 
-E(0b, FFI_TYPE_UINT32) 
-	movl	%eax, %eax 
-	movq	%rax, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_SINT32) 
-	movslq	%eax, %rax 
-	movq	%rax, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_UINT64) 
-98:	movq	%rax, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_SINT64) 
-	movq	%rax, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_STRUCT) 
-	epilogue 
-E(0b, FFI_TYPE_POINTER) 
-	movq	%rax, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_COMPLEX) 
-	call	PLT(C(abort)) 
-E(0b, FFI_TYPE_SMALL_STRUCT_1B) 
-	movb	%al, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_SMALL_STRUCT_2B) 
-	movw	%ax, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_SMALL_STRUCT_4B) 
-	movl	%eax, (%r8) 
-	epilogue 
-
-	.align	8 
-99:	call	PLT(C(abort)) 
-
-	epilogue 
-
-	cfi_endproc 
-	SEH(.seh_endproc) 
-
-
-/* 32 bytes of outgoing register stack space, 8 bytes of alignment, 
-   16 bytes of result, 32 bytes of xmm registers.  */ 
-#define ffi_clo_FS	(32+8+16+32) 
-#define ffi_clo_OFF_R	(32+8) 
-#define ffi_clo_OFF_X	(32+8+16) 
-
-	.align	8 
-	.globl	C(ffi_go_closure_win64) 
-	FFI_HIDDEN(C(ffi_go_closure_win64)) 
-
-	SEH(.seh_proc ffi_go_closure_win64) 
-C(ffi_go_closure_win64): 
-	cfi_startproc 
-	/* Save all integer arguments into the incoming reg stack space.  */ 
-	movq	%rcx, 8(%rsp) 
-	movq	%rdx, 16(%rsp) 
-	movq	%r8, 24(%rsp) 
-	movq	%r9, 32(%rsp) 
-
-	movq	8(%r10), %rcx			/* load cif */ 
-	movq	16(%r10), %rdx			/* load fun */ 
-	movq	%r10, %r8			/* closure is user_data */ 
-	jmp	0f 
-	cfi_endproc 
-	SEH(.seh_endproc) 
-
-	.align	8 
-	.globl	C(ffi_closure_win64) 
-	FFI_HIDDEN(C(ffi_closure_win64)) 
-
-	SEH(.seh_proc ffi_closure_win64) 
-C(ffi_closure_win64): 
-	cfi_startproc 
-	/* Save all integer arguments into the incoming reg stack space.  */ 
-	movq	%rcx, 8(%rsp) 
-	movq	%rdx, 16(%rsp) 
-	movq	%r8, 24(%rsp) 
-	movq	%r9, 32(%rsp) 
-
-	movq	FFI_TRAMPOLINE_SIZE(%r10), %rcx		/* load cif */ 
-	movq	FFI_TRAMPOLINE_SIZE+8(%r10), %rdx	/* load fun */ 
-	movq	FFI_TRAMPOLINE_SIZE+16(%r10), %r8	/* load user_data */ 
-0: 
-	subq	$ffi_clo_FS, %rsp 
-	cfi_adjust_cfa_offset(ffi_clo_FS) 
-	SEH(.seh_stackalloc ffi_clo_FS) 
-	SEH(.seh_endprologue) 
-
-	/* Save all sse arguments into the stack frame.  */ 
-	movsd	%xmm0, ffi_clo_OFF_X(%rsp) 
-	movsd	%xmm1, ffi_clo_OFF_X+8(%rsp) 
-	movsd	%xmm2, ffi_clo_OFF_X+16(%rsp) 
-	movsd	%xmm3, ffi_clo_OFF_X+24(%rsp) 
-
-	leaq	ffi_clo_OFF_R(%rsp), %r9 
-	call	PLT(C(ffi_closure_win64_inner)) 
-
-	/* Load the result into both possible result registers.  */ 
-	movq    ffi_clo_OFF_R(%rsp), %rax 
-	movsd   ffi_clo_OFF_R(%rsp), %xmm0 
-
-	addq	$ffi_clo_FS, %rsp 
-	cfi_adjust_cfa_offset(-ffi_clo_FS) 
-	ret 
-
-	cfi_endproc 
-	SEH(.seh_endproc) 
-#endif /* __x86_64__ */ 
-
-#if defined __ELF__ && defined __linux__ 
-	.section	.note.GNU-stack,"",@progbits 
-#endif 
+	.text
+
+/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10)
+
+   Bit o trickiness here -- FRAME is the base of the stack frame
+   for this function.  This has been allocated by ffi_call.  We also
+   deallocate some of the stack that has been alloca'd.  */
+
+	.align	8
+	.globl	C(ffi_call_win64)
+	FFI_HIDDEN(C(ffi_call_win64))
+
+	SEH(.seh_proc ffi_call_win64)
+C(ffi_call_win64):
+	cfi_startproc
+	/* Set up the local stack frame and install it in rbp/rsp.  */
+	movq	(%rsp), %rax
+	movq	%rbp, (arg1)
+	movq	%rax, 8(arg1)
+	movq	arg1, %rbp
+	cfi_def_cfa(%rbp, 16)
+	cfi_rel_offset(%rbp, 0)
+	SEH(.seh_pushreg %rbp)
+	SEH(.seh_setframe %rbp, 0)
+	SEH(.seh_endprologue)
+	movq	arg0, %rsp
+
+	movq	arg2, %r10
+
+	/* Load all slots into both general and xmm registers.  */
+	movq	(%rsp), %rcx
+	movsd	(%rsp), %xmm0
+	movq	8(%rsp), %rdx
+	movsd	8(%rsp), %xmm1
+	movq	16(%rsp), %r8
+	movsd	16(%rsp), %xmm2
+	movq	24(%rsp), %r9
+	movsd	24(%rsp), %xmm3
+
+	call	*16(%rbp)
+
+	movl	24(%rbp), %ecx
+	movq	32(%rbp), %r8
+	leaq	0f(%rip), %r10
+	cmpl	$FFI_TYPE_SMALL_STRUCT_4B, %ecx
+	leaq	(%r10, %rcx, 8), %r10
+	ja	99f
+	jmp	*%r10
+
+/* Below, we're space constrained most of the time.  Thus we eschew the
+   modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes).  */
+.macro epilogue
+	leaveq
+	cfi_remember_state
+	cfi_def_cfa(%rsp, 8)
+	cfi_restore(%rbp)
+	ret
+	cfi_restore_state
+.endm
+
+	.align	8
+0:
+E(0b, FFI_TYPE_VOID)
+	epilogue
+E(0b, FFI_TYPE_INT)
+	movslq	%eax, %rax
+	movq	%rax, (%r8)
+	epilogue
+E(0b, FFI_TYPE_FLOAT)
+	movss	%xmm0, (%r8)
+	epilogue
+E(0b, FFI_TYPE_DOUBLE)
+	movsd	%xmm0, (%r8)
+	epilogue
+E(0b, FFI_TYPE_LONGDOUBLE)
+	call	PLT(C(abort))
+E(0b, FFI_TYPE_UINT8)
+	movzbl	%al, %eax
+	movq	%rax, (%r8)
+	epilogue
+E(0b, FFI_TYPE_SINT8)
+	movsbq	%al, %rax
+	jmp	98f
+E(0b, FFI_TYPE_UINT16)
+	movzwl	%ax, %eax
+	movq	%rax, (%r8)
+	epilogue
+E(0b, FFI_TYPE_SINT16)
+	movswq	%ax, %rax
+	jmp	98f
+E(0b, FFI_TYPE_UINT32)
+	movl	%eax, %eax
+	movq	%rax, (%r8)
+	epilogue
+E(0b, FFI_TYPE_SINT32)
+	movslq	%eax, %rax
+	movq	%rax, (%r8)
+	epilogue
+E(0b, FFI_TYPE_UINT64)
+98:	movq	%rax, (%r8)
+	epilogue
+E(0b, FFI_TYPE_SINT64)
+	movq	%rax, (%r8)
+	epilogue
+E(0b, FFI_TYPE_STRUCT)
+	epilogue
+E(0b, FFI_TYPE_POINTER)
+	movq	%rax, (%r8)
+	epilogue
+E(0b, FFI_TYPE_COMPLEX)
+	call	PLT(C(abort))
+E(0b, FFI_TYPE_SMALL_STRUCT_1B)
+	movb	%al, (%r8)
+	epilogue
+E(0b, FFI_TYPE_SMALL_STRUCT_2B)
+	movw	%ax, (%r8)
+	epilogue
+E(0b, FFI_TYPE_SMALL_STRUCT_4B)
+	movl	%eax, (%r8)
+	epilogue
+
+	.align	8
+99:	call	PLT(C(abort))
+
+	epilogue
+
+	cfi_endproc
+	SEH(.seh_endproc)
+
+
+/* 32 bytes of outgoing register stack space, 8 bytes of alignment,
+   16 bytes of result, 32 bytes of xmm registers.  */
+#define ffi_clo_FS	(32+8+16+32)
+#define ffi_clo_OFF_R	(32+8)
+#define ffi_clo_OFF_X	(32+8+16)
+
+	.align	8
+	.globl	C(ffi_go_closure_win64)
+	FFI_HIDDEN(C(ffi_go_closure_win64))
+
+	SEH(.seh_proc ffi_go_closure_win64)
+C(ffi_go_closure_win64):
+	cfi_startproc
+	/* Save all integer arguments into the incoming reg stack space.  */
+	movq	%rcx, 8(%rsp)
+	movq	%rdx, 16(%rsp)
+	movq	%r8, 24(%rsp)
+	movq	%r9, 32(%rsp)
+
+	movq	8(%r10), %rcx			/* load cif */
+	movq	16(%r10), %rdx			/* load fun */
+	movq	%r10, %r8			/* closure is user_data */
+	jmp	0f
+	cfi_endproc
+	SEH(.seh_endproc)
+
+	.align	8
+	.globl	C(ffi_closure_win64)
+	FFI_HIDDEN(C(ffi_closure_win64))
+
+	SEH(.seh_proc ffi_closure_win64)
+C(ffi_closure_win64):
+	cfi_startproc
+	/* Save all integer arguments into the incoming reg stack space.  */
+	movq	%rcx, 8(%rsp)
+	movq	%rdx, 16(%rsp)
+	movq	%r8, 24(%rsp)
+	movq	%r9, 32(%rsp)
+
+	movq	FFI_TRAMPOLINE_SIZE(%r10), %rcx		/* load cif */
+	movq	FFI_TRAMPOLINE_SIZE+8(%r10), %rdx	/* load fun */
+	movq	FFI_TRAMPOLINE_SIZE+16(%r10), %r8	/* load user_data */
+0:
+	subq	$ffi_clo_FS, %rsp
+	cfi_adjust_cfa_offset(ffi_clo_FS)
+	SEH(.seh_stackalloc ffi_clo_FS)
+	SEH(.seh_endprologue)
+
+	/* Save all sse arguments into the stack frame.  */
+	movsd	%xmm0, ffi_clo_OFF_X(%rsp)
+	movsd	%xmm1, ffi_clo_OFF_X+8(%rsp)
+	movsd	%xmm2, ffi_clo_OFF_X+16(%rsp)
+	movsd	%xmm3, ffi_clo_OFF_X+24(%rsp)
+
+	leaq	ffi_clo_OFF_R(%rsp), %r9
+	call	PLT(C(ffi_closure_win64_inner))
+
+	/* Load the result into both possible result registers.  */
+	movq    ffi_clo_OFF_R(%rsp), %rax
+	movsd   ffi_clo_OFF_R(%rsp), %xmm0
+
+	addq	$ffi_clo_FS, %rsp
+	cfi_adjust_cfa_offset(-ffi_clo_FS)
+	ret
+
+	cfi_endproc
+	SEH(.seh_endproc)
+#endif /* __x86_64__ */
+
+#if defined __ELF__ && defined __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
diff --git a/contrib/restricted/libffi/src/x86/win64_intel.S b/contrib/restricted/libffi/src/x86/win64_intel.S
index fdf3e4aa74..7df78b30e4 100644
--- a/contrib/restricted/libffi/src/x86/win64_intel.S
+++ b/contrib/restricted/libffi/src/x86/win64_intel.S
@@ -1,237 +1,237 @@
-#define LIBFFI_ASM 
-#include <fficonfig.h> 
-#include <ffi.h> 
-#include <ffi_cfi.h> 
-#include "asmnames.h" 
- 
-#if defined(HAVE_AS_CFI_PSEUDO_OP) 
-        .cfi_sections   .debug_frame 
-#endif 
- 
-#ifdef X86_WIN64 
-#define SEH(...) __VA_ARGS__ 
-#define arg0	rcx 
-#define arg1	rdx 
-#define arg2	r8 
-#define arg3	r9 
-#else 
-#define SEH(...) 
-#define arg0	rdi 
-#define arg1	rsi 
-#define arg2	rdx 
-#define arg3	rcx 
-#endif 
- 
-/* This macro allows the safe creation of jump tables without an 
-   actual table.  The entry points into the table are all 8 bytes. 
-   The use of ORG asserts that we're at the correct location.  */ 
-/* ??? The clang assembler doesn't handle .org with symbolic expressions.  */ 
-#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) 
-# define E(BASE, X)	ALIGN 8 
-#else 
-# define E(BASE, X)	ALIGN 8; ORG BASE + X * 8 
-#endif 
- 
-	.CODE 
-	extern PLT(C(abort)):near 
-	extern C(ffi_closure_win64_inner):near 
- 
-/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10) 
- 
-   Bit o trickiness here -- FRAME is the base of the stack frame 
-   for this function.  This has been allocated by ffi_call.  We also 
-   deallocate some of the stack that has been alloca'd.  */ 
- 
-	ALIGN	8 
-	PUBLIC	C(ffi_call_win64) 
- 
-	; SEH(.safesh ffi_call_win64) 
-C(ffi_call_win64) proc SEH(frame) 
-	cfi_startproc 
-	/* Set up the local stack frame and install it in rbp/rsp.  */ 
-	mov	RAX, [RSP] ; 	movq	(%rsp), %rax 
-	mov [arg1], RBP ; movq	%rbp, (arg1) 
-	mov [arg1 + 8], RAX;	movq	%rax, 8(arg1) 
-	mov	 RBP, arg1; movq	arg1, %rbp 
-	cfi_def_cfa(rbp, 16) 
-	cfi_rel_offset(rbp, 0) 
-	SEH(.pushreg rbp) 
-	SEH(.setframe rbp, 0) 
-	SEH(.endprolog) 
-	mov	RSP, arg0 ;	movq	arg0, %rsp 
- 
-	mov	R10, arg2 ; movq	arg2, %r10 
- 
-	/* Load all slots into both general and xmm registers.  */ 
-	mov	RCX, [RSP] ;	movq	(%rsp), %rcx 
-	movsd XMM0, qword ptr [RSP] ; movsd	(%rsp), %xmm0 
-	mov	RDX, [RSP + 8] ;movq	8(%rsp), %rdx 
-	movsd XMM1, qword ptr [RSP + 8];	movsd	8(%rsp), %xmm1 
-	mov R8, [RSP + 16] ; movq	16(%rsp), %r8 
-	movsd	XMM2, qword ptr [RSP + 16] ; movsd	16(%rsp), %xmm2 
-	mov	R9, [RSP + 24] ; movq	24(%rsp), %r9 
-	movsd	XMM3, qword ptr [RSP + 24] ;movsd	24(%rsp), %xmm3 
- 
-	CALL qword ptr [RBP + 16] ; call	*16(%rbp) 
- 
-	mov	 ECX, [RBP + 24] ; movl	24(%rbp), %ecx 
-	mov	R8, [RBP + 32] ; movq	32(%rbp), %r8 
-	LEA	R10, ffi_call_win64_tab ; leaq	0f(%rip), %r10 
-	CMP	ECX, FFI_TYPE_SMALL_STRUCT_4B ; cmpl	$FFI_TYPE_SMALL_STRUCT_4B, %ecx 
-	LEA	R10, [R10 + RCX*8] ; leaq	(%r10, %rcx, 8), %r10 
-	JA	L99 ; ja	99f 
-	JMP	R10 ; jmp	*%r10 
- 
-/* Below, we're space constrained most of the time.  Thus we eschew the 
-   modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes).  */ 
-epilogue macro 
-	LEAVE 
-	cfi_remember_state 
-	cfi_def_cfa(rsp, 8) 
-	cfi_restore(rbp) 
-	RET 
-	cfi_restore_state 
-endm 
- 
-	ALIGN 8 
-ffi_call_win64_tab LABEL NEAR 
-E(0b, FFI_TYPE_VOID) 
-	epilogue 
-E(0b, FFI_TYPE_INT) 
-	movsxd rax, eax ; movslq	%eax, %rax 
-	mov qword ptr [r8], rax; movq	%rax, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_FLOAT) 
-	movss dword ptr [r8], xmm0 ; movss	%xmm0, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_DOUBLE) 
-	movsd qword ptr[r8], xmm0; movsd	%xmm0, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_LONGDOUBLE) 
-	call	PLT(C(abort)) 
-E(0b, FFI_TYPE_UINT8) 
-	movzx eax, al ;movzbl	%al, %eax 
-	mov qword ptr[r8], rax; movq	%rax, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_SINT8) 
-	movsx rax, al ; movsbq	%al, %rax 
-	jmp	L98 
-E(0b, FFI_TYPE_UINT16) 
-	movzx eax, ax ; movzwl	%ax, %eax 
-	mov qword ptr[r8], rax; movq	%rax, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_SINT16) 
-	movsx rax, ax; movswq	%ax, %rax 
-	jmp	L98 
-E(0b, FFI_TYPE_UINT32) 
-	mov eax, eax; movl	%eax, %eax 
-	mov qword ptr[r8], rax ; movq	%rax, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_SINT32) 
-	movsxd rax, eax; movslq	%eax, %rax 
-	mov qword ptr [r8], rax; movq	%rax, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_UINT64) 
-L98 LABEL near 
-	mov qword ptr [r8], rax ; movq	%rax, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_SINT64) 
-	mov qword ptr [r8], rax;movq	%rax, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_STRUCT) 
-	epilogue 
-E(0b, FFI_TYPE_POINTER) 
-	mov qword ptr [r8], rax ;movq	%rax, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_COMPLEX) 
-	call	PLT(C(abort)) 
-E(0b, FFI_TYPE_SMALL_STRUCT_1B) 
-	mov byte ptr [r8], al ; movb	%al, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_SMALL_STRUCT_2B) 
-	mov word ptr [r8], ax ; movw	%ax, (%r8) 
-	epilogue 
-E(0b, FFI_TYPE_SMALL_STRUCT_4B) 
-	mov dword ptr [r8], eax ; movl	%eax, (%r8) 
-	epilogue 
- 
-	align	8 
-L99 LABEL near 
-	call	PLT(C(abort)) 
- 
-	epilogue 
- 
-	cfi_endproc 
-	C(ffi_call_win64) endp 
- 
- 
-/* 32 bytes of outgoing register stack space, 8 bytes of alignment, 
-   16 bytes of result, 32 bytes of xmm registers.  */ 
-#define ffi_clo_FS	(32+8+16+32) 
-#define ffi_clo_OFF_R	(32+8) 
-#define ffi_clo_OFF_X	(32+8+16) 
- 
-	align	8 
-	PUBLIC	C(ffi_go_closure_win64) 
- 
-C(ffi_go_closure_win64) proc 
-	cfi_startproc 
-	/* Save all integer arguments into the incoming reg stack space.  */ 
-	mov qword ptr [rsp + 8], rcx; movq	%rcx, 8(%rsp) 
-	mov qword ptr [rsp + 16], rdx; movq	%rdx, 16(%rsp) 
-	mov qword ptr [rsp + 24], r8; movq	%r8, 24(%rsp) 
-	mov qword ptr [rsp + 32], r9 ;movq	%r9, 32(%rsp) 
- 
-	mov rcx, qword ptr [r10 + 8]; movq	8(%r10), %rcx			/* load cif */ 
-	mov rdx, qword ptr [r10 + 16];  movq	16(%r10), %rdx			/* load fun */ 
-	mov r8, r10 ; movq	%r10, %r8			/* closure is user_data */ 
-	jmp	ffi_closure_win64_2 
-	cfi_endproc 
-	C(ffi_go_closure_win64) endp 
- 
-	align	8 
-	 
-PUBLIC C(ffi_closure_win64) 
-C(ffi_closure_win64) PROC FRAME 
-	cfi_startproc 
-	/* Save all integer arguments into the incoming reg stack space.  */ 
-	mov qword ptr [rsp + 8], rcx; movq	%rcx, 8(%rsp) 
-	mov qword ptr [rsp + 16], rdx;	movq	%rdx, 16(%rsp) 
-	mov qword ptr [rsp + 24], r8; 	movq	%r8, 24(%rsp) 
-	mov qword ptr [rsp + 32], r9;	movq	%r9, 32(%rsp) 
- 
-	mov rcx, qword ptr [FFI_TRAMPOLINE_SIZE + r10]	;movq	FFI_TRAMPOLINE_SIZE(%r10), %rcx		/* load cif */ 
-	mov rdx, qword ptr [FFI_TRAMPOLINE_SIZE + 8 + r10] ;	movq	FFI_TRAMPOLINE_SIZE+8(%r10), %rdx	/* load fun */ 
-	mov r8, qword ptr [FFI_TRAMPOLINE_SIZE+16+r10] ;movq	FFI_TRAMPOLINE_SIZE+16(%r10), %r8	/* load user_data */ 
-ffi_closure_win64_2 LABEL near 
-	sub rsp, ffi_clo_FS ;subq	$ffi_clo_FS, %rsp 
-	cfi_adjust_cfa_offset(ffi_clo_FS) 
-	SEH(.allocstack ffi_clo_FS) 
-	SEH(.endprolog) 
- 
-	/* Save all sse arguments into the stack frame.  */ 
-	movsd qword ptr [ffi_clo_OFF_X + rsp], xmm0	; movsd	%xmm0, ffi_clo_OFF_X(%rsp) 
-	movsd qword ptr [ffi_clo_OFF_X+8+rsp], xmm1 ; movsd	%xmm1, ffi_clo_OFF_X+8(%rsp) 
-	movsd qword ptr [ffi_clo_OFF_X+16+rsp], xmm2 ; movsd %xmm2, ffi_clo_OFF_X+16(%rsp) 
-	movsd qword ptr [ffi_clo_OFF_X+24+rsp], xmm3 ; movsd %xmm3, ffi_clo_OFF_X+24(%rsp) 
- 
-	lea	r9, [ffi_clo_OFF_R + rsp] ; leaq	ffi_clo_OFF_R(%rsp), %r9 
-	call C(ffi_closure_win64_inner) 
- 
-	/* Load the result into both possible result registers.  */ 
-	 
-	mov rax, qword ptr [ffi_clo_OFF_R + rsp] ;movq    ffi_clo_OFF_R(%rsp), %rax 
-	movsd xmm0, qword ptr [rsp + ffi_clo_OFF_R] ;movsd   ffi_clo_OFF_R(%rsp), %xmm0 
- 
-	add rsp, ffi_clo_FS ;addq	$ffi_clo_FS, %rsp 
-	cfi_adjust_cfa_offset(-ffi_clo_FS) 
-	ret 
- 
-	cfi_endproc 
-	C(ffi_closure_win64) endp 
- 
-#if defined __ELF__ && defined __linux__ 
-	.section	.note.GNU-stack,"",@progbits 
-#endif 
-_text ends 
-end 
-\ No newline at end of file
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_cfi.h>
+#include "asmnames.h"
+
+#if defined(HAVE_AS_CFI_PSEUDO_OP)
+        .cfi_sections   .debug_frame
+#endif
+
+#ifdef X86_WIN64
+#define SEH(...) __VA_ARGS__
+#define arg0	rcx
+#define arg1	rdx
+#define arg2	r8
+#define arg3	r9
+#else
+#define SEH(...)
+#define arg0	rdi
+#define arg1	rsi
+#define arg2	rdx
+#define arg3	rcx
+#endif
+
+/* This macro allows the safe creation of jump tables without an
+   actual table.  The entry points into the table are all 8 bytes.
+   The use of ORG asserts that we're at the correct location.  */
+/* ??? The clang assembler doesn't handle .org with symbolic expressions.  */
+#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__))
+# define E(BASE, X)	ALIGN 8
+#else
+# define E(BASE, X)	ALIGN 8; ORG BASE + X * 8
+#endif
+
+	.CODE
+	extern PLT(C(abort)):near
+	extern C(ffi_closure_win64_inner):near
+
+/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10)
+
+   Bit o trickiness here -- FRAME is the base of the stack frame
+   for this function.  This has been allocated by ffi_call.  We also
+   deallocate some of the stack that has been alloca'd.  */
+
+	ALIGN	8
+	PUBLIC	C(ffi_call_win64)
+
+	; SEH(.safesh ffi_call_win64)
+C(ffi_call_win64) proc SEH(frame)
+	cfi_startproc
+	/* Set up the local stack frame and install it in rbp/rsp.  */
+	mov	RAX, [RSP] ; 	movq	(%rsp), %rax
+	mov [arg1], RBP ; movq	%rbp, (arg1)
+	mov [arg1 + 8], RAX;	movq	%rax, 8(arg1)
+	mov	 RBP, arg1; movq	arg1, %rbp
+	cfi_def_cfa(rbp, 16)
+	cfi_rel_offset(rbp, 0)
+	SEH(.pushreg rbp)
+	SEH(.setframe rbp, 0)
+	SEH(.endprolog)
+	mov	RSP, arg0 ;	movq	arg0, %rsp
+
+	mov	R10, arg2 ; movq	arg2, %r10
+
+	/* Load all slots into both general and xmm registers.  */
+	mov	RCX, [RSP] ;	movq	(%rsp), %rcx
+	movsd XMM0, qword ptr [RSP] ; movsd	(%rsp), %xmm0
+	mov	RDX, [RSP + 8] ;movq	8(%rsp), %rdx
+	movsd XMM1, qword ptr [RSP + 8];	movsd	8(%rsp), %xmm1
+	mov R8, [RSP + 16] ; movq	16(%rsp), %r8
+	movsd	XMM2, qword ptr [RSP + 16] ; movsd	16(%rsp), %xmm2
+	mov	R9, [RSP + 24] ; movq	24(%rsp), %r9
+	movsd	XMM3, qword ptr [RSP + 24] ;movsd	24(%rsp), %xmm3
+
+	CALL qword ptr [RBP + 16] ; call	*16(%rbp)
+
+	mov	 ECX, [RBP + 24] ; movl	24(%rbp), %ecx
+	mov	R8, [RBP + 32] ; movq	32(%rbp), %r8
+	LEA	R10, ffi_call_win64_tab ; leaq	0f(%rip), %r10
+	CMP	ECX, FFI_TYPE_SMALL_STRUCT_4B ; cmpl	$FFI_TYPE_SMALL_STRUCT_4B, %ecx
+	LEA	R10, [R10 + RCX*8] ; leaq	(%r10, %rcx, 8), %r10
+	JA	L99 ; ja	99f
+	JMP	R10 ; jmp	*%r10
+
+/* Below, we're space constrained most of the time.  Thus we eschew the
+   modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes).  */
+epilogue macro
+	LEAVE
+	cfi_remember_state
+	cfi_def_cfa(rsp, 8)
+	cfi_restore(rbp)
+	RET
+	cfi_restore_state
+endm
+
+	ALIGN 8
+ffi_call_win64_tab LABEL NEAR
+E(0b, FFI_TYPE_VOID)
+	epilogue
+E(0b, FFI_TYPE_INT)
+	movsxd rax, eax ; movslq	%eax, %rax
+	mov qword ptr [r8], rax; movq	%rax, (%r8)
+	epilogue
+E(0b, FFI_TYPE_FLOAT)
+	movss dword ptr [r8], xmm0 ; movss	%xmm0, (%r8)
+	epilogue
+E(0b, FFI_TYPE_DOUBLE)
+	movsd qword ptr[r8], xmm0; movsd	%xmm0, (%r8)
+	epilogue
+E(0b, FFI_TYPE_LONGDOUBLE)
+	call	PLT(C(abort))
+E(0b, FFI_TYPE_UINT8)
+	movzx eax, al ;movzbl	%al, %eax
+	mov qword ptr[r8], rax; movq	%rax, (%r8)
+	epilogue
+E(0b, FFI_TYPE_SINT8)
+	movsx rax, al ; movsbq	%al, %rax
+	jmp	L98
+E(0b, FFI_TYPE_UINT16)
+	movzx eax, ax ; movzwl	%ax, %eax
+	mov qword ptr[r8], rax; movq	%rax, (%r8)
+	epilogue
+E(0b, FFI_TYPE_SINT16)
+	movsx rax, ax; movswq	%ax, %rax
+	jmp	L98
+E(0b, FFI_TYPE_UINT32)
+	mov eax, eax; movl	%eax, %eax
+	mov qword ptr[r8], rax ; movq	%rax, (%r8)
+	epilogue
+E(0b, FFI_TYPE_SINT32)
+	movsxd rax, eax; movslq	%eax, %rax
+	mov qword ptr [r8], rax; movq	%rax, (%r8)
+	epilogue
+E(0b, FFI_TYPE_UINT64)
+L98 LABEL near
+	mov qword ptr [r8], rax ; movq	%rax, (%r8)
+	epilogue
+E(0b, FFI_TYPE_SINT64)
+	mov qword ptr [r8], rax;movq	%rax, (%r8)
+	epilogue
+E(0b, FFI_TYPE_STRUCT)
+	epilogue
+E(0b, FFI_TYPE_POINTER)
+	mov qword ptr [r8], rax ;movq	%rax, (%r8)
+	epilogue
+E(0b, FFI_TYPE_COMPLEX)
+	call	PLT(C(abort))
+E(0b, FFI_TYPE_SMALL_STRUCT_1B)
+	mov byte ptr [r8], al ; movb	%al, (%r8)
+	epilogue
+E(0b, FFI_TYPE_SMALL_STRUCT_2B)
+	mov word ptr [r8], ax ; movw	%ax, (%r8)
+	epilogue
+E(0b, FFI_TYPE_SMALL_STRUCT_4B)
+	mov dword ptr [r8], eax ; movl	%eax, (%r8)
+	epilogue
+
+	align	8
+L99 LABEL near
+	call	PLT(C(abort))
+
+	epilogue
+
+	cfi_endproc
+	C(ffi_call_win64) endp
+
+
+/* 32 bytes of outgoing register stack space, 8 bytes of alignment,
+   16 bytes of result, 32 bytes of xmm registers.  */
+#define ffi_clo_FS	(32+8+16+32)
+#define ffi_clo_OFF_R	(32+8)
+#define ffi_clo_OFF_X	(32+8+16)
+
+	align	8
+	PUBLIC	C(ffi_go_closure_win64)
+
+C(ffi_go_closure_win64) proc
+	cfi_startproc
+	/* Save all integer arguments into the incoming reg stack space.  */
+	mov qword ptr [rsp + 8], rcx; movq	%rcx, 8(%rsp)
+	mov qword ptr [rsp + 16], rdx; movq	%rdx, 16(%rsp)
+	mov qword ptr [rsp + 24], r8; movq	%r8, 24(%rsp)
+	mov qword ptr [rsp + 32], r9 ;movq	%r9, 32(%rsp)
+
+	mov rcx, qword ptr [r10 + 8]; movq	8(%r10), %rcx			/* load cif */
+	mov rdx, qword ptr [r10 + 16];  movq	16(%r10), %rdx			/* load fun */
+	mov r8, r10 ; movq	%r10, %r8			/* closure is user_data */
+	jmp	ffi_closure_win64_2
+	cfi_endproc
+	C(ffi_go_closure_win64) endp
+
+	align	8
+	
+PUBLIC C(ffi_closure_win64)
+C(ffi_closure_win64) PROC FRAME
+	cfi_startproc
+	/* Save all integer arguments into the incoming reg stack space.  */
+	mov qword ptr [rsp + 8], rcx; movq	%rcx, 8(%rsp)
+	mov qword ptr [rsp + 16], rdx;	movq	%rdx, 16(%rsp)
+	mov qword ptr [rsp + 24], r8; 	movq	%r8, 24(%rsp)
+	mov qword ptr [rsp + 32], r9;	movq	%r9, 32(%rsp)
+
+	mov rcx, qword ptr [FFI_TRAMPOLINE_SIZE + r10]	;movq	FFI_TRAMPOLINE_SIZE(%r10), %rcx		/* load cif */
+	mov rdx, qword ptr [FFI_TRAMPOLINE_SIZE + 8 + r10] ;	movq	FFI_TRAMPOLINE_SIZE+8(%r10), %rdx	/* load fun */
+	mov r8, qword ptr [FFI_TRAMPOLINE_SIZE+16+r10] ;movq	FFI_TRAMPOLINE_SIZE+16(%r10), %r8	/* load user_data */
+ffi_closure_win64_2 LABEL near
+	sub rsp, ffi_clo_FS ;subq	$ffi_clo_FS, %rsp
+	cfi_adjust_cfa_offset(ffi_clo_FS)
+	SEH(.allocstack ffi_clo_FS)
+	SEH(.endprolog)
+
+	/* Save all sse arguments into the stack frame.  */
+	movsd qword ptr [ffi_clo_OFF_X + rsp], xmm0	; movsd	%xmm0, ffi_clo_OFF_X(%rsp)
+	movsd qword ptr [ffi_clo_OFF_X+8+rsp], xmm1 ; movsd	%xmm1, ffi_clo_OFF_X+8(%rsp)
+	movsd qword ptr [ffi_clo_OFF_X+16+rsp], xmm2 ; movsd %xmm2, ffi_clo_OFF_X+16(%rsp)
+	movsd qword ptr [ffi_clo_OFF_X+24+rsp], xmm3 ; movsd %xmm3, ffi_clo_OFF_X+24(%rsp)
+
+	lea	r9, [ffi_clo_OFF_R + rsp] ; leaq	ffi_clo_OFF_R(%rsp), %r9
+	call C(ffi_closure_win64_inner)
+
+	/* Load the result into both possible result registers.  */
+	
+	mov rax, qword ptr [ffi_clo_OFF_R + rsp] ;movq    ffi_clo_OFF_R(%rsp), %rax
+	movsd xmm0, qword ptr [rsp + ffi_clo_OFF_R] ;movsd   ffi_clo_OFF_R(%rsp), %xmm0
+
+	add rsp, ffi_clo_FS ;addq	$ffi_clo_FS, %rsp
+	cfi_adjust_cfa_offset(-ffi_clo_FS)
+	ret
+
+	cfi_endproc
+	C(ffi_closure_win64) endp
+
+#if defined __ELF__ && defined __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
+_text ends
+end
+\ No newline at end of file
author	Mikhail Borisov <borisov.mikhail@gmail.com>	2022-02-10 16:45:40 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:45:40 +0300
commit	5d50718e66d9c037dc587a0211110b7d25a66185 (patch)
tree	e98df59de24d2ef7c77baed9f41e4875a2fef972 /contrib/restricted/libffi/src
parent	a6a92afe03e02795227d2641b49819b687f088f8 (diff)
download	ydb-5d50718e66d9c037dc587a0211110b7d25a66185.tar.gz