diff options
author | Mikhail Borisov <borisov.mikhail@gmail.com> | 2022-02-10 16:45:39 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:39 +0300 |
commit | a6a92afe03e02795227d2641b49819b687f088f8 (patch) | |
tree | f6984a1d27d5a7ec88a6fdd6e20cd5b7693b6ece /contrib/restricted/libffi/src/x86/win64.S | |
parent | c6dc8b8bd530985bc4cce0137e9a5de32f1087cb (diff) | |
download | ydb-a6a92afe03e02795227d2641b49819b687f088f8.tar.gz |
Restoring authorship annotation for Mikhail Borisov <borisov.mikhail@gmail.com>. Commit 1 of 2.
Diffstat (limited to 'contrib/restricted/libffi/src/x86/win64.S')
-rw-r--r-- | contrib/restricted/libffi/src/x86/win64.S | 460 |
1 files changed, 230 insertions, 230 deletions
diff --git a/contrib/restricted/libffi/src/x86/win64.S b/contrib/restricted/libffi/src/x86/win64.S index 2c334c82f9..13b89acbdd 100644 --- a/contrib/restricted/libffi/src/x86/win64.S +++ b/contrib/restricted/libffi/src/x86/win64.S @@ -1,237 +1,237 @@ -#ifdef __x86_64__ +#ifdef __x86_64__ #define LIBFFI_ASM #include <fficonfig.h> #include <ffi.h> -#include <ffi_cfi.h> -#include "asmnames.h" - -#if defined(HAVE_AS_CFI_PSEUDO_OP) - .cfi_sections .debug_frame -#endif - -#ifdef X86_WIN64 -#define SEH(...) __VA_ARGS__ -#define arg0 %rcx -#define arg1 %rdx -#define arg2 %r8 -#define arg3 %r9 +#include <ffi_cfi.h> +#include "asmnames.h" + +#if defined(HAVE_AS_CFI_PSEUDO_OP) + .cfi_sections .debug_frame +#endif + +#ifdef X86_WIN64 +#define SEH(...) __VA_ARGS__ +#define arg0 %rcx +#define arg1 %rdx +#define arg2 %r8 +#define arg3 %r9 #else -#define SEH(...) -#define arg0 %rdi -#define arg1 %rsi -#define arg2 %rdx -#define arg3 %rcx -#endif - -/* This macro allows the safe creation of jump tables without an - actual table. The entry points into the table are all 8 bytes. - The use of ORG asserts that we're at the correct location. */ -/* ??? The clang assembler doesn't handle .org with symbolic expressions. */ -#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) -# define E(BASE, X) .balign 8 +#define SEH(...) +#define arg0 %rdi +#define arg1 %rsi +#define arg2 %rdx +#define arg3 %rcx +#endif + +/* This macro allows the safe creation of jump tables without an + actual table. The entry points into the table are all 8 bytes. + The use of ORG asserts that we're at the correct location. */ +/* ??? The clang assembler doesn't handle .org with symbolic expressions. */ +#if defined(__clang__) || defined(__APPLE__) || (defined (__sun__) && defined(__svr4__)) +# define E(BASE, X) .balign 8 #else -# define E(BASE, X) .balign 8; .org BASE + X * 8 +# define E(BASE, X) .balign 8; .org BASE + X * 8 #endif - .text - -/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10) - - Bit o trickiness here -- FRAME is the base of the stack frame - for this function. This has been allocated by ffi_call. We also - deallocate some of the stack that has been alloca'd. */ - - .align 8 - .globl C(ffi_call_win64) - FFI_HIDDEN(C(ffi_call_win64)) - - SEH(.seh_proc ffi_call_win64) -C(ffi_call_win64): - cfi_startproc - /* Set up the local stack frame and install it in rbp/rsp. */ - movq (%rsp), %rax - movq %rbp, (arg1) - movq %rax, 8(arg1) - movq arg1, %rbp - cfi_def_cfa(%rbp, 16) - cfi_rel_offset(%rbp, 0) - SEH(.seh_pushreg %rbp) - SEH(.seh_setframe %rbp, 0) - SEH(.seh_endprologue) - movq arg0, %rsp - - movq arg2, %r10 - - /* Load all slots into both general and xmm registers. */ - movq (%rsp), %rcx - movsd (%rsp), %xmm0 - movq 8(%rsp), %rdx - movsd 8(%rsp), %xmm1 - movq 16(%rsp), %r8 - movsd 16(%rsp), %xmm2 - movq 24(%rsp), %r9 - movsd 24(%rsp), %xmm3 - - call *16(%rbp) - - movl 24(%rbp), %ecx - movq 32(%rbp), %r8 - leaq 0f(%rip), %r10 - cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx - leaq (%r10, %rcx, 8), %r10 - ja 99f - jmp *%r10 - -/* Below, we're space constrained most of the time. Thus we eschew the - modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */ -.macro epilogue - leaveq - cfi_remember_state - cfi_def_cfa(%rsp, 8) - cfi_restore(%rbp) - ret - cfi_restore_state -.endm - - .align 8 -0: -E(0b, FFI_TYPE_VOID) - epilogue -E(0b, FFI_TYPE_INT) - movslq %eax, %rax - movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_FLOAT) - movss %xmm0, (%r8) - epilogue -E(0b, FFI_TYPE_DOUBLE) - movsd %xmm0, (%r8) - epilogue -E(0b, FFI_TYPE_LONGDOUBLE) - call PLT(C(abort)) -E(0b, FFI_TYPE_UINT8) - movzbl %al, %eax - movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_SINT8) - movsbq %al, %rax - jmp 98f -E(0b, FFI_TYPE_UINT16) - movzwl %ax, %eax - movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_SINT16) - movswq %ax, %rax - jmp 98f -E(0b, FFI_TYPE_UINT32) - movl %eax, %eax - movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_SINT32) - movslq %eax, %rax - movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_UINT64) -98: movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_SINT64) - movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_STRUCT) - epilogue -E(0b, FFI_TYPE_POINTER) - movq %rax, (%r8) - epilogue -E(0b, FFI_TYPE_COMPLEX) - call PLT(C(abort)) -E(0b, FFI_TYPE_SMALL_STRUCT_1B) - movb %al, (%r8) - epilogue -E(0b, FFI_TYPE_SMALL_STRUCT_2B) - movw %ax, (%r8) - epilogue -E(0b, FFI_TYPE_SMALL_STRUCT_4B) - movl %eax, (%r8) - epilogue - - .align 8 -99: call PLT(C(abort)) - - epilogue - - cfi_endproc - SEH(.seh_endproc) - - -/* 32 bytes of outgoing register stack space, 8 bytes of alignment, - 16 bytes of result, 32 bytes of xmm registers. */ -#define ffi_clo_FS (32+8+16+32) -#define ffi_clo_OFF_R (32+8) -#define ffi_clo_OFF_X (32+8+16) - - .align 8 - .globl C(ffi_go_closure_win64) - FFI_HIDDEN(C(ffi_go_closure_win64)) - - SEH(.seh_proc ffi_go_closure_win64) -C(ffi_go_closure_win64): - cfi_startproc - /* Save all integer arguments into the incoming reg stack space. */ - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %r8, 24(%rsp) - movq %r9, 32(%rsp) - - movq 8(%r10), %rcx /* load cif */ - movq 16(%r10), %rdx /* load fun */ - movq %r10, %r8 /* closure is user_data */ - jmp 0f - cfi_endproc - SEH(.seh_endproc) - - .align 8 - .globl C(ffi_closure_win64) - FFI_HIDDEN(C(ffi_closure_win64)) - - SEH(.seh_proc ffi_closure_win64) -C(ffi_closure_win64): - cfi_startproc - /* Save all integer arguments into the incoming reg stack space. */ - movq %rcx, 8(%rsp) - movq %rdx, 16(%rsp) - movq %r8, 24(%rsp) - movq %r9, 32(%rsp) - - movq FFI_TRAMPOLINE_SIZE(%r10), %rcx /* load cif */ - movq FFI_TRAMPOLINE_SIZE+8(%r10), %rdx /* load fun */ - movq FFI_TRAMPOLINE_SIZE+16(%r10), %r8 /* load user_data */ -0: - subq $ffi_clo_FS, %rsp - cfi_adjust_cfa_offset(ffi_clo_FS) - SEH(.seh_stackalloc ffi_clo_FS) - SEH(.seh_endprologue) - - /* Save all sse arguments into the stack frame. */ - movsd %xmm0, ffi_clo_OFF_X(%rsp) - movsd %xmm1, ffi_clo_OFF_X+8(%rsp) - movsd %xmm2, ffi_clo_OFF_X+16(%rsp) - movsd %xmm3, ffi_clo_OFF_X+24(%rsp) - - leaq ffi_clo_OFF_R(%rsp), %r9 - call PLT(C(ffi_closure_win64_inner)) - - /* Load the result into both possible result registers. */ - movq ffi_clo_OFF_R(%rsp), %rax - movsd ffi_clo_OFF_R(%rsp), %xmm0 - - addq $ffi_clo_FS, %rsp - cfi_adjust_cfa_offset(-ffi_clo_FS) - ret - - cfi_endproc - SEH(.seh_endproc) -#endif /* __x86_64__ */ - -#if defined __ELF__ && defined __linux__ - .section .note.GNU-stack,"",@progbits -#endif + .text + +/* ffi_call_win64 (void *stack, struct win64_call_frame *frame, void *r10) + + Bit o trickiness here -- FRAME is the base of the stack frame + for this function. This has been allocated by ffi_call. We also + deallocate some of the stack that has been alloca'd. */ + + .align 8 + .globl C(ffi_call_win64) + FFI_HIDDEN(C(ffi_call_win64)) + + SEH(.seh_proc ffi_call_win64) +C(ffi_call_win64): + cfi_startproc + /* Set up the local stack frame and install it in rbp/rsp. */ + movq (%rsp), %rax + movq %rbp, (arg1) + movq %rax, 8(arg1) + movq arg1, %rbp + cfi_def_cfa(%rbp, 16) + cfi_rel_offset(%rbp, 0) + SEH(.seh_pushreg %rbp) + SEH(.seh_setframe %rbp, 0) + SEH(.seh_endprologue) + movq arg0, %rsp + + movq arg2, %r10 + + /* Load all slots into both general and xmm registers. */ + movq (%rsp), %rcx + movsd (%rsp), %xmm0 + movq 8(%rsp), %rdx + movsd 8(%rsp), %xmm1 + movq 16(%rsp), %r8 + movsd 16(%rsp), %xmm2 + movq 24(%rsp), %r9 + movsd 24(%rsp), %xmm3 + + call *16(%rbp) + + movl 24(%rbp), %ecx + movq 32(%rbp), %r8 + leaq 0f(%rip), %r10 + cmpl $FFI_TYPE_SMALL_STRUCT_4B, %ecx + leaq (%r10, %rcx, 8), %r10 + ja 99f + jmp *%r10 + +/* Below, we're space constrained most of the time. Thus we eschew the + modern "mov, pop, ret" sequence (5 bytes) for "leave, ret" (2 bytes). */ +.macro epilogue + leaveq + cfi_remember_state + cfi_def_cfa(%rsp, 8) + cfi_restore(%rbp) + ret + cfi_restore_state +.endm + + .align 8 +0: +E(0b, FFI_TYPE_VOID) + epilogue +E(0b, FFI_TYPE_INT) + movslq %eax, %rax + movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_FLOAT) + movss %xmm0, (%r8) + epilogue +E(0b, FFI_TYPE_DOUBLE) + movsd %xmm0, (%r8) + epilogue +E(0b, FFI_TYPE_LONGDOUBLE) + call PLT(C(abort)) +E(0b, FFI_TYPE_UINT8) + movzbl %al, %eax + movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_SINT8) + movsbq %al, %rax + jmp 98f +E(0b, FFI_TYPE_UINT16) + movzwl %ax, %eax + movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_SINT16) + movswq %ax, %rax + jmp 98f +E(0b, FFI_TYPE_UINT32) + movl %eax, %eax + movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_SINT32) + movslq %eax, %rax + movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_UINT64) +98: movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_SINT64) + movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_STRUCT) + epilogue +E(0b, FFI_TYPE_POINTER) + movq %rax, (%r8) + epilogue +E(0b, FFI_TYPE_COMPLEX) + call PLT(C(abort)) +E(0b, FFI_TYPE_SMALL_STRUCT_1B) + movb %al, (%r8) + epilogue +E(0b, FFI_TYPE_SMALL_STRUCT_2B) + movw %ax, (%r8) + epilogue +E(0b, FFI_TYPE_SMALL_STRUCT_4B) + movl %eax, (%r8) + epilogue + + .align 8 +99: call PLT(C(abort)) + + epilogue + + cfi_endproc + SEH(.seh_endproc) + + +/* 32 bytes of outgoing register stack space, 8 bytes of alignment, + 16 bytes of result, 32 bytes of xmm registers. */ +#define ffi_clo_FS (32+8+16+32) +#define ffi_clo_OFF_R (32+8) +#define ffi_clo_OFF_X (32+8+16) + + .align 8 + .globl C(ffi_go_closure_win64) + FFI_HIDDEN(C(ffi_go_closure_win64)) + + SEH(.seh_proc ffi_go_closure_win64) +C(ffi_go_closure_win64): + cfi_startproc + /* Save all integer arguments into the incoming reg stack space. */ + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %r8, 24(%rsp) + movq %r9, 32(%rsp) + + movq 8(%r10), %rcx /* load cif */ + movq 16(%r10), %rdx /* load fun */ + movq %r10, %r8 /* closure is user_data */ + jmp 0f + cfi_endproc + SEH(.seh_endproc) + + .align 8 + .globl C(ffi_closure_win64) + FFI_HIDDEN(C(ffi_closure_win64)) + + SEH(.seh_proc ffi_closure_win64) +C(ffi_closure_win64): + cfi_startproc + /* Save all integer arguments into the incoming reg stack space. */ + movq %rcx, 8(%rsp) + movq %rdx, 16(%rsp) + movq %r8, 24(%rsp) + movq %r9, 32(%rsp) + + movq FFI_TRAMPOLINE_SIZE(%r10), %rcx /* load cif */ + movq FFI_TRAMPOLINE_SIZE+8(%r10), %rdx /* load fun */ + movq FFI_TRAMPOLINE_SIZE+16(%r10), %r8 /* load user_data */ +0: + subq $ffi_clo_FS, %rsp + cfi_adjust_cfa_offset(ffi_clo_FS) + SEH(.seh_stackalloc ffi_clo_FS) + SEH(.seh_endprologue) + + /* Save all sse arguments into the stack frame. */ + movsd %xmm0, ffi_clo_OFF_X(%rsp) + movsd %xmm1, ffi_clo_OFF_X+8(%rsp) + movsd %xmm2, ffi_clo_OFF_X+16(%rsp) + movsd %xmm3, ffi_clo_OFF_X+24(%rsp) + + leaq ffi_clo_OFF_R(%rsp), %r9 + call PLT(C(ffi_closure_win64_inner)) + + /* Load the result into both possible result registers. */ + movq ffi_clo_OFF_R(%rsp), %rax + movsd ffi_clo_OFF_R(%rsp), %xmm0 + + addq $ffi_clo_FS, %rsp + cfi_adjust_cfa_offset(-ffi_clo_FS) + ret + + cfi_endproc + SEH(.seh_endproc) +#endif /* __x86_64__ */ + +#if defined __ELF__ && defined __linux__ + .section .note.GNU-stack,"",@progbits +#endif |