diff options
author | Maxim Yurchuk <maxim-yurchuk@ydb.tech> | 2024-10-18 20:31:38 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-10-18 20:31:38 +0300 |
commit | 2a74bac2d2d3bccb4e10120f1ead805640ec9dd0 (patch) | |
tree | 047e4818ced5aaf73f58517629e5260b5291f9f0 /contrib/libs/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm | |
parent | 2d9656823e9521d8c29ea4c9a1d0eab78391abfc (diff) | |
parent | 3d834a1923bbf9403cd4a448e7f32b670aa4124f (diff) | |
download | ydb-2a74bac2d2d3bccb4e10120f1ead805640ec9dd0.tar.gz |
Merge pull request #10502 from ydb-platform/mergelibs-241016-1210
Library import 241016-1210
Diffstat (limited to 'contrib/libs/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm')
-rw-r--r-- | contrib/libs/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm | 41 |
1 files changed, 13 insertions, 28 deletions
diff --git a/contrib/libs/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm b/contrib/libs/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm index 16a90eb2af..fcd919367d 100644 --- a/contrib/libs/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm +++ b/contrib/libs/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm @@ -44,8 +44,6 @@ %define arg5 r9 %define tmp r11 - %define tmp.w r11d - %define tmp.b r11b %define tmp2 r10 %define tmp3 r13 ; must be saved and restored %define tmp4 r12 ; must be saved and restored @@ -53,7 +51,7 @@ %define PS 8 %define LOG_PS 3 - %define func(x) x: + %define func(x) x: endbranch %macro FUNC_SAVE 0 push r12 push r13 @@ -73,15 +71,13 @@ %define arg4 r12 ; must be saved, loaded and restored %define arg5 r15 ; must be saved and restored %define tmp r11 - %define tmp.w r11d - %define tmp.b r11b %define tmp2 r10 %define tmp3 r13 ; must be saved and restored %define tmp4 r14 ; must be saved and restored %define return rax %define PS 8 %define LOG_PS 3 - %define stack_size 9*16 + 5*8 ; must be an odd multiple of 8 + %define stack_size 6*16 + 5*8 ; must be an odd multiple of 8 %define arg(x) [rsp + stack_size + PS + PS*x] %define func(x) proc_frame x @@ -93,13 +89,10 @@ vmovdqa [rsp + 3*16], xmm9 vmovdqa [rsp + 4*16], xmm10 vmovdqa [rsp + 5*16], xmm11 - vmovdqa [rsp + 6*16], xmm12 - vmovdqa [rsp + 7*16], xmm13 - vmovdqa [rsp + 8*16], xmm14 - save_reg r12, 9*16 + 0*8 - save_reg r13, 9*16 + 1*8 - save_reg r14, 9*16 + 2*8 - save_reg r15, 9*16 + 3*8 + save_reg r12, 6*16 + 0*8 + save_reg r13, 6*16 + 1*8 + save_reg r14, 6*16 + 2*8 + save_reg r15, 6*16 + 3*8 end_prolog mov arg4, arg(4) %endmacro @@ -111,13 +104,10 @@ vmovdqa xmm9, [rsp + 3*16] vmovdqa xmm10, [rsp + 4*16] vmovdqa xmm11, [rsp + 5*16] - vmovdqa xmm12, [rsp + 6*16] - vmovdqa xmm13, [rsp + 7*16] - vmovdqa xmm14, [rsp + 8*16] - mov r12, [rsp + 9*16 + 0*8] - mov r13, [rsp + 9*16 + 1*8] - mov r14, [rsp + 9*16 + 2*8] - mov r15, [rsp + 9*16 + 3*8] + mov r12, [rsp + 6*16 + 0*8] + mov r13, [rsp + 6*16 + 1*8] + mov r14, [rsp + 6*16 + 2*8] + mov r15, [rsp + 6*16 + 3*8] add rsp, stack_size %endmacro %endif @@ -142,8 +132,8 @@ %else ;;; Use Non-temporal load/stor %ifdef NO_NT_LDST - %define XLDR vmovdqa - %define XSTR vmovdqa + %define XLDR vmovdqa64 + %define XSTR vmovdqa64 %else %define XLDR vmovntdqa %define XSTR vmovntdq @@ -173,13 +163,8 @@ default rel section .text align 16 -global gf_3vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION +global gf_3vect_dot_prod_avx512, function func(gf_3vect_dot_prod_avx512) -%ifidn __OUTPUT_FORMAT__, macho64 -global _gf_3vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION -func(_gf_3vect_dot_prod_avx512) -%endif - FUNC_SAVE sub len, 64 jl .return_fail |