aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm
diff options
context:
space:
mode:
authorMaxim Yurchuk <maxim-yurchuk@ydb.tech>2024-10-18 20:31:38 +0300
committerGitHub <noreply@github.com>2024-10-18 20:31:38 +0300
commit2a74bac2d2d3bccb4e10120f1ead805640ec9dd0 (patch)
tree047e4818ced5aaf73f58517629e5260b5291f9f0 /contrib/libs/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm
parent2d9656823e9521d8c29ea4c9a1d0eab78391abfc (diff)
parent3d834a1923bbf9403cd4a448e7f32b670aa4124f (diff)
downloadydb-2a74bac2d2d3bccb4e10120f1ead805640ec9dd0.tar.gz
Merge pull request #10502 from ydb-platform/mergelibs-241016-1210
Library import 241016-1210
Diffstat (limited to 'contrib/libs/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm')
-rw-r--r--contrib/libs/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm41
1 files changed, 13 insertions, 28 deletions
diff --git a/contrib/libs/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm b/contrib/libs/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm
index 16a90eb2af..fcd919367d 100644
--- a/contrib/libs/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm
+++ b/contrib/libs/isa-l/erasure_code/gf_3vect_dot_prod_avx512.asm
@@ -44,8 +44,6 @@
%define arg5 r9
%define tmp r11
- %define tmp.w r11d
- %define tmp.b r11b
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r12 ; must be saved and restored
@@ -53,7 +51,7 @@
%define PS 8
%define LOG_PS 3
- %define func(x) x:
+ %define func(x) x: endbranch
%macro FUNC_SAVE 0
push r12
push r13
@@ -73,15 +71,13 @@
%define arg4 r12 ; must be saved, loaded and restored
%define arg5 r15 ; must be saved and restored
%define tmp r11
- %define tmp.w r11d
- %define tmp.b r11b
%define tmp2 r10
%define tmp3 r13 ; must be saved and restored
%define tmp4 r14 ; must be saved and restored
%define return rax
%define PS 8
%define LOG_PS 3
- %define stack_size 9*16 + 5*8 ; must be an odd multiple of 8
+ %define stack_size 6*16 + 5*8 ; must be an odd multiple of 8
%define arg(x) [rsp + stack_size + PS + PS*x]
%define func(x) proc_frame x
@@ -93,13 +89,10 @@
vmovdqa [rsp + 3*16], xmm9
vmovdqa [rsp + 4*16], xmm10
vmovdqa [rsp + 5*16], xmm11
- vmovdqa [rsp + 6*16], xmm12
- vmovdqa [rsp + 7*16], xmm13
- vmovdqa [rsp + 8*16], xmm14
- save_reg r12, 9*16 + 0*8
- save_reg r13, 9*16 + 1*8
- save_reg r14, 9*16 + 2*8
- save_reg r15, 9*16 + 3*8
+ save_reg r12, 6*16 + 0*8
+ save_reg r13, 6*16 + 1*8
+ save_reg r14, 6*16 + 2*8
+ save_reg r15, 6*16 + 3*8
end_prolog
mov arg4, arg(4)
%endmacro
@@ -111,13 +104,10 @@
vmovdqa xmm9, [rsp + 3*16]
vmovdqa xmm10, [rsp + 4*16]
vmovdqa xmm11, [rsp + 5*16]
- vmovdqa xmm12, [rsp + 6*16]
- vmovdqa xmm13, [rsp + 7*16]
- vmovdqa xmm14, [rsp + 8*16]
- mov r12, [rsp + 9*16 + 0*8]
- mov r13, [rsp + 9*16 + 1*8]
- mov r14, [rsp + 9*16 + 2*8]
- mov r15, [rsp + 9*16 + 3*8]
+ mov r12, [rsp + 6*16 + 0*8]
+ mov r13, [rsp + 6*16 + 1*8]
+ mov r14, [rsp + 6*16 + 2*8]
+ mov r15, [rsp + 6*16 + 3*8]
add rsp, stack_size
%endmacro
%endif
@@ -142,8 +132,8 @@
%else
;;; Use Non-temporal load/stor
%ifdef NO_NT_LDST
- %define XLDR vmovdqa
- %define XSTR vmovdqa
+ %define XLDR vmovdqa64
+ %define XSTR vmovdqa64
%else
%define XLDR vmovntdqa
%define XSTR vmovntdq
@@ -173,13 +163,8 @@ default rel
section .text
align 16
-global gf_3vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION
+global gf_3vect_dot_prod_avx512, function
func(gf_3vect_dot_prod_avx512)
-%ifidn __OUTPUT_FORMAT__, macho64
-global _gf_3vect_dot_prod_avx512:ISAL_SYM_TYPE_FUNCTION
-func(_gf_3vect_dot_prod_avx512)
-%endif
-
FUNC_SAVE
sub len, 64
jl .return_fail