aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorHenrik Gramner <henrik@gramner.com>2024-03-16 16:39:37 +0100
committerHenrik Gramner <henrik@gramner.com>2024-03-24 14:53:57 +0100
commitafa471d0efed1df5dca6eeeb2fcdd211ae4cad4e (patch)
treedb94cecfd1fe1cb5951773461f0d7d93526f0d10 /libavcodec/x86
parent782c4df28dc91a2b5160fe7a35ad18541e8c5029 (diff)
downloadffmpeg-afa471d0efed1df5dca6eeeb2fcdd211ae4cad4e.tar.gz
x86: Update x86inc.asm
Make things up-to-date with upstream. https://code.videolan.org/videolan/x86inc.asm
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/celt_pvq_search.asm4
-rw-r--r--libavcodec/x86/h264_chromamc.asm10
-rw-r--r--libavcodec/x86/h264_idct.asm6
-rw-r--r--libavcodec/x86/h264_intrapred.asm15
-rw-r--r--libavcodec/x86/hevc_mc.asm16
-rw-r--r--libavcodec/x86/rv40dsp.asm12
-rw-r--r--libavcodec/x86/sbrdsp.asm6
-rw-r--r--libavcodec/x86/vp8dsp.asm30
-rw-r--r--libavcodec/x86/vp9itxfm.asm2
-rw-r--r--libavcodec/x86/vp9itxfm_16bpp.asm12
10 files changed, 57 insertions, 56 deletions
diff --git a/libavcodec/x86/celt_pvq_search.asm b/libavcodec/x86/celt_pvq_search.asm
index 5c1e6d6174..e9bff02650 100644
--- a/libavcodec/x86/celt_pvq_search.asm
+++ b/libavcodec/x86/celt_pvq_search.asm
@@ -74,7 +74,7 @@ SECTION .text
; "movaps m0, [r5 + r4]" if PIC is enabled
; "movaps m0, [constant_name + r4]" if texrel are used
%macro SET_PIC_BASE 3; reg, const_label
-%ifdef PIC
+%if PIC
%{1} %2, [%3] ; lea r5, [rip+const]
%define pic_base_%3 %2
%else
@@ -195,7 +195,7 @@ align 16
; PIC relative addressing. Use this
; to count it in cglobal
;
-%ifdef PIC
+%if PIC
%define num_pic_regs 1
%else
%define num_pic_regs 0
diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm
index e70bc492b2..ec6288d48e 100644
--- a/libavcodec/x86/h264_chromamc.asm
+++ b/libavcodec/x86/h264_chromamc.asm
@@ -91,7 +91,7 @@ SECTION .text
%macro chroma_mc8_mmx_func 2-3
%ifidn %2, rv40
-%ifdef PIC
+%if PIC
%define rnd_1d_rv40 r8
%define rnd_2d_rv40 r8
%define extra_regs 2
@@ -147,7 +147,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0
or r4d, r5d ; x + y
%ifidn %2, rv40
-%ifdef PIC
+%if PIC
lea r8, [rnd_rv40_1d_tbl]
%endif
%if ARCH_X86_64 == 0
@@ -198,7 +198,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0
movd m4, r4d ; x
movd m6, r5d ; y
%ifidn %2, rv40
-%ifdef PIC
+%if PIC
lea r8, [rnd_rv40_2d_tbl]
%endif
%if ARCH_X86_64 == 0
@@ -283,7 +283,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0
%macro chroma_mc4_mmx_func 2
%define extra_regs 0
%ifidn %2, rv40
-%ifdef PIC
+%if PIC
%define extra_regs 1
%endif ; PIC
%endif ; rv40
@@ -301,7 +301,7 @@ cglobal %1_%2_chroma_mc4, 6, 6 + extra_regs, 0
psubw m5, m3
%ifidn %2, rv40
-%ifdef PIC
+%if PIC
lea r6, [rnd_rv40_2d_tbl]
%define rnd_2d_rv40 r6
%else
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index 1f86e51d82..b29ddde200 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -42,7 +42,7 @@ scan8_mem: db 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8
db 6+11*8, 7+11*8, 6+12*8, 7+12*8
db 4+13*8, 5+13*8, 4+14*8, 5+14*8
db 6+13*8, 7+13*8, 6+14*8, 7+14*8
-%ifdef PIC
+%if PIC
%define npicregs 1
%define scan8 picregq
%else
@@ -322,7 +322,7 @@ INIT_XMM sse2
cglobal h264_idct8_add4_8, 5, 8 + npicregs, 10, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
movsxdifnidn r3, r3d
xor r5, r5
-%ifdef PIC
+%if PIC
lea picregq, [scan8_mem]
%endif
.nextblock:
@@ -398,7 +398,7 @@ h264_idct_add8_mmx_plane:
cglobal h264_idct_add8_422_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
; dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
movsxdifnidn r3, r3d
-%ifdef PIC
+%if PIC
lea picregq, [scan8_mem]
%endif
%if ARCH_X86_64
diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm
index ea46bc595d..a8a630dbe6 100644
--- a/libavcodec/x86/h264_intrapred.asm
+++ b/libavcodec/x86/h264_intrapred.asm
@@ -1311,10 +1311,7 @@ PRED8x8L_DOWN_RIGHT
;-----------------------------------------------------------------------------
%macro PRED8x8L_VERTICAL_RIGHT 0
-cglobal pred8x8l_vertical_right_8, 4,5,7
- ; manually spill XMM registers for Win64 because
- ; the code here is initialized with INIT_MMX
- WIN64_SPILL_XMM 7
+cglobal pred8x8l_vertical_right_8, 4,5,6
sub r0, r3
lea r4, [r0+r3*2]
movq mm0, [r0+r3*1-8]
@@ -1384,7 +1381,6 @@ cglobal pred8x8l_vertical_right_8, 4,5,7
movq2dq xmm4, mm6
pslldq xmm4, 8
por xmm0, xmm4
- movdqa xmm6, [pw_ff00]
movdqa xmm1, xmm0
lea r2, [r1+r3*2]
movdqa xmm2, xmm0
@@ -1394,15 +1390,16 @@ cglobal pred8x8l_vertical_right_8, 4,5,7
pavgb xmm2, xmm0
INIT_XMM cpuname
PRED4x4_LOWPASS xmm4, xmm3, xmm1, xmm0, xmm5
- pandn xmm6, xmm4
+ movdqa xmm0, [pw_ff00]
+ pandn xmm0, xmm4
movdqa xmm5, xmm4
psrlw xmm4, 8
- packuswb xmm6, xmm4
- movhlps xmm4, xmm6
+ packuswb xmm0, xmm4
+ movhlps xmm4, xmm0
movhps [r0+r3*2], xmm5
movhps [r0+r3*1], xmm2
psrldq xmm5, 4
- movss xmm5, xmm6
+ movss xmm5, xmm0
psrldq xmm2, 4
movss xmm2, xmm4
lea r0, [r2+r3*2]
diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm
index 5489701e44..b3b589b271 100644
--- a/libavcodec/x86/hevc_mc.asm
+++ b/libavcodec/x86/hevc_mc.asm
@@ -180,7 +180,7 @@ SECTION .text
%macro EPEL_FILTER 5 ; bit depth, filter index, xmma, xmmb, gprtmp
%if cpuflag(avx2)
%assign %%offset 32
-%ifdef PIC
+%if PIC
lea %5q, [hevc_epel_filters_avx2_%1]
%define FILTER %5q
%else
@@ -188,7 +188,7 @@ SECTION .text
%endif
%else
%assign %%offset 16
-%ifdef PIC
+%if PIC
lea %5q, [hevc_epel_filters_sse4_%1]
%define FILTER %5q
%else
@@ -216,7 +216,7 @@ SECTION .text
%define %%table hevc_epel_filters_sse4_%1
%endif
-%ifdef PIC
+%if PIC
lea r3srcq, [%%table]
%define FILTER r3srcq
%else
@@ -234,7 +234,7 @@ SECTION .text
%else
%define %%table hevc_epel_filters_sse4_10
%endif
-%ifdef PIC
+%if PIC
lea r3srcq, [%%table]
%define FILTER r3srcq
%else
@@ -257,7 +257,7 @@ SECTION .text
%define %%table hevc_qpel_filters_sse4_%1
%endif
-%ifdef PIC
+%if PIC
lea rfilterq, [%%table]
%else
%define rfilterq %%table
@@ -576,7 +576,7 @@ SECTION .text
%define %%table hevc_qpel_filters_sse4_%2
%endif
-%ifdef PIC
+%if PIC
lea rfilterq, [%%table]
%else
%define rfilterq %%table
@@ -1288,7 +1288,7 @@ HEVC_PUT_HEVC_QPEL_HV 16, 10
%assign %%offset 4
dec %2q
shl %2q, 3
-%ifdef PIC
+%if PIC
lea %5q, [%%table]
%define FILTER %5q
%else
@@ -1365,7 +1365,7 @@ cglobal hevc_put_hevc_qpel_hv%1_%2, 6, 7, 27, dst, src, srcstride, height, mx, m
sub myq, 1
shl myq, 5
%define %%table hevc_qpel_filters_avx512icl_v_%1
-%ifdef PIC
+%if PIC
lea tmpq, [%%table]
%define FILTER tmpq
%else
diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm
index e3c37dd297..dc520dbeb4 100644
--- a/libavcodec/x86/rv40dsp.asm
+++ b/libavcodec/x86/rv40dsp.asm
@@ -51,7 +51,7 @@ sixtap_filter_v_m: times 8 dw 1
times 8 dw 20
times 8 dw 52
-%ifdef PIC
+%if PIC
%define sixtap_filter_hw picregq
%define sixtap_filter_hb picregq
%define sixtap_filter_v picregq
@@ -84,7 +84,7 @@ SECTION .text
%if WIN64
movsxd %1q, %1d
%endif
-%ifdef PIC
+%if PIC
add %1q, picregq
%else
add %1q, %2
@@ -104,7 +104,7 @@ SECTION .text
%macro FILTER_V 1
cglobal %1_rv40_qpel_v, 6,6+npicregs,12, dst, dststride, src, srcstride, height, my, picreg
-%ifdef PIC
+%if PIC
lea picregq, [sixtap_filter_v_m]
%endif
pxor m7, m7
@@ -175,7 +175,7 @@ cglobal %1_rv40_qpel_v, 6,6+npicregs,12, dst, dststride, src, srcstride, height,
%macro FILTER_H 1
cglobal %1_rv40_qpel_h, 6, 6+npicregs, 12, dst, dststride, src, srcstride, height, mx, picreg
-%ifdef PIC
+%if PIC
lea picregq, [sixtap_filter_v_m]
%endif
pxor m7, m7
@@ -238,7 +238,7 @@ FILTER_V avg
%macro FILTER_SSSE3 1
cglobal %1_rv40_qpel_v, 6,6+npicregs,8, dst, dststride, src, srcstride, height, my, picreg
-%ifdef PIC
+%if PIC
lea picregq, [sixtap_filter_hb_m]
%endif
@@ -283,7 +283,7 @@ cglobal %1_rv40_qpel_v, 6,6+npicregs,8, dst, dststride, src, srcstride, height,
RET
cglobal %1_rv40_qpel_h, 6,6+npicregs,8, dst, dststride, src, srcstride, height, mx, picreg
-%ifdef PIC
+%if PIC
lea picregq, [sixtap_filter_hb_m]
%endif
mova m3, [filter_h6_shuf2]
diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm
index d02f70d704..63e9f0d33a 100644
--- a/libavcodec/x86/sbrdsp.asm
+++ b/libavcodec/x86/sbrdsp.asm
@@ -308,7 +308,7 @@ cglobal sbr_qmf_pre_shuffle, 1,4,6,z
movq [r2q], m2
RET
-%ifdef PIC
+%if PIC
%define NREGS 1
%if UNIX64
%define NOISE_TABLE r6q ; r5q is m_max
@@ -321,7 +321,7 @@ cglobal sbr_qmf_pre_shuffle, 1,4,6,z
%endif
%macro LOAD_NST 1
-%ifdef PIC
+%if PIC
lea NOISE_TABLE, [%1]
mova m0, [kxq + NOISE_TABLE]
%else
@@ -371,7 +371,7 @@ apply_noise_main:
movsxdifnidn noiseq, noised
dec noiseq
shl countd, 2
-%ifdef PIC
+%if PIC
lea NOISE_TABLE, [sbr_noise_table]
%endif
lea Yq, [Yq + 2*countq]
diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm
index 6ac5a7721b..231c21ea0d 100644
--- a/libavcodec/x86/vp8dsp.asm
+++ b/libavcodec/x86/vp8dsp.asm
@@ -114,7 +114,7 @@ bilinear_filter_vb_m: times 8 db 7, 1
times 8 db 2, 6
times 8 db 1, 7
-%ifdef PIC
+%if PIC
%define fourtap_filter_hw picregq
%define sixtap_filter_hw picregq
%define fourtap_filter_hb picregq
@@ -166,7 +166,7 @@ cglobal put_vp8_epel%1_h6, 6, 6 + npicregs, 8, dst, dststride, src, srcstride, h
lea mxd, [mxq*3]
mova m3, [filter_h6_shuf2]
mova m4, [filter_h6_shuf3]
-%ifdef PIC
+%if PIC
lea picregq, [sixtap_filter_hb_m]
%endif
mova m5, [sixtap_filter_hb+mxq*8-48] ; set up 6tap filter in bytes
@@ -207,7 +207,7 @@ cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, h
mova m2, [pw_256]
mova m3, [filter_h2_shuf]
mova m4, [filter_h4_shuf]
-%ifdef PIC
+%if PIC
lea picregq, [fourtap_filter_hb_m]
%endif
mova m5, [fourtap_filter_hb+mxq-16] ; set up 4tap filter in bytes
@@ -234,7 +234,7 @@ cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, h
cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
shl myd, 4
-%ifdef PIC
+%if PIC
lea picregq, [fourtap_filter_hb_m]
%endif
mova m5, [fourtap_filter_hb+myq-16]
@@ -272,7 +272,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr
cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
lea myd, [myq*3]
-%ifdef PIC
+%if PIC
lea picregq, [sixtap_filter_hb_m]
%endif
lea myq, [sixtap_filter_hb+myq*8]
@@ -326,7 +326,7 @@ FILTER_SSSE3 8
INIT_MMX mmxext
cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, height, mx, picreg
shl mxd, 4
-%ifdef PIC
+%if PIC
lea picregq, [fourtap_filter_hw_m]
%endif
movq mm4, [fourtap_filter_hw+mxq-16] ; set up 4tap filter in words
@@ -374,7 +374,7 @@ cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he
INIT_MMX mmxext
cglobal put_vp8_epel4_h6, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, height, mx, picreg
lea mxd, [mxq*3]
-%ifdef PIC
+%if PIC
lea picregq, [sixtap_filter_hw_m]
%endif
movq mm4, [sixtap_filter_hw+mxq*8-48] ; set up 4tap filter in words
@@ -431,7 +431,7 @@ cglobal put_vp8_epel4_h6, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he
INIT_XMM sse2
cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, height, mx, picreg
shl mxd, 5
-%ifdef PIC
+%if PIC
lea picregq, [fourtap_filter_v_m]
%endif
lea mxq, [fourtap_filter_v+mxq-32]
@@ -480,7 +480,7 @@ INIT_XMM sse2
cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, height, mx, picreg
lea mxd, [mxq*3]
shl mxd, 4
-%ifdef PIC
+%if PIC
lea picregq, [sixtap_filter_v_m]
%endif
lea mxq, [sixtap_filter_v+mxq-96]
@@ -543,7 +543,7 @@ cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, h
; 4x4 block, V-only 4-tap filter
cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
shl myd, 5
-%ifdef PIC
+%if PIC
lea picregq, [fourtap_filter_v_m]
%endif
lea myq, [fourtap_filter_v+myq-32]
@@ -597,7 +597,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr
cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my
shl myd, 4
lea myq, [myq*3]
-%ifdef PIC
+%if PIC
lea picregq, [sixtap_filter_v_m]
%endif
lea myq, [sixtap_filter_v+myq-96]
@@ -667,7 +667,7 @@ FILTER_V 8
%if cpuflag(ssse3)
cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my
shl myd, 4
-%ifdef PIC
+%if PIC
lea picregq, [bilinear_filter_vb_m]
%endif
pxor m4, m4
@@ -697,7 +697,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, p
%else ; cpuflag(ssse3)
cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my
shl myd, 4
-%ifdef PIC
+%if PIC
lea picregq, [bilinear_filter_vw_m]
%endif
pxor m6, m6
@@ -743,7 +743,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p
%if cpuflag(ssse3)
cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg
shl mxd, 4
-%ifdef PIC
+%if PIC
lea picregq, [bilinear_filter_vb_m]
%endif
pxor m4, m4
@@ -773,7 +773,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride
%else ; cpuflag(ssse3)
cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg
shl mxd, 4
-%ifdef PIC
+%if PIC
lea picregq, [bilinear_filter_vw_m]
%endif
pxor m6, m6
diff --git a/libavcodec/x86/vp9itxfm.asm b/libavcodec/x86/vp9itxfm.asm
index 2c63fe514a..2f290f2f88 100644
--- a/libavcodec/x86/vp9itxfm.asm
+++ b/libavcodec/x86/vp9itxfm.asm
@@ -330,7 +330,9 @@ IDCT_4x4_FN ssse3
INIT_MMX %5
cglobal vp9_%1_%3_4x4_add, 3, 3, 0, dst, stride, block, eob
%if WIN64 && notcpuflag(ssse3)
+INIT_XMM cpuname
WIN64_SPILL_XMM 8
+INIT_MMX cpuname
%endif
movdqa xmm5, [pd_8192]
mova m0, [blockq+ 0]
diff --git a/libavcodec/x86/vp9itxfm_16bpp.asm b/libavcodec/x86/vp9itxfm_16bpp.asm
index 902685edf6..ebe6222285 100644
--- a/libavcodec/x86/vp9itxfm_16bpp.asm
+++ b/libavcodec/x86/vp9itxfm_16bpp.asm
@@ -303,7 +303,9 @@ IDCT4_10_FN
%macro IADST4_FN 4
cglobal vp9_%1_%3_4x4_add_10, 3, 3, 0, dst, stride, block, eob
%if WIN64 && notcpuflag(ssse3)
+INIT_XMM cpuname
WIN64_SPILL_XMM 8
+INIT_MMX cpuname
%endif
movdqa xmm5, [pd_8192]
mova m0, [blockq+0*16+0]
@@ -672,7 +674,7 @@ cglobal vp9_idct_idct_8x8_add_10, 4, 6 + ARCH_X86_64, 14, \
mov dstbakq, dstq
movsxd cntq, cntd
%endif
-%ifdef PIC
+%if PIC
lea ptrq, [default_8x8]
movzx cntd, byte [ptrq+cntq-1]
%else
@@ -921,7 +923,7 @@ cglobal vp9_%1_%3_8x8_add_10, 4, 6 + ARCH_X86_64, 16, \
mov dstbakq, dstq
movsxd cntq, cntd
%endif
-%ifdef PIC
+%if PIC
lea ptrq, [%5_8x8]
movzx cntd, byte [ptrq+cntq-1]
%else
@@ -1128,7 +1130,7 @@ cglobal vp9_idct_idct_16x16_add_10, 4, 6 + ARCH_X86_64, 16, \
mov dstbakq, dstq
movsxd cntq, cntd
%endif
-%ifdef PIC
+%if PIC
lea ptrq, [default_16x16]
movzx cntd, byte [ptrq+cntq-1]
%else
@@ -1445,7 +1447,7 @@ cglobal vp9_%1_%4_16x16_add_10, 4, 6 + ARCH_X86_64, 16, \
mov dstbakq, dstq
movsxd cntq, cntd
%endif
-%ifdef PIC
+%if PIC
lea ptrq, [%7_16x16]
movzx cntd, byte [ptrq+cntq-1]
%else
@@ -1958,7 +1960,7 @@ cglobal vp9_idct_idct_32x32_add_10, 4, 6 + ARCH_X86_64, 16, \
mov dstbakq, dstq
movsxd cntq, cntd
%endif
-%ifdef PIC
+%if PIC
lea ptrq, [default_32x32]
movzx cntd, byte [ptrq+cntq-1]
%else