diff options
author | Henrik Gramner <henrik@gramner.com> | 2024-03-16 16:39:37 +0100 |
---|---|---|
committer | Henrik Gramner <henrik@gramner.com> | 2024-03-24 14:53:57 +0100 |
commit | afa471d0efed1df5dca6eeeb2fcdd211ae4cad4e (patch) | |
tree | db94cecfd1fe1cb5951773461f0d7d93526f0d10 /libavcodec/x86 | |
parent | 782c4df28dc91a2b5160fe7a35ad18541e8c5029 (diff) | |
download | ffmpeg-afa471d0efed1df5dca6eeeb2fcdd211ae4cad4e.tar.gz |
x86: Update x86inc.asm
Make things up-to-date with upstream.
https://code.videolan.org/videolan/x86inc.asm
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/celt_pvq_search.asm | 4 | ||||
-rw-r--r-- | libavcodec/x86/h264_chromamc.asm | 10 | ||||
-rw-r--r-- | libavcodec/x86/h264_idct.asm | 6 | ||||
-rw-r--r-- | libavcodec/x86/h264_intrapred.asm | 15 | ||||
-rw-r--r-- | libavcodec/x86/hevc_mc.asm | 16 | ||||
-rw-r--r-- | libavcodec/x86/rv40dsp.asm | 12 | ||||
-rw-r--r-- | libavcodec/x86/sbrdsp.asm | 6 | ||||
-rw-r--r-- | libavcodec/x86/vp8dsp.asm | 30 | ||||
-rw-r--r-- | libavcodec/x86/vp9itxfm.asm | 2 | ||||
-rw-r--r-- | libavcodec/x86/vp9itxfm_16bpp.asm | 12 |
10 files changed, 57 insertions, 56 deletions
diff --git a/libavcodec/x86/celt_pvq_search.asm b/libavcodec/x86/celt_pvq_search.asm index 5c1e6d6174..e9bff02650 100644 --- a/libavcodec/x86/celt_pvq_search.asm +++ b/libavcodec/x86/celt_pvq_search.asm @@ -74,7 +74,7 @@ SECTION .text ; "movaps m0, [r5 + r4]" if PIC is enabled ; "movaps m0, [constant_name + r4]" if texrel are used %macro SET_PIC_BASE 3; reg, const_label -%ifdef PIC +%if PIC %{1} %2, [%3] ; lea r5, [rip+const] %define pic_base_%3 %2 %else @@ -195,7 +195,7 @@ align 16 ; PIC relative addressing. Use this ; to count it in cglobal ; -%ifdef PIC +%if PIC %define num_pic_regs 1 %else %define num_pic_regs 0 diff --git a/libavcodec/x86/h264_chromamc.asm b/libavcodec/x86/h264_chromamc.asm index e70bc492b2..ec6288d48e 100644 --- a/libavcodec/x86/h264_chromamc.asm +++ b/libavcodec/x86/h264_chromamc.asm @@ -91,7 +91,7 @@ SECTION .text %macro chroma_mc8_mmx_func 2-3 %ifidn %2, rv40 -%ifdef PIC +%if PIC %define rnd_1d_rv40 r8 %define rnd_2d_rv40 r8 %define extra_regs 2 @@ -147,7 +147,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0 or r4d, r5d ; x + y %ifidn %2, rv40 -%ifdef PIC +%if PIC lea r8, [rnd_rv40_1d_tbl] %endif %if ARCH_X86_64 == 0 @@ -198,7 +198,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0 movd m4, r4d ; x movd m6, r5d ; y %ifidn %2, rv40 -%ifdef PIC +%if PIC lea r8, [rnd_rv40_2d_tbl] %endif %if ARCH_X86_64 == 0 @@ -283,7 +283,7 @@ cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0 %macro chroma_mc4_mmx_func 2 %define extra_regs 0 %ifidn %2, rv40 -%ifdef PIC +%if PIC %define extra_regs 1 %endif ; PIC %endif ; rv40 @@ -301,7 +301,7 @@ cglobal %1_%2_chroma_mc4, 6, 6 + extra_regs, 0 psubw m5, m3 %ifidn %2, rv40 -%ifdef PIC +%if PIC lea r6, [rnd_rv40_2d_tbl] %define rnd_2d_rv40 r6 %else diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm index 1f86e51d82..b29ddde200 100644 --- a/libavcodec/x86/h264_idct.asm +++ b/libavcodec/x86/h264_idct.asm @@ -42,7 +42,7 @@ scan8_mem: db 4+ 1*8, 5+ 1*8, 4+ 2*8, 5+ 2*8 db 6+11*8, 7+11*8, 6+12*8, 7+12*8 db 4+13*8, 5+13*8, 4+14*8, 5+14*8 db 6+13*8, 7+13*8, 6+14*8, 7+14*8 -%ifdef PIC +%if PIC %define npicregs 1 %define scan8 picregq %else @@ -322,7 +322,7 @@ INIT_XMM sse2 cglobal h264_idct8_add4_8, 5, 8 + npicregs, 10, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg movsxdifnidn r3, r3d xor r5, r5 -%ifdef PIC +%if PIC lea picregq, [scan8_mem] %endif .nextblock: @@ -398,7 +398,7 @@ h264_idct_add8_mmx_plane: cglobal h264_idct_add8_422_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg ; dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg movsxdifnidn r3, r3d -%ifdef PIC +%if PIC lea picregq, [scan8_mem] %endif %if ARCH_X86_64 diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm index ea46bc595d..a8a630dbe6 100644 --- a/libavcodec/x86/h264_intrapred.asm +++ b/libavcodec/x86/h264_intrapred.asm @@ -1311,10 +1311,7 @@ PRED8x8L_DOWN_RIGHT ;----------------------------------------------------------------------------- %macro PRED8x8L_VERTICAL_RIGHT 0 -cglobal pred8x8l_vertical_right_8, 4,5,7 - ; manually spill XMM registers for Win64 because - ; the code here is initialized with INIT_MMX - WIN64_SPILL_XMM 7 +cglobal pred8x8l_vertical_right_8, 4,5,6 sub r0, r3 lea r4, [r0+r3*2] movq mm0, [r0+r3*1-8] @@ -1384,7 +1381,6 @@ cglobal pred8x8l_vertical_right_8, 4,5,7 movq2dq xmm4, mm6 pslldq xmm4, 8 por xmm0, xmm4 - movdqa xmm6, [pw_ff00] movdqa xmm1, xmm0 lea r2, [r1+r3*2] movdqa xmm2, xmm0 @@ -1394,15 +1390,16 @@ cglobal pred8x8l_vertical_right_8, 4,5,7 pavgb xmm2, xmm0 INIT_XMM cpuname PRED4x4_LOWPASS xmm4, xmm3, xmm1, xmm0, xmm5 - pandn xmm6, xmm4 + movdqa xmm0, [pw_ff00] + pandn xmm0, xmm4 movdqa xmm5, xmm4 psrlw xmm4, 8 - packuswb xmm6, xmm4 - movhlps xmm4, xmm6 + packuswb xmm0, xmm4 + movhlps xmm4, xmm0 movhps [r0+r3*2], xmm5 movhps [r0+r3*1], xmm2 psrldq xmm5, 4 - movss xmm5, xmm6 + movss xmm5, xmm0 psrldq xmm2, 4 movss xmm2, xmm4 lea r0, [r2+r3*2] diff --git a/libavcodec/x86/hevc_mc.asm b/libavcodec/x86/hevc_mc.asm index 5489701e44..b3b589b271 100644 --- a/libavcodec/x86/hevc_mc.asm +++ b/libavcodec/x86/hevc_mc.asm @@ -180,7 +180,7 @@ SECTION .text %macro EPEL_FILTER 5 ; bit depth, filter index, xmma, xmmb, gprtmp %if cpuflag(avx2) %assign %%offset 32 -%ifdef PIC +%if PIC lea %5q, [hevc_epel_filters_avx2_%1] %define FILTER %5q %else @@ -188,7 +188,7 @@ SECTION .text %endif %else %assign %%offset 16 -%ifdef PIC +%if PIC lea %5q, [hevc_epel_filters_sse4_%1] %define FILTER %5q %else @@ -216,7 +216,7 @@ SECTION .text %define %%table hevc_epel_filters_sse4_%1 %endif -%ifdef PIC +%if PIC lea r3srcq, [%%table] %define FILTER r3srcq %else @@ -234,7 +234,7 @@ SECTION .text %else %define %%table hevc_epel_filters_sse4_10 %endif -%ifdef PIC +%if PIC lea r3srcq, [%%table] %define FILTER r3srcq %else @@ -257,7 +257,7 @@ SECTION .text %define %%table hevc_qpel_filters_sse4_%1 %endif -%ifdef PIC +%if PIC lea rfilterq, [%%table] %else %define rfilterq %%table @@ -576,7 +576,7 @@ SECTION .text %define %%table hevc_qpel_filters_sse4_%2 %endif -%ifdef PIC +%if PIC lea rfilterq, [%%table] %else %define rfilterq %%table @@ -1288,7 +1288,7 @@ HEVC_PUT_HEVC_QPEL_HV 16, 10 %assign %%offset 4 dec %2q shl %2q, 3 -%ifdef PIC +%if PIC lea %5q, [%%table] %define FILTER %5q %else @@ -1365,7 +1365,7 @@ cglobal hevc_put_hevc_qpel_hv%1_%2, 6, 7, 27, dst, src, srcstride, height, mx, m sub myq, 1 shl myq, 5 %define %%table hevc_qpel_filters_avx512icl_v_%1 -%ifdef PIC +%if PIC lea tmpq, [%%table] %define FILTER tmpq %else diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm index e3c37dd297..dc520dbeb4 100644 --- a/libavcodec/x86/rv40dsp.asm +++ b/libavcodec/x86/rv40dsp.asm @@ -51,7 +51,7 @@ sixtap_filter_v_m: times 8 dw 1 times 8 dw 20 times 8 dw 52 -%ifdef PIC +%if PIC %define sixtap_filter_hw picregq %define sixtap_filter_hb picregq %define sixtap_filter_v picregq @@ -84,7 +84,7 @@ SECTION .text %if WIN64 movsxd %1q, %1d %endif -%ifdef PIC +%if PIC add %1q, picregq %else add %1q, %2 @@ -104,7 +104,7 @@ SECTION .text %macro FILTER_V 1 cglobal %1_rv40_qpel_v, 6,6+npicregs,12, dst, dststride, src, srcstride, height, my, picreg -%ifdef PIC +%if PIC lea picregq, [sixtap_filter_v_m] %endif pxor m7, m7 @@ -175,7 +175,7 @@ cglobal %1_rv40_qpel_v, 6,6+npicregs,12, dst, dststride, src, srcstride, height, %macro FILTER_H 1 cglobal %1_rv40_qpel_h, 6, 6+npicregs, 12, dst, dststride, src, srcstride, height, mx, picreg -%ifdef PIC +%if PIC lea picregq, [sixtap_filter_v_m] %endif pxor m7, m7 @@ -238,7 +238,7 @@ FILTER_V avg %macro FILTER_SSSE3 1 cglobal %1_rv40_qpel_v, 6,6+npicregs,8, dst, dststride, src, srcstride, height, my, picreg -%ifdef PIC +%if PIC lea picregq, [sixtap_filter_hb_m] %endif @@ -283,7 +283,7 @@ cglobal %1_rv40_qpel_v, 6,6+npicregs,8, dst, dststride, src, srcstride, height, RET cglobal %1_rv40_qpel_h, 6,6+npicregs,8, dst, dststride, src, srcstride, height, mx, picreg -%ifdef PIC +%if PIC lea picregq, [sixtap_filter_hb_m] %endif mova m3, [filter_h6_shuf2] diff --git a/libavcodec/x86/sbrdsp.asm b/libavcodec/x86/sbrdsp.asm index d02f70d704..63e9f0d33a 100644 --- a/libavcodec/x86/sbrdsp.asm +++ b/libavcodec/x86/sbrdsp.asm @@ -308,7 +308,7 @@ cglobal sbr_qmf_pre_shuffle, 1,4,6,z movq [r2q], m2 RET -%ifdef PIC +%if PIC %define NREGS 1 %if UNIX64 %define NOISE_TABLE r6q ; r5q is m_max @@ -321,7 +321,7 @@ cglobal sbr_qmf_pre_shuffle, 1,4,6,z %endif %macro LOAD_NST 1 -%ifdef PIC +%if PIC lea NOISE_TABLE, [%1] mova m0, [kxq + NOISE_TABLE] %else @@ -371,7 +371,7 @@ apply_noise_main: movsxdifnidn noiseq, noised dec noiseq shl countd, 2 -%ifdef PIC +%if PIC lea NOISE_TABLE, [sbr_noise_table] %endif lea Yq, [Yq + 2*countq] diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index 6ac5a7721b..231c21ea0d 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -114,7 +114,7 @@ bilinear_filter_vb_m: times 8 db 7, 1 times 8 db 2, 6 times 8 db 1, 7 -%ifdef PIC +%if PIC %define fourtap_filter_hw picregq %define sixtap_filter_hw picregq %define fourtap_filter_hb picregq @@ -166,7 +166,7 @@ cglobal put_vp8_epel%1_h6, 6, 6 + npicregs, 8, dst, dststride, src, srcstride, h lea mxd, [mxq*3] mova m3, [filter_h6_shuf2] mova m4, [filter_h6_shuf3] -%ifdef PIC +%if PIC lea picregq, [sixtap_filter_hb_m] %endif mova m5, [sixtap_filter_hb+mxq*8-48] ; set up 6tap filter in bytes @@ -207,7 +207,7 @@ cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, h mova m2, [pw_256] mova m3, [filter_h2_shuf] mova m4, [filter_h4_shuf] -%ifdef PIC +%if PIC lea picregq, [fourtap_filter_hb_m] %endif mova m5, [fourtap_filter_hb+mxq-16] ; set up 4tap filter in bytes @@ -234,7 +234,7 @@ cglobal put_vp8_epel%1_h4, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, h cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my shl myd, 4 -%ifdef PIC +%if PIC lea picregq, [fourtap_filter_hb_m] %endif mova m5, [fourtap_filter_hb+myq-16] @@ -272,7 +272,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my lea myd, [myq*3] -%ifdef PIC +%if PIC lea picregq, [sixtap_filter_hb_m] %endif lea myq, [sixtap_filter_hb+myq*8] @@ -326,7 +326,7 @@ FILTER_SSSE3 8 INIT_MMX mmxext cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, height, mx, picreg shl mxd, 4 -%ifdef PIC +%if PIC lea picregq, [fourtap_filter_hw_m] %endif movq mm4, [fourtap_filter_hw+mxq-16] ; set up 4tap filter in words @@ -374,7 +374,7 @@ cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he INIT_MMX mmxext cglobal put_vp8_epel4_h6, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, height, mx, picreg lea mxd, [mxq*3] -%ifdef PIC +%if PIC lea picregq, [sixtap_filter_hw_m] %endif movq mm4, [sixtap_filter_hw+mxq*8-48] ; set up 4tap filter in words @@ -431,7 +431,7 @@ cglobal put_vp8_epel4_h6, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he INIT_XMM sse2 cglobal put_vp8_epel8_h4, 6, 6 + npicregs, 10, dst, dststride, src, srcstride, height, mx, picreg shl mxd, 5 -%ifdef PIC +%if PIC lea picregq, [fourtap_filter_v_m] %endif lea mxq, [fourtap_filter_v+mxq-32] @@ -480,7 +480,7 @@ INIT_XMM sse2 cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, height, mx, picreg lea mxd, [mxq*3] shl mxd, 4 -%ifdef PIC +%if PIC lea picregq, [sixtap_filter_v_m] %endif lea mxq, [sixtap_filter_v+mxq-96] @@ -543,7 +543,7 @@ cglobal put_vp8_epel8_h6, 6, 6 + npicregs, 14, dst, dststride, src, srcstride, h ; 4x4 block, V-only 4-tap filter cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my shl myd, 5 -%ifdef PIC +%if PIC lea picregq, [fourtap_filter_v_m] %endif lea myq, [fourtap_filter_v+myq-32] @@ -597,7 +597,7 @@ cglobal put_vp8_epel%1_v4, 7, 7, 8, dst, dststride, src, srcstride, height, picr cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picreg, my shl myd, 4 lea myq, [myq*3] -%ifdef PIC +%if PIC lea picregq, [sixtap_filter_v_m] %endif lea myq, [sixtap_filter_v+myq-96] @@ -667,7 +667,7 @@ FILTER_V 8 %if cpuflag(ssse3) cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, picreg, my shl myd, 4 -%ifdef PIC +%if PIC lea picregq, [bilinear_filter_vb_m] %endif pxor m4, m4 @@ -697,7 +697,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 5, dst, dststride, src, srcstride, height, p %else ; cpuflag(ssse3) cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, picreg, my shl myd, 4 -%ifdef PIC +%if PIC lea picregq, [bilinear_filter_vw_m] %endif pxor m6, m6 @@ -743,7 +743,7 @@ cglobal put_vp8_bilinear%1_v, 7, 7, 7, dst, dststride, src, srcstride, height, p %if cpuflag(ssse3) cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride, height, mx, picreg shl mxd, 4 -%ifdef PIC +%if PIC lea picregq, [bilinear_filter_vb_m] %endif pxor m4, m4 @@ -773,7 +773,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 5, dst, dststride, src, srcstride %else ; cpuflag(ssse3) cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride, height, mx, picreg shl mxd, 4 -%ifdef PIC +%if PIC lea picregq, [bilinear_filter_vw_m] %endif pxor m6, m6 diff --git a/libavcodec/x86/vp9itxfm.asm b/libavcodec/x86/vp9itxfm.asm index 2c63fe514a..2f290f2f88 100644 --- a/libavcodec/x86/vp9itxfm.asm +++ b/libavcodec/x86/vp9itxfm.asm @@ -330,7 +330,9 @@ IDCT_4x4_FN ssse3 INIT_MMX %5 cglobal vp9_%1_%3_4x4_add, 3, 3, 0, dst, stride, block, eob %if WIN64 && notcpuflag(ssse3) +INIT_XMM cpuname WIN64_SPILL_XMM 8 +INIT_MMX cpuname %endif movdqa xmm5, [pd_8192] mova m0, [blockq+ 0] diff --git a/libavcodec/x86/vp9itxfm_16bpp.asm b/libavcodec/x86/vp9itxfm_16bpp.asm index 902685edf6..ebe6222285 100644 --- a/libavcodec/x86/vp9itxfm_16bpp.asm +++ b/libavcodec/x86/vp9itxfm_16bpp.asm @@ -303,7 +303,9 @@ IDCT4_10_FN %macro IADST4_FN 4 cglobal vp9_%1_%3_4x4_add_10, 3, 3, 0, dst, stride, block, eob %if WIN64 && notcpuflag(ssse3) +INIT_XMM cpuname WIN64_SPILL_XMM 8 +INIT_MMX cpuname %endif movdqa xmm5, [pd_8192] mova m0, [blockq+0*16+0] @@ -672,7 +674,7 @@ cglobal vp9_idct_idct_8x8_add_10, 4, 6 + ARCH_X86_64, 14, \ mov dstbakq, dstq movsxd cntq, cntd %endif -%ifdef PIC +%if PIC lea ptrq, [default_8x8] movzx cntd, byte [ptrq+cntq-1] %else @@ -921,7 +923,7 @@ cglobal vp9_%1_%3_8x8_add_10, 4, 6 + ARCH_X86_64, 16, \ mov dstbakq, dstq movsxd cntq, cntd %endif -%ifdef PIC +%if PIC lea ptrq, [%5_8x8] movzx cntd, byte [ptrq+cntq-1] %else @@ -1128,7 +1130,7 @@ cglobal vp9_idct_idct_16x16_add_10, 4, 6 + ARCH_X86_64, 16, \ mov dstbakq, dstq movsxd cntq, cntd %endif -%ifdef PIC +%if PIC lea ptrq, [default_16x16] movzx cntd, byte [ptrq+cntq-1] %else @@ -1445,7 +1447,7 @@ cglobal vp9_%1_%4_16x16_add_10, 4, 6 + ARCH_X86_64, 16, \ mov dstbakq, dstq movsxd cntq, cntd %endif -%ifdef PIC +%if PIC lea ptrq, [%7_16x16] movzx cntd, byte [ptrq+cntq-1] %else @@ -1958,7 +1960,7 @@ cglobal vp9_idct_idct_32x32_add_10, 4, 6 + ARCH_X86_64, 16, \ mov dstbakq, dstq movsxd cntq, cntd %endif -%ifdef PIC +%if PIC lea ptrq, [default_32x32] movzx cntd, byte [ptrq+cntq-1] %else |