diff options
author | Andreas Rheinhardt <andreas.rheinhardt@outlook.com> | 2022-06-10 20:28:06 +0200 |
---|---|---|
committer | Andreas Rheinhardt <andreas.rheinhardt@outlook.com> | 2022-06-22 13:33:27 +0200 |
commit | bfb28b5ce89f3e950214b67ea95b45e3355c2caf (patch) | |
tree | fdeb2fc466ea1118d2f1bb41611cf3df28efaad4 /libavcodec/x86 | |
parent | b2437a45af58b0a9d726f1ee082e7d2809175b99 (diff) | |
download | ffmpeg-bfb28b5ce89f3e950214b67ea95b45e3355c2caf.tar.gz |
avcodec/x86/idctdsp: Remove obsolete MMX(EXT) functions
x64 always has MMX, MMXEXT, SSE and SSE2 and this means
that some functions for MMX, MMXEXT and 3dnow are always
overridden by other functions (unless one e.g. explicitly
disables SSE2) for x64. So given that the only systems that
benefit from these functions are truely ancient 32bit x86s
they are removed.
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
Diffstat (limited to 'libavcodec/x86')
-rw-r--r-- | libavcodec/x86/idctdsp.asm | 79 | ||||
-rw-r--r-- | libavcodec/x86/idctdsp.h | 6 | ||||
-rw-r--r-- | libavcodec/x86/idctdsp_init.c | 11 | ||||
-rw-r--r-- | libavcodec/x86/simple_idct.asm | 22 |
4 files changed, 10 insertions, 108 deletions
diff --git a/libavcodec/x86/idctdsp.asm b/libavcodec/x86/idctdsp.asm index 089425a9ab..1cfdb5419d 100644 --- a/libavcodec/x86/idctdsp.asm +++ b/libavcodec/x86/idctdsp.asm @@ -37,47 +37,24 @@ SECTION .text %macro PUT_SIGNED_PIXELS_CLAMPED_HALF 1 mova m1, [blockq+mmsize*0+%1] mova m2, [blockq+mmsize*2+%1] -%if mmsize == 8 - mova m3, [blockq+mmsize*4+%1] - mova m4, [blockq+mmsize*6+%1] -%endif packsswb m1, [blockq+mmsize*1+%1] packsswb m2, [blockq+mmsize*3+%1] -%if mmsize == 8 - packsswb m3, [blockq+mmsize*5+%1] - packsswb m4, [blockq+mmsize*7+%1] -%endif paddb m1, m0 paddb m2, m0 -%if mmsize == 8 - paddb m3, m0 - paddb m4, m0 - movq [pixelsq+lsizeq*0], m1 - movq [pixelsq+lsizeq*1], m2 - movq [pixelsq+lsizeq*2], m3 - movq [pixelsq+lsize3q ], m4 -%else movq [pixelsq+lsizeq*0], m1 movhps [pixelsq+lsizeq*1], m1 movq [pixelsq+lsizeq*2], m2 movhps [pixelsq+lsize3q ], m2 -%endif %endmacro -%macro PUT_SIGNED_PIXELS_CLAMPED 1 -cglobal put_signed_pixels_clamped, 3, 4, %1, block, pixels, lsize, lsize3 +INIT_XMM sse2 +cglobal put_signed_pixels_clamped, 3, 4, 3, block, pixels, lsize, lsize3 mova m0, [pb_80] lea lsize3q, [lsizeq*3] PUT_SIGNED_PIXELS_CLAMPED_HALF 0 lea pixelsq, [pixelsq+lsizeq*4] PUT_SIGNED_PIXELS_CLAMPED_HALF 64 RET -%endmacro - -INIT_MMX mmx -PUT_SIGNED_PIXELS_CLAMPED 0 -INIT_XMM sse2 -PUT_SIGNED_PIXELS_CLAMPED 3 ;-------------------------------------------------------------------------- ; void ff_put_pixels_clamped(const int16_t *block, uint8_t *pixels, @@ -87,40 +64,21 @@ PUT_SIGNED_PIXELS_CLAMPED 3 %macro PUT_PIXELS_CLAMPED_HALF 1 mova m0, [blockq+mmsize*0+%1] mova m1, [blockq+mmsize*2+%1] -%if mmsize == 8 - mova m2, [blockq+mmsize*4+%1] - mova m3, [blockq+mmsize*6+%1] -%endif packuswb m0, [blockq+mmsize*1+%1] packuswb m1, [blockq+mmsize*3+%1] -%if mmsize == 8 - packuswb m2, [blockq+mmsize*5+%1] - packuswb m3, [blockq+mmsize*7+%1] - movq [pixelsq], m0 - movq [lsizeq+pixelsq], m1 - movq [2*lsizeq+pixelsq], m2 - movq [lsize3q+pixelsq], m3 -%else movq [pixelsq], m0 movhps [lsizeq+pixelsq], m0 movq [2*lsizeq+pixelsq], m1 movhps [lsize3q+pixelsq], m1 -%endif %endmacro -%macro PUT_PIXELS_CLAMPED 0 +INIT_XMM sse2 cglobal put_pixels_clamped, 3, 4, 2, block, pixels, lsize, lsize3 lea lsize3q, [lsizeq*3] PUT_PIXELS_CLAMPED_HALF 0 lea pixelsq, [pixelsq+lsizeq*4] PUT_PIXELS_CLAMPED_HALF 64 RET -%endmacro - -INIT_MMX mmx -PUT_PIXELS_CLAMPED -INIT_XMM sse2 -PUT_PIXELS_CLAMPED ;-------------------------------------------------------------------------- ; void ff_add_pixels_clamped(const int16_t *block, uint8_t *pixels, @@ -130,41 +88,18 @@ PUT_PIXELS_CLAMPED %macro ADD_PIXELS_CLAMPED 1 mova m0, [blockq+mmsize*0+%1] mova m1, [blockq+mmsize*1+%1] -%if mmsize == 8 - mova m5, [blockq+mmsize*2+%1] - mova m6, [blockq+mmsize*3+%1] -%endif movq m2, [pixelsq] movq m3, [pixelsq+lsizeq] -%if mmsize == 8 - mova m7, m2 - punpcklbw m2, m4 - punpckhbw m7, m4 - paddsw m0, m2 - paddsw m1, m7 - mova m7, m3 - punpcklbw m3, m4 - punpckhbw m7, m4 - paddsw m5, m3 - paddsw m6, m7 -%else punpcklbw m2, m4 punpcklbw m3, m4 paddsw m0, m2 paddsw m1, m3 -%endif packuswb m0, m1 -%if mmsize == 8 - packuswb m5, m6 - movq [pixelsq], m0 - movq [pixelsq+lsizeq], m5 -%else movq [pixelsq], m0 movhps [pixelsq+lsizeq], m0 -%endif %endmacro -%macro ADD_PIXELS_CLAMPED 0 +INIT_XMM sse2 cglobal add_pixels_clamped, 3, 3, 5, block, pixels, lsize pxor m4, m4 ADD_PIXELS_CLAMPED 0 @@ -175,9 +110,3 @@ cglobal add_pixels_clamped, 3, 3, 5, block, pixels, lsize lea pixelsq, [pixelsq+lsizeq*2] ADD_PIXELS_CLAMPED 96 RET -%endmacro - -INIT_MMX mmx -ADD_PIXELS_CLAMPED -INIT_XMM sse2 -ADD_PIXELS_CLAMPED diff --git a/libavcodec/x86/idctdsp.h b/libavcodec/x86/idctdsp.h index 0d0bdb5f57..738e4e36e4 100644 --- a/libavcodec/x86/idctdsp.h +++ b/libavcodec/x86/idctdsp.h @@ -22,16 +22,10 @@ #include <stddef.h> #include <stdint.h> -void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, - ptrdiff_t line_size); void ff_add_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size); -void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, - ptrdiff_t line_size); void ff_put_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size); -void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, - ptrdiff_t line_size); void ff_put_signed_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size); diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c index 9103b92ce7..f28a1ad744 100644 --- a/libavcodec/x86/idctdsp_init.c +++ b/libavcodec/x86/idctdsp_init.c @@ -63,28 +63,24 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, { int cpu_flags = av_get_cpu_flags(); +#if ARCH_X86_32 if (EXTERNAL_MMX(cpu_flags)) { - c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; - c->put_pixels_clamped = ff_put_pixels_clamped_mmx; - c->add_pixels_clamped = ff_add_pixels_clamped_mmx; - if (!high_bit_depth && avctx->lowres == 0 && (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLEAUTO || avctx->idct_algo == FF_IDCT_SIMPLEMMX)) { - c->idct_put = ff_simple_idct_put_mmx; - c->idct_add = ff_simple_idct_add_mmx; c->idct = ff_simple_idct_mmx; - c->perm_type = FF_IDCT_PERM_SIMPLE; } } +#endif if (EXTERNAL_SSE2(cpu_flags)) { c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2; c->put_pixels_clamped = ff_put_pixels_clamped_sse2; c->add_pixels_clamped = ff_add_pixels_clamped_sse2; +#if ARCH_X86_32 if (!high_bit_depth && avctx->lowres == 0 && (avctx->idct_algo == FF_IDCT_AUTO || @@ -94,6 +90,7 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, c->idct_add = ff_simple_idct_add_sse2; c->perm_type = FF_IDCT_PERM_SIMPLE; } +#endif if (ARCH_X86_64 && !high_bit_depth && diff --git a/libavcodec/x86/simple_idct.asm b/libavcodec/x86/simple_idct.asm index 6fedbb5784..dcf0da6df1 100644 --- a/libavcodec/x86/simple_idct.asm +++ b/libavcodec/x86/simple_idct.asm @@ -25,6 +25,7 @@ %include "libavutil/x86/x86util.asm" +%if ARCH_X86_32 SECTION_RODATA cextern pb_80 @@ -846,26 +847,6 @@ cglobal simple_idct, 1, 2, 8, 128, block, t0 IDCT RET -cglobal simple_idct_put, 3, 5, 8, 128, pixels, lsize, block, lsize3, t0 - IDCT - lea lsize3q, [lsizeq*3] - PUT_PIXELS_CLAMPED_HALF 0 - lea pixelsq, [pixelsq+lsizeq*4] - PUT_PIXELS_CLAMPED_HALF 64 -RET - -cglobal simple_idct_add, 3, 4, 8, 128, pixels, lsize, block, t0 - IDCT - pxor m4, m4 - ADD_PIXELS_CLAMPED 0 - lea pixelsq, [pixelsq+lsizeq*2] - ADD_PIXELS_CLAMPED 32 - lea pixelsq, [pixelsq+lsizeq*2] - ADD_PIXELS_CLAMPED 64 - lea pixelsq, [pixelsq+lsizeq*2] - ADD_PIXELS_CLAMPED 96 -RET - INIT_XMM sse2 cglobal simple_idct_put, 3, 5, 8, 128, pixels, lsize, block, lsize3, t0 @@ -887,3 +868,4 @@ cglobal simple_idct_add, 3, 4, 8, 128, pixels, lsize, block, t0 lea pixelsq, [pixelsq+lsizeq*2] ADD_PIXELS_CLAMPED 96 RET +%endif |