diff options
author | Andreas Rheinhardt <[email protected]> | 2025-09-29 00:11:47 +0200 |
---|---|---|
committer | Andreas Rheinhardt <[email protected]> | 2025-10-04 07:06:33 +0200 |
commit | 279b6f3cf56dbfcfb3980a1d955d66628e80aa24 (patch) | |
tree | e1cf1887e460a34d058f7f9539e62226a0469ac1 | |
parent | e340f31b898e51d12ec1ced52fcbf03fb4635533 (diff) |
avcodec/fpel: Avoid loop in ff_avg_pixels4_mmxext()
It is only used by h264_qpel.c and only with height four
(which is unrolled) and uses a loop in order to handle
multiples of four as height. Remove the loop and the height
parameter and move the function to h264_qpel_8bit.asm.
This leads to a bit of code duplication, but this is simpler
than all the %if checks necessary to achieve the same outcome
in fpel.asm.
Reviewed-by: James Almer <[email protected]>
Signed-off-by: Andreas Rheinhardt <[email protected]>
-rw-r--r-- | libavcodec/x86/fpel.asm | 1 | ||||
-rw-r--r-- | libavcodec/x86/fpel.h | 2 | ||||
-rw-r--r-- | libavcodec/x86/h264_qpel.c | 21 | ||||
-rw-r--r-- | libavcodec/x86/h264_qpel_8bit.asm | 20 |
4 files changed, 28 insertions, 16 deletions
diff --git a/libavcodec/x86/fpel.asm b/libavcodec/x86/fpel.asm index 477caa8b44..8ca684efa9 100644 --- a/libavcodec/x86/fpel.asm +++ b/libavcodec/x86/fpel.asm @@ -63,7 +63,6 @@ INIT_MMX mmx OP_PIXELS put, 8 INIT_MMX mmxext -OP_PIXELS avg, 4 OP_PIXELS avg, 8 INIT_XMM sse2 diff --git a/libavcodec/x86/fpel.h b/libavcodec/x86/fpel.h index 851a70b99f..dc69e1cd83 100644 --- a/libavcodec/x86/fpel.h +++ b/libavcodec/x86/fpel.h @@ -22,8 +22,6 @@ #include <stddef.h> #include <stdint.h> -void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels, - ptrdiff_t line_size, int h); void ff_avg_pixels8_mmxext(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index 43e68d2d97..649bfabda8 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -30,6 +30,7 @@ #include "fpel.h" #if HAVE_X86ASM +void ff_avg_pixels4_mmxext(uint8_t *dst, const uint8_t *src, ptrdiff_t stride); void ff_put_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, ptrdiff_t stride); void ff_avg_pixels4_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, @@ -52,7 +53,6 @@ void ff_avg_pixels16_l2_mmxext(uint8_t *dst, const uint8_t *src1, const uint8_t #define ff_put_pixels16_l2_sse2 ff_put_pixels16_l2_mmxext #define ff_avg_pixels16_l2_sse2(dst, src1, src2, dststride, src1stride, h) \ ff_avg_pixels16_l2_mmxext((dst), (src1), (src2), (dststride), (src1stride)) -#define ff_put_pixels4_mmxext(...) #define DEF_QPEL(OPNAME)\ void ff_ ## OPNAME ## _h264_qpel4_h_lowpass_mmxext(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride);\ @@ -191,8 +191,7 @@ static av_always_inline void ff_ ## OPNAME ## h264_qpel16_hv_lowpass_ ## MMX(uin #define ff_put_h264_qpel8or16_hv2_lowpass_sse2 ff_put_h264_qpel8or16_hv2_lowpass_mmxext #define ff_avg_h264_qpel8or16_hv2_lowpass_sse2 ff_avg_h264_qpel8or16_hv2_lowpass_mmxext -#define H264_MC_C_V_H_HV(OPNAME, SIZE, MMX, ALIGN) \ -H264_MC_C(OPNAME, SIZE, MMX, ALIGN)\ +#define H264_MC_V_H_HV(OPNAME, SIZE, MMX, ALIGN) \ H264_MC_V(OPNAME, SIZE, MMX, ALIGN)\ H264_MC_H(OPNAME, SIZE, MMX, ALIGN)\ H264_MC_HV(OPNAME, SIZE, MMX, ALIGN)\ @@ -208,11 +207,11 @@ static void avg_h264_qpel16_mc00_sse2 (uint8_t *dst, const uint8_t *src, ff_avg_pixels16_sse2(dst, src, stride, 16); } -#define H264_MC_C(OPNAME, SIZE, MMX, ALIGN) \ -av_unused static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## MMX (uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ -{\ - ff_ ## OPNAME ## pixels ## SIZE ## _ ## MMX(dst, src, stride, SIZE);\ -}\ +static void avg_h264_qpel8_mc00_mmxext(uint8_t *dst, const uint8_t *src, + ptrdiff_t stride) +{ + ff_avg_pixels8_mmxext(dst, src, stride, 8); +} #define H264_MC_H(OPNAME, SIZE, MMX, ALIGN) \ static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## MMX(uint8_t *dst, const uint8_t *src, ptrdiff_t stride)\ @@ -346,8 +345,7 @@ QPEL_H264_H_XMM(avg_,AVG_MMXEXT_OP, ssse3) QPEL_H264_HV_XMM(put_, PUT_OP, ssse3) QPEL_H264_HV_XMM(avg_,AVG_MMXEXT_OP, ssse3) -H264_MC(H264_MC_C_V_H_HV, 4, mmxext, 8) -H264_MC_C(avg_, 8, mmxext, 8) +H264_MC(H264_MC_V_H_HV, 4, mmxext, 8) H264_MC_816(H264_MC_V, sse2) H264_MC_816(H264_MC_HV, sse2) H264_MC_816(H264_MC_H, ssse3) @@ -461,7 +459,8 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) if (!high_bit_depth) { SET_QPEL_FUNCS_1PP(put_h264_qpel, 2, 4, mmxext, ); c->avg_h264_qpel_pixels_tab[1][0] = avg_h264_qpel8_mc00_mmxext; - SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, mmxext, ); + SET_QPEL_FUNCS_1PP(avg_h264_qpel, 2, 4, mmxext, ); + c->avg_h264_qpel_pixels_tab[2][0] = ff_avg_pixels4_mmxext; } else if (bit_depth == 10) { SET_QPEL_FUNCS(put_h264_qpel, 2, 4, 10_mmxext, ff_); SET_QPEL_FUNCS(avg_h264_qpel, 2, 4, 10_mmxext, ff_); diff --git a/libavcodec/x86/h264_qpel_8bit.asm b/libavcodec/x86/h264_qpel_8bit.asm index 6a134ee5b4..07056f1215 100644 --- a/libavcodec/x86/h264_qpel_8bit.asm +++ b/libavcodec/x86/h264_qpel_8bit.asm @@ -25,14 +25,30 @@ %include "libavutil/x86/x86util.asm" -SECTION_RODATA 32 - cextern pw_16 cextern pw_5 cextern pb_0 SECTION .text +; void ff_avg_pixels4_mmxext(uint8_t *block, const uint8_t *pixels, +; ptrdiff_t line_size) +INIT_MMX mmxext +cglobal avg_pixels4, 3,4 + lea r3, [r2*3] + movh m0, [r1] + movh m1, [r1+r2] + movh m2, [r1+r2*2] + movh m3, [r1+r3] + pavgb m0, [r0] + pavgb m1, [r0+r2] + pavgb m2, [r0+r2*2] + pavgb m3, [r0+r3] + movh [r0], m0 + movh [r0+r2], m1 + movh [r0+r2*2], m2 + movh [r0+r3], m3 + RET %macro op_avgh 3 movh %3, %2 |