diff options
author | Andreas Rheinhardt <[email protected]> | 2025-09-29 23:08:11 +0200 |
---|---|---|
committer | Andreas Rheinhardt <[email protected]> | 2025-10-04 07:06:33 +0200 |
commit | 495c3d03ae7f72dc71f374b0ca91effcd747268e (patch) | |
tree | 76c08db7d9688cd91223d6cd8116dec2bc2aa225 | |
parent | 697da64c8eb208d33cd529d0d46bcabe25249759 (diff) |
avcodec/x86/h264_qpel_10bit: Remove SSE2 "cache64" duplicates
The horizontal 10bit MC SSE2 functions are currently duplicated:
They exist both in ordinary form as well as with a "sse2_cache64"
suffix. A comment in ff_h264qpel_init_x86() indicates that this
is due to older processors not liking accesses that cross cache
lines, yet these functions are identical to the non-cache64
functions (apart from the unavoidable changes in the rip-offset).
The only difference between these functions and the ordinary ones
are that the cache64 ones are created via a special form of the
INIT_XMM macro: "INIT_XMM sse2, cache64". This affects the name
and apparently defines cpuflags_cache64, yet nothing checks for
this, so both versions are identical. So remove the cache64 ones
and treat the remaining ones like ordinary SSE2 functions.
Reviewed-by: James Almer <[email protected]>
Signed-off-by: Andreas Rheinhardt <[email protected]>
-rw-r--r-- | libavcodec/x86/h264_qpel.c | 21 | ||||
-rw-r--r-- | libavcodec/x86/h264_qpel_10bit.asm | 4 |
2 files changed, 3 insertions, 22 deletions
diff --git a/libavcodec/x86/h264_qpel.c b/libavcodec/x86/h264_qpel.c index 52a6bfd5bf..6b9b4f7bc6 100644 --- a/libavcodec/x86/h264_qpel.c +++ b/libavcodec/x86/h264_qpel.c @@ -384,13 +384,10 @@ LUMA_MC_4(10, mc33, mmxext) LUMA_MC_816(10, mc00, sse2) LUMA_MC_816(10, mc10, sse2) -LUMA_MC_816(10, mc10, sse2_cache64) LUMA_MC_816(10, mc10, ssse3_cache64) LUMA_MC_816(10, mc20, sse2) -LUMA_MC_816(10, mc20, sse2_cache64) LUMA_MC_816(10, mc20, ssse3_cache64) LUMA_MC_816(10, mc30, sse2) -LUMA_MC_816(10, mc30, sse2_cache64) LUMA_MC_816(10, mc30, ssse3_cache64) LUMA_MC_816(10, mc01, sse2) LUMA_MC_816(10, mc11, sse2) @@ -488,9 +485,9 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) SET_QPEL_FUNCS(put_h264_qpel, 1, 8, 10_sse2, ff_); SET_QPEL_FUNCS(avg_h264_qpel, 0, 16, 10_sse2, ff_); SET_QPEL_FUNCS(avg_h264_qpel, 1, 8, 10_sse2, ff_); - H264_QPEL_FUNCS_10(1, 0, sse2_cache64); - H264_QPEL_FUNCS_10(2, 0, sse2_cache64); - H264_QPEL_FUNCS_10(3, 0, sse2_cache64); + H264_QPEL_FUNCS_10(1, 0, sse2); + H264_QPEL_FUNCS_10(2, 0, sse2); + H264_QPEL_FUNCS_10(3, 0, sse2); } } @@ -516,17 +513,5 @@ av_cold void ff_h264qpel_init_x86(H264QpelContext *c, int bit_depth) H264_QPEL_FUNCS_10(3, 0, ssse3_cache64); } } - - if (EXTERNAL_AVX(cpu_flags)) { - /* AVX implies 64 byte cache lines without the need to avoid unaligned - * memory accesses that cross the boundary between two cache lines. - * TODO: Port X264_CPU_CACHELINE_32/64 detection from x264 to avoid - * having to treat SSE2 functions with such properties as AVX. */ - if (bit_depth == 10) { - H264_QPEL_FUNCS_10(1, 0, sse2); - H264_QPEL_FUNCS_10(2, 0, sse2); - H264_QPEL_FUNCS_10(3, 0, sse2); - } - } #endif } diff --git a/libavcodec/x86/h264_qpel_10bit.asm b/libavcodec/x86/h264_qpel_10bit.asm index 80483b15ba..bad2d386eb 100644 --- a/libavcodec/x86/h264_qpel_10bit.asm +++ b/libavcodec/x86/h264_qpel_10bit.asm @@ -227,8 +227,6 @@ MC00 avg %define OP_MOV mova INIT_MMX mmxext %1 put, 4 -INIT_XMM sse2, cache64 -%1 put, 8 INIT_XMM ssse3, cache64 %1 put, 8 INIT_XMM sse2 @@ -237,8 +235,6 @@ INIT_XMM sse2 %define OP_MOV AVG_MOV INIT_MMX mmxext %1 avg, 4 -INIT_XMM sse2, cache64 -%1 avg, 8 INIT_XMM ssse3, cache64 %1 avg, 8 INIT_XMM sse2 |