diff options
author | Kyosuke Kawakami <kawakami150708@gmail.com> | 2024-11-15 03:25:34 +0900 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2024-11-15 13:45:52 -0500 |
commit | 61aa532e22668ae0ed943e4aee3ea69af648375e (patch) | |
tree | 280ac626eb5ec18424b637a7a6c90177c70e4044 /libavcodec/x86/diracdsp_init.c | |
parent | 8f683ee41775d5b15367f3b0d8316d894c07736a (diff) | |
download | ffmpeg-61aa532e22668ae0ed943e4aee3ea69af648375e.tar.gz |
avcodec/x86/diracdsp: migrate last remaining MMX function to SSE2
The add_dirac_obmc8_mmx function was the only MMX function left. This
patch migrates it to SSE2.
Here are the checkasm benchmark results:
diracdsp.add_dirac_obmc_8_c: 2299.1 ( 1.00x)
diracdsp.add_dirac_obmc_8_mmx: 237.6 ( 9.68x)
diracdsp.add_dirac_obmc_8_sse2: 109.1 (21.07x)
Signed-off-by: Kyosuke Kawakami <kawakami150708@gmail.com>
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
Diffstat (limited to 'libavcodec/x86/diracdsp_init.c')
-rw-r--r-- | libavcodec/x86/diracdsp_init.c | 10 |
1 files changed, 3 insertions, 7 deletions
diff --git a/libavcodec/x86/diracdsp_init.c b/libavcodec/x86/diracdsp_init.c index f678759dc0..08247133e1 100644 --- a/libavcodec/x86/diracdsp_init.c +++ b/libavcodec/x86/diracdsp_init.c @@ -24,8 +24,7 @@ void ff_add_rect_clamped_sse2(uint8_t *, const uint16_t *, int, const int16_t *, int, int, int); -void ff_add_dirac_obmc8_mmx(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); - +void ff_add_dirac_obmc8_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); void ff_add_dirac_obmc16_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); void ff_add_dirac_obmc32_sse2(uint16_t *dst, const uint8_t *src, int stride, const uint8_t *obmc_weight, int yblen); @@ -94,15 +93,12 @@ void ff_diracdsp_init_x86(DiracDSPContext* c) #if HAVE_X86ASM int mm_flags = av_get_cpu_flags(); - if (EXTERNAL_MMX(mm_flags)) { - c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx; - } - if (EXTERNAL_SSE2(mm_flags)) { c->dirac_hpel_filter = dirac_hpel_filter_sse2; c->add_rect_clamped = ff_add_rect_clamped_sse2; c->put_signed_rect_clamped[0] = (void *)ff_put_signed_rect_clamped_sse2; + c->add_dirac_obmc[0] = ff_add_dirac_obmc8_sse2; c->add_dirac_obmc[1] = ff_add_dirac_obmc16_sse2; c->add_dirac_obmc[2] = ff_add_dirac_obmc32_sse2; @@ -116,5 +112,5 @@ void ff_diracdsp_init_x86(DiracDSPContext* c) c->dequant_subband[1] = ff_dequant_subband_32_sse4; c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4; } -#endif +#endif // HAVE_X86ASM } |