diff options
author | Christophe Gisquet <christophe.gisquet@gmail.com> | 2014-05-22 23:47:06 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-05-24 15:15:56 +0200 |
commit | 81aa0f4604f98da692f2689c84968f90354a92ea (patch) | |
tree | 243197f645e40a7dd5219df7992e4f8452fc73a9 /libavcodec/x86/hpeldsp_init.c | |
parent | 726316240bcc41cef6053dd6d1e46a3c57328498 (diff) | |
download | ffmpeg-81aa0f4604f98da692f2689c84968f90354a92ea.tar.gz |
x86: hpeldsp: implement SSSE3 version of _xy2
Loading pb_1 rather than pw_8192 was benchmarked to be more efficient.
Loading of the 2 yields no advantage. Loading of one saves ~11 cycles.
decicycles count:
put8: 3223(mmx) -> 2387
avg8: 2863(mmxext) -> 2125
put16: 4356(sse2) -> 3553
avg16: 4481(sse2) -> 3513
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/hpeldsp_init.c')
-rw-r--r-- | libavcodec/x86/hpeldsp_init.c | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/libavcodec/x86/hpeldsp_init.c b/libavcodec/x86/hpeldsp_init.c index cda16dc722..42e33416eb 100644 --- a/libavcodec/x86/hpeldsp_init.c +++ b/libavcodec/x86/hpeldsp_init.c @@ -95,6 +95,15 @@ void ff_avg_approx_pixels8_xy2_mmxext(uint8_t *block, const uint8_t *pixels, void ff_avg_approx_pixels8_xy2_3dnow(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); +void ff_put_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_avg_pixels8_xy2_ssse3(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_put_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); +void ff_avg_pixels16_xy2_ssse3(uint8_t *block, const uint8_t *pixels, + ptrdiff_t line_size, int h); + #define avg_pixels8_mmx ff_avg_pixels8_mmx #define avg_pixels8_x2_mmx ff_avg_pixels8_x2_mmx #define avg_pixels16_mmx ff_avg_pixels16_mmx @@ -307,6 +316,16 @@ static void hpeldsp_init_sse2(HpelDSPContext *c, int flags, int cpu_flags) #endif /* HAVE_SSE2_EXTERNAL */ } +static void hpeldsp_init_ssse3(HpelDSPContext *c, int flags, int cpu_flags) +{ +#if HAVE_SSSE3_EXTERNAL + c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_ssse3; + c->avg_pixels_tab[0][3] = ff_avg_pixels16_xy2_ssse3; + c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_ssse3; + c->avg_pixels_tab[1][3] = ff_avg_pixels8_xy2_ssse3; +#endif +} + av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags) { int cpu_flags = av_get_cpu_flags(); @@ -322,4 +341,7 @@ av_cold void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags) if (EXTERNAL_SSE2(cpu_flags)) hpeldsp_init_sse2(c, flags, cpu_flags); + + if (EXTERNAL_SSSE3(cpu_flags)) + hpeldsp_init_ssse3(c, flags, cpu_flags); } |