diff options
author | James Almer <jamrial@gmail.com> | 2014-09-23 18:42:35 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2014-09-24 16:12:55 -0300 |
commit | 70277d1d234b33a80477f75435758a194fed5873 (patch) | |
tree | 1d795d1fd6bf7ca36882d729e67a5d03ece87678 /libavcodec/x86/videodsp_init.c | |
parent | 280ef183db554bd4eaeaa7fe487ad398ec5208fb (diff) | |
download | ffmpeg-70277d1d234b33a80477f75435758a194fed5873.tar.gz |
x86/videodsp: add ff_emu_edge_{hfix,hvar}_avx2
~15% faster than sse2.
Reviewed-by: Ronald S. Bultje <rsbultje@gmail.com>
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86/videodsp_init.c')
-rw-r--r-- | libavcodec/x86/videodsp_init.c | 36 |
1 files changed, 36 insertions, 0 deletions
diff --git a/libavcodec/x86/videodsp_init.c b/libavcodec/x86/videodsp_init.c index 602f141c05..885cdf1d8c 100644 --- a/libavcodec/x86/videodsp_init.c +++ b/libavcodec/x86/videodsp_init.c @@ -128,6 +128,23 @@ static emu_edge_hfix_func * const hfixtbl_sse2[11] = { ff_emu_edge_hfix20_sse2, ff_emu_edge_hfix22_sse2 }; extern emu_edge_hvar_func ff_emu_edge_hvar_sse2; +#if HAVE_AVX2_EXTERNAL +extern emu_edge_hfix_func ff_emu_edge_hfix8_avx2; +extern emu_edge_hfix_func ff_emu_edge_hfix10_avx2; +extern emu_edge_hfix_func ff_emu_edge_hfix12_avx2; +extern emu_edge_hfix_func ff_emu_edge_hfix14_avx2; +extern emu_edge_hfix_func ff_emu_edge_hfix16_avx2; +extern emu_edge_hfix_func ff_emu_edge_hfix18_avx2; +extern emu_edge_hfix_func ff_emu_edge_hfix20_avx2; +extern emu_edge_hfix_func ff_emu_edge_hfix22_avx2; +static emu_edge_hfix_func * const hfixtbl_avx2[11] = { + ff_emu_edge_hfix2_mmx, ff_emu_edge_hfix4_mmx, ff_emu_edge_hfix6_mmx, + ff_emu_edge_hfix8_avx2, ff_emu_edge_hfix10_avx2, ff_emu_edge_hfix12_avx2, + ff_emu_edge_hfix14_avx2, ff_emu_edge_hfix16_avx2, ff_emu_edge_hfix18_avx2, + ff_emu_edge_hfix20_avx2, ff_emu_edge_hfix22_avx2 +}; +extern emu_edge_hvar_func ff_emu_edge_hvar_avx2; +#endif static av_always_inline void emulated_edge_mc(uint8_t *dst, const uint8_t *src, ptrdiff_t dst_stride, @@ -238,6 +255,20 @@ static av_noinline void emulated_edge_mc_sse2(uint8_t *buf, const uint8_t *src, src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse, hfixtbl_sse2, &ff_emu_edge_hvar_sse2); } + +#if HAVE_AVX2_EXTERNAL +static av_noinline void emulated_edge_mc_avx2(uint8_t *buf, const uint8_t *src, + ptrdiff_t buf_stride, + ptrdiff_t src_stride, + int block_w, int block_h, + int src_x, int src_y, int w, + int h) +{ + emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h, + src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse, + hfixtbl_avx2, &ff_emu_edge_hvar_avx2); +} +#endif /* HAVE_AVX2_EXTERNAL */ #endif /* HAVE_YASM */ void ff_prefetch_mmxext(uint8_t *buf, ptrdiff_t stride, int h); @@ -267,5 +298,10 @@ av_cold void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc) if (EXTERNAL_SSE2(cpu_flags) && bpc <= 8) { ctx->emulated_edge_mc = emulated_edge_mc_sse2; } +#if HAVE_AVX2_EXTERNAL + if (EXTERNAL_AVX2(cpu_flags) && bpc <= 8) { + ctx->emulated_edge_mc = emulated_edge_mc_avx2; + } +#endif #endif /* HAVE_YASM */ } |