aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/videodsp_init.c
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2014-09-23 18:42:35 -0300
committerJames Almer <jamrial@gmail.com>2014-09-24 16:12:55 -0300
commit70277d1d234b33a80477f75435758a194fed5873 (patch)
tree1d795d1fd6bf7ca36882d729e67a5d03ece87678 /libavcodec/x86/videodsp_init.c
parent280ef183db554bd4eaeaa7fe487ad398ec5208fb (diff)
downloadffmpeg-70277d1d234b33a80477f75435758a194fed5873.tar.gz
x86/videodsp: add ff_emu_edge_{hfix,hvar}_avx2
~15% faster than sse2. Reviewed-by: Ronald S. Bultje <rsbultje@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86/videodsp_init.c')
-rw-r--r--libavcodec/x86/videodsp_init.c36
1 files changed, 36 insertions, 0 deletions
diff --git a/libavcodec/x86/videodsp_init.c b/libavcodec/x86/videodsp_init.c
index 602f141c05..885cdf1d8c 100644
--- a/libavcodec/x86/videodsp_init.c
+++ b/libavcodec/x86/videodsp_init.c
@@ -128,6 +128,23 @@ static emu_edge_hfix_func * const hfixtbl_sse2[11] = {
ff_emu_edge_hfix20_sse2, ff_emu_edge_hfix22_sse2
};
extern emu_edge_hvar_func ff_emu_edge_hvar_sse2;
+#if HAVE_AVX2_EXTERNAL
+extern emu_edge_hfix_func ff_emu_edge_hfix8_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix10_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix12_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix14_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix16_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix18_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix20_avx2;
+extern emu_edge_hfix_func ff_emu_edge_hfix22_avx2;
+static emu_edge_hfix_func * const hfixtbl_avx2[11] = {
+ ff_emu_edge_hfix2_mmx, ff_emu_edge_hfix4_mmx, ff_emu_edge_hfix6_mmx,
+ ff_emu_edge_hfix8_avx2, ff_emu_edge_hfix10_avx2, ff_emu_edge_hfix12_avx2,
+ ff_emu_edge_hfix14_avx2, ff_emu_edge_hfix16_avx2, ff_emu_edge_hfix18_avx2,
+ ff_emu_edge_hfix20_avx2, ff_emu_edge_hfix22_avx2
+};
+extern emu_edge_hvar_func ff_emu_edge_hvar_avx2;
+#endif
static av_always_inline void emulated_edge_mc(uint8_t *dst, const uint8_t *src,
ptrdiff_t dst_stride,
@@ -238,6 +255,20 @@ static av_noinline void emulated_edge_mc_sse2(uint8_t *buf, const uint8_t *src,
src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse,
hfixtbl_sse2, &ff_emu_edge_hvar_sse2);
}
+
+#if HAVE_AVX2_EXTERNAL
+static av_noinline void emulated_edge_mc_avx2(uint8_t *buf, const uint8_t *src,
+ ptrdiff_t buf_stride,
+ ptrdiff_t src_stride,
+ int block_w, int block_h,
+ int src_x, int src_y, int w,
+ int h)
+{
+ emulated_edge_mc(buf, src, buf_stride, src_stride, block_w, block_h,
+ src_x, src_y, w, h, vfixtbl_sse, &ff_emu_edge_vvar_sse,
+ hfixtbl_avx2, &ff_emu_edge_hvar_avx2);
+}
+#endif /* HAVE_AVX2_EXTERNAL */
#endif /* HAVE_YASM */
void ff_prefetch_mmxext(uint8_t *buf, ptrdiff_t stride, int h);
@@ -267,5 +298,10 @@ av_cold void ff_videodsp_init_x86(VideoDSPContext *ctx, int bpc)
if (EXTERNAL_SSE2(cpu_flags) && bpc <= 8) {
ctx->emulated_edge_mc = emulated_edge_mc_sse2;
}
+#if HAVE_AVX2_EXTERNAL
+ if (EXTERNAL_AVX2(cpu_flags) && bpc <= 8) {
+ ctx->emulated_edge_mc = emulated_edge_mc_avx2;
+ }
+#endif
#endif /* HAVE_YASM */
}