diff options
author | Mirage Abeysekara <mirage.12@cse.mrt.ac.lk> | 2017-03-19 01:20:53 +0530 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2017-03-20 09:45:42 -0400 |
commit | 5eb4f95bef2f95c4e776c60613b2825064d02ba9 (patch) | |
tree | 6565a5dc34ab32ae2fcb3ae5c84a3d7efdb71b6c /libavcodec/x86/h264_intrapred.asm | |
parent | f3cd2302a9c9724f57fda4afb5ad7a588fb8b304 (diff) | |
download | ffmpeg-5eb4f95bef2f95c4e776c60613b2825064d02ba9.tar.gz |
h264pred: added AVX2 implementation for tm_vp8 16x16.
checkasm --bench results with 5000 runs
pred16x16_tm_vp8_c: 302.8
pred16x16_tm_vp8_mmx: 101.4
pred16x16_tm_vp8_mmxext: 95.5
pred16x16_tm_vp8_sse2: 95.1
pred16x16_tm_vp8_avx2: 38.2
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
Diffstat (limited to 'libavcodec/x86/h264_intrapred.asm')
-rw-r--r-- | libavcodec/x86/h264_intrapred.asm | 37 |
1 files changed, 37 insertions, 0 deletions
diff --git a/libavcodec/x86/h264_intrapred.asm b/libavcodec/x86/h264_intrapred.asm index c88d91b49e..0f3b46287e 100644 --- a/libavcodec/x86/h264_intrapred.asm +++ b/libavcodec/x86/h264_intrapred.asm @@ -268,6 +268,43 @@ cglobal pred16x16_tm_vp8_8, 2,6,6 jg .loop REP_RET +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +cglobal pred16x16_tm_vp8_8, 2, 4, 5, dst, stride, stride3, iteration + sub dstq, strideq + pmovzxbw m0, [dstq] + vpbroadcastb xm1, [r0-1] + pmovzxbw m1, xm1 + psubw m0, m1 + mov iterationd, 4 + lea stride3q, [strideq*3] +.loop: + vpbroadcastb xm1, [dstq+strideq*1-1] + vpbroadcastb xm2, [dstq+strideq*2-1] + vpbroadcastb xm3, [dstq+stride3q-1] + vpbroadcastb xm4, [dstq+strideq*4-1] + pmovzxbw m1, xm1 + pmovzxbw m2, xm2 + pmovzxbw m3, xm3 + pmovzxbw m4, xm4 + paddw m1, m0 + paddw m2, m0 + paddw m3, m0 + paddw m4, m0 + vpackuswb m1, m1, m2 + vpackuswb m3, m3, m4 + vpermq m1, m1, q3120 + vpermq m3, m3, q3120 + movdqa [dstq+strideq*1], xm1 + vextracti128 [dstq+strideq*2], m1, 1 + movdqa [dstq+stride3q*1], xm3 + vextracti128 [dstq+strideq*4], m3, 1 + lea dstq, [dstq+strideq*4] + dec iterationd + jg .loop + REP_RET +%endif + ;----------------------------------------------------------------------------- ; void ff_pred16x16_plane_*_8(uint8_t *src, int stride) ;----------------------------------------------------------------------------- |