diff options
author | James Almer <jamrial@gmail.com> | 2014-05-26 03:49:35 -0300 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-05-27 05:55:11 +0200 |
commit | e64e079ece7d037686c4c0f97eac9c62af6300b1 (patch) | |
tree | 96227a1b784bbf67a6d3b6e9d4b9640b98ae6db6 | |
parent | a0c5cd3475fd93930604e4ec5ac1336f5732c04b (diff) | |
download | ffmpeg-e64e079ece7d037686c4c0f97eac9c62af6300b1.tar.gz |
x86/dsputilenc: implement SSE2 version of diff_pixels
Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/x86/dsputilenc.asm | 25 | ||||
-rw-r--r-- | libavcodec/x86/dsputilenc_mmx.c | 3 |
2 files changed, 28 insertions, 0 deletions
diff --git a/libavcodec/x86/dsputilenc.asm b/libavcodec/x86/dsputilenc.asm index 7426c01dbb..ba8a1773c2 100644 --- a/libavcodec/x86/dsputilenc.asm +++ b/libavcodec/x86/dsputilenc.asm @@ -419,6 +419,31 @@ cglobal diff_pixels, 4,5 jne .loop REP_RET +INIT_XMM sse2 +cglobal diff_pixels, 4, 5, 5 + movsxdifnidn r3, r3d + pxor m4, m4 + add r0, 128 + mov r4, -128 +.loop: + movh m0, [r1] + movh m2, [r2] + movh m1, [r1+r3] + movh m3, [r2+r3] + punpcklbw m0, m4 + punpcklbw m1, m4 + punpcklbw m2, m4 + punpcklbw m3, m4 + psubw m0, m2 + psubw m1, m3 + mova [r0+r4+0 ], m0 + mova [r0+r4+16], m1 + lea r1, [r1+r3*2] + lea r2, [r2+r3*2] + add r4, 32 + jne .loop + RET + INIT_MMX mmx ; int ff_pix_sum16_mmx(uint8_t *pix, int line_size) cglobal pix_sum16, 2, 3 diff --git a/libavcodec/x86/dsputilenc_mmx.c b/libavcodec/x86/dsputilenc_mmx.c index e63d510ab9..acff94702f 100644 --- a/libavcodec/x86/dsputilenc_mmx.c +++ b/libavcodec/x86/dsputilenc_mmx.c @@ -36,6 +36,8 @@ void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size); void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, int line_size); void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2, int stride); +void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2, + int stride); int ff_pix_sum16_mmx(uint8_t *pix, int line_size); int ff_pix_norm1_mmx(uint8_t *pix, int line_size); int ff_sum_abs_dctelem_mmx(int16_t *block); @@ -971,6 +973,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, if (EXTERNAL_SSE2(cpu_flags)) { c->sse[0] = ff_sse16_sse2; c->sum_abs_dctelem = ff_sum_abs_dctelem_sse2; + c->diff_pixels = ff_diff_pixels_sse2; #if HAVE_ALIGNED_STACK c->hadamard8_diff[0] = ff_hadamard8_diff16_sse2; |