diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2014-01-20 03:51:21 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-01-20 04:06:46 +0100 |
commit | a493f8541de20e76073433f39f66da31f3834bc4 (patch) | |
tree | 0df302fd7ae6f808d9dd36240ee665f675509ea7 /libavcodec | |
parent | da0684820a58ce42a5a2953cbce417e06a54be8f (diff) | |
download | ffmpeg-a493f8541de20e76073433f39f66da31f3834bc4.tar.gz |
avcodec/x86/dsp: add_int16_mmx / add_int16_sse2
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/x86/dsputil.asm | 65 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_init.c | 3 | ||||
-rw-r--r-- | libavcodec/x86/dsputil_x86.h | 2 |
3 files changed, 70 insertions, 0 deletions
diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm index 77069e20f8..9450cd8fd6 100644 --- a/libavcodec/x86/dsputil.asm +++ b/libavcodec/x86/dsputil.asm @@ -465,6 +465,71 @@ cglobal add_hfyu_left_prediction, 3,3,7, dst, src, w, left .src_unaligned: ADD_HFYU_LEFT_LOOP 0, 0 + +%macro ADD_INT16_LOOP 1 ; %1 = is_aligned + movd m4, maskq + punpcklwd m4, m4 + punpcklwd m4, m4 + punpcklwd m4, m4 + add wq, wq + test wq, 2*mmsize - 1 + jz %%.tomainloop +%%.wordloop: + sub wq, 2 + mov ax, [srcq+wq] + add ax, [dstq+wq] + and ax, maskw + mov [dstq+wq], ax + test wq, 2*mmsize - 1 + jnz %%.wordloop +%%.tomainloop: + add srcq, wq + add dstq, wq + neg wq + jz %%.end +%%.loop: +%if %1 + mova m0, [srcq+wq] + mova m1, [dstq+wq] + mova m2, [srcq+wq+mmsize] + mova m3, [dstq+wq+mmsize] +%else + movu m0, [srcq+wq] + movu m1, [dstq+wq] + movu m2, [srcq+wq+mmsize] + movu m3, [dstq+wq+mmsize] +%endif + paddw m0, m1 + paddw m2, m3 + pand m0, m4 + pand m2, m4 +%if %1 + mova [dstq+wq] , m0 + mova [dstq+wq+mmsize], m2 +%else + movu [dstq+wq] , m0 + movu [dstq+wq+mmsize], m2 +%endif + add wq, 2*mmsize + jl %%.loop +%%.end: + RET +%endmacro + +INIT_MMX mmx +cglobal add_int16, 4,4,5, dst, src, mask, w + ADD_INT16_LOOP 1 + +INIT_XMM sse2 +cglobal add_int16, 4,4,5, dst, src, mask, w + test srcq, mmsize-1 + jnz .unaligned + test dstq, mmsize-1 + jnz .unaligned + ADD_INT16_LOOP 1 +.unaligned: + ADD_INT16_LOOP 0 + ;----------------------------------------------------------------------------- ; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, ; int32_t max, unsigned int len) diff --git a/libavcodec/x86/dsputil_init.c b/libavcodec/x86/dsputil_init.c index e0b40410a7..08bd29720a 100644 --- a/libavcodec/x86/dsputil_init.c +++ b/libavcodec/x86/dsputil_init.c @@ -542,6 +542,7 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, #endif /* HAVE_MMX_INLINE */ #if HAVE_MMX_EXTERNAL + c->add_int16 = ff_add_int16_mmx; c->vector_clip_int32 = ff_vector_clip_int32_mmx; #endif /* HAVE_MMX_EXTERNAL */ } @@ -625,6 +626,8 @@ static av_cold void dsputil_init_sse2(DSPContext *c, AVCodecContext *avctx, c->vector_clip_int32 = ff_vector_clip_int32_sse2; } c->bswap_buf = ff_bswap32_buf_sse2; + + c->add_int16 = ff_add_int16_sse2; #endif /* HAVE_SSE2_EXTERNAL */ } diff --git a/libavcodec/x86/dsputil_x86.h b/libavcodec/x86/dsputil_x86.h index 356b2c142f..e707e55a59 100644 --- a/libavcodec/x86/dsputil_x86.h +++ b/libavcodec/x86/dsputil_x86.h @@ -116,6 +116,8 @@ void ff_clear_blocks_mmx(int16_t *blocks); void ff_clear_blocks_sse(int16_t *blocks); void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w); +void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w); +void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w); void ff_add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, |