diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2014-01-22 19:41:21 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-01-22 19:41:21 +0100 |
commit | 631939bde6e29e29131a0ca389e5e8dea4c3d038 (patch) | |
tree | 5a6390ac41514a2d4a8f8c6906cd1ab89fa1211b | |
parent | 7b89e24151eaabc1b526cfd199dd2a67cdec6154 (diff) | |
download | ffmpeg-631939bde6e29e29131a0ca389e5e8dea4c3d038.tar.gz |
avcodec/x86/lossless_videodsp: add diff_int16_mmx/sse2
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r-- | libavcodec/x86/lossless_videodsp.asm | 66 | ||||
-rw-r--r-- | libavcodec/x86/lossless_videodsp_init.c | 4 |
2 files changed, 70 insertions, 0 deletions
diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm index e496c804c8..37663d70df 100644 --- a/libavcodec/x86/lossless_videodsp.asm +++ b/libavcodec/x86/lossless_videodsp.asm @@ -93,6 +93,72 @@ cglobal add_int16, 4,4,5, dst, src, mask, w .unaligned: ADD_INT16_LOOP 0 +%macro DIFF_INT16_LOOP 1 ; %1 = is_aligned + movd m4, maskd + SPLATW m4, m4 + add wq, wq + test wq, 2*mmsize - 1 + jz %%.tomainloop +%%.wordloop: + sub wq, 2 + mov ax, [src1q+wq] + sub ax, [src2q+wq] + and ax, maskw + mov [dstq+wq], ax + test wq, 2*mmsize - 1 + jnz %%.wordloop +%%.tomainloop: + add src1q, wq + add src2q, wq + add dstq, wq + neg wq + jz %%.end +%%.loop: +%if %1 + mova m0, [src1q+wq] + mova m1, [src2q+wq] + mova m2, [src1q+wq+mmsize] + mova m3, [src2q+wq+mmsize] +%else + movu m0, [src1q+wq] + movu m1, [src2q+wq] + movu m2, [src1q+wq+mmsize] + movu m3, [src2q+wq+mmsize] +%endif + psubw m0, m1 + psubw m2, m3 + pand m0, m4 + pand m2, m4 +%if %1 + mova [dstq+wq] , m0 + mova [dstq+wq+mmsize], m2 +%else + movu [dstq+wq] , m0 + movu [dstq+wq+mmsize], m2 +%endif + add wq, 2*mmsize + jl %%.loop +%%.end: + RET +%endmacro + +INIT_MMX mmx +cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w + DIFF_INT16_LOOP 1 + +INIT_XMM sse2 +cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w + test src1q, mmsize-1 + jnz .unaligned + test src2q, mmsize-1 + jnz .unaligned + test dstq, mmsize-1 + jnz .unaligned + DIFF_INT16_LOOP 1 +.unaligned: + DIFF_INT16_LOOP 0 + + %macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst_is_aligned, %2 = src_is_aligned add wq, wq add srcq, wq diff --git a/libavcodec/x86/lossless_videodsp_init.c b/libavcodec/x86/lossless_videodsp_init.c index 88424ba1f9..9927ca38f3 100644 --- a/libavcodec/x86/lossless_videodsp_init.c +++ b/libavcodec/x86/lossless_videodsp_init.c @@ -23,6 +23,8 @@ void ff_add_int16_mmx(uint16_t *dst, const uint16_t *src, unsigned mask, int w); void ff_add_int16_sse2(uint16_t *dst, const uint16_t *src, unsigned mask, int w); +void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w); +void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w); int ff_add_hfyu_left_prediction_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc); int ff_add_hfyu_left_prediction_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc); @@ -32,10 +34,12 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c) if (EXTERNAL_MMX(cpu_flags)) { c->add_int16 = ff_add_int16_mmx; + c->diff_int16 = ff_diff_int16_mmx; } if (EXTERNAL_SSE2(cpu_flags)) { c->add_int16 = ff_add_int16_sse2; + c->diff_int16 = ff_diff_int16_sse2; } if (EXTERNAL_SSSE3(cpu_flags)) { |