diff options
author | James Almer <jamrial@gmail.com> | 2016-07-19 22:37:04 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2016-07-20 13:43:38 -0300 |
commit | 7a15cf42ee17955b22c9b13d83acdc70eb8983ab (patch) | |
tree | c18aad5419a42bb5ae6ac9b23a92b63c75367c3f | |
parent | 41d7642a7be5ad778788042e5f3769868da09d31 (diff) | |
download | ffmpeg-7a15cf42ee17955b22c9b13d83acdc70eb8983ab.tar.gz |
x86/diracdsp: make ff_put_signed_rect_clamped_10_sse4 work on x86_32
Reviewed-by: Rostislav Pehlivanov <atomnuker@gmail.com>
Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r-- | libavcodec/x86/diracdsp.asm | 37 | ||||
-rw-r--r-- | libavcodec/x86/diracdsp_init.c | 4 |
2 files changed, 20 insertions, 21 deletions
diff --git a/libavcodec/x86/diracdsp.asm b/libavcodec/x86/diracdsp.asm index d86b5438c5..6b3f780e41 100644 --- a/libavcodec/x86/diracdsp.asm +++ b/libavcodec/x86/diracdsp.asm @@ -303,24 +303,30 @@ cglobal dequant_subband_32, 7, 7, 4, src, dst, stride, qf, qs, tot_v, tot_h RET -%if ARCH_X86_64 == 1 +INIT_XMM sse4 ; void put_signed_rect_clamped_10(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height) -cglobal put_signed_rect_clamped_10, 6, 9, 6, dst, dst_stride, src, src_stride, w, h - mov r6, srcq - mov r7, dstq - mov r8, wq +%if ARCH_X86_64 +cglobal put_signed_rect_clamped_10, 6, 8, 5, dst, dst_stride, src, src_stride, w, h, t1, t2 +%else +cglobal put_signed_rect_clamped_10, 5, 7, 5, dst, dst_stride, src, src_stride, w, t1, t2 + %define hd r5mp +%endif + shl wd, 2 + add srcq, wq + neg wq + mov t2q, dstq + mov t1q, wq pxor m2, m2 mova m3, [clip_10bit] mova m4, [convert_to_unsigned_10bit] .loop_h: - mov srcq, r6 - mov dstq, r7 - mov wq, r8 + mov dstq, t2q + mov wq, t1q .loop_w: - movu m0, [srcq+0*mmsize] - movu m1, [srcq+1*mmsize] + movu m0, [srcq+wq+0*mmsize] + movu m1, [srcq+wq+1*mmsize] paddd m0, m4 paddd m1, m4 @@ -329,16 +335,13 @@ cglobal put_signed_rect_clamped_10, 6, 9, 6, dst, dst_stride, src, src_stride, w movu [dstq], m0 - add srcq, 2*mmsize add dstq, 1*mmsize - sub wd, 8 - jg .loop_w + add wq, 2*mmsize + jl .loop_w - add r6, src_strideq - add r7, dst_strideq + add srcq, src_strideq + add t2q, dst_strideq sub hd, 1 jg .loop_h RET - -%endif diff --git a/libavcodec/x86/diracdsp_init.c b/libavcodec/x86/diracdsp_init.c index d7c7cd1657..b195113789 100644 --- a/libavcodec/x86/diracdsp_init.c +++ b/libavcodec/x86/diracdsp_init.c @@ -45,9 +45,7 @@ void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, i void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); -#if ARCH_X86_64 void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height); -#endif void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h); @@ -192,8 +190,6 @@ void ff_diracdsp_init_x86(DiracDSPContext* c) if (EXTERNAL_SSE4(mm_flags)) { c->dequant_subband[1] = ff_dequant_subband_32_sse4; -#if ARCH_X86_64 c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4; -#endif } } |