aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2016-07-19 22:37:04 -0300
committerJames Almer <jamrial@gmail.com>2016-07-20 13:43:38 -0300
commit7a15cf42ee17955b22c9b13d83acdc70eb8983ab (patch)
treec18aad5419a42bb5ae6ac9b23a92b63c75367c3f
parent41d7642a7be5ad778788042e5f3769868da09d31 (diff)
downloadffmpeg-7a15cf42ee17955b22c9b13d83acdc70eb8983ab.tar.gz
x86/diracdsp: make ff_put_signed_rect_clamped_10_sse4 work on x86_32
Reviewed-by: Rostislav Pehlivanov <atomnuker@gmail.com> Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r--libavcodec/x86/diracdsp.asm37
-rw-r--r--libavcodec/x86/diracdsp_init.c4
2 files changed, 20 insertions, 21 deletions
diff --git a/libavcodec/x86/diracdsp.asm b/libavcodec/x86/diracdsp.asm
index d86b5438c5..6b3f780e41 100644
--- a/libavcodec/x86/diracdsp.asm
+++ b/libavcodec/x86/diracdsp.asm
@@ -303,24 +303,30 @@ cglobal dequant_subband_32, 7, 7, 4, src, dst, stride, qf, qs, tot_v, tot_h
RET
-%if ARCH_X86_64 == 1
+INIT_XMM sse4
; void put_signed_rect_clamped_10(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height)
-cglobal put_signed_rect_clamped_10, 6, 9, 6, dst, dst_stride, src, src_stride, w, h
- mov r6, srcq
- mov r7, dstq
- mov r8, wq
+%if ARCH_X86_64
+cglobal put_signed_rect_clamped_10, 6, 8, 5, dst, dst_stride, src, src_stride, w, h, t1, t2
+%else
+cglobal put_signed_rect_clamped_10, 5, 7, 5, dst, dst_stride, src, src_stride, w, t1, t2
+ %define hd r5mp
+%endif
+ shl wd, 2
+ add srcq, wq
+ neg wq
+ mov t2q, dstq
+ mov t1q, wq
pxor m2, m2
mova m3, [clip_10bit]
mova m4, [convert_to_unsigned_10bit]
.loop_h:
- mov srcq, r6
- mov dstq, r7
- mov wq, r8
+ mov dstq, t2q
+ mov wq, t1q
.loop_w:
- movu m0, [srcq+0*mmsize]
- movu m1, [srcq+1*mmsize]
+ movu m0, [srcq+wq+0*mmsize]
+ movu m1, [srcq+wq+1*mmsize]
paddd m0, m4
paddd m1, m4
@@ -329,16 +335,13 @@ cglobal put_signed_rect_clamped_10, 6, 9, 6, dst, dst_stride, src, src_stride, w
movu [dstq], m0
- add srcq, 2*mmsize
add dstq, 1*mmsize
- sub wd, 8
- jg .loop_w
+ add wq, 2*mmsize
+ jl .loop_w
- add r6, src_strideq
- add r7, dst_strideq
+ add srcq, src_strideq
+ add t2q, dst_strideq
sub hd, 1
jg .loop_h
RET
-
-%endif
diff --git a/libavcodec/x86/diracdsp_init.c b/libavcodec/x86/diracdsp_init.c
index d7c7cd1657..b195113789 100644
--- a/libavcodec/x86/diracdsp_init.c
+++ b/libavcodec/x86/diracdsp_init.c
@@ -45,9 +45,7 @@ void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, i
void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
-#if ARCH_X86_64
void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height);
-#endif
void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
@@ -192,8 +190,6 @@ void ff_diracdsp_init_x86(DiracDSPContext* c)
if (EXTERNAL_SSE4(mm_flags)) {
c->dequant_subband[1] = ff_dequant_subband_32_sse4;
-#if ARCH_X86_64
c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4;
-#endif
}
}