aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86
diff options
context:
space:
mode:
authorClément Bœsch <u@pkh.me>2014-01-25 11:59:39 +0100
committerClément Bœsch <u@pkh.me>2014-01-25 14:27:37 +0100
commit5267e850563d8c4cbb417fc5f98e140e758a51cf (patch)
treef1eeee5becf4d7e902a2efc68c80a67c048816f4 /libavcodec/x86
parentcddbfd2a95540084fdf660b46fe755255f05d4df (diff)
downloadffmpeg-5267e850563d8c4cbb417fc5f98e140e758a51cf.tar.gz
x86/lossless_videodsp: use common macro for add and diff int16 loop.
Diffstat (limited to 'libavcodec/x86')
-rw-r--r--libavcodec/x86/lossless_videodsp.asm72
1 files changed, 26 insertions, 46 deletions
diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm
index 59ca733dda..0266728a6d 100644
--- a/libavcodec/x86/lossless_videodsp.asm
+++ b/libavcodec/x86/lossless_videodsp.asm
@@ -31,7 +31,7 @@ pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7
SECTION_TEXT
-%macro ADD_INT16_LOOP 1 ; %1 = a/u (aligned/unaligned)
+%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub
movd m4, maskd
SPLATW m4, m4
add wq, wq
@@ -39,24 +39,41 @@ SECTION_TEXT
jz %%.tomainloop
%%.wordloop:
sub wq, 2
+%ifidn %2, add
mov ax, [srcq+wq]
add ax, [dstq+wq]
+%else
+ mov ax, [src1q+wq]
+ sub ax, [src2q+wq]
+%endif
and ax, maskw
mov [dstq+wq], ax
test wq, 2*mmsize - 1
jnz %%.wordloop
%%.tomainloop:
+%ifidn %2, add
add srcq, wq
+%else
+ add src1q, wq
+ add src2q, wq
+%endif
add dstq, wq
neg wq
jz %%.end
%%.loop:
+%ifidn %2, add
mov%1 m0, [srcq+wq]
mov%1 m1, [dstq+wq]
mov%1 m2, [srcq+wq+mmsize]
mov%1 m3, [dstq+wq+mmsize]
- paddw m0, m1
- paddw m2, m3
+%else
+ mov%1 m0, [src1q+wq]
+ mov%1 m1, [src2q+wq]
+ mov%1 m2, [src1q+wq+mmsize]
+ mov%1 m3, [src2q+wq+mmsize]
+%endif
+ p%2w m0, m1
+ p%2w m2, m3
pand m0, m4
pand m2, m4
mov%1 [dstq+wq] , m0
@@ -69,7 +86,7 @@ SECTION_TEXT
INIT_MMX mmx
cglobal add_int16, 4,4,5, dst, src, mask, w
- ADD_INT16_LOOP a
+ INT16_LOOP a, add
INIT_XMM sse2
cglobal add_int16, 4,4,5, dst, src, mask, w
@@ -77,50 +94,13 @@ cglobal add_int16, 4,4,5, dst, src, mask, w
jnz .unaligned
test dstq, mmsize-1
jnz .unaligned
- ADD_INT16_LOOP a
+ INT16_LOOP a, add
.unaligned:
- ADD_INT16_LOOP u
-
-%macro DIFF_INT16_LOOP 1 ; %1 = a/u (aligned/unaligned)
- movd m4, maskd
- SPLATW m4, m4
- add wq, wq
- test wq, 2*mmsize - 1
- jz %%.tomainloop
-%%.wordloop:
- sub wq, 2
- mov ax, [src1q+wq]
- sub ax, [src2q+wq]
- and ax, maskw
- mov [dstq+wq], ax
- test wq, 2*mmsize - 1
- jnz %%.wordloop
-%%.tomainloop:
- add src1q, wq
- add src2q, wq
- add dstq, wq
- neg wq
- jz %%.end
-%%.loop:
- mov%1 m0, [src1q+wq]
- mov%1 m1, [src2q+wq]
- mov%1 m2, [src1q+wq+mmsize]
- mov%1 m3, [src2q+wq+mmsize]
- psubw m0, m1
- psubw m2, m3
- pand m0, m4
- pand m2, m4
- mov%1 [dstq+wq] , m0
- mov%1 [dstq+wq+mmsize], m2
- add wq, 2*mmsize
- jl %%.loop
-%%.end:
- RET
-%endmacro
+ INT16_LOOP u, add
INIT_MMX mmx
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
- DIFF_INT16_LOOP a
+ INT16_LOOP a, sub
INIT_XMM sse2
cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
@@ -130,9 +110,9 @@ cglobal diff_int16, 5,5,5, dst, src1, src2, mask, w
jnz .unaligned
test dstq, mmsize-1
jnz .unaligned
- DIFF_INT16_LOOP a
+ INT16_LOOP a, sub
.unaligned:
- DIFF_INT16_LOOP u
+ INT16_LOOP u, sub
%macro ADD_HFYU_LEFT_LOOP_INT16 2 ; %1 = dst alignment (a/u), %2 = src alignment (a/u)