aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHenrik Gramner <henrik@gramner.com>2015-07-16 00:10:27 +0200
committerLuca Barbato <lu_zero@gentoo.org>2015-07-17 20:02:28 +0200
commita344e5d094ebcf9a23acf3a27c56cbbbc829db42 (patch)
treec63923f571dd3ce2c776dbe4ad1a2b6184d783b1
parentf5ee23004d1177ca6dd99b92cb4ff4b94b2eae09 (diff)
downloadffmpeg-a344e5d094ebcf9a23acf3a27c56cbbbc829db42.tar.gz
x86: bswapdsp: Don't treat 32-bit integers as 64-bit
The upper halves are not guaranteed to be zero in x86-64. Also use `test` instead of `and` when the result isn't used for anything other than as a branch condition, this allows some register moves to be eliminated. Signed-off-by: Luca Barbato <lu_zero@gentoo.org>
-rw-r--r--libavcodec/x86/bswapdsp.asm23
1 files changed, 10 insertions, 13 deletions
diff --git a/libavcodec/x86/bswapdsp.asm b/libavcodec/x86/bswapdsp.asm
index 17a6cb1be3..42580a392c 100644
--- a/libavcodec/x86/bswapdsp.asm
+++ b/libavcodec/x86/bswapdsp.asm
@@ -28,8 +28,8 @@ SECTION_TEXT
; %1 = aligned/unaligned
%macro BSWAP_LOOPS 1
- mov r3, r2
- sar r2, 3
+ mov r3d, r2d
+ sar r2d, 3
jz .left4_%1
.loop8_%1:
mov%1 m0, [r1 + 0]
@@ -57,11 +57,11 @@ SECTION_TEXT
%endif
add r0, 32
add r1, 32
- dec r2
+ dec r2d
jnz .loop8_%1
.left4_%1:
- mov r2, r3
- and r3, 4
+ mov r2d, r3d
+ test r3d, 4
jz .left
mov%1 m0, [r1]
%if cpuflag(ssse3)
@@ -84,13 +84,11 @@ SECTION_TEXT
%macro BSWAP32_BUF 0
%if cpuflag(ssse3)
cglobal bswap32_buf, 3,4,3
- mov r3, r1
mova m2, [pb_bswap32]
%else
cglobal bswap32_buf, 3,4,5
- mov r3, r1
%endif
- and r3, 15
+ test r1, 15
jz .start_align
BSWAP_LOOPS u
jmp .left
@@ -98,8 +96,7 @@ cglobal bswap32_buf, 3,4,5
BSWAP_LOOPS a
.left:
%if cpuflag(ssse3)
- mov r3, r2
- and r2, 2
+ test r2d, 2
jz .left1
movq m0, [r1]
pshufb m0, m2
@@ -107,13 +104,13 @@ cglobal bswap32_buf, 3,4,5
add r1, 8
add r0, 8
.left1:
- and r3, 1
+ test r2d, 1
jz .end
mov r2d, [r1]
bswap r2d
mov [r0], r2d
%else
- and r2, 3
+ and r2d, 3
jz .end
.loop2:
mov r3d, [r1]
@@ -121,7 +118,7 @@ cglobal bswap32_buf, 3,4,5
mov [r0], r3d
add r1, 4
add r0, 4
- dec r2
+ dec r2d
jnz .loop2
%endif
.end: