diff options
author | Mans Rullgard <mans@mansr.com> | 2012-08-06 00:16:13 +0100 |
---|---|---|
committer | Mans Rullgard <mans@mansr.com> | 2012-08-07 15:21:20 +0100 |
commit | 2b140a3d091de2bd54687df1b2aa6608549fb6ee (patch) | |
tree | 94a81581441df6d8c905f5802884738ff13fef0f /libavcodec | |
parent | a3df4781f40869cd7766f46221412039cf54c1b7 (diff) | |
download | ffmpeg-2b140a3d091de2bd54687df1b2aa6608549fb6ee.tar.gz |
x86: use 32-bit source registers with movd instruction
yasm tolerates mismatch between movd/movq and source register size,
adjusting the instruction according to the register. nasm is more
strict.
Signed-off-by: Mans Rullgard <mans@mansr.com>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/x86/h264_deblock_10bit.asm | 12 | ||||
-rw-r--r-- | libavcodec/x86/rv34dsp.asm | 6 | ||||
-rw-r--r-- | libavcodec/x86/rv40dsp.asm | 4 |
3 files changed, 11 insertions, 11 deletions
diff --git a/libavcodec/x86/h264_deblock_10bit.asm b/libavcodec/x86/h264_deblock_10bit.asm index b18f7bcdbf..7b9316d2c0 100644 --- a/libavcodec/x86/h264_deblock_10bit.asm +++ b/libavcodec/x86/h264_deblock_10bit.asm @@ -165,7 +165,7 @@ cglobal deblock_v_luma_10, 5,5,8*(mmsize/16) SUB rsp, pad shl r2d, 2 shl r3d, 2 - LOAD_AB m4, m5, r2, r3 + LOAD_AB m4, m5, r2d, r3d mov r3, 32/mmsize mov r2, r0 sub r0, r1 @@ -222,7 +222,7 @@ cglobal deblock_h_luma_10, 5,6,8*(mmsize/16) SUB rsp, pad shl r2d, 2 shl r3d, 2 - LOAD_AB m4, m5, r2, r3 + LOAD_AB m4, m5, r2d, r3d mov r3, r1 mova am, m4 add r3, r1 @@ -351,7 +351,7 @@ cglobal deblock_v_luma_10, 5,5,15 %define mask2 m11 shl r2d, 2 shl r3d, 2 - LOAD_AB m12, m13, r2, r3 + LOAD_AB m12, m13, r2d, r3d mov r2, r0 sub r0, r1 sub r0, r1 @@ -379,7 +379,7 @@ cglobal deblock_v_luma_10, 5,5,15 cglobal deblock_h_luma_10, 5,7,15 shl r2d, 2 shl r3d, 2 - LOAD_AB m12, m13, r2, r3 + LOAD_AB m12, m13, r2d, r3d mov r2, r1 add r2, r1 add r2, r1 @@ -857,7 +857,7 @@ cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16) .loop: %endif CHROMA_V_LOAD r5 - LOAD_AB m4, m5, r2, r3 + LOAD_AB m4, m5, r2d, r3d LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4 pxor m4, m4 CHROMA_V_LOAD_TC m6, r4 @@ -891,7 +891,7 @@ cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16) .loop: %endif CHROMA_V_LOAD r4 - LOAD_AB m4, m5, r2, r3 + LOAD_AB m4, m5, r2d, r3d LOAD_MASK m0, m1, m2, m3, m4, m5, m7, m6, m4 CHROMA_DEBLOCK_P0_Q0_INTRA m1, m2, m0, m3, m7, m5, m6 CHROMA_V_STORE diff --git a/libavcodec/x86/rv34dsp.asm b/libavcodec/x86/rv34dsp.asm index c43b77abd2..78d8c92b0b 100644 --- a/libavcodec/x86/rv34dsp.asm +++ b/libavcodec/x86/rv34dsp.asm @@ -49,7 +49,7 @@ SECTION .text cglobal rv34_idct_%1, 1, 2, 0 movsx r1, word [r0] IDCT_DC r1 - movd m0, r1 + movd m0, r1d pshufw m0, m0, 0 movq [r0+ 0], m0 movq [r0+ 8], m0 @@ -70,7 +70,7 @@ cglobal rv34_idct_dc_add, 3, 3 ; calculate DC IDCT_DC_ROUND r2 pxor m1, m1 - movd m0, r2 + movd m0, r2d psubw m1, m0 packuswb m0, m0 packuswb m1, m1 @@ -175,7 +175,7 @@ cglobal rv34_idct_dc_add, 3, 3, 6 pxor m1, m1 ; calculate DC - movd m0, r2 + movd m0, r2d lea r2, [r0+r1*2] movd m2, [r0] movd m3, [r0+r1] diff --git a/libavcodec/x86/rv40dsp.asm b/libavcodec/x86/rv40dsp.asm index ae740c213a..70c0c0400f 100644 --- a/libavcodec/x86/rv40dsp.asm +++ b/libavcodec/x86/rv40dsp.asm @@ -466,8 +466,8 @@ cglobal rv40_weight_func_%1_%2, 6, 7, 8 add r2, r6 neg r6 - movd m2, r3 - movd m3, r4 + movd m2, r3d + movd m3, r4d %ifidn %1,rnd %define RND 0 SPLATW m2, m2 |