aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/h264_idct.asm
diff options
context:
space:
mode:
authorDaniel Kang <daniel.d.kang@gmail.com>2011-05-16 17:18:22 -0400
committerDiego Biurrun <diego@biurrun.de>2011-05-17 20:44:48 +0200
commitd0005d347d0831c904630fe70408c9fd4eec18e8 (patch)
tree59ed93f9e798770a6bb99b786f647d5999c6deed /libavcodec/x86/h264_idct.asm
parent6f2309ed2e0c5c02ab417f3f0acad23e86411079 (diff)
downloadffmpeg-d0005d347d0831c904630fe70408c9fd4eec18e8.tar.gz
Modify x86util.asm to ease transitioning to 10-bit H.264 assembly.
Arguments for variable size instructions are added to many macros, along with other various changes. The x86util.asm code was ported from x264. Signed-off-by: Diego Biurrun <diego@biurrun.de>
Diffstat (limited to 'libavcodec/x86/h264_idct.asm')
-rw-r--r--libavcodec/x86/h264_idct.asm26
1 files changed, 13 insertions, 13 deletions
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index d237b3eefd..ae70a3049b 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -59,11 +59,11 @@ SECTION .text
movq m2, [%2+16]
movq m3, [%2+24]
- IDCT4_1D 0, 1, 2, 3, 4, 5
+ IDCT4_1D w, 0, 1, 2, 3, 4, 5
mova m6, [pw_32]
TRANSPOSE4x4W 0, 1, 2, 3, 4
paddw m0, m6
- IDCT4_1D 0, 1, 2, 3, 4, 5
+ IDCT4_1D w, 0, 1, 2, 3, 4, 5
pxor m7, m7
STORE_DIFFx2 m0, m1, m4, m5, m7, 6, %1, %3
@@ -118,13 +118,13 @@ cglobal h264_idct_add_mmx, 3, 3, 0
mova m2, %1
mova m5, %2
- SUMSUB_BA m5, m2
- SUMSUB_BA m6, m5
- SUMSUB_BA m4, m2
- SUMSUB_BA m7, m6
- SUMSUB_BA m0, m4
- SUMSUB_BA m3, m2
- SUMSUB_BA m1, m5
+ SUMSUB_BA w, 5, 2
+ SUMSUB_BA w, 6, 5
+ SUMSUB_BA w, 4, 2
+ SUMSUB_BA w, 7, 6
+ SUMSUB_BA w, 0, 4
+ SUMSUB_BA w, 3, 2
+ SUMSUB_BA w, 1, 5
SWAP 7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567
%endmacro
@@ -715,10 +715,10 @@ x264_add8x4_idct_sse2:
movhps m1, [r2+40]
movhps m2, [r2+48]
movhps m3, [r2+56]
- IDCT4_1D 0,1,2,3,4,5
+ IDCT4_1D w,0,1,2,3,4,5
TRANSPOSE2x4x4W 0,1,2,3,4
paddw m0, [pw_32]
- IDCT4_1D 0,1,2,3,4,5
+ IDCT4_1D w,0,1,2,3,4,5
pxor m7, m7
STORE_DIFFx2 m0, m1, m4, m5, m7, 6, r0, r3
lea r0, [r0+r3*2]
@@ -859,8 +859,8 @@ cglobal h264_idct_add8_sse2, 5, 7, 8
;void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul)
%macro WALSH4_1D 5
- SUMSUB_BADC m%4, m%3, m%2, m%1, m%5
- SUMSUB_BADC m%4, m%2, m%3, m%1, m%5
+ SUMSUB_BADC w, %4, %3, %2, %1, %5
+ SUMSUB_BADC w, %4, %2, %3, %1, %5
SWAP %1, %4, %3
%endmacro