diff options
author | Daniel Kang <daniel.d.kang@gmail.com> | 2011-05-16 17:18:22 -0400 |
---|---|---|
committer | Diego Biurrun <diego@biurrun.de> | 2011-05-17 20:44:48 +0200 |
commit | d0005d347d0831c904630fe70408c9fd4eec18e8 (patch) | |
tree | 59ed93f9e798770a6bb99b786f647d5999c6deed /libavcodec/x86/h264_idct.asm | |
parent | 6f2309ed2e0c5c02ab417f3f0acad23e86411079 (diff) | |
download | ffmpeg-d0005d347d0831c904630fe70408c9fd4eec18e8.tar.gz |
Modify x86util.asm to ease transitioning to 10-bit H.264 assembly.
Arguments for variable size instructions are added to many macros, along
with other various changes. The x86util.asm code was ported from x264.
Signed-off-by: Diego Biurrun <diego@biurrun.de>
Diffstat (limited to 'libavcodec/x86/h264_idct.asm')
-rw-r--r-- | libavcodec/x86/h264_idct.asm | 26 |
1 files changed, 13 insertions, 13 deletions
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm index d237b3eefd..ae70a3049b 100644 --- a/libavcodec/x86/h264_idct.asm +++ b/libavcodec/x86/h264_idct.asm @@ -59,11 +59,11 @@ SECTION .text movq m2, [%2+16] movq m3, [%2+24] - IDCT4_1D 0, 1, 2, 3, 4, 5 + IDCT4_1D w, 0, 1, 2, 3, 4, 5 mova m6, [pw_32] TRANSPOSE4x4W 0, 1, 2, 3, 4 paddw m0, m6 - IDCT4_1D 0, 1, 2, 3, 4, 5 + IDCT4_1D w, 0, 1, 2, 3, 4, 5 pxor m7, m7 STORE_DIFFx2 m0, m1, m4, m5, m7, 6, %1, %3 @@ -118,13 +118,13 @@ cglobal h264_idct_add_mmx, 3, 3, 0 mova m2, %1 mova m5, %2 - SUMSUB_BA m5, m2 - SUMSUB_BA m6, m5 - SUMSUB_BA m4, m2 - SUMSUB_BA m7, m6 - SUMSUB_BA m0, m4 - SUMSUB_BA m3, m2 - SUMSUB_BA m1, m5 + SUMSUB_BA w, 5, 2 + SUMSUB_BA w, 6, 5 + SUMSUB_BA w, 4, 2 + SUMSUB_BA w, 7, 6 + SUMSUB_BA w, 0, 4 + SUMSUB_BA w, 3, 2 + SUMSUB_BA w, 1, 5 SWAP 7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567 %endmacro @@ -715,10 +715,10 @@ x264_add8x4_idct_sse2: movhps m1, [r2+40] movhps m2, [r2+48] movhps m3, [r2+56] - IDCT4_1D 0,1,2,3,4,5 + IDCT4_1D w,0,1,2,3,4,5 TRANSPOSE2x4x4W 0,1,2,3,4 paddw m0, [pw_32] - IDCT4_1D 0,1,2,3,4,5 + IDCT4_1D w,0,1,2,3,4,5 pxor m7, m7 STORE_DIFFx2 m0, m1, m4, m5, m7, 6, r0, r3 lea r0, [r0+r3*2] @@ -859,8 +859,8 @@ cglobal h264_idct_add8_sse2, 5, 7, 8 ;void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul) %macro WALSH4_1D 5 - SUMSUB_BADC m%4, m%3, m%2, m%1, m%5 - SUMSUB_BADC m%4, m%2, m%3, m%1, m%5 + SUMSUB_BADC w, %4, %3, %2, %1, %5 + SUMSUB_BADC w, %4, %2, %3, %1, %5 SWAP %1, %4, %3 %endmacro |