aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/h264_idct.asm
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-01-28 04:23:26 +0100
committerMichael Niedermayer <michaelni@gmx.at>2012-01-28 07:53:34 +0100
commite37f161e66e042d6c2c7470c4d9881df9427fc4a (patch)
tree6400fd6453f0525a65724937532d5baa33deead3 /libavcodec/x86/h264_idct.asm
parentf21b6159cf3110a5f018d6addf7382840d427199 (diff)
parente771e6dd63e837220aa5d959486546d2be972e83 (diff)
downloadffmpeg-e37f161e66e042d6c2c7470c4d9881df9427fc4a.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: (71 commits) movenc: Allow writing to a non-seekable output if using empty moov movenc: Support adding isml (smooth streaming live) metadata libavcodec: Don't crash in avcodec_encode_audio if time_base isn't set sunrast: Document the different Sun Raster file format types. sunrast: Add a check for experimental type. libspeexenc: use AVSampleFormat instead of deprecated/removed SampleFormat lavf: remove disabled FF_API_SET_PTS_INFO cruft lavf: remove disabled FF_API_OLD_INTERRUPT_CB cruft lavf: remove disabled FF_API_REORDER_PRIVATE cruft lavf: remove disabled FF_API_SEEK_PUBLIC cruft lavf: remove disabled FF_API_STREAM_COPY cruft lavf: remove disabled FF_API_PRELOAD cruft lavf: remove disabled FF_API_NEW_STREAM cruft lavf: remove disabled FF_API_RTSP_URL_OPTIONS cruft lavf: remove disabled FF_API_MUXRATE cruft lavf: remove disabled FF_API_FILESIZE cruft lavf: remove disabled FF_API_TIMESTAMP cruft lavf: remove disabled FF_API_LOOP_OUTPUT cruft lavf: remove disabled FF_API_LOOP_INPUT cruft lavf: remove disabled FF_API_AVSTREAM_QUALITY cruft ... Conflicts: doc/APIchanges libavcodec/8bps.c libavcodec/avcodec.h libavcodec/libx264.c libavcodec/mjpegbdec.c libavcodec/options.c libavcodec/sunrast.c libavcodec/utils.c libavcodec/version.h libavcodec/x86/h264_deblock.asm libavdevice/libdc1394.c libavdevice/v4l2.c libavformat/avformat.h libavformat/avio.c libavformat/avio.h libavformat/aviobuf.c libavformat/dv.c libavformat/mov.c libavformat/utils.c libavformat/version.h libavformat/wtv.c libavutil/Makefile libavutil/file.c libswscale/x86/input.asm libswscale/x86/swscale_mmx.c libswscale/x86/swscale_template.c tests/ref/lavf/ffm Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/h264_idct.asm')
-rw-r--r--libavcodec/x86/h264_idct.asm60
1 files changed, 30 insertions, 30 deletions
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm
index dd13bcd72f..15ba297ee9 100644
--- a/libavcodec/x86/h264_idct.asm
+++ b/libavcodec/x86/h264_idct.asm
@@ -198,14 +198,14 @@ cglobal h264_idct8_add_8_mmx, 3, 4, 0
; %1=uint8_t *dst, %2=int16_t *block, %3=int stride
%macro IDCT8_ADD_SSE 4
IDCT8_1D_FULL %2
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8
%else
TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [%2], [%2+16]
%endif
paddw m0, [pw_32]
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
mova [%2 ], m0
mova [%2+16], m4
IDCT8_1D [%2], [%2+ 16]
@@ -225,7 +225,7 @@ cglobal h264_idct8_add_8_mmx, 3, 4, 0
STORE_DIFF m1, m6, m7, [%1+%3 ]
STORE_DIFF m2, m6, m7, [%1+%3*2]
STORE_DIFF m3, m6, m7, [%1+%4 ]
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
mova m0, [%2 ]
mova m1, [%2+16]
%else
@@ -371,7 +371,7 @@ cglobal h264_idct_add16_8_mmx2, 5, 7, 0
test r6, r6
jz .no_dc
DC_ADD_MMX2_INIT r2, r3, r6
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
%define dst_reg r10
%define dst_regd r10d
%else
@@ -381,7 +381,7 @@ cglobal h264_idct_add16_8_mmx2, 5, 7, 0
mov dst_regd, dword [r1+r5*4]
lea dst_reg, [r0+dst_reg]
DC_ADD_MMX2_OP movh, dst_reg, r3, r6
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
mov r1, r1m
%endif
inc r5
@@ -448,7 +448,7 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 7, 0
test r6, r6
jz .skipblock
DC_ADD_MMX2_INIT r2, r3, r6
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
%define dst_reg r10
%define dst_regd r10d
%else
@@ -458,7 +458,7 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 7, 0
mov dst_regd, dword [r1+r5*4]
add dst_reg, r0
DC_ADD_MMX2_OP movh, dst_reg, r3, r6
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
mov r1, r1m
%endif
.skipblock
@@ -489,7 +489,7 @@ cglobal h264_idct8_add4_8_mmx2, 5, 7, 0
test r6, r6
jz .no_dc
DC_ADD_MMX2_INIT r2, r3, r6
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
%define dst_reg r10
%define dst_regd r10d
%else
@@ -501,7 +501,7 @@ cglobal h264_idct8_add4_8_mmx2, 5, 7, 0
DC_ADD_MMX2_OP mova, dst_reg, r3, r6
lea dst_reg, [dst_reg+r3*4]
DC_ADD_MMX2_OP mova, dst_reg, r3, r6
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
mov r1, r1m
%endif
add r5, 4
@@ -550,7 +550,7 @@ cglobal h264_idct8_add4_8_sse2, 5, 7, 10
jz .no_dc
INIT_MMX
DC_ADD_MMX2_INIT r2, r3, r6
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
%define dst_reg r10
%define dst_regd r10d
%else
@@ -562,7 +562,7 @@ INIT_MMX
DC_ADD_MMX2_OP mova, dst_reg, r3, r6
lea dst_reg, [dst_reg+r3*4]
DC_ADD_MMX2_OP mova, dst_reg, r3, r6
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
mov r1, r1m
%endif
add r5, 4
@@ -575,7 +575,7 @@ INIT_XMM
mov dst_regd, dword [r1+r5*4]
add dst_reg, r0
IDCT8_ADD_SSE dst_reg, r2, r3, r6
-%ifndef ARCH_X86_64
+%if ARCH_X86_64 == 0
mov r1, r1m
%endif
.skipblock
@@ -593,7 +593,7 @@ h264_idct_add8_mmx_plane:
or r6w, word [r2]
test r6, r6
jz .skipblock
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
mov r0d, dword [r1+r5*4]
add r0, [r10]
%else
@@ -617,13 +617,13 @@ cglobal h264_idct_add8_8_mmx, 5, 7, 0
%ifdef PIC
lea r11, [scan8_mem]
%endif
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
mov r10, r0
%endif
call h264_idct_add8_mmx_plane
mov r5, 32
add r2, 384
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
add r10, gprsize
%else
add r0mp, gprsize
@@ -637,7 +637,7 @@ h264_idct_add8_mmx2_plane
movzx r6, byte [r4+r6]
test r6, r6
jz .try_dc
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
mov r0d, dword [r1+r5*4]
add r0, [r10]
%else
@@ -656,7 +656,7 @@ h264_idct_add8_mmx2_plane
test r6, r6
jz .skipblock
DC_ADD_MMX2_INIT r2, r3, r6
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
mov r0d, dword [r1+r5*4]
add r0, [r10]
%else
@@ -677,7 +677,7 @@ h264_idct_add8_mmx2_plane
cglobal h264_idct_add8_8_mmx2, 5, 7, 0
mov r5, 16
add r2, 512
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
mov r10, r0
%endif
%ifdef PIC
@@ -686,7 +686,7 @@ cglobal h264_idct_add8_8_mmx2, 5, 7, 0
call h264_idct_add8_mmx2_plane
mov r5, 32
add r2, 384
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
add r10, gprsize
%else
add r0mp, gprsize
@@ -738,7 +738,7 @@ x264_add8x4_idct_sse2:
test r0, r0
jz .cycle%1end
mov r0d, dword [r1+%1*8]
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
add r0, r10
%else
add r0, r0m
@@ -753,7 +753,7 @@ x264_add8x4_idct_sse2:
; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct_add16_8_sse2, 5, 5, 8
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
mov r10, r0
%endif
; unrolling of the loop leads to an average performance gain of
@@ -773,7 +773,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5, 8
test r0, r0
jz .try%1dc
mov r0d, dword [r1+%1*8]
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
add r0, r10
%else
add r0, r0m
@@ -785,7 +785,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5, 8
or r0w, word [r2+32]
jz .cycle%1end
mov r0d, dword [r1+%1*8]
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
add r0, r10
%else
add r0, r0m
@@ -800,7 +800,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5, 8
; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
mov r10, r0
%endif
add16intra_sse2_cycle 0, 0xc
@@ -817,7 +817,7 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
movzx r0, word [r4+%2]
test r0, r0
jz .try%1dc
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
mov r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
add r0, [r10]
%else
@@ -831,7 +831,7 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
movsx r0, word [r2 ]
or r0w, word [r2+32]
jz .cycle%1end
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
mov r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))]
add r0, [r10]
%else
@@ -852,12 +852,12 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct_add8_8_sse2, 5, 7, 8
add r2, 512
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
mov r10, r0
%endif
add8_sse2_cycle 0, 0x34
add8_sse2_cycle 1, 0x3c
-%ifdef ARCH_X86_64
+%if ARCH_X86_64
add r10, gprsize
%else
add r0mp, gprsize
@@ -977,11 +977,11 @@ cglobal h264_luma_dc_dequant_idct_%1, 3,4,%2
WALSH4_1D 0,1,2,3,4
; shift, tmp, output, qmul
-%ifdef WIN64
+%if WIN64
DECLARE_REG_TMP 0,3,1,2
; we can't avoid this, because r0 is the shift register (ecx) on win64
xchg r0, t2
-%elifdef ARCH_X86_64
+%elif ARCH_X86_64
DECLARE_REG_TMP 3,1,0,2
%else
DECLARE_REG_TMP 1,3,0,2