diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-01-28 04:23:26 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-01-28 07:53:34 +0100 |
commit | e37f161e66e042d6c2c7470c4d9881df9427fc4a (patch) | |
tree | 6400fd6453f0525a65724937532d5baa33deead3 /libavcodec/x86/h264_idct.asm | |
parent | f21b6159cf3110a5f018d6addf7382840d427199 (diff) | |
parent | e771e6dd63e837220aa5d959486546d2be972e83 (diff) | |
download | ffmpeg-e37f161e66e042d6c2c7470c4d9881df9427fc4a.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master: (71 commits)
movenc: Allow writing to a non-seekable output if using empty moov
movenc: Support adding isml (smooth streaming live) metadata
libavcodec: Don't crash in avcodec_encode_audio if time_base isn't set
sunrast: Document the different Sun Raster file format types.
sunrast: Add a check for experimental type.
libspeexenc: use AVSampleFormat instead of deprecated/removed SampleFormat
lavf: remove disabled FF_API_SET_PTS_INFO cruft
lavf: remove disabled FF_API_OLD_INTERRUPT_CB cruft
lavf: remove disabled FF_API_REORDER_PRIVATE cruft
lavf: remove disabled FF_API_SEEK_PUBLIC cruft
lavf: remove disabled FF_API_STREAM_COPY cruft
lavf: remove disabled FF_API_PRELOAD cruft
lavf: remove disabled FF_API_NEW_STREAM cruft
lavf: remove disabled FF_API_RTSP_URL_OPTIONS cruft
lavf: remove disabled FF_API_MUXRATE cruft
lavf: remove disabled FF_API_FILESIZE cruft
lavf: remove disabled FF_API_TIMESTAMP cruft
lavf: remove disabled FF_API_LOOP_OUTPUT cruft
lavf: remove disabled FF_API_LOOP_INPUT cruft
lavf: remove disabled FF_API_AVSTREAM_QUALITY cruft
...
Conflicts:
doc/APIchanges
libavcodec/8bps.c
libavcodec/avcodec.h
libavcodec/libx264.c
libavcodec/mjpegbdec.c
libavcodec/options.c
libavcodec/sunrast.c
libavcodec/utils.c
libavcodec/version.h
libavcodec/x86/h264_deblock.asm
libavdevice/libdc1394.c
libavdevice/v4l2.c
libavformat/avformat.h
libavformat/avio.c
libavformat/avio.h
libavformat/aviobuf.c
libavformat/dv.c
libavformat/mov.c
libavformat/utils.c
libavformat/version.h
libavformat/wtv.c
libavutil/Makefile
libavutil/file.c
libswscale/x86/input.asm
libswscale/x86/swscale_mmx.c
libswscale/x86/swscale_template.c
tests/ref/lavf/ffm
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/h264_idct.asm')
-rw-r--r-- | libavcodec/x86/h264_idct.asm | 60 |
1 files changed, 30 insertions, 30 deletions
diff --git a/libavcodec/x86/h264_idct.asm b/libavcodec/x86/h264_idct.asm index dd13bcd72f..15ba297ee9 100644 --- a/libavcodec/x86/h264_idct.asm +++ b/libavcodec/x86/h264_idct.asm @@ -198,14 +198,14 @@ cglobal h264_idct8_add_8_mmx, 3, 4, 0 ; %1=uint8_t *dst, %2=int16_t *block, %3=int stride %macro IDCT8_ADD_SSE 4 IDCT8_1D_FULL %2 -%ifdef ARCH_X86_64 +%if ARCH_X86_64 TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, 8 %else TRANSPOSE8x8W 0, 1, 2, 3, 4, 5, 6, 7, [%2], [%2+16] %endif paddw m0, [pw_32] -%ifndef ARCH_X86_64 +%if ARCH_X86_64 == 0 mova [%2 ], m0 mova [%2+16], m4 IDCT8_1D [%2], [%2+ 16] @@ -225,7 +225,7 @@ cglobal h264_idct8_add_8_mmx, 3, 4, 0 STORE_DIFF m1, m6, m7, [%1+%3 ] STORE_DIFF m2, m6, m7, [%1+%3*2] STORE_DIFF m3, m6, m7, [%1+%4 ] -%ifndef ARCH_X86_64 +%if ARCH_X86_64 == 0 mova m0, [%2 ] mova m1, [%2+16] %else @@ -371,7 +371,7 @@ cglobal h264_idct_add16_8_mmx2, 5, 7, 0 test r6, r6 jz .no_dc DC_ADD_MMX2_INIT r2, r3, r6 -%ifdef ARCH_X86_64 +%if ARCH_X86_64 %define dst_reg r10 %define dst_regd r10d %else @@ -381,7 +381,7 @@ cglobal h264_idct_add16_8_mmx2, 5, 7, 0 mov dst_regd, dword [r1+r5*4] lea dst_reg, [r0+dst_reg] DC_ADD_MMX2_OP movh, dst_reg, r3, r6 -%ifndef ARCH_X86_64 +%if ARCH_X86_64 == 0 mov r1, r1m %endif inc r5 @@ -448,7 +448,7 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 7, 0 test r6, r6 jz .skipblock DC_ADD_MMX2_INIT r2, r3, r6 -%ifdef ARCH_X86_64 +%if ARCH_X86_64 %define dst_reg r10 %define dst_regd r10d %else @@ -458,7 +458,7 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 7, 0 mov dst_regd, dword [r1+r5*4] add dst_reg, r0 DC_ADD_MMX2_OP movh, dst_reg, r3, r6 -%ifndef ARCH_X86_64 +%if ARCH_X86_64 == 0 mov r1, r1m %endif .skipblock @@ -489,7 +489,7 @@ cglobal h264_idct8_add4_8_mmx2, 5, 7, 0 test r6, r6 jz .no_dc DC_ADD_MMX2_INIT r2, r3, r6 -%ifdef ARCH_X86_64 +%if ARCH_X86_64 %define dst_reg r10 %define dst_regd r10d %else @@ -501,7 +501,7 @@ cglobal h264_idct8_add4_8_mmx2, 5, 7, 0 DC_ADD_MMX2_OP mova, dst_reg, r3, r6 lea dst_reg, [dst_reg+r3*4] DC_ADD_MMX2_OP mova, dst_reg, r3, r6 -%ifndef ARCH_X86_64 +%if ARCH_X86_64 == 0 mov r1, r1m %endif add r5, 4 @@ -550,7 +550,7 @@ cglobal h264_idct8_add4_8_sse2, 5, 7, 10 jz .no_dc INIT_MMX DC_ADD_MMX2_INIT r2, r3, r6 -%ifdef ARCH_X86_64 +%if ARCH_X86_64 %define dst_reg r10 %define dst_regd r10d %else @@ -562,7 +562,7 @@ INIT_MMX DC_ADD_MMX2_OP mova, dst_reg, r3, r6 lea dst_reg, [dst_reg+r3*4] DC_ADD_MMX2_OP mova, dst_reg, r3, r6 -%ifndef ARCH_X86_64 +%if ARCH_X86_64 == 0 mov r1, r1m %endif add r5, 4 @@ -575,7 +575,7 @@ INIT_XMM mov dst_regd, dword [r1+r5*4] add dst_reg, r0 IDCT8_ADD_SSE dst_reg, r2, r3, r6 -%ifndef ARCH_X86_64 +%if ARCH_X86_64 == 0 mov r1, r1m %endif .skipblock @@ -593,7 +593,7 @@ h264_idct_add8_mmx_plane: or r6w, word [r2] test r6, r6 jz .skipblock -%ifdef ARCH_X86_64 +%if ARCH_X86_64 mov r0d, dword [r1+r5*4] add r0, [r10] %else @@ -617,13 +617,13 @@ cglobal h264_idct_add8_8_mmx, 5, 7, 0 %ifdef PIC lea r11, [scan8_mem] %endif -%ifdef ARCH_X86_64 +%if ARCH_X86_64 mov r10, r0 %endif call h264_idct_add8_mmx_plane mov r5, 32 add r2, 384 -%ifdef ARCH_X86_64 +%if ARCH_X86_64 add r10, gprsize %else add r0mp, gprsize @@ -637,7 +637,7 @@ h264_idct_add8_mmx2_plane movzx r6, byte [r4+r6] test r6, r6 jz .try_dc -%ifdef ARCH_X86_64 +%if ARCH_X86_64 mov r0d, dword [r1+r5*4] add r0, [r10] %else @@ -656,7 +656,7 @@ h264_idct_add8_mmx2_plane test r6, r6 jz .skipblock DC_ADD_MMX2_INIT r2, r3, r6 -%ifdef ARCH_X86_64 +%if ARCH_X86_64 mov r0d, dword [r1+r5*4] add r0, [r10] %else @@ -677,7 +677,7 @@ h264_idct_add8_mmx2_plane cglobal h264_idct_add8_8_mmx2, 5, 7, 0 mov r5, 16 add r2, 512 -%ifdef ARCH_X86_64 +%if ARCH_X86_64 mov r10, r0 %endif %ifdef PIC @@ -686,7 +686,7 @@ cglobal h264_idct_add8_8_mmx2, 5, 7, 0 call h264_idct_add8_mmx2_plane mov r5, 32 add r2, 384 -%ifdef ARCH_X86_64 +%if ARCH_X86_64 add r10, gprsize %else add r0mp, gprsize @@ -738,7 +738,7 @@ x264_add8x4_idct_sse2: test r0, r0 jz .cycle%1end mov r0d, dword [r1+%1*8] -%ifdef ARCH_X86_64 +%if ARCH_X86_64 add r0, r10 %else add r0, r0m @@ -753,7 +753,7 @@ x264_add8x4_idct_sse2: ; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset, ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) cglobal h264_idct_add16_8_sse2, 5, 5, 8 -%ifdef ARCH_X86_64 +%if ARCH_X86_64 mov r10, r0 %endif ; unrolling of the loop leads to an average performance gain of @@ -773,7 +773,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5, 8 test r0, r0 jz .try%1dc mov r0d, dword [r1+%1*8] -%ifdef ARCH_X86_64 +%if ARCH_X86_64 add r0, r10 %else add r0, r0m @@ -785,7 +785,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5, 8 or r0w, word [r2+32] jz .cycle%1end mov r0d, dword [r1+%1*8] -%ifdef ARCH_X86_64 +%if ARCH_X86_64 add r0, r10 %else add r0, r0m @@ -800,7 +800,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5, 8 ; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset, ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) cglobal h264_idct_add16intra_8_sse2, 5, 7, 8 -%ifdef ARCH_X86_64 +%if ARCH_X86_64 mov r10, r0 %endif add16intra_sse2_cycle 0, 0xc @@ -817,7 +817,7 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8 movzx r0, word [r4+%2] test r0, r0 jz .try%1dc -%ifdef ARCH_X86_64 +%if ARCH_X86_64 mov r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))] add r0, [r10] %else @@ -831,7 +831,7 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8 movsx r0, word [r2 ] or r0w, word [r2+32] jz .cycle%1end -%ifdef ARCH_X86_64 +%if ARCH_X86_64 mov r0d, dword [r1+(%1&1)*8+64*(1+(%1>>1))] add r0, [r10] %else @@ -852,12 +852,12 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7, 8 ; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) cglobal h264_idct_add8_8_sse2, 5, 7, 8 add r2, 512 -%ifdef ARCH_X86_64 +%if ARCH_X86_64 mov r10, r0 %endif add8_sse2_cycle 0, 0x34 add8_sse2_cycle 1, 0x3c -%ifdef ARCH_X86_64 +%if ARCH_X86_64 add r10, gprsize %else add r0mp, gprsize @@ -977,11 +977,11 @@ cglobal h264_luma_dc_dequant_idct_%1, 3,4,%2 WALSH4_1D 0,1,2,3,4 ; shift, tmp, output, qmul -%ifdef WIN64 +%if WIN64 DECLARE_REG_TMP 0,3,1,2 ; we can't avoid this, because r0 is the shift register (ecx) on win64 xchg r0, t2 -%elifdef ARCH_X86_64 +%elif ARCH_X86_64 DECLARE_REG_TMP 3,1,0,2 %else DECLARE_REG_TMP 1,3,0,2 |