diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-07-29 01:56:31 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-07-29 02:16:26 +0200 |
commit | 706bd8ea19a6f723795547885714033ac68a4d74 (patch) | |
tree | 39f7933c9432b35801c81903fa7dc352de92fe95 /libavcodec/x86/vp3dsp.asm | |
parent | 0f8f9248471bbee5649c8efdc52d02c1cf93bba1 (diff) | |
parent | c83f44dba11930744e167856b48fbc24a8ff0e63 (diff) | |
download | ffmpeg-706bd8ea19a6f723795547885714033ac68a4d74.tar.gz |
Merge remote-tracking branch 'qatar/master'
* qatar/master: (35 commits)
h264_idct_10bit: port x86 assembly to cpuflags.
x86inc: clip num_args to 7 on x86-32.
x86inc: sync to latest version from x264.
fft: rename "z" to "zc" to prevent name collision.
wv: return meaningful error codes.
wv: return AVERROR_EOF on EOF, not EIO.
mp3dec: forward errors for av_get_packet().
mp3dec: remove a pointless local variable.
mp3dec: remove commented out cruft.
lavfi: bump minor to mark stabilizing the ABI.
FATE: add tests for yadif.
FATE: add a test for delogo video filter.
FATE: add a test for amix audio filter.
audiogen: allow specifying random seed as a commandline parameter.
vc1dec: Override invalid macroblock quantizer
vc1: avoid reading beyond the last line in vc1_draw_sprites()
vc1dec: check that coded slice positions and interlacing match.
vc1dec: Do not ignore ff_vc1_parse_frame_header_adv return value
configure: Move parts that should not be user-selectable to CONFIG_EXTRA
lavf: remove commented out cruft in avformat_find_stream_info()
...
Conflicts:
Makefile
configure
libavcodec/vc1dec.c
libavcodec/x86/h264_deblock.asm
libavcodec/x86/h264_deblock_10bit.asm
libavcodec/x86/h264dsp_mmx.c
libavfilter/version.h
libavformat/mp3dec.c
libavformat/utils.c
libavformat/wv.c
libavutil/x86/x86inc.asm
Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/vp3dsp.asm')
-rw-r--r-- | libavcodec/x86/vp3dsp.asm | 97 |
1 files changed, 50 insertions, 47 deletions
diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm index 46bd9d8f86..dcd55f5f8b 100644 --- a/libavcodec/x86/vp3dsp.asm +++ b/libavcodec/x86/vp3dsp.asm @@ -102,8 +102,8 @@ SECTION .text mov [r0+r3 -1], r2w %endmacro -INIT_MMX -cglobal vp3_v_loop_filter_mmx2, 3, 4 +INIT_MMX mmx2 +cglobal vp3_v_loop_filter, 3, 4 %if ARCH_X86_64 movsxd r1, r1d %endif @@ -120,7 +120,7 @@ cglobal vp3_v_loop_filter_mmx2, 3, 4 movq [r0 ], m3 RET -cglobal vp3_h_loop_filter_mmx2, 3, 4 +cglobal vp3_h_loop_filter, 3, 4 %if ARCH_X86_64 movsxd r1, r1d %endif @@ -354,38 +354,6 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4 movq I(2), m2 %endmacro -%macro VP3_IDCT_mmx 1 - ; eax = quantized input - ; ebx = dequantizer matrix - ; ecx = IDCT constants - ; M(I) = ecx + MaskOffset(0) + I * 8 - ; C(I) = ecx + CosineOffset(32) + (I-1) * 8 - ; edx = output - ; r0..r7 = mm0..mm7 -%define OC_8 [pw_8] -%define C(x) [vp3_idct_data+16*(x-1)] - - ; at this point, function has completed dequantization + dezigzag + - ; partial transposition; now do the idct itself -%define I(x) [%1+16* x ] -%define J(x) [%1+16*(x-4)+8] - RowIDCT - Transpose - -%define I(x) [%1+16* x +64] -%define J(x) [%1+16*(x-4)+72] - RowIDCT - Transpose - -%define I(x) [%1+16*x] -%define J(x) [%1+16*x] - ColumnIDCT - -%define I(x) [%1+16*x+8] -%define J(x) [%1+16*x+8] - ColumnIDCT -%endmacro - %macro VP3_1D_IDCT_SSE2 0 movdqa m2, I(3) ; xmm2 = i3 movdqa m6, C(3) ; xmm6 = c3 @@ -501,7 +469,8 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4 movdqa O(7), m%8 %endmacro -%macro VP3_IDCT_sse2 1 +%macro VP3_IDCT 1 +%if mmsize == 16 %define I(x) [%1+16*x] %define O(x) [%1+16*x] %define C(x) [vp3_idct_data+16*(x-1)] @@ -519,11 +488,42 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4 %define ADD(x) paddsw x, [pw_8] VP3_1D_IDCT_SSE2 PUT_BLOCK 0, 1, 2, 3, 4, 5, 6, 7 +%else ; mmsize == 8 + ; eax = quantized input + ; ebx = dequantizer matrix + ; ecx = IDCT constants + ; M(I) = ecx + MaskOffset(0) + I * 8 + ; C(I) = ecx + CosineOffset(32) + (I-1) * 8 + ; edx = output + ; r0..r7 = mm0..mm7 +%define OC_8 [pw_8] +%define C(x) [vp3_idct_data+16*(x-1)] + + ; at this point, function has completed dequantization + dezigzag + + ; partial transposition; now do the idct itself +%define I(x) [%1+16* x ] +%define J(x) [%1+16*(x-4)+8] + RowIDCT + Transpose + +%define I(x) [%1+16* x +64] +%define J(x) [%1+16*(x-4)+72] + RowIDCT + Transpose + +%define I(x) [%1+16*x] +%define J(x) [%1+16*x] + ColumnIDCT + +%define I(x) [%1+16*x+8] +%define J(x) [%1+16*x+8] + ColumnIDCT +%endif ; mmsize == 16/8 %endmacro -%macro vp3_idct_funcs 1 -cglobal vp3_idct_put_%1, 3, 4, 9 - VP3_IDCT_%1 r2 +%macro vp3_idct_funcs 0 +cglobal vp3_idct_put, 3, 4, 9 + VP3_IDCT r2 movsxdifnidn r1, r1d mova m4, [pb_80] @@ -565,8 +565,8 @@ cglobal vp3_idct_put_%1, 3, 4, 9 %endrep RET -cglobal vp3_idct_add_%1, 3, 4, 9 - VP3_IDCT_%1 r2 +cglobal vp3_idct_add, 3, 4, 9 + VP3_IDCT r2 mov r3, 4 pxor m4, m4 @@ -607,10 +607,13 @@ cglobal vp3_idct_add_%1, 3, 4, 9 RET %endmacro -INIT_MMX -vp3_idct_funcs mmx -INIT_XMM -vp3_idct_funcs sse2 +%if ARCH_X86_32 +INIT_MMX mmx +vp3_idct_funcs +%endif + +INIT_XMM sse2 +vp3_idct_funcs %macro DC_ADD 0 movq m2, [r0 ] @@ -631,8 +634,8 @@ vp3_idct_funcs sse2 movq [r0+r3 ], m5 %endmacro -INIT_MMX -cglobal vp3_idct_dc_add_mmx2, 3, 4 +INIT_MMX mmx2 +cglobal vp3_idct_dc_add, 3, 4 %if ARCH_X86_64 movsxd r1, r1d %endif |