aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/vp3dsp.asm
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-07-29 01:56:31 +0200
committerMichael Niedermayer <michaelni@gmx.at>2012-07-29 02:16:26 +0200
commit706bd8ea19a6f723795547885714033ac68a4d74 (patch)
tree39f7933c9432b35801c81903fa7dc352de92fe95 /libavcodec/x86/vp3dsp.asm
parent0f8f9248471bbee5649c8efdc52d02c1cf93bba1 (diff)
parentc83f44dba11930744e167856b48fbc24a8ff0e63 (diff)
downloadffmpeg-706bd8ea19a6f723795547885714033ac68a4d74.tar.gz
Merge remote-tracking branch 'qatar/master'
* qatar/master: (35 commits) h264_idct_10bit: port x86 assembly to cpuflags. x86inc: clip num_args to 7 on x86-32. x86inc: sync to latest version from x264. fft: rename "z" to "zc" to prevent name collision. wv: return meaningful error codes. wv: return AVERROR_EOF on EOF, not EIO. mp3dec: forward errors for av_get_packet(). mp3dec: remove a pointless local variable. mp3dec: remove commented out cruft. lavfi: bump minor to mark stabilizing the ABI. FATE: add tests for yadif. FATE: add a test for delogo video filter. FATE: add a test for amix audio filter. audiogen: allow specifying random seed as a commandline parameter. vc1dec: Override invalid macroblock quantizer vc1: avoid reading beyond the last line in vc1_draw_sprites() vc1dec: check that coded slice positions and interlacing match. vc1dec: Do not ignore ff_vc1_parse_frame_header_adv return value configure: Move parts that should not be user-selectable to CONFIG_EXTRA lavf: remove commented out cruft in avformat_find_stream_info() ... Conflicts: Makefile configure libavcodec/vc1dec.c libavcodec/x86/h264_deblock.asm libavcodec/x86/h264_deblock_10bit.asm libavcodec/x86/h264dsp_mmx.c libavfilter/version.h libavformat/mp3dec.c libavformat/utils.c libavformat/wv.c libavutil/x86/x86inc.asm Merged-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/vp3dsp.asm')
-rw-r--r--libavcodec/x86/vp3dsp.asm97
1 files changed, 50 insertions, 47 deletions
diff --git a/libavcodec/x86/vp3dsp.asm b/libavcodec/x86/vp3dsp.asm
index 46bd9d8f86..dcd55f5f8b 100644
--- a/libavcodec/x86/vp3dsp.asm
+++ b/libavcodec/x86/vp3dsp.asm
@@ -102,8 +102,8 @@ SECTION .text
mov [r0+r3 -1], r2w
%endmacro
-INIT_MMX
-cglobal vp3_v_loop_filter_mmx2, 3, 4
+INIT_MMX mmx2
+cglobal vp3_v_loop_filter, 3, 4
%if ARCH_X86_64
movsxd r1, r1d
%endif
@@ -120,7 +120,7 @@ cglobal vp3_v_loop_filter_mmx2, 3, 4
movq [r0 ], m3
RET
-cglobal vp3_h_loop_filter_mmx2, 3, 4
+cglobal vp3_h_loop_filter, 3, 4
%if ARCH_X86_64
movsxd r1, r1d
%endif
@@ -354,38 +354,6 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4
movq I(2), m2
%endmacro
-%macro VP3_IDCT_mmx 1
- ; eax = quantized input
- ; ebx = dequantizer matrix
- ; ecx = IDCT constants
- ; M(I) = ecx + MaskOffset(0) + I * 8
- ; C(I) = ecx + CosineOffset(32) + (I-1) * 8
- ; edx = output
- ; r0..r7 = mm0..mm7
-%define OC_8 [pw_8]
-%define C(x) [vp3_idct_data+16*(x-1)]
-
- ; at this point, function has completed dequantization + dezigzag +
- ; partial transposition; now do the idct itself
-%define I(x) [%1+16* x ]
-%define J(x) [%1+16*(x-4)+8]
- RowIDCT
- Transpose
-
-%define I(x) [%1+16* x +64]
-%define J(x) [%1+16*(x-4)+72]
- RowIDCT
- Transpose
-
-%define I(x) [%1+16*x]
-%define J(x) [%1+16*x]
- ColumnIDCT
-
-%define I(x) [%1+16*x+8]
-%define J(x) [%1+16*x+8]
- ColumnIDCT
-%endmacro
-
%macro VP3_1D_IDCT_SSE2 0
movdqa m2, I(3) ; xmm2 = i3
movdqa m6, C(3) ; xmm6 = c3
@@ -501,7 +469,8 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4
movdqa O(7), m%8
%endmacro
-%macro VP3_IDCT_sse2 1
+%macro VP3_IDCT 1
+%if mmsize == 16
%define I(x) [%1+16*x]
%define O(x) [%1+16*x]
%define C(x) [vp3_idct_data+16*(x-1)]
@@ -519,11 +488,42 @@ cglobal vp3_h_loop_filter_mmx2, 3, 4
%define ADD(x) paddsw x, [pw_8]
VP3_1D_IDCT_SSE2
PUT_BLOCK 0, 1, 2, 3, 4, 5, 6, 7
+%else ; mmsize == 8
+ ; eax = quantized input
+ ; ebx = dequantizer matrix
+ ; ecx = IDCT constants
+ ; M(I) = ecx + MaskOffset(0) + I * 8
+ ; C(I) = ecx + CosineOffset(32) + (I-1) * 8
+ ; edx = output
+ ; r0..r7 = mm0..mm7
+%define OC_8 [pw_8]
+%define C(x) [vp3_idct_data+16*(x-1)]
+
+ ; at this point, function has completed dequantization + dezigzag +
+ ; partial transposition; now do the idct itself
+%define I(x) [%1+16* x ]
+%define J(x) [%1+16*(x-4)+8]
+ RowIDCT
+ Transpose
+
+%define I(x) [%1+16* x +64]
+%define J(x) [%1+16*(x-4)+72]
+ RowIDCT
+ Transpose
+
+%define I(x) [%1+16*x]
+%define J(x) [%1+16*x]
+ ColumnIDCT
+
+%define I(x) [%1+16*x+8]
+%define J(x) [%1+16*x+8]
+ ColumnIDCT
+%endif ; mmsize == 16/8
%endmacro
-%macro vp3_idct_funcs 1
-cglobal vp3_idct_put_%1, 3, 4, 9
- VP3_IDCT_%1 r2
+%macro vp3_idct_funcs 0
+cglobal vp3_idct_put, 3, 4, 9
+ VP3_IDCT r2
movsxdifnidn r1, r1d
mova m4, [pb_80]
@@ -565,8 +565,8 @@ cglobal vp3_idct_put_%1, 3, 4, 9
%endrep
RET
-cglobal vp3_idct_add_%1, 3, 4, 9
- VP3_IDCT_%1 r2
+cglobal vp3_idct_add, 3, 4, 9
+ VP3_IDCT r2
mov r3, 4
pxor m4, m4
@@ -607,10 +607,13 @@ cglobal vp3_idct_add_%1, 3, 4, 9
RET
%endmacro
-INIT_MMX
-vp3_idct_funcs mmx
-INIT_XMM
-vp3_idct_funcs sse2
+%if ARCH_X86_32
+INIT_MMX mmx
+vp3_idct_funcs
+%endif
+
+INIT_XMM sse2
+vp3_idct_funcs
%macro DC_ADD 0
movq m2, [r0 ]
@@ -631,8 +634,8 @@ vp3_idct_funcs sse2
movq [r0+r3 ], m5
%endmacro
-INIT_MMX
-cglobal vp3_idct_dc_add_mmx2, 3, 4
+INIT_MMX mmx2
+cglobal vp3_idct_dc_add, 3, 4
%if ARCH_X86_64
movsxd r1, r1d
%endif