diff options
author | Jason Garrett-Glaser <darkshikari@gmail.com> | 2010-06-29 01:41:59 +0000 |
---|---|---|
committer | Jason Garrett-Glaser <darkshikari@gmail.com> | 2010-06-29 01:41:59 +0000 |
commit | 004cda8e79d839209e636fd97bec1090d53936b5 (patch) | |
tree | cfa8430fddd8f66dccbd35f1566a43fac8d4e9b4 | |
parent | 37355fe823170537d97d9858877a928c645984f4 (diff) | |
download | ffmpeg-004cda8e79d839209e636fd97bec1090d53936b5.tar.gz |
Add mmxext version of VP8 DC Hadamard transform
Originally committed as revision 23878 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/x86/vp8dsp-init.c | 2 | ||||
-rw-r--r-- | libavcodec/x86/vp8dsp.asm | 46 |
2 files changed, 48 insertions, 0 deletions
diff --git a/libavcodec/x86/vp8dsp-init.c b/libavcodec/x86/vp8dsp-init.c index 7bb97ae3f8..9eec10a0a7 100644 --- a/libavcodec/x86/vp8dsp-init.c +++ b/libavcodec/x86/vp8dsp-init.c @@ -195,6 +195,7 @@ HVBILIN(ssse3, 8, 16, 16) extern void ff_vp8_idct_dc_add_mmx(uint8_t *dst, DCTELEM block[16], int stride); extern void ff_vp8_idct_dc_add_sse4(uint8_t *dst, DCTELEM block[16], int stride); +extern void ff_vp8_luma_dc_wht_mmxext(DCTELEM block[4][4][16], DCTELEM dc[16]); #endif #define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \ @@ -237,6 +238,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c) /* note that 4-tap width=16 functions are missing because w=16 * is only used for luma, and luma is always a copy or sixtap. */ if (mm_flags & FF_MM_MMX2) { + c->vp8_luma_dc_wht = ff_vp8_luma_dc_wht_mmxext; VP8_LUMA_MC_FUNC(0, 16, mmxext); VP8_MC_FUNC(1, 8, mmxext); VP8_MC_FUNC(1, 4, mmxext); diff --git a/libavcodec/x86/vp8dsp.asm b/libavcodec/x86/vp8dsp.asm index 7c366edc06..118d07196e 100644 --- a/libavcodec/x86/vp8dsp.asm +++ b/libavcodec/x86/vp8dsp.asm @@ -21,6 +21,7 @@ ;****************************************************************************** %include "x86inc.asm" +%include "x86util.asm" SECTION_RODATA @@ -141,6 +142,7 @@ filter_h6_shuf1: db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12 filter_h6_shuf2: db 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9 filter_h6_shuf3: db 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11 +cextern pw_3 cextern pw_4 cextern pw_64 @@ -920,3 +922,47 @@ cglobal vp8_idct_dc_add_sse4, 3, 3, 6 pextrd [r1], xmm2, 2 pextrd [r1+r2], xmm2, 3 RET + +;----------------------------------------------------------------------------- +; void vp8_luma_dc_wht_mmxext(DCTELEM block[4][4][16], DCTELEM dc[16]) +;----------------------------------------------------------------------------- + +%macro SCATTER_WHT 1 + pextrw r1d, m0, %1 + pextrw r2d, m1, %1 + mov [r0+2*16*0], r1w + mov [r0+2*16*1], r2w + pextrw r1d, m2, %1 + pextrw r2d, m3, %1 + mov [r0+2*16*2], r1w + mov [r0+2*16*3], r2w +%endmacro + +%macro HADAMARD4_1D 4 + SUMSUB_BADC m%2, m%1, m%4, m%3 + SUMSUB_BADC m%4, m%2, m%3, m%1 + SWAP %1, %4, %3 +%endmacro + +INIT_MMX +cglobal vp8_luma_dc_wht_mmxext, 2,3 + movq m0, [r1] + movq m1, [r1+8] + movq m2, [r1+16] + movq m3, [r1+24] + HADAMARD4_1D 0, 1, 2, 3 + TRANSPOSE4x4W 0, 1, 2, 3, 4 + paddw m0, [pw_3] + HADAMARD4_1D 0, 1, 2, 3 + psraw m0, 3 + psraw m1, 3 + psraw m2, 3 + psraw m3, 3 + SCATTER_WHT 0 + add r0, 2*16*4 + SCATTER_WHT 1 + add r0, 2*16*4 + SCATTER_WHT 2 + add r0, 2*16*4 + SCATTER_WHT 3 + RET |