diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2014-01-20 20:04:48 -0500 |
---|---|---|
committer | Ronald S. Bultje <rsbultje@gmail.com> | 2014-01-24 19:25:25 -0500 |
commit | 97474d527f9a17340c29018548502c178ed7d966 (patch) | |
tree | bbca2f7c33c96e4e794fe35a8b8297b3e3e5ace4 | |
parent | d43efa68bd53eecc9cea6e1081b6fb163e45665c (diff) | |
download | ffmpeg-97474d527f9a17340c29018548502c178ed7d966.tar.gz |
vp9/x86: iwht4x4 (lossless) mmx.
-rw-r--r-- | libavcodec/x86/vp9dsp_init.c | 5 | ||||
-rw-r--r-- | libavcodec/x86/vp9itxfm.asm | 41 |
2 files changed, 46 insertions, 0 deletions
diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c index 612da8d2b7..47d4153614 100644 --- a/libavcodec/x86/vp9dsp_init.c +++ b/libavcodec/x86/vp9dsp_init.c @@ -173,6 +173,7 @@ itxfm_funcs(16, ssse3); itxfm_funcs(16, avx); itxfm_func(idct, idct, 32, ssse3); itxfm_func(idct, idct, 32, avx); +itxfm_func(iwht, iwht, 4, mmx); #undef itxfm_func #undef itxfm_funcs @@ -223,6 +224,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) if (EXTERNAL_MMX(cpu_flags)) { init_fpel(4, 0, 4, put, mmx); init_fpel(3, 0, 8, put, mmx); + dsp->itxfm_add[4 /* lossless */][DCT_DCT] = + dsp->itxfm_add[4 /* lossless */][ADST_DCT] = + dsp->itxfm_add[4 /* lossless */][DCT_ADST] = + dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx; } if (EXTERNAL_MMXEXT(cpu_flags)) { diff --git a/libavcodec/x86/vp9itxfm.asm b/libavcodec/x86/vp9itxfm.asm index 8fd838dc9b..b142b8f778 100644 --- a/libavcodec/x86/vp9itxfm.asm +++ b/libavcodec/x86/vp9itxfm.asm @@ -152,6 +152,47 @@ SECTION .text %endmacro ;------------------------------------------------------------------------------------------- +; void vp9_iwht_iwht_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); +;------------------------------------------------------------------------------------------- + +%macro VP9_IWHT4_1D 0 + SWAP 1, 2, 3 + paddw m0, m2 + psubw m3, m1 + psubw m4, m0, m3 + psraw m4, 1 + psubw m5, m4, m1 + SWAP 5, 1 + psubw m4, m2 + SWAP 4, 2 + psubw m0, m1 + paddw m3, m2 + SWAP 3, 2, 1 +%endmacro + +INIT_MMX mmx +cglobal vp9_iwht_iwht_4x4_add, 3, 3, 0, dst, stride, block, eob + mova m0, [blockq+0*8] + mova m1, [blockq+1*8] + mova m2, [blockq+2*8] + mova m3, [blockq+3*8] + psraw m0, 2 + psraw m1, 2 + psraw m2, 2 + psraw m3, 2 + + VP9_IWHT4_1D + TRANSPOSE4x4W 0, 1, 2, 3, 4 + VP9_IWHT4_1D + + pxor m4, m4 + VP9_STORE_2X 0, 1, 5, 6, 4 + lea dstq, [dstq+strideq*2] + VP9_STORE_2X 2, 3, 5, 6, 4 + ZERO_BLOCK blockq, 8, 4, m4 + RET + +;------------------------------------------------------------------------------------------- ; void vp9_idct_idct_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); ;------------------------------------------------------------------------------------------- |