diff options
author | Ronald S. Bultje <rsbultje@gmail.com> | 2014-12-15 00:25:10 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-12-15 00:38:05 +0100 |
commit | fd77fbb3900665ae8d0d1d8bbf9345822efc3d30 (patch) | |
tree | cbe0968c2ba32b2441f43de2064b6028adfea08c /libavcodec/x86/vp9dsp_init.c | |
parent | 4efdb29c75ba57cb119c04f303fd74e2fd0f0368 (diff) | |
download | ffmpeg-fd77fbb3900665ae8d0d1d8bbf9345822efc3d30.tar.gz |
vp9/x86: 32bit and sse2 support for vp9 inverse transform assembly
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/vp9dsp_init.c')
-rw-r--r-- | libavcodec/x86/vp9dsp_init.c | 75 |
1 files changed, 47 insertions, 28 deletions
diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c index 2790024194..06e77a83f8 100644 --- a/libavcodec/x86/vp9dsp_init.c +++ b/libavcodec/x86/vp9dsp_init.c @@ -186,11 +186,18 @@ itxfm_func(iadst, idct, size, opt); \ itxfm_func(idct, iadst, size, opt); \ itxfm_func(iadst, iadst, size, opt) +itxfm_func(idct, idct, 4, mmxext); +itxfm_func(idct, iadst, 4, sse2); +itxfm_func(iadst, idct, 4, sse2); +itxfm_func(iadst, iadst, 4, sse2); itxfm_funcs(4, ssse3); +itxfm_funcs(8, sse2); itxfm_funcs(8, ssse3); itxfm_funcs(8, avx); +itxfm_funcs(16, sse2); itxfm_funcs(16, ssse3); itxfm_funcs(16, avx); +itxfm_func(idct, idct, 32, sse2); itxfm_func(idct, idct, 32, ssse3); itxfm_func(idct, idct, 32, avx); itxfm_func(iwht, iwht, 4, mmx); @@ -352,6 +359,7 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) if (EXTERNAL_MMXEXT(cpu_flags)) { init_fpel(4, 1, 4, avg, mmxext); init_fpel(3, 1, 8, avg, mmxext); + dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_mmxext; } if (EXTERNAL_SSE(cpu_flags)) { @@ -365,6 +373,21 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) init_fpel(1, 1, 32, avg, sse2); init_fpel(0, 1, 64, avg, sse2); init_lpf(sse2); + dsp->itxfm_add[TX_4X4][ADST_DCT] = ff_vp9_idct_iadst_4x4_add_sse2; + dsp->itxfm_add[TX_4X4][DCT_ADST] = ff_vp9_iadst_idct_4x4_add_sse2; + dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_sse2; + dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_sse2; + dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_sse2; + dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_sse2; + dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_sse2; + dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_sse2; + dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_sse2; + dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_sse2; + dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_sse2; + dsp->itxfm_add[TX_32X32][ADST_ADST] = + dsp->itxfm_add[TX_32X32][ADST_DCT] = + dsp->itxfm_add[TX_32X32][DCT_ADST] = + dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_sse2; dsp->intra_pred[TX_16X16][VERT_PRED] = ff_vp9_ipred_v_16x16_sse2; dsp->intra_pred[TX_32X32][VERT_PRED] = ff_vp9_ipred_v_32x32_sse2; } @@ -376,20 +399,18 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) dsp->itxfm_add[TX_4X4][ADST_DCT] = ff_vp9_idct_iadst_4x4_add_ssse3; dsp->itxfm_add[TX_4X4][DCT_ADST] = ff_vp9_iadst_idct_4x4_add_ssse3; dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_ssse3; - if (ARCH_X86_64) { - dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3; - dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_ssse3; - dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_ssse3; - dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3; - dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_ssse3; - dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_ssse3; - dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_ssse3; - dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_ssse3; - dsp->itxfm_add[TX_32X32][ADST_ADST] = - dsp->itxfm_add[TX_32X32][ADST_DCT] = - dsp->itxfm_add[TX_32X32][DCT_ADST] = - dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3; - } + dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3; + dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_ssse3; + dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_ssse3; + dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3; + dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_ssse3; + dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_ssse3; + dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_ssse3; + dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_ssse3; + dsp->itxfm_add[TX_32X32][ADST_ADST] = + dsp->itxfm_add[TX_32X32][ADST_DCT] = + dsp->itxfm_add[TX_32X32][DCT_ADST] = + dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3; init_lpf(ssse3); init_dc_ipred(TX_4X4, 4, ssse3); init_dc_ipred(TX_8X8, 8, ssse3); @@ -398,20 +419,18 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) } if (EXTERNAL_AVX(cpu_flags)) { - if (ARCH_X86_64) { - dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx; - dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_avx; - dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_avx; - dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx; - dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx; - dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_avx; - dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_avx; - dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx; - dsp->itxfm_add[TX_32X32][ADST_ADST] = - dsp->itxfm_add[TX_32X32][ADST_DCT] = - dsp->itxfm_add[TX_32X32][DCT_ADST] = - dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx; - } + dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx; + dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_avx; + dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_avx; + dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx; + dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx; + dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_avx; + dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_avx; + dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx; + dsp->itxfm_add[TX_32X32][ADST_ADST] = + dsp->itxfm_add[TX_32X32][ADST_DCT] = + dsp->itxfm_add[TX_32X32][DCT_ADST] = + dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx; init_fpel(1, 0, 32, put, avx); init_fpel(0, 0, 64, put, avx); init_lpf(avx); |