aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/vp9dsp_init.c
diff options
context:
space:
mode:
authorRonald S. Bultje <rsbultje@gmail.com>2014-12-15 00:25:10 +0100
committerMichael Niedermayer <michaelni@gmx.at>2014-12-15 00:38:05 +0100
commitfd77fbb3900665ae8d0d1d8bbf9345822efc3d30 (patch)
treecbe0968c2ba32b2441f43de2064b6028adfea08c /libavcodec/x86/vp9dsp_init.c
parent4efdb29c75ba57cb119c04f303fd74e2fd0f0368 (diff)
downloadffmpeg-fd77fbb3900665ae8d0d1d8bbf9345822efc3d30.tar.gz
vp9/x86: 32bit and sse2 support for vp9 inverse transform assembly
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/vp9dsp_init.c')
-rw-r--r--libavcodec/x86/vp9dsp_init.c75
1 files changed, 47 insertions, 28 deletions
diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c
index 2790024194..06e77a83f8 100644
--- a/libavcodec/x86/vp9dsp_init.c
+++ b/libavcodec/x86/vp9dsp_init.c
@@ -186,11 +186,18 @@ itxfm_func(iadst, idct, size, opt); \
itxfm_func(idct, iadst, size, opt); \
itxfm_func(iadst, iadst, size, opt)
+itxfm_func(idct, idct, 4, mmxext);
+itxfm_func(idct, iadst, 4, sse2);
+itxfm_func(iadst, idct, 4, sse2);
+itxfm_func(iadst, iadst, 4, sse2);
itxfm_funcs(4, ssse3);
+itxfm_funcs(8, sse2);
itxfm_funcs(8, ssse3);
itxfm_funcs(8, avx);
+itxfm_funcs(16, sse2);
itxfm_funcs(16, ssse3);
itxfm_funcs(16, avx);
+itxfm_func(idct, idct, 32, sse2);
itxfm_func(idct, idct, 32, ssse3);
itxfm_func(idct, idct, 32, avx);
itxfm_func(iwht, iwht, 4, mmx);
@@ -352,6 +359,7 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
if (EXTERNAL_MMXEXT(cpu_flags)) {
init_fpel(4, 1, 4, avg, mmxext);
init_fpel(3, 1, 8, avg, mmxext);
+ dsp->itxfm_add[TX_4X4][DCT_DCT] = ff_vp9_idct_idct_4x4_add_mmxext;
}
if (EXTERNAL_SSE(cpu_flags)) {
@@ -365,6 +373,21 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
init_fpel(1, 1, 32, avg, sse2);
init_fpel(0, 1, 64, avg, sse2);
init_lpf(sse2);
+ dsp->itxfm_add[TX_4X4][ADST_DCT] = ff_vp9_idct_iadst_4x4_add_sse2;
+ dsp->itxfm_add[TX_4X4][DCT_ADST] = ff_vp9_iadst_idct_4x4_add_sse2;
+ dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_sse2;
+ dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_sse2;
+ dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_sse2;
+ dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_sse2;
+ dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_sse2;
+ dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_sse2;
+ dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_sse2;
+ dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_sse2;
+ dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_sse2;
+ dsp->itxfm_add[TX_32X32][ADST_ADST] =
+ dsp->itxfm_add[TX_32X32][ADST_DCT] =
+ dsp->itxfm_add[TX_32X32][DCT_ADST] =
+ dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_sse2;
dsp->intra_pred[TX_16X16][VERT_PRED] = ff_vp9_ipred_v_16x16_sse2;
dsp->intra_pred[TX_32X32][VERT_PRED] = ff_vp9_ipred_v_32x32_sse2;
}
@@ -376,20 +399,18 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
dsp->itxfm_add[TX_4X4][ADST_DCT] = ff_vp9_idct_iadst_4x4_add_ssse3;
dsp->itxfm_add[TX_4X4][DCT_ADST] = ff_vp9_iadst_idct_4x4_add_ssse3;
dsp->itxfm_add[TX_4X4][ADST_ADST] = ff_vp9_iadst_iadst_4x4_add_ssse3;
- if (ARCH_X86_64) {
- dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
- dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_ssse3;
- dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_ssse3;
- dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3;
- dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_ssse3;
- dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_ssse3;
- dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_ssse3;
- dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_ssse3;
- dsp->itxfm_add[TX_32X32][ADST_ADST] =
- dsp->itxfm_add[TX_32X32][ADST_DCT] =
- dsp->itxfm_add[TX_32X32][DCT_ADST] =
- dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3;
- }
+ dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_ssse3;
+ dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_ssse3;
+ dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_ssse3;
+ dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_ssse3;
+ dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_ssse3;
+ dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_ssse3;
+ dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_ssse3;
+ dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_ssse3;
+ dsp->itxfm_add[TX_32X32][ADST_ADST] =
+ dsp->itxfm_add[TX_32X32][ADST_DCT] =
+ dsp->itxfm_add[TX_32X32][DCT_ADST] =
+ dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_ssse3;
init_lpf(ssse3);
init_dc_ipred(TX_4X4, 4, ssse3);
init_dc_ipred(TX_8X8, 8, ssse3);
@@ -398,20 +419,18 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
}
if (EXTERNAL_AVX(cpu_flags)) {
- if (ARCH_X86_64) {
- dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx;
- dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_avx;
- dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_avx;
- dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx;
- dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx;
- dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_avx;
- dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_avx;
- dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx;
- dsp->itxfm_add[TX_32X32][ADST_ADST] =
- dsp->itxfm_add[TX_32X32][ADST_DCT] =
- dsp->itxfm_add[TX_32X32][DCT_ADST] =
- dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx;
- }
+ dsp->itxfm_add[TX_8X8][DCT_DCT] = ff_vp9_idct_idct_8x8_add_avx;
+ dsp->itxfm_add[TX_8X8][ADST_DCT] = ff_vp9_idct_iadst_8x8_add_avx;
+ dsp->itxfm_add[TX_8X8][DCT_ADST] = ff_vp9_iadst_idct_8x8_add_avx;
+ dsp->itxfm_add[TX_8X8][ADST_ADST] = ff_vp9_iadst_iadst_8x8_add_avx;
+ dsp->itxfm_add[TX_16X16][DCT_DCT] = ff_vp9_idct_idct_16x16_add_avx;
+ dsp->itxfm_add[TX_16X16][ADST_DCT] = ff_vp9_idct_iadst_16x16_add_avx;
+ dsp->itxfm_add[TX_16X16][DCT_ADST] = ff_vp9_iadst_idct_16x16_add_avx;
+ dsp->itxfm_add[TX_16X16][ADST_ADST] = ff_vp9_iadst_iadst_16x16_add_avx;
+ dsp->itxfm_add[TX_32X32][ADST_ADST] =
+ dsp->itxfm_add[TX_32X32][ADST_DCT] =
+ dsp->itxfm_add[TX_32X32][DCT_ADST] =
+ dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx;
init_fpel(1, 0, 32, put, avx);
init_fpel(0, 0, 64, put, avx);
init_lpf(avx);