diff options
author | James Almer <jamrial@gmail.com> | 2014-07-29 04:30:13 -0300 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-07-29 14:04:59 +0200 |
commit | 73c4f63ba5a36f3998159dcd5a4a2ec7500eb557 (patch) | |
tree | 1ebfde14616d3079d26314e310a0f73fecead5ce /libavcodec/x86/hevcdsp_init.c | |
parent | 88ba821f23cb9f16bf6cc92688fa0c3788a6010e (diff) | |
download | ffmpeg-73c4f63ba5a36f3998159dcd5a4a2ec7500eb557.tar.gz |
x86/hevc_deblock: add add ff_hevc_[hv]_loop_filter_luma_{8, 10, 12}_avx
~5% faster than SSSE3
Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/hevcdsp_init.c')
-rw-r--r-- | libavcodec/x86/hevcdsp_init.c | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c index 3e8704aec8..828c081a2e 100644 --- a/libavcodec/x86/hevcdsp_init.c +++ b/libavcodec/x86/hevcdsp_init.c @@ -45,12 +45,18 @@ void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, pt LFC_FUNCS(uint8_t, 8, sse2) LFC_FUNCS(uint8_t, 10, sse2) LFC_FUNCS(uint8_t, 12, sse2) +LFC_FUNCS(uint8_t, 8, avx) +LFC_FUNCS(uint8_t, 10, avx) +LFC_FUNCS(uint8_t, 12, avx) LFL_FUNCS(uint8_t, 8, sse2) LFL_FUNCS(uint8_t, 10, sse2) LFL_FUNCS(uint8_t, 12, sse2) LFL_FUNCS(uint8_t, 8, ssse3) LFL_FUNCS(uint8_t, 10, ssse3) LFL_FUNCS(uint8_t, 12, ssse3) +LFL_FUNCS(uint8_t, 8, avx) +LFL_FUNCS(uint8_t, 10, avx) +LFL_FUNCS(uint8_t, 12, avx) #define IDCT_FUNCS(W, opt) \ void ff_hevc_idct##W##_dc_8_##opt(int16_t *coeffs); \ @@ -492,6 +498,14 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 8, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4); } + if (EXTERNAL_AVX(cpu_flags)) { + c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_avx; + c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_avx; + if (ARCH_X86_64) { + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_avx; + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_avx; + } + } if (EXTERNAL_AVX2(cpu_flags)) { c->idct_dc[2] = ff_hevc_idct16x16_dc_8_avx2; c->idct_dc[3] = ff_hevc_idct32x32_dc_8_avx2; @@ -528,6 +542,14 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4); } + if (EXTERNAL_AVX(cpu_flags)) { + c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_avx; + c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_avx; + if (ARCH_X86_64) { + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_avx; + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_avx; + } + } if (EXTERNAL_AVX2(cpu_flags)) { c->idct_dc[2] = ff_hevc_idct16x16_dc_10_avx2; c->idct_dc[3] = ff_hevc_idct32x32_dc_10_avx2; @@ -565,6 +587,14 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4); } + if (EXTERNAL_AVX(cpu_flags)) { + c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_avx; + c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_avx; + if (ARCH_X86_64) { + c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_avx; + c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_avx; + } + } if (EXTERNAL_AVX2(cpu_flags)) { c->idct_dc[2] = ff_hevc_idct16x16_dc_12_avx2; c->idct_dc[3] = ff_hevc_idct32x32_dc_12_avx2; |