aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/hevcdsp_init.c
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2014-07-26 04:47:14 -0300
committerMichael Niedermayer <michaelni@gmx.at>2014-07-26 18:00:11 +0200
commit1ace9573dce509e2b25165199c3b658667860ecf (patch)
tree9a50b2c470620bfe22f1541b6815f15bc1661b75 /libavcodec/x86/hevcdsp_init.c
parent23480da0aa70b045b7b8dea7da8fedde0bcd7062 (diff)
downloadffmpeg-1ace9573dce509e2b25165199c3b658667860ecf.tar.gz
x86/hevc_idct: replace old and unused idct functions
Only 8-bit and 10-bit idct_dc() functions are included (adding others should be trivial). Benchmarks on an Intel Core i5-4200U: idct8x8_dc SSE2 MMXEXT C cycles 22 26 57 idct16x16_dc AVX2 SSE2 C cycles 27 32 249 idct32x32_dc AVX2 SSE2 C cycles 62 126 1375 Signed-off-by: James Almer <jamrial@gmail.com> Reviewed-by: Mickaƫl Raulet <mraulet@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec/x86/hevcdsp_init.c')
-rw-r--r--libavcodec/x86/hevcdsp_init.c95
1 files changed, 25 insertions, 70 deletions
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
index 6fb94aaf0b..fb3357bef6 100644
--- a/libavcodec/x86/hevcdsp_init.c
+++ b/libavcodec/x86/hevcdsp_init.c
@@ -54,59 +54,17 @@ LFL_FUNCS(uint8_t, 8, ssse3)
LFL_FUNCS(uint8_t, 10, ssse3)
LFL_FUNCS(uint8_t, 12, ssse3)
-#if HAVE_SSE2_EXTERNAL
-void ff_hevc_idct32_dc_add_8_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
-{
- ff_hevc_idct16_dc_add_8_sse2(dst, coeffs, stride);
- ff_hevc_idct16_dc_add_8_sse2(dst+16, coeffs, stride);
- ff_hevc_idct16_dc_add_8_sse2(dst+16*stride, coeffs, stride);
- ff_hevc_idct16_dc_add_8_sse2(dst+16*stride+16, coeffs, stride);
-}
-
-void ff_hevc_idct16_dc_add_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
-{
- ff_hevc_idct8_dc_add_10_sse2(dst, coeffs, stride);
- ff_hevc_idct8_dc_add_10_sse2(dst+16, coeffs, stride);
- ff_hevc_idct8_dc_add_10_sse2(dst+8*stride, coeffs, stride);
- ff_hevc_idct8_dc_add_10_sse2(dst+8*stride+16, coeffs, stride);
-}
-
-void ff_hevc_idct32_dc_add_10_sse2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
-{
- ff_hevc_idct16_dc_add_10_sse2(dst, coeffs, stride);
- ff_hevc_idct16_dc_add_10_sse2(dst+32, coeffs, stride);
- ff_hevc_idct16_dc_add_10_sse2(dst+16*stride, coeffs, stride);
- ff_hevc_idct16_dc_add_10_sse2(dst+16*stride+32, coeffs, stride);
-}
-#endif //HAVE_SSE2_EXTERNAL
-#if HAVE_AVX_EXTERNAL
-void ff_hevc_idct16_dc_add_10_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
-{
- ff_hevc_idct8_dc_add_10_avx(dst, coeffs, stride);
- ff_hevc_idct8_dc_add_10_avx(dst+16, coeffs, stride);
- ff_hevc_idct8_dc_add_10_avx(dst+8*stride, coeffs, stride);
- ff_hevc_idct8_dc_add_10_avx(dst+8*stride+16, coeffs, stride);
-}
-
-void ff_hevc_idct32_dc_add_10_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
-{
- ff_hevc_idct16_dc_add_10_avx(dst, coeffs, stride);
- ff_hevc_idct16_dc_add_10_avx(dst+32, coeffs, stride);
- ff_hevc_idct16_dc_add_10_avx(dst+16*stride, coeffs, stride);
- ff_hevc_idct16_dc_add_10_avx(dst+16*stride+32, coeffs, stride);
-}
-#endif //HAVE_AVX_EXTERNAL
-
-#if HAVE_AVX2_EXTERNAL
-
-void ff_hevc_idct32_dc_add_10_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
-{
- ff_hevc_idct16_dc_add_10_avx2(dst, coeffs, stride);
- ff_hevc_idct16_dc_add_10_avx2(dst+32, coeffs, stride);
- ff_hevc_idct16_dc_add_10_avx2(dst+16*stride, coeffs, stride);
- ff_hevc_idct16_dc_add_10_avx2(dst+16*stride+32, coeffs, stride);
-}
-#endif //HAVE_AVX2_EXTERNAL
+#define IDCT_FUNCS(W, opt) \
+void ff_hevc_idct##W##_dc_8_##opt(int16_t *coeffs); \
+void ff_hevc_idct##W##_dc_10_##opt(int16_t *coeffs)
+
+IDCT_FUNCS(4x4, mmxext);
+IDCT_FUNCS(8x8, mmxext);
+IDCT_FUNCS(8x8, sse2);
+IDCT_FUNCS(16x16, sse2);
+IDCT_FUNCS(32x32, sse2);
+IDCT_FUNCS(16x16, avx2);
+IDCT_FUNCS(32x32, avx2);
#define mc_rep_func(name, bitd, step, W, opt) \
void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst, ptrdiff_t dststride, \
@@ -504,8 +462,8 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
if (bit_depth == 8) {
if (EXTERNAL_MMXEXT(mm_flags)) {
- c->transform_dc_add[0] = ff_hevc_idct4_dc_add_8_mmxext;
- c->transform_dc_add[1] = ff_hevc_idct8_dc_add_8_mmxext;
+ c->idct_dc[0] = ff_hevc_idct4x4_dc_8_mmxext;
+ c->idct_dc[1] = ff_hevc_idct8x8_dc_8_mmxext;
}
if (EXTERNAL_SSE2(mm_flags)) {
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
@@ -515,8 +473,9 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_sse2;
}
- c->transform_dc_add[2] = ff_hevc_idct16_dc_add_8_sse2;
- c->transform_dc_add[3] = ff_hevc_idct32_dc_add_8_sse2;
+ c->idct_dc[1] = ff_hevc_idct8x8_dc_8_sse2;
+ c->idct_dc[2] = ff_hevc_idct16x16_dc_8_sse2;
+ c->idct_dc[3] = ff_hevc_idct32x32_dc_8_sse2;
}
if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
@@ -535,12 +494,13 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 8, sse4);
}
if (EXTERNAL_AVX2(mm_flags)) {
- c->transform_dc_add[3] = ff_hevc_idct32_dc_add_8_avx2;
+ c->idct_dc[2] = ff_hevc_idct16x16_dc_8_avx2;
+ c->idct_dc[3] = ff_hevc_idct32x32_dc_8_avx2;
}
} else if (bit_depth == 10) {
if (EXTERNAL_MMXEXT(mm_flags)) {
- c->transform_dc_add[0] = ff_hevc_idct4_dc_add_10_mmxext;
-
+ c->idct_dc[0] = ff_hevc_idct4x4_dc_10_mmxext;
+ c->idct_dc[1] = ff_hevc_idct8x8_dc_10_mmxext;
}
if (EXTERNAL_SSE2(mm_flags)) {
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
@@ -550,9 +510,9 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
}
- c->transform_dc_add[1] = ff_hevc_idct8_dc_add_10_sse2;
- c->transform_dc_add[2] = ff_hevc_idct16_dc_add_10_sse2;
- c->transform_dc_add[3] = ff_hevc_idct32_dc_add_10_sse2;
+ c->idct_dc[1] = ff_hevc_idct8x8_dc_10_sse2;
+ c->idct_dc[2] = ff_hevc_idct16x16_dc_10_sse2;
+ c->idct_dc[3] = ff_hevc_idct32x32_dc_10_sse2;
}
if (EXTERNAL_SSSE3(mm_flags) && ARCH_X86_64) {
c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
@@ -569,14 +529,9 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 10, sse4);
QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 10, sse4);
}
- if (EXTERNAL_AVX(mm_flags)) {
- c->transform_dc_add[1] = ff_hevc_idct8_dc_add_10_avx;
- c->transform_dc_add[2] = ff_hevc_idct16_dc_add_10_avx;
- c->transform_dc_add[3] = ff_hevc_idct32_dc_add_10_avx;
- }
if (EXTERNAL_AVX2(mm_flags)) {
- c->transform_dc_add[2] = ff_hevc_idct16_dc_add_10_avx2;
- c->transform_dc_add[3] = ff_hevc_idct32_dc_add_10_avx2;
+ c->idct_dc[2] = ff_hevc_idct16x16_dc_10_avx2;
+ c->idct_dc[3] = ff_hevc_idct32x32_dc_10_avx2;
}
} else if (bit_depth == 12) {