diff options
author | Jun Zhao <[email protected]> | 2018-07-10 00:41:00 +0800 |
---|---|---|
committer | Jun Zhao <[email protected]> | 2018-07-31 19:17:51 +0800 |
commit | d36b8394f4fa95403afd194ca9b7edbd8f470076 (patch) | |
tree | eb11d875c5433aedd6344114e5a39c6dfd542673 /libavutil/x86/pixelutils_init.c | |
parent | b8bf7408dc189668efa3e45b418349ce3a7c816e (diff) |
avutil/pixelutils: sad_32x32 sse2/avx2 optimizations.
add ff_pixelutils_sad_32x32_sse2, ff_pixelutils_sad_{a,u}_32x32_sse2,
ff_pixelutils_sad_32x32_avx22, ff_pixelutils_sad_{a,u}_32x32_avx2
use perf record/report profiling, get instructions:u for avx2 sad_32x32:
72.05% pixelutils pixelutils [.] block_sad_32x32_c
18.50% pixelutils pixelutils [.] block_sad_16x16_c
4.78% pixelutils pixelutils [.] block_sad_8x8_c
2.69% pixelutils pixelutils [.] block_sad_4x4_c
0.89% pixelutils pixelutils [.] block_sad_2x2_c
0.16% pixelutils pixelutils [.] ff_pixelutils_sad_32x32_avx2
0.16% pixelutils pixelutils [.] ff_pixelutils_sad_u_32x32_avx2
0.12% pixelutils pixelutils [.] ff_pixelutils_sad_a_32x32_avx2
sse2 sad_32x32 instructions:u like:
71.86% pixelutils pixelutils [.] block_sad_32x32_c
18.42% pixelutils pixelutils [.] block_sad_16x16_c
4.81% pixelutils pixelutils [.] block_sad_8x8_c
2.68% pixelutils pixelutils [.] block_sad_4x4_c
0.88% pixelutils pixelutils [.] block_sad_2x2_c
0.29% pixelutils pixelutils [.] ff_pixelutils_sad_32x32_sse2
0.26% pixelutils pixelutils [.] ff_pixelutils_sad_u_32x32_sse2
0.23% pixelutils pixelutils [.] ff_pixelutils_sad_a_32x32_sse2
Signed-off-by: Jun Zhao <[email protected]>
Diffstat (limited to 'libavutil/x86/pixelutils_init.c')
-rw-r--r-- | libavutil/x86/pixelutils_init.c | 30 |
1 files changed, 30 insertions, 0 deletions
diff --git a/libavutil/x86/pixelutils_init.c b/libavutil/x86/pixelutils_init.c index c24a533aea..dd05421b50 100644 --- a/libavutil/x86/pixelutils_init.c +++ b/libavutil/x86/pixelutils_init.c @@ -35,6 +35,20 @@ int ff_pixelutils_sad_a_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, int ff_pixelutils_sad_u_16x16_sse2(const uint8_t *src1, ptrdiff_t stride1, const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_a_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_u_32x32_sse2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + +int ff_pixelutils_sad_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_a_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); +int ff_pixelutils_sad_u_32x32_avx2(const uint8_t *src1, ptrdiff_t stride1, + const uint8_t *src2, ptrdiff_t stride2); + void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned) { int cpu_flags = av_get_cpu_flags(); @@ -61,4 +75,20 @@ void ff_pixelutils_sad_init_x86(av_pixelutils_sad_fn *sad, int aligned) case 2: sad[3] = ff_pixelutils_sad_a_16x16_sse2; break; // src1 aligned, src2 aligned } } + + if (EXTERNAL_SSE2(cpu_flags)) { + switch (aligned) { + case 0: sad[4] = ff_pixelutils_sad_32x32_sse2; break; // src1 unaligned, src2 unaligned + case 1: sad[4] = ff_pixelutils_sad_u_32x32_sse2; break; // src1 aligned, src2 unaligned + case 2: sad[4] = ff_pixelutils_sad_a_32x32_sse2; break; // src1 aligned, src2 aligned + } + } + + if (EXTERNAL_AVX2(cpu_flags)) { + switch (aligned) { + case 0: sad[4] = ff_pixelutils_sad_32x32_avx2; break; // src1 unaligned, src2 unaligned + case 1: sad[4] = ff_pixelutils_sad_u_32x32_avx2; break; // src1 aligned, src2 unaligned + case 2: sad[4] = ff_pixelutils_sad_a_32x32_avx2; break; // src1 aligned, src2 aligned + } + } } |