diff options
author | James Almer <jamrial@gmail.com> | 2015-02-04 20:23:19 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2015-02-05 15:02:33 -0300 |
commit | 15574c505b81d6e92370096eaca6764c13b5fbfc (patch) | |
tree | 224fce351260fb2a9fd6e1a656d6245998b26caf /libavcodec/x86/hevcdsp_init.c | |
parent | 042c1159fcf8431725c718a504b1fb40364bf434 (diff) | |
download | ffmpeg-15574c505b81d6e92370096eaca6764c13b5fbfc.tar.gz |
x86/hevcdsp: add ff_hevc_sao_edge_filter_{10,12}_{sse2,avx2}
Original x86 intrinsics code by Pierre-Edouard Lepere.
Yasm port, refactoring and optimizations by James Almer.
Benchmarks of BQTerrace_1920x1080_60_qp22.bin with an Intel Core i5-4200U
Width 32
342694 decicycles in sao_edge_filter_10, 16384 runs, 0 skips
29476 decicycles in ff_hevc_sao_edge_filter_32_10_ssse3, 16384 runs, 0 skips
13996 decicycles in ff_hevc_sao_edge_filter_32_10_avx2, 16381 runs, 3 skips
Width 64
581163 decicycles in sao_edge_filter_10, 8192 runs, 0 skips
59774 decicycles in ff_hevc_sao_edge_filter_64_10_ssse3, 8192 runs, 0 skips
28383 decicycles in ff_hevc_sao_edge_filter_64_10_avx2, 8191 runs, 1 skips
Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86/hevcdsp_init.c')
-rw-r--r-- | libavcodec/x86/hevcdsp_init.c | 12 |
1 files changed, 12 insertions, 0 deletions
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c index ddc30cf675..f082f4d6dc 100644 --- a/libavcodec/x86/hevcdsp_init.c +++ b/libavcodec/x86/hevcdsp_init.c @@ -522,6 +522,10 @@ void ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptr SAO_EDGE_FILTER_FUNCS(8, ssse3); SAO_EDGE_FILTER_FUNCS(8, avx2); +SAO_EDGE_FILTER_FUNCS(10, sse2); +SAO_EDGE_FILTER_FUNCS(10, avx2); +SAO_EDGE_FILTER_FUNCS(12, sse2); +SAO_EDGE_FILTER_FUNCS(12, avx2); #define SAO_EDGE_INIT(bitd, opt) do { \ c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8_##bitd##_##opt; \ @@ -636,6 +640,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2; SAO_BAND_INIT(10, sse2); + SAO_EDGE_INIT(10, sse2); } c->idct_dc[1] = ff_hevc_idct8x8_dc_10_sse2; @@ -677,6 +682,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->idct_dc[3] = ff_hevc_idct32x32_dc_10_avx2; if (ARCH_X86_64) { SAO_BAND_INIT(10, avx2); + c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_10_avx2; + c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_10_avx2; + c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_10_avx2; } c->transform_add[2] = ff_hevc_transform_add16_10_avx2; @@ -696,6 +704,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2; SAO_BAND_INIT(12, sse2); + SAO_EDGE_INIT(12, sse2); } c->idct_dc[1] = ff_hevc_idct8x8_dc_12_sse2; @@ -732,6 +741,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->idct_dc[3] = ff_hevc_idct32x32_dc_12_avx2; if (ARCH_X86_64) { SAO_BAND_INIT(12, avx2); + c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_12_avx2; + c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_12_avx2; + c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_12_avx2; } } } |