aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/x86/hevcdsp_init.c
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2015-02-04 20:23:19 -0300
committerJames Almer <jamrial@gmail.com>2015-02-05 15:02:33 -0300
commit15574c505b81d6e92370096eaca6764c13b5fbfc (patch)
tree224fce351260fb2a9fd6e1a656d6245998b26caf /libavcodec/x86/hevcdsp_init.c
parent042c1159fcf8431725c718a504b1fb40364bf434 (diff)
downloadffmpeg-15574c505b81d6e92370096eaca6764c13b5fbfc.tar.gz
x86/hevcdsp: add ff_hevc_sao_edge_filter_{10,12}_{sse2,avx2}
Original x86 intrinsics code by Pierre-Edouard Lepere. Yasm port, refactoring and optimizations by James Almer. Benchmarks of BQTerrace_1920x1080_60_qp22.bin with an Intel Core i5-4200U Width 32 342694 decicycles in sao_edge_filter_10, 16384 runs, 0 skips 29476 decicycles in ff_hevc_sao_edge_filter_32_10_ssse3, 16384 runs, 0 skips 13996 decicycles in ff_hevc_sao_edge_filter_32_10_avx2, 16381 runs, 3 skips Width 64 581163 decicycles in sao_edge_filter_10, 8192 runs, 0 skips 59774 decicycles in ff_hevc_sao_edge_filter_64_10_ssse3, 8192 runs, 0 skips 28383 decicycles in ff_hevc_sao_edge_filter_64_10_avx2, 8191 runs, 1 skips Signed-off-by: James Almer <jamrial@gmail.com>
Diffstat (limited to 'libavcodec/x86/hevcdsp_init.c')
-rw-r--r--libavcodec/x86/hevcdsp_init.c12
1 files changed, 12 insertions, 0 deletions
diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c
index ddc30cf675..f082f4d6dc 100644
--- a/libavcodec/x86/hevcdsp_init.c
+++ b/libavcodec/x86/hevcdsp_init.c
@@ -522,6 +522,10 @@ void ff_hevc_sao_edge_filter_64_##bitd##_##opt(uint8_t *_dst, uint8_t *_src, ptr
SAO_EDGE_FILTER_FUNCS(8, ssse3);
SAO_EDGE_FILTER_FUNCS(8, avx2);
+SAO_EDGE_FILTER_FUNCS(10, sse2);
+SAO_EDGE_FILTER_FUNCS(10, avx2);
+SAO_EDGE_FILTER_FUNCS(12, sse2);
+SAO_EDGE_FILTER_FUNCS(12, avx2);
#define SAO_EDGE_INIT(bitd, opt) do { \
c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8_##bitd##_##opt; \
@@ -636,6 +640,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_sse2;
SAO_BAND_INIT(10, sse2);
+ SAO_EDGE_INIT(10, sse2);
}
c->idct_dc[1] = ff_hevc_idct8x8_dc_10_sse2;
@@ -677,6 +682,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->idct_dc[3] = ff_hevc_idct32x32_dc_10_avx2;
if (ARCH_X86_64) {
SAO_BAND_INIT(10, avx2);
+ c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_10_avx2;
+ c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_10_avx2;
+ c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_10_avx2;
}
c->transform_add[2] = ff_hevc_transform_add16_10_avx2;
@@ -696,6 +704,7 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2;
SAO_BAND_INIT(12, sse2);
+ SAO_EDGE_INIT(12, sse2);
}
c->idct_dc[1] = ff_hevc_idct8x8_dc_12_sse2;
@@ -732,6 +741,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c->idct_dc[3] = ff_hevc_idct32x32_dc_12_avx2;
if (ARCH_X86_64) {
SAO_BAND_INIT(12, avx2);
+ c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_32_12_avx2;
+ c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_48_12_avx2;
+ c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_64_12_avx2;
}
}
}