diff options
author | Meng Wang <wangmeng.kids@bytedance.com> | 2018-03-27 20:43:06 +0800 |
---|---|---|
committer | Michael Niedermayer <michael@niedermayer.cc> | 2018-04-09 03:45:15 +0200 |
commit | 3b2fd960481d90d0788e1958a2b1469ac55ba3c5 (patch) | |
tree | d29798b01d5572284303a1dc45fb430534c16bdb /libavcodec/arm/hevcdsp_init_neon.c | |
parent | 249aca8f98ff7fb09c12ea68e23c862c62203b95 (diff) | |
download | ffmpeg-3b2fd960481d90d0788e1958a2b1469ac55ba3c5.tar.gz |
avcodec/arm/hevcdsp_sao : add NEON optimization for sao
Signed-off-by: Meng Wang <wangmeng.kids@bytedance.com>
Reviewed-by: Shengbin Meng <shengbinmeng@gmail.com>
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavcodec/arm/hevcdsp_init_neon.c')
-rw-r--r-- | libavcodec/arm/hevcdsp_init_neon.c | 59 |
1 files changed, 59 insertions, 0 deletions
diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c index a4628d2a93..201a088dac 100644 --- a/libavcodec/arm/hevcdsp_init_neon.c +++ b/libavcodec/arm/hevcdsp_init_neon.c @@ -21,8 +21,16 @@ #include "libavutil/attributes.h" #include "libavutil/arm/cpu.h" #include "libavcodec/hevcdsp.h" +#include "libavcodec/avcodec.h" #include "hevcdsp_arm.h" +void ff_hevc_sao_band_filter_neon_8_wrapper(uint8_t *_dst, uint8_t *_src, + ptrdiff_t stride_dst, ptrdiff_t stride_src, + int16_t *sao_offset_val, int sao_left_class, + int width, int height); +void ff_hevc_sao_edge_filter_neon_8_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, + int eo, int width, int height); + void ff_hevc_v_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q); void ff_hevc_h_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q); void ff_hevc_v_loop_filter_chroma_neon(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q); @@ -142,6 +150,47 @@ QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h3v2_neon_8); QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h3v3_neon_8); #undef QPEL_FUNC_UW +void ff_hevc_sao_band_filter_neon_8(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int width, int height, int16_t *offset_table); + +void ff_hevc_sao_band_filter_neon_8_wrapper(uint8_t *_dst, uint8_t *_src, + ptrdiff_t stride_dst, ptrdiff_t stride_src, + int16_t *sao_offset_val, int sao_left_class, + int width, int height) { + uint8_t *dst = _dst; + uint8_t *src = _src; + int16_t offset_table[32] = {0}; + int k; + + for (k = 0; k < 4; k++) { + offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1]; + } + + ff_hevc_sao_band_filter_neon_8(dst, src, stride_dst, stride_src, width, height, offset_table); +} + +void ff_hevc_sao_edge_filter_neon_8(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int width, int height, + int a_stride, int b_stride, int16_t *sao_offset_val, uint8_t *edge_idx); + +void ff_hevc_sao_edge_filter_neon_8_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val, + int eo, int width, int height) { + static uint8_t edge_idx[] = { 1, 2, 0, 3, 4 }; + static const int8_t pos[4][2][2] = { + { { -1, 0 }, { 1, 0 } }, // horizontal + { { 0, -1 }, { 0, 1 } }, // vertical + { { -1, -1 }, { 1, 1 } }, // 45 degree + { { 1, -1 }, { -1, 1 } }, // 135 degree + }; + uint8_t *dst = _dst; + uint8_t *src = _src; + int a_stride, b_stride; + ptrdiff_t stride_src = (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE); + + a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src; + b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src; + + ff_hevc_sao_edge_filter_neon_8(dst, src, stride_dst, stride_src, width, height, a_stride, b_stride, sao_offset_val, edge_idx); +} + void ff_hevc_put_qpel_neon_wrapper(int16_t *dst, uint8_t *src, ptrdiff_t srcstride, int height, intptr_t mx, intptr_t my, int width) { @@ -168,6 +217,16 @@ av_cold void ff_hevc_dsp_init_neon(HEVCDSPContext *c, const int bit_depth) c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_neon; c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_neon; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_neon; + c->sao_band_filter[0] = ff_hevc_sao_band_filter_neon_8_wrapper; + c->sao_band_filter[1] = ff_hevc_sao_band_filter_neon_8_wrapper; + c->sao_band_filter[2] = ff_hevc_sao_band_filter_neon_8_wrapper; + c->sao_band_filter[3] = ff_hevc_sao_band_filter_neon_8_wrapper; + c->sao_band_filter[4] = ff_hevc_sao_band_filter_neon_8_wrapper; + c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_neon_8_wrapper; + c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_neon_8_wrapper; + c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_neon_8_wrapper; + c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_neon_8_wrapper; + c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_neon_8_wrapper; c->add_residual[0] = ff_hevc_add_residual_4x4_8_neon; c->add_residual[1] = ff_hevc_add_residual_8x8_8_neon; c->add_residual[2] = ff_hevc_add_residual_16x16_8_neon; |