aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/arm/hevcdsp_init_neon.c
diff options
context:
space:
mode:
authorMeng Wang <wangmeng.kids@bytedance.com>2018-03-27 20:43:06 +0800
committerMichael Niedermayer <michael@niedermayer.cc>2018-04-09 03:45:15 +0200
commit3b2fd960481d90d0788e1958a2b1469ac55ba3c5 (patch)
treed29798b01d5572284303a1dc45fb430534c16bdb /libavcodec/arm/hevcdsp_init_neon.c
parent249aca8f98ff7fb09c12ea68e23c862c62203b95 (diff)
downloadffmpeg-3b2fd960481d90d0788e1958a2b1469ac55ba3c5.tar.gz
avcodec/arm/hevcdsp_sao : add NEON optimization for sao
Signed-off-by: Meng Wang <wangmeng.kids@bytedance.com> Reviewed-by: Shengbin Meng <shengbinmeng@gmail.com> Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavcodec/arm/hevcdsp_init_neon.c')
-rw-r--r--libavcodec/arm/hevcdsp_init_neon.c59
1 files changed, 59 insertions, 0 deletions
diff --git a/libavcodec/arm/hevcdsp_init_neon.c b/libavcodec/arm/hevcdsp_init_neon.c
index a4628d2a93..201a088dac 100644
--- a/libavcodec/arm/hevcdsp_init_neon.c
+++ b/libavcodec/arm/hevcdsp_init_neon.c
@@ -21,8 +21,16 @@
#include "libavutil/attributes.h"
#include "libavutil/arm/cpu.h"
#include "libavcodec/hevcdsp.h"
+#include "libavcodec/avcodec.h"
#include "hevcdsp_arm.h"
+void ff_hevc_sao_band_filter_neon_8_wrapper(uint8_t *_dst, uint8_t *_src,
+ ptrdiff_t stride_dst, ptrdiff_t stride_src,
+ int16_t *sao_offset_val, int sao_left_class,
+ int width, int height);
+void ff_hevc_sao_edge_filter_neon_8_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
+ int eo, int width, int height);
+
void ff_hevc_v_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
void ff_hevc_h_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
void ff_hevc_v_loop_filter_chroma_neon(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
@@ -142,6 +150,47 @@ QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h3v2_neon_8);
QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h3v3_neon_8);
#undef QPEL_FUNC_UW
+void ff_hevc_sao_band_filter_neon_8(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int width, int height, int16_t *offset_table);
+
+void ff_hevc_sao_band_filter_neon_8_wrapper(uint8_t *_dst, uint8_t *_src,
+ ptrdiff_t stride_dst, ptrdiff_t stride_src,
+ int16_t *sao_offset_val, int sao_left_class,
+ int width, int height) {
+ uint8_t *dst = _dst;
+ uint8_t *src = _src;
+ int16_t offset_table[32] = {0};
+ int k;
+
+ for (k = 0; k < 4; k++) {
+ offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
+ }
+
+ ff_hevc_sao_band_filter_neon_8(dst, src, stride_dst, stride_src, width, height, offset_table);
+}
+
+void ff_hevc_sao_edge_filter_neon_8(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int width, int height,
+ int a_stride, int b_stride, int16_t *sao_offset_val, uint8_t *edge_idx);
+
+void ff_hevc_sao_edge_filter_neon_8_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
+ int eo, int width, int height) {
+ static uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
+ static const int8_t pos[4][2][2] = {
+ { { -1, 0 }, { 1, 0 } }, // horizontal
+ { { 0, -1 }, { 0, 1 } }, // vertical
+ { { -1, -1 }, { 1, 1 } }, // 45 degree
+ { { 1, -1 }, { -1, 1 } }, // 135 degree
+ };
+ uint8_t *dst = _dst;
+ uint8_t *src = _src;
+ int a_stride, b_stride;
+ ptrdiff_t stride_src = (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE);
+
+ a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src;
+ b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src;
+
+ ff_hevc_sao_edge_filter_neon_8(dst, src, stride_dst, stride_src, width, height, a_stride, b_stride, sao_offset_val, edge_idx);
+}
+
void ff_hevc_put_qpel_neon_wrapper(int16_t *dst, uint8_t *src, ptrdiff_t srcstride,
int height, intptr_t mx, intptr_t my, int width) {
@@ -168,6 +217,16 @@ av_cold void ff_hevc_dsp_init_neon(HEVCDSPContext *c, const int bit_depth)
c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_neon;
c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_neon;
c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_neon;
+ c->sao_band_filter[0] = ff_hevc_sao_band_filter_neon_8_wrapper;
+ c->sao_band_filter[1] = ff_hevc_sao_band_filter_neon_8_wrapper;
+ c->sao_band_filter[2] = ff_hevc_sao_band_filter_neon_8_wrapper;
+ c->sao_band_filter[3] = ff_hevc_sao_band_filter_neon_8_wrapper;
+ c->sao_band_filter[4] = ff_hevc_sao_band_filter_neon_8_wrapper;
+ c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_neon_8_wrapper;
+ c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_neon_8_wrapper;
+ c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_neon_8_wrapper;
+ c->sao_edge_filter[3] = ff_hevc_sao_edge_filter_neon_8_wrapper;
+ c->sao_edge_filter[4] = ff_hevc_sao_edge_filter_neon_8_wrapper;
c->add_residual[0] = ff_hevc_add_residual_4x4_8_neon;
c->add_residual[1] = ff_hevc_add_residual_8x8_8_neon;
c->add_residual[2] = ff_hevc_add_residual_16x16_8_neon;