diff options
author | Janne Grunau <janne-libav@jannau.net> | 2018-08-13 20:43:19 +0200 |
---|---|---|
committer | Janne Grunau <janne-libav@jannau.net> | 2019-01-26 12:05:10 +0100 |
commit | 28a8b5413b64b831dfb8650208bccd8b78360484 (patch) | |
tree | 090b0141e8734a31fca45d432cf7c266d34ed852 /libavcodec/aarch64/h264dsp_init_aarch64.c | |
parent | 846c3d6aca5484904e60946c4fe8b8833bc07f92 (diff) | |
download | ffmpeg-28a8b5413b64b831dfb8650208bccd8b78360484.tar.gz |
h264/aarch64: add intra loop filter neon asm
Add my neon asm from x264 relicensed under the LGPL 2.1 or later. Ported
(x264 uses nv12 chroma) and optimized.
Cycle count for checkasm --bench on a Snapdragon 820e:
h264_h_loop_filter_luma_intra_8bpp_c: 60.0
h264_h_loop_filter_luma_intra_8bpp_neon: 54.2
h264_v_loop_filter_luma_intra_8bpp_c: 148.3
h264_v_loop_filter_luma_intra_8bpp_neon: 73.8
h264_h_loop_filter_chroma_intra_8bpp_c: 27.8
h264_h_loop_filter_chroma_intra_8bpp_neon: 21.4
h264_h_loop_filter_chroma_mbaff_intra_8bpp_c: 15.8
h264_h_loop_filter_chroma_mbaff_intra_8bpp_neon: 15.7
h264_v_loop_filter_chroma_intra_8bpp_c: 45.8
h264_v_loop_filter_chroma_intra_8bpp_neon: 17.3
Diffstat (limited to 'libavcodec/aarch64/h264dsp_init_aarch64.c')
-rw-r--r-- | libavcodec/aarch64/h264dsp_init_aarch64.c | 16 |
1 files changed, 16 insertions, 0 deletions
diff --git a/libavcodec/aarch64/h264dsp_init_aarch64.c b/libavcodec/aarch64/h264dsp_init_aarch64.c index b106f11134..07bda2ff07 100644 --- a/libavcodec/aarch64/h264dsp_init_aarch64.c +++ b/libavcodec/aarch64/h264dsp_init_aarch64.c @@ -29,10 +29,20 @@ void ff_h264_v_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); void ff_h264_h_loop_filter_luma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); +void ff_h264_v_loop_filter_luma_intra_neon(uint8_t *pix, int stride, int alpha, + int beta); +void ff_h264_h_loop_filter_luma_intra_neon(uint8_t *pix, int stride, int alpha, + int beta); void ff_h264_v_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); void ff_h264_h_loop_filter_chroma_neon(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0); +void ff_h264_v_loop_filter_chroma_intra_neon(uint8_t *pix, int stride, + int alpha, int beta); +void ff_h264_h_loop_filter_chroma_intra_neon(uint8_t *pix, int stride, + int alpha, int beta); +void ff_h264_h_loop_filter_chroma_mbaff_intra_neon(uint8_t *pix, int stride, + int alpha, int beta); void ff_weight_h264_pixels_16_neon(uint8_t *dst, int stride, int height, int log2_den, int weight, int offset); @@ -77,8 +87,14 @@ av_cold void ff_h264dsp_init_aarch64(H264DSPContext *c, const int bit_depth, if (have_neon(cpu_flags) && bit_depth == 8) { c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; c->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_neon; + c->h264_v_loop_filter_luma_intra= ff_h264_v_loop_filter_luma_intra_neon; + c->h264_h_loop_filter_luma_intra= ff_h264_h_loop_filter_luma_intra_neon; + c->h264_v_loop_filter_chroma = ff_h264_v_loop_filter_chroma_neon; c->h264_h_loop_filter_chroma = ff_h264_h_loop_filter_chroma_neon; + c->h264_v_loop_filter_chroma_intra = ff_h264_v_loop_filter_chroma_intra_neon; + c->h264_h_loop_filter_chroma_intra = ff_h264_h_loop_filter_chroma_intra_neon; + c->h264_h_loop_filter_chroma_mbaff_intra = ff_h264_h_loop_filter_chroma_mbaff_intra_neon; c->weight_h264_pixels_tab[0] = ff_weight_h264_pixels_16_neon; c->weight_h264_pixels_tab[1] = ff_weight_h264_pixels_8_neon; |