diff options
author | Logan Lyu <Logan.Lyu@myais.com.cn> | 2023-08-15 15:24:32 +0800 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2023-09-26 15:50:40 +0300 |
commit | 7ce5a2f640080430d707d9a373ec961694f7a1e8 (patch) | |
tree | 6747017ca4b345b91088b89c03b2fecb4805b103 /libavcodec/aarch64/hevcdsp_init_aarch64.c | |
parent | 56085e057ff2d2da180a5d56bdd1efd70d53e662 (diff) | |
download | ffmpeg-7ce5a2f640080430d707d9a373ec961694f7a1e8.tar.gz |
lavc/aarch64: new optimization for 8-bit hevc_epel_uni_v
checkasm bench:
put_hevc_epel_uni_hv64_8_i8mm: 6568.7
put_hevc_epel_uni_v4_8_c: 88.7
put_hevc_epel_uni_v4_8_neon: 32.7
put_hevc_epel_uni_v6_8_c: 185.4
put_hevc_epel_uni_v6_8_neon: 44.9
put_hevc_epel_uni_v8_8_c: 333.9
put_hevc_epel_uni_v8_8_neon: 44.4
put_hevc_epel_uni_v12_8_c: 728.7
put_hevc_epel_uni_v12_8_neon: 119.7
put_hevc_epel_uni_v16_8_c: 1224.2
put_hevc_epel_uni_v16_8_neon: 139.7
put_hevc_epel_uni_v24_8_c: 2531.2
put_hevc_epel_uni_v24_8_neon: 329.9
put_hevc_epel_uni_v32_8_c: 4739.9
put_hevc_epel_uni_v32_8_neon: 562.7
put_hevc_epel_uni_v48_8_c: 10618.7
put_hevc_epel_uni_v48_8_neon: 1256.2
put_hevc_epel_uni_v64_8_c: 19169.9
put_hevc_epel_uni_v64_8_neon: 2179.2
Co-Authored-By: J. Dekker <jdek@itanimul.li>
Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec/aarch64/hevcdsp_init_aarch64.c')
-rw-r--r-- | libavcodec/aarch64/hevcdsp_init_aarch64.c | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c index e125b0cfb2..f1e167c50b 100644 --- a/libavcodec/aarch64/hevcdsp_init_aarch64.c +++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c @@ -161,6 +161,10 @@ NEON8_FNPROTO(pel_uni_w_pixels, (uint8_t *_dst, ptrdiff_t _dststride, int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width),); +NEON8_FNPROTO(epel_uni_v, (uint8_t *dst, ptrdiff_t dststride, + const uint8_t *src, ptrdiff_t srcstride, + int height, intptr_t mx, intptr_t my, int width),); + NEON8_FNPROTO(epel_uni_w_v, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, @@ -285,6 +289,7 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth) c->put_hevc_qpel_bi[9][0][1] = ff_hevc_put_hevc_qpel_bi_h16_8_neon; NEON8_FNASSIGN(c->put_hevc_epel_uni, 0, 0, pel_uni_pixels,); + NEON8_FNASSIGN(c->put_hevc_epel_uni, 1, 0, epel_uni_v,); NEON8_FNASSIGN(c->put_hevc_qpel_uni, 0, 0, pel_uni_pixels,); NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 0, 0, pel_uni_w_pixels,); NEON8_FNASSIGN(c->put_hevc_qpel_uni_w, 0, 0, pel_uni_w_pixels,); |