diff options
author | Logan Lyu <Logan.Lyu@myais.com.cn> | 2023-10-26 09:17:36 +0800 |
---|---|---|
committer | Martin Storsjö <martin@martin.st> | 2023-10-31 14:02:53 +0200 |
commit | 265450b89e920c57fa35feff6d6d9953aec8f30e (patch) | |
tree | 3761359e840ee2e0712a02367cc865fac22180bc /libavcodec/aarch64/hevcdsp_init_aarch64.c | |
parent | 22c729150648f9f4b0d1e6f769d01bb4fb022016 (diff) | |
download | ffmpeg-265450b89e920c57fa35feff6d6d9953aec8f30e.tar.gz |
lavc/aarch64: new optimization for 8-bit hevc_epel_hv
checkasm bench:
put_hevc_epel_hv4_8_c: 213.7
put_hevc_epel_hv4_8_i8mm: 59.4
put_hevc_epel_hv6_8_c: 350.9
put_hevc_epel_hv6_8_i8mm: 130.2
put_hevc_epel_hv8_8_c: 548.7
put_hevc_epel_hv8_8_i8mm: 136.9
put_hevc_epel_hv12_8_c: 1126.7
put_hevc_epel_hv12_8_i8mm: 302.2
put_hevc_epel_hv16_8_c: 1925.2
put_hevc_epel_hv16_8_i8mm: 459.9
put_hevc_epel_hv24_8_c: 4301.9
put_hevc_epel_hv24_8_i8mm: 1024.9
put_hevc_epel_hv32_8_c: 7509.2
put_hevc_epel_hv32_8_i8mm: 1680.4
put_hevc_epel_hv48_8_c: 16566.9
put_hevc_epel_hv48_8_i8mm: 3945.4
put_hevc_epel_hv64_8_c: 29134.2
put_hevc_epel_hv64_8_i8mm: 6567.7
Co-Authored-By: J. Dekker <jdek@itanimul.li>
Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec/aarch64/hevcdsp_init_aarch64.c')
-rw-r--r-- | libavcodec/aarch64/hevcdsp_init_aarch64.c | 5 |
1 files changed, 5 insertions, 0 deletions
diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c index 42aa76ddde..e54d8d7b1e 100644 --- a/libavcodec/aarch64/hevcdsp_init_aarch64.c +++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c @@ -191,6 +191,10 @@ NEON8_FNPROTO(epel_h, (int16_t *dst, const uint8_t *_src, ptrdiff_t _srcstride, int height, intptr_t mx, intptr_t my, int width), _i8mm); +NEON8_FNPROTO(epel_hv, (int16_t *dst, + const uint8_t *src, ptrdiff_t srcstride, + int height, intptr_t mx, intptr_t my, int width), _i8mm); + NEON8_FNPROTO(epel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride, const uint8_t *_src, ptrdiff_t _srcstride, int height, int denom, int wx, int ox, @@ -322,6 +326,7 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth) if (have_i8mm(cpu_flags)) { NEON8_FNASSIGN(c->put_hevc_epel, 0, 1, epel_h, _i8mm); + NEON8_FNASSIGN(c->put_hevc_epel, 1, 1, epel_hv, _i8mm); NEON8_FNASSIGN(c->put_hevc_epel_uni, 1, 1, epel_uni_hv, _i8mm); NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 0, 1, epel_uni_w_h ,_i8mm); NEON8_FNASSIGN(c->put_hevc_qpel, 0, 1, qpel_h, _i8mm); |