aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/aarch64/hevcdsp_init_aarch64.c
diff options
context:
space:
mode:
authorLogan Lyu <Logan.Lyu@myais.com.cn>2023-10-26 09:17:36 +0800
committerMartin Storsjö <martin@martin.st>2023-10-31 14:02:53 +0200
commit265450b89e920c57fa35feff6d6d9953aec8f30e (patch)
tree3761359e840ee2e0712a02367cc865fac22180bc /libavcodec/aarch64/hevcdsp_init_aarch64.c
parent22c729150648f9f4b0d1e6f769d01bb4fb022016 (diff)
downloadffmpeg-265450b89e920c57fa35feff6d6d9953aec8f30e.tar.gz
lavc/aarch64: new optimization for 8-bit hevc_epel_hv
checkasm bench: put_hevc_epel_hv4_8_c: 213.7 put_hevc_epel_hv4_8_i8mm: 59.4 put_hevc_epel_hv6_8_c: 350.9 put_hevc_epel_hv6_8_i8mm: 130.2 put_hevc_epel_hv8_8_c: 548.7 put_hevc_epel_hv8_8_i8mm: 136.9 put_hevc_epel_hv12_8_c: 1126.7 put_hevc_epel_hv12_8_i8mm: 302.2 put_hevc_epel_hv16_8_c: 1925.2 put_hevc_epel_hv16_8_i8mm: 459.9 put_hevc_epel_hv24_8_c: 4301.9 put_hevc_epel_hv24_8_i8mm: 1024.9 put_hevc_epel_hv32_8_c: 7509.2 put_hevc_epel_hv32_8_i8mm: 1680.4 put_hevc_epel_hv48_8_c: 16566.9 put_hevc_epel_hv48_8_i8mm: 3945.4 put_hevc_epel_hv64_8_c: 29134.2 put_hevc_epel_hv64_8_i8mm: 6567.7 Co-Authored-By: J. Dekker <jdek@itanimul.li> Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec/aarch64/hevcdsp_init_aarch64.c')
-rw-r--r--libavcodec/aarch64/hevcdsp_init_aarch64.c5
1 files changed, 5 insertions, 0 deletions
diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index 42aa76ddde..e54d8d7b1e 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -191,6 +191,10 @@ NEON8_FNPROTO(epel_h, (int16_t *dst,
const uint8_t *_src, ptrdiff_t _srcstride,
int height, intptr_t mx, intptr_t my, int width), _i8mm);
+NEON8_FNPROTO(epel_hv, (int16_t *dst,
+ const uint8_t *src, ptrdiff_t srcstride,
+ int height, intptr_t mx, intptr_t my, int width), _i8mm);
+
NEON8_FNPROTO(epel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
const uint8_t *_src, ptrdiff_t _srcstride,
int height, int denom, int wx, int ox,
@@ -322,6 +326,7 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
if (have_i8mm(cpu_flags)) {
NEON8_FNASSIGN(c->put_hevc_epel, 0, 1, epel_h, _i8mm);
+ NEON8_FNASSIGN(c->put_hevc_epel, 1, 1, epel_hv, _i8mm);
NEON8_FNASSIGN(c->put_hevc_epel_uni, 1, 1, epel_uni_hv, _i8mm);
NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 0, 1, epel_uni_w_h ,_i8mm);
NEON8_FNASSIGN(c->put_hevc_qpel, 0, 1, qpel_h, _i8mm);