aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/aarch64/hevcdsp_init_aarch64.c
diff options
context:
space:
mode:
authorMartin Storsjö <martin@martin.st>2024-03-20 12:18:07 +0200
committerMartin Storsjö <martin@martin.st>2024-03-26 09:03:18 +0200
commitad01d06f919f25147f857efc4f143b5469e2f92f (patch)
tree956c81dd7a88c6f3b85745b832070a5276bdbca5 /libavcodec/aarch64/hevcdsp_init_aarch64.c
parentde23b384fd7c098132ba1745342cb0d3ed1a7af6 (diff)
downloadffmpeg-ad01d06f919f25147f857efc4f143b5469e2f92f.tar.gz
aarch64: hevc: Implement a neon version of hevc_qpel_uni_w_h*_8
AWS Graviton 3: put_hevc_qpel_uni_w_h4_8_c: 159.0 put_hevc_qpel_uni_w_h4_8_neon: 64.2 put_hevc_qpel_uni_w_h4_8_i8mm: 40.0 put_hevc_qpel_uni_w_h6_8_c: 344.7 put_hevc_qpel_uni_w_h6_8_neon: 114.5 put_hevc_qpel_uni_w_h6_8_i8mm: 82.0 put_hevc_qpel_uni_w_h8_8_c: 596.2 put_hevc_qpel_uni_w_h8_8_neon: 132.2 put_hevc_qpel_uni_w_h8_8_i8mm: 106.0 put_hevc_qpel_uni_w_h12_8_c: 1325.0 put_hevc_qpel_uni_w_h12_8_neon: 299.0 put_hevc_qpel_uni_w_h12_8_i8mm: 211.5 put_hevc_qpel_uni_w_h16_8_c: 2300.0 put_hevc_qpel_uni_w_h16_8_neon: 422.0 put_hevc_qpel_uni_w_h16_8_i8mm: 286.2 put_hevc_qpel_uni_w_h24_8_c: 5059.0 put_hevc_qpel_uni_w_h24_8_neon: 912.2 put_hevc_qpel_uni_w_h24_8_i8mm: 664.2 put_hevc_qpel_uni_w_h32_8_c: 9198.2 put_hevc_qpel_uni_w_h32_8_neon: 1638.2 put_hevc_qpel_uni_w_h32_8_i8mm: 1033.7 put_hevc_qpel_uni_w_h48_8_c: 20754.7 put_hevc_qpel_uni_w_h48_8_neon: 3633.7 put_hevc_qpel_uni_w_h48_8_i8mm: 2300.7 put_hevc_qpel_uni_w_h64_8_c: 36854.7 put_hevc_qpel_uni_w_h64_8_neon: 6435.7 put_hevc_qpel_uni_w_h64_8_i8mm: 4039.2 Signed-off-by: Martin Storsjö <martin@martin.st>
Diffstat (limited to 'libavcodec/aarch64/hevcdsp_init_aarch64.c')
-rw-r--r--libavcodec/aarch64/hevcdsp_init_aarch64.c7
1 files changed, 7 insertions, 0 deletions
diff --git a/libavcodec/aarch64/hevcdsp_init_aarch64.c b/libavcodec/aarch64/hevcdsp_init_aarch64.c
index 6110a360d8..ea0d26c019 100644
--- a/libavcodec/aarch64/hevcdsp_init_aarch64.c
+++ b/libavcodec/aarch64/hevcdsp_init_aarch64.c
@@ -280,6 +280,11 @@ NEON8_FNPROTO(qpel_uni_hv, (uint8_t *dst, ptrdiff_t dststride,
NEON8_FNPROTO(qpel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
const uint8_t *_src, ptrdiff_t _srcstride,
int height, int denom, int wx, int ox,
+ intptr_t mx, intptr_t my, int width),);
+
+NEON8_FNPROTO(qpel_uni_w_h, (uint8_t *_dst, ptrdiff_t _dststride,
+ const uint8_t *_src, ptrdiff_t _srcstride,
+ int height, int denom, int wx, int ox,
intptr_t mx, intptr_t my, int width), _i8mm);
NEON8_FNPROTO(epel_uni_w_hv, (uint8_t *_dst, ptrdiff_t _dststride,
@@ -429,6 +434,8 @@ av_cold void ff_hevc_dsp_init_aarch64(HEVCDSPContext *c, const int bit_depth)
NEON8_FNASSIGN(c->put_hevc_epel_uni_w, 1, 1, epel_uni_w_hv,);
NEON8_FNASSIGN(c->put_hevc_epel_bi, 1, 1, epel_bi_hv,);
+ NEON8_FNASSIGN_SHARED_32(c->put_hevc_qpel_uni_w, 0, 1, qpel_uni_w_h,);
+
if (have_i8mm(cpu_flags)) {
NEON8_FNASSIGN(c->put_hevc_epel, 0, 1, epel_h, _i8mm);
NEON8_FNASSIGN(c->put_hevc_epel, 1, 1, epel_hv, _i8mm);