aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRémi Denis-Courmont <remi@remlab.net>2024-08-14 19:56:05 +0300
committerRémi Denis-Courmont <remi@remlab.net>2024-08-19 22:41:13 +0300
commit1907dd7f2335ba106b1d92070a497e818d734efd (patch)
treecf18a7801b652213206cc077d68c3c9096251b8c
parent0fd37c00d728becba6c49aff9efaf7cae17d46d7 (diff)
downloadffmpeg-1907dd7f2335ba106b1d92070a497e818d734efd.tar.gz
lavc/mpegvideoencdsp: R-V V try_8x8basis
T-Head C908: try_8x8basis_c: 922.5 try_8x8basis_rvv_i32: 135.3 SpacemiT X60: try_8x8basis_c: 926.1 try_8x8basis_rvv_i32: 103.1
-rw-r--r--libavcodec/riscv/mpegvideoencdsp_init.c15
-rw-r--r--libavcodec/riscv/mpegvideoencdsp_rvv.S35
2 files changed, 46 insertions, 4 deletions
diff --git a/libavcodec/riscv/mpegvideoencdsp_init.c b/libavcodec/riscv/mpegvideoencdsp_init.c
index eb5c8a5aed..4c156c1cf2 100644
--- a/libavcodec/riscv/mpegvideoencdsp_init.c
+++ b/libavcodec/riscv/mpegvideoencdsp_init.c
@@ -23,6 +23,8 @@
#include "libavutil/cpu.h"
#include "libavcodec/mpegvideoencdsp.h"
+int ff_try_8x8basis_rvv(const int16_t rem[64], const int16_t weight[64],
+ const int16_t basis[16], int scale);
int ff_pix_sum_rvv(const uint8_t *pix, int line_size);
int ff_pix_norm1_rvv(const uint8_t *pix, int line_size);
@@ -32,10 +34,15 @@ av_cold void ff_mpegvideoencdsp_init_riscv(MpegvideoEncDSPContext *c,
#if HAVE_RVV
int flags = av_get_cpu_flags();
- if (flags & AV_CPU_FLAG_RVV_I64) {
- if ((flags & AV_CPU_FLAG_RVB) && ff_rv_vlen_least(128))
- c->pix_sum = ff_pix_sum_rvv;
- c->pix_norm1 = ff_pix_norm1_rvv;
+ if (flags & AV_CPU_FLAG_RVV_I32) {
+ if (flags & AV_CPU_FLAG_RVB)
+ c->try_8x8basis = ff_try_8x8basis_rvv;
+
+ if (flags & AV_CPU_FLAG_RVV_I64) {
+ if ((flags & AV_CPU_FLAG_RVB) && ff_rv_vlen_least(128))
+ c->pix_sum = ff_pix_sum_rvv;
+ c->pix_norm1 = ff_pix_norm1_rvv;
+ }
}
#endif
}
diff --git a/libavcodec/riscv/mpegvideoencdsp_rvv.S b/libavcodec/riscv/mpegvideoencdsp_rvv.S
index 2f25b00eb2..9408de47c8 100644
--- a/libavcodec/riscv/mpegvideoencdsp_rvv.S
+++ b/libavcodec/riscv/mpegvideoencdsp_rvv.S
@@ -20,6 +20,41 @@
#include "libavutil/riscv/asm.S"
+.equ BASIS_SHIFT, 16
+.equ RECON_SHIFT, 6
+
+func ff_try_8x8basis_rvv, zve32x, b
+ li t1, 64
+ csrwi vxrm, 0
+ vsetvli t0, t1, e32, m8, ta, ma
+ vmv.v.x v24, zero
+ vmv.s.x v1, zero
+1:
+ vsetvli zero, zero, e16, m4, ta, ma
+ vle16.v v4, (a2)
+ sub t1, t1, t0
+ vwmul.vx v16, v4, a3
+ sh1add a2, t0, a2
+ vle16.v v8, (a0)
+ sh1add a0, t0, a0
+ vnclip.wi v4, v16, BASIS_SHIFT - RECON_SHIFT
+ vle16.v v12, (a1)
+ sh1add a1, t0, a1
+ vadd.vv v4, v8, v4
+ vsra.vi v4, v4, RECON_SHIFT
+ vwmul.vv v16, v12, v4
+ vsetvli zero, zero, e32, m8, ta, ma
+ vmul.vv v16, v16, v16
+ vsra.vi v16, v16, 4
+ vadd.vv v24, v24, v16
+ bnez t1, 1b
+
+ vredsum.vs v1, v24, v1
+ vmv.x.s a0, v1
+ srai a0, a0, 2
+ ret
+endfunc
+
func ff_pix_sum_rvv, zve64x, b
lpad 0
vsetivli t0, 16, e16, m1, ta, ma