diff options
author | Rémi Denis-Courmont <remi@remlab.net> | 2024-08-14 20:16:30 +0300 |
---|---|---|
committer | Rémi Denis-Courmont <remi@remlab.net> | 2024-08-19 22:41:13 +0300 |
commit | d8fb44c0aa2f7bc566f937330067be6f2ab83c67 (patch) | |
tree | 6df2ca1ea11c78a6847218b6e841c7b27cf4ddab | |
parent | 1907dd7f2335ba106b1d92070a497e818d734efd (diff) | |
download | ffmpeg-d8fb44c0aa2f7bc566f937330067be6f2ab83c67.tar.gz |
lavc/mpegvideoencdsp: R-V V add_8x8basis
T-Head C908:
add_8x8basis_c: 440.6
add_8x8basis_rvv_i32: 70.3
SpacemiT X60:
add_8x8basis_c: 436.3
add_8x8basis_rvv_i32: 40.5
-rw-r--r-- | libavcodec/riscv/mpegvideoencdsp_init.c | 5 | ||||
-rw-r--r-- | libavcodec/riscv/mpegvideoencdsp_rvv.S | 19 |
2 files changed, 23 insertions, 1 deletions
diff --git a/libavcodec/riscv/mpegvideoencdsp_init.c b/libavcodec/riscv/mpegvideoencdsp_init.c index 4c156c1cf2..1ac808af16 100644 --- a/libavcodec/riscv/mpegvideoencdsp_init.c +++ b/libavcodec/riscv/mpegvideoencdsp_init.c @@ -25,6 +25,7 @@ int ff_try_8x8basis_rvv(const int16_t rem[64], const int16_t weight[64], const int16_t basis[16], int scale); +void ff_add_8x8basis_rvv(int16_t rem[64], const int16_t basis[16], int scale); int ff_pix_sum_rvv(const uint8_t *pix, int line_size); int ff_pix_norm1_rvv(const uint8_t *pix, int line_size); @@ -35,8 +36,10 @@ av_cold void ff_mpegvideoencdsp_init_riscv(MpegvideoEncDSPContext *c, int flags = av_get_cpu_flags(); if (flags & AV_CPU_FLAG_RVV_I32) { - if (flags & AV_CPU_FLAG_RVB) + if (flags & AV_CPU_FLAG_RVB) { c->try_8x8basis = ff_try_8x8basis_rvv; + c->add_8x8basis = ff_add_8x8basis_rvv; + } if (flags & AV_CPU_FLAG_RVV_I64) { if ((flags & AV_CPU_FLAG_RVB) && ff_rv_vlen_least(128)) diff --git a/libavcodec/riscv/mpegvideoencdsp_rvv.S b/libavcodec/riscv/mpegvideoencdsp_rvv.S index 9408de47c8..7c50526934 100644 --- a/libavcodec/riscv/mpegvideoencdsp_rvv.S +++ b/libavcodec/riscv/mpegvideoencdsp_rvv.S @@ -55,6 +55,25 @@ func ff_try_8x8basis_rvv, zve32x, b ret endfunc +func ff_add_8x8basis_rvv, zve32x, b + li t1, 64 + csrwi vxrm, 0 +1: + vsetvli t0, t1, e16, m4, ta, ma + vle16.v v4, (a1) + sub t1, t1, t0 + vwmul.vx v16, v4, a2 + sh1add a1, t0, a1 + vle16.v v8, (a0) + vnclip.wi v4, v16, BASIS_SHIFT - RECON_SHIFT + vadd.vv v4, v8, v4 + vse16.v v4, (a0) + sh1add a0, t0, a0 + bnez t1, 1b + + ret +endfunc + func ff_pix_sum_rvv, zve64x, b lpad 0 vsetivli t0, 16, e16, m1, ta, ma |