diff options
author | Rémi Denis-Courmont <remi@remlab.net> | 2023-10-28 15:29:32 +0300 |
---|---|---|
committer | Rémi Denis-Courmont <remi@remlab.net> | 2023-10-31 21:33:25 +0200 |
commit | 7e1cdc69fbe5cc82203b6a772e14f6e5f88b4b7a (patch) | |
tree | f8e8a429f9f467cf292721af2e33710e5c77595c | |
parent | 4aea0da2300603f0d24cb711fb5196777c00a935 (diff) | |
download | ffmpeg-7e1cdc69fbe5cc82203b6a772e14f6e5f88b4b7a.tar.gz |
lavc/utvideodsp: R-V V restore_rgb_planes10
restore_rgb_planes10_c: 185852.2
restore_rgb_planes10_rvv_i32: 90130.5
-rw-r--r-- | libavcodec/riscv/utvideodsp_init.c | 9 | ||||
-rw-r--r-- | libavcodec/riscv/utvideodsp_rvv.S | 35 |
2 files changed, 43 insertions, 1 deletions
diff --git a/libavcodec/riscv/utvideodsp_init.c b/libavcodec/riscv/utvideodsp_init.c index dfaa16692a..f5038c4736 100644 --- a/libavcodec/riscv/utvideodsp_init.c +++ b/libavcodec/riscv/utvideodsp_init.c @@ -26,13 +26,20 @@ void ff_restore_rgb_planes_rvv(uint8_t *r, uint8_t *g, uint8_t *b, ptrdiff_t linesize_r, ptrdiff_t linesize_g, ptrdiff_t linesize_b, int width, int height); +void ff_restore_rgb_planes10_rvv(uint16_t *r, uint16_t *g, uint16_t *b, + ptrdiff_t linesize_r, ptrdiff_t linesize_g, + ptrdiff_t linesize_b, int width, int height); av_cold void ff_utvideodsp_init_riscv(UTVideoDSPContext *c) { #if HAVE_RVV int flags = av_get_cpu_flags(); - if (flags & AV_CPU_FLAG_RVV_I32) + if (flags & AV_CPU_FLAG_RVV_I32) { c->restore_rgb_planes = ff_restore_rgb_planes_rvv; + + if (flags & AV_CPU_FLAG_RVB_ADDR) + c->restore_rgb_planes10 = ff_restore_rgb_planes10_rvv; + } #endif } diff --git a/libavcodec/riscv/utvideodsp_rvv.S b/libavcodec/riscv/utvideodsp_rvv.S index 673e3442ce..fa70d0eb34 100644 --- a/libavcodec/riscv/utvideodsp_rvv.S +++ b/libavcodec/riscv/utvideodsp_rvv.S @@ -51,3 +51,38 @@ func ff_restore_rgb_planes_rvv, zve32x ret endfunc + +func ff_restore_rgb_planes10_rvv, zve32x + li t1, -0x200 + li t2, 0x3FF + sub a3, a3, a6 + sub a4, a4, a6 + sub a5, a5, a6 +1: + mv t6, a6 + addi a7, a7, -1 +2: + vsetvli t0, t6, e16, m8, ta, ma + vle16.v v16, (a1) + sub t6, t6, t0 + vle16.v v8, (a0) + vadd.vx v16, v16, t1 + sh1add a1, t0, a1 + vle16.v v24, (a2) + vadd.vv v8, v8, v16 + vadd.vv v24, v24, v16 + vand.vx v8, v8, t2 + vand.vx v24, v24, t2 + vse16.v v8, (a0) + sh1add a0, t0, a0 + vse16.v v24, (a2) + sh1add a2, t0, a2 + bnez t6, 2b + + sh1add a0, a3, a0 + sh1add a1, a4, a1 + sh1add a2, a5, a2 + bnez a7, 1b + + ret +endfunc |