aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRémi Denis-Courmont <remi@remlab.net>2023-10-28 15:29:32 +0300
committerRémi Denis-Courmont <remi@remlab.net>2023-10-31 21:33:25 +0200
commit7e1cdc69fbe5cc82203b6a772e14f6e5f88b4b7a (patch)
treef8e8a429f9f467cf292721af2e33710e5c77595c
parent4aea0da2300603f0d24cb711fb5196777c00a935 (diff)
downloadffmpeg-7e1cdc69fbe5cc82203b6a772e14f6e5f88b4b7a.tar.gz
lavc/utvideodsp: R-V V restore_rgb_planes10
restore_rgb_planes10_c: 185852.2 restore_rgb_planes10_rvv_i32: 90130.5
-rw-r--r--libavcodec/riscv/utvideodsp_init.c9
-rw-r--r--libavcodec/riscv/utvideodsp_rvv.S35
2 files changed, 43 insertions, 1 deletions
diff --git a/libavcodec/riscv/utvideodsp_init.c b/libavcodec/riscv/utvideodsp_init.c
index dfaa16692a..f5038c4736 100644
--- a/libavcodec/riscv/utvideodsp_init.c
+++ b/libavcodec/riscv/utvideodsp_init.c
@@ -26,13 +26,20 @@
void ff_restore_rgb_planes_rvv(uint8_t *r, uint8_t *g, uint8_t *b,
ptrdiff_t linesize_r, ptrdiff_t linesize_g,
ptrdiff_t linesize_b, int width, int height);
+void ff_restore_rgb_planes10_rvv(uint16_t *r, uint16_t *g, uint16_t *b,
+ ptrdiff_t linesize_r, ptrdiff_t linesize_g,
+ ptrdiff_t linesize_b, int width, int height);
av_cold void ff_utvideodsp_init_riscv(UTVideoDSPContext *c)
{
#if HAVE_RVV
int flags = av_get_cpu_flags();
- if (flags & AV_CPU_FLAG_RVV_I32)
+ if (flags & AV_CPU_FLAG_RVV_I32) {
c->restore_rgb_planes = ff_restore_rgb_planes_rvv;
+
+ if (flags & AV_CPU_FLAG_RVB_ADDR)
+ c->restore_rgb_planes10 = ff_restore_rgb_planes10_rvv;
+ }
#endif
}
diff --git a/libavcodec/riscv/utvideodsp_rvv.S b/libavcodec/riscv/utvideodsp_rvv.S
index 673e3442ce..fa70d0eb34 100644
--- a/libavcodec/riscv/utvideodsp_rvv.S
+++ b/libavcodec/riscv/utvideodsp_rvv.S
@@ -51,3 +51,38 @@ func ff_restore_rgb_planes_rvv, zve32x
ret
endfunc
+
+func ff_restore_rgb_planes10_rvv, zve32x
+ li t1, -0x200
+ li t2, 0x3FF
+ sub a3, a3, a6
+ sub a4, a4, a6
+ sub a5, a5, a6
+1:
+ mv t6, a6
+ addi a7, a7, -1
+2:
+ vsetvli t0, t6, e16, m8, ta, ma
+ vle16.v v16, (a1)
+ sub t6, t6, t0
+ vle16.v v8, (a0)
+ vadd.vx v16, v16, t1
+ sh1add a1, t0, a1
+ vle16.v v24, (a2)
+ vadd.vv v8, v8, v16
+ vadd.vv v24, v24, v16
+ vand.vx v8, v8, t2
+ vand.vx v24, v24, t2
+ vse16.v v8, (a0)
+ sh1add a0, t0, a0
+ vse16.v v24, (a2)
+ sh1add a2, t0, a2
+ bnez t6, 2b
+
+ sh1add a0, a3, a0
+ sh1add a1, a4, a1
+ sh1add a2, a5, a2
+ bnez a7, 1b
+
+ ret
+endfunc