diff options
author | RĂ©mi Denis-Courmont <remi@remlab.net> | 2022-09-28 18:30:00 +0300 |
---|---|---|
committer | Lynne <dev@lynne.ee> | 2022-09-30 07:24:09 +0200 |
commit | 9181835a249405ec492d26bba58d3881eded95bf (patch) | |
tree | 699aadd24df14068c6e8b02ea8d292afde061257 /libswscale/riscv | |
parent | 66a03f405316a0e1a4a60cacd1d32ec540604a01 (diff) | |
download | ffmpeg-9181835a249405ec492d26bba58d3881eded95bf.tar.gz |
sws/rgb2rgb: RISC-V V interleaveBytes
Diffstat (limited to 'libswscale/riscv')
-rw-r--r-- | libswscale/riscv/rgb2rgb.c | 4 | ||||
-rw-r--r-- | libswscale/riscv/rgb2rgb_rvv.S | 26 |
2 files changed, 30 insertions, 0 deletions
diff --git a/libswscale/riscv/rgb2rgb.c b/libswscale/riscv/rgb2rgb.c index 5654154494..32c1546827 100644 --- a/libswscale/riscv/rgb2rgb.c +++ b/libswscale/riscv/rgb2rgb.c @@ -30,6 +30,9 @@ void ff_shuffle_bytes_2103_rvv(const uint8_t *src, uint8_t *dst, int src_len); void ff_shuffle_bytes_1230_rvv(const uint8_t *src, uint8_t *dst, int src_len); void ff_shuffle_bytes_3012_rvv(const uint8_t *src, uint8_t *dst, int src_len); void ff_shuffle_bytes_3210_rvv(const uint8_t *src, uint8_t *dst, int src_len); +void ff_interleave_bytes_rvv(const uint8_t *src1, const uint8_t *src2, + uint8_t *dst, int width, int height, int s1stride, + int s2stride, int dstride); av_cold void rgb2rgb_init_riscv(void) { @@ -42,6 +45,7 @@ av_cold void rgb2rgb_init_riscv(void) shuffle_bytes_1230 = ff_shuffle_bytes_1230_rvv; shuffle_bytes_3012 = ff_shuffle_bytes_3012_rvv; shuffle_bytes_3210 = ff_shuffle_bytes_3210_rvv; + interleaveBytes = ff_interleave_bytes_rvv; } #endif } diff --git a/libswscale/riscv/rgb2rgb_rvv.S b/libswscale/riscv/rgb2rgb_rvv.S index 3eb11262c0..7f8c2efd80 100644 --- a/libswscale/riscv/rgb2rgb_rvv.S +++ b/libswscale/riscv/rgb2rgb_rvv.S @@ -76,3 +76,29 @@ func ff_shuffle_bytes_3210_rvv, zve32x addi a0, a0, 3 j 1b endfunc + +func ff_interleave_bytes_rvv, zve32x +1: + mv t0, a0 + mv t1, a1 + mv t2, a2 + mv t3, a3 + addi a4, a4, -1 +2: + vsetvli t4, t3, e8, ta, ma + sub t3, t3, t4 + vle8.v v8, (t0) + add t0, t4, t0 + vle8.v v9, (t1) + add t1, t4, t1 + vsseg2e8.v v8, (t2) + sh1add t2, t4, t2 + bnez t4, 2b + + add a0, a0, a5 + add a1, a1, a6 + add a2, a2, a7 + bnez a4, 1b + + ret +endfunc |