aboutsummaryrefslogtreecommitdiffstats
path: root/libswscale
diff options
context:
space:
mode:
authorRĂ©mi Denis-Courmont <remi@remlab.net>2022-09-28 18:30:01 +0300
committerLynne <dev@lynne.ee>2022-09-30 07:25:44 +0200
commita1bfb5290e58afa3d38e4d3f986302a2668fbfbe (patch)
treefaa554915d1c7185c2443ac1c435827a0699fc69 /libswscale
parent9181835a249405ec492d26bba58d3881eded95bf (diff)
downloadffmpeg-a1bfb5290e58afa3d38e4d3f986302a2668fbfbe.tar.gz
sws/rgb2rgb: RISC-V 64-bit V packed YUYV/UYVY to planar 4:2:2
This is currently 64-bit only because the stack spilling code would not assemble on RV32I (and it would corrupt s0 and s1 on RV128I, in theory). This could be added later in the unlikely that someone wants it.
Diffstat (limited to 'libswscale')
-rw-r--r--libswscale/riscv/rgb2rgb.c10
-rw-r--r--libswscale/riscv/rgb2rgb_rvv.S53
2 files changed, 63 insertions, 0 deletions
diff --git a/libswscale/riscv/rgb2rgb.c b/libswscale/riscv/rgb2rgb.c
index 32c1546827..37a2cd5ea1 100644
--- a/libswscale/riscv/rgb2rgb.c
+++ b/libswscale/riscv/rgb2rgb.c
@@ -33,6 +33,12 @@ void ff_shuffle_bytes_3210_rvv(const uint8_t *src, uint8_t *dst, int src_len);
void ff_interleave_bytes_rvv(const uint8_t *src1, const uint8_t *src2,
uint8_t *dst, int width, int height, int s1stride,
int s2stride, int dstride);
+void ff_uyvytoyuv422_rvv(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+ const uint8_t *src, int width, int height,
+ int ystride, int uvstride, int src_stride);
+void ff_yuyvtoyuv422_rvv(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
+ const uint8_t *src, int width, int height,
+ int ystride, int uvstride, int src_stride);
av_cold void rgb2rgb_init_riscv(void)
{
@@ -46,6 +52,10 @@ av_cold void rgb2rgb_init_riscv(void)
shuffle_bytes_3012 = ff_shuffle_bytes_3012_rvv;
shuffle_bytes_3210 = ff_shuffle_bytes_3210_rvv;
interleaveBytes = ff_interleave_bytes_rvv;
+#if (__riscv_xlen == 64)
+ uyvytoyuv422 = ff_uyvytoyuv422_rvv;
+ yuyvtoyuv422 = ff_yuyvtoyuv422_rvv;
+#endif
}
#endif
}
diff --git a/libswscale/riscv/rgb2rgb_rvv.S b/libswscale/riscv/rgb2rgb_rvv.S
index 7f8c2efd80..5626d906eb 100644
--- a/libswscale/riscv/rgb2rgb_rvv.S
+++ b/libswscale/riscv/rgb2rgb_rvv.S
@@ -102,3 +102,56 @@ func ff_interleave_bytes_rvv, zve32x
ret
endfunc
+
+#if (__riscv_xlen == 64)
+.macro yuy2_to_i422p v_y0, v_y1, v_u, v_v
+ addi sp, sp, -16
+ sd s0, (sp)
+ sd s1, 8(sp)
+ addi a4, a4, 1
+ lw s0, 16(sp)
+ srai a4, a4, 1 // pixel width -> chroma width
+ li s1, 2
+1:
+ mv t4, a4
+ mv t3, a3
+ mv t0, a0
+ addi t6, a0, 1
+ mv t1, a1
+ mv t2, a2
+ addi a5, a5, -1
+2:
+ vsetvli t5, t4, e8, m1, ta, ma
+ sub t4, t4, t5
+ vlseg4e8.v v8, (t3)
+ sh2add t3, t5, t3
+ vsse8.v \v_y0, (t0), s1
+ sh1add t0, t5, t0
+ vsse8.v \v_y1, (t6), s1
+ sh1add t6, t5, t6
+ vse8.v \v_u, (t1)
+ add t1, t5, t1
+ vse8.v \v_v, (t2)
+ add t2, t5, t2
+ bnez t4, 2b
+
+ add a3, a3, s0
+ add a0, a0, a6
+ add a1, a1, a7
+ add a2, a2, a7
+ bnez a5, 1b
+
+ ld s1, 8(sp)
+ ld s0, (sp)
+ addi sp, sp, 16
+ ret
+.endm
+
+func ff_uyvytoyuv422_rvv, zve32x
+ yuy2_to_i422p v9, v11, v8, v10
+endfunc
+
+func ff_yuyvtoyuv422_rvv, zve32x
+ yuy2_to_i422p v8, v10, v9, v11
+endfunc
+#endif