diff options
author | Rémi Denis-Courmont <remi@remlab.net> | 2024-07-13 16:01:14 +0300 |
---|---|---|
committer | Rémi Denis-Courmont <remi@remlab.net> | 2024-07-16 17:25:40 +0300 |
commit | 3002310b707071ec7aa671ab7428feed5ee9740c (patch) | |
tree | a3ff3a7b3eaa4fbef1b452129e84b998bbb14d1c | |
parent | 7744c08240808c8517a9c088b465c15235a34c86 (diff) | |
download | ffmpeg-3002310b707071ec7aa671ab7428feed5ee9740c.tar.gz |
lavc/h264dsp: R-V V high-depth add_pixels8
T-Head C908 (cycles);
h264_add_pixels8_9bpp_c: 270.5
h264_add_pixels8_9bpp_rvv_i32: 164.2
h264_add_pixels8_10bpp_c: 270.5
h264_add_pixels8_10bpp_rvv_i32: 164.2
h264_add_pixels8_12bpp_c: 270.5
h264_add_pixels8_12bpp_rvv_i32: 164.2
h264_add_pixels8_14bpp_c: 270.5
h264_add_pixels8_14bpp_rvv_i32: 164.2
-rw-r--r-- | libavcodec/riscv/h264addpx_rvv.S | 22 | ||||
-rw-r--r-- | libavcodec/riscv/h264dsp_init.c | 2 |
2 files changed, 24 insertions, 0 deletions
diff --git a/libavcodec/riscv/h264addpx_rvv.S b/libavcodec/riscv/h264addpx_rvv.S index fd36bd4896..3c0700d1d9 100644 --- a/libavcodec/riscv/h264addpx_rvv.S +++ b/libavcodec/riscv/h264addpx_rvv.S @@ -87,3 +87,25 @@ func ff_h264_add_pixels8_8_rvv, zve64x vsse64.v v8, (a0), a2 ret endfunc + +func ff_h264_add_pixels8_16_rvv, zve32x + li t0, 8 + vsetivli zero, 8, e16, m1, ta, ma +1: + vle32.v v16, (a1) + addi t0, t0, -1 + vle16.v v8, (a0) + .equ offset, 0 + .rept 256 / __riscv_xlen + sx zero, offset(a1) + .equ offset, offset + (__riscv_xlen / 8) + .endr + vncvt.x.x.w v24, v16 + addi a1, a1, 8 * 4 + vadd.vv v8, v8, v24 + vse16.v v8, (a0) + add a0, a0, a2 + bnez t0, 1b + + ret +endfunc diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c index 2787485647..4fc695f158 100644 --- a/libavcodec/riscv/h264dsp_init.c +++ b/libavcodec/riscv/h264dsp_init.c @@ -63,6 +63,7 @@ void ff_h264_idct8_add_14_rvv(uint8_t *dst, int16_t *block, int stride); void ff_h264_add_pixels8_8_rvv(uint8_t *dst, int16_t *block, int stride); void ff_h264_add_pixels4_8_rvv(uint8_t *dst, int16_t *block, int stride); +void ff_h264_add_pixels8_16_rvv(uint8_t *dst, int16_t *block, int stride); void ff_h264_add_pixels4_16_rvv(uint8_t *dst, int16_t *block, int stride); extern int ff_startcode_find_candidate_rvb(const uint8_t *, int); @@ -126,6 +127,7 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth, dsp->h264_idct8_add = ff_h264_idct8_add_14_rvv; } if (bit_depth > 8 && zvl128b) { + dsp->h264_add_pixels8_clear = ff_h264_add_pixels8_16_rvv; if (flags & AV_CPU_FLAG_RVV_I64) dsp->h264_add_pixels4_clear = ff_h264_add_pixels4_16_rvv; } |