diff options
author | Rémi Denis-Courmont <remi@remlab.net> | 2024-05-12 20:02:03 +0300 |
---|---|---|
committer | Rémi Denis-Courmont <remi@remlab.net> | 2024-05-17 18:08:04 +0300 |
commit | 88d973a5d658dc61dfd32e87b062724def46addc (patch) | |
tree | f8e6b06ad340fe2a57b4abe841e2ab09490aba80 | |
parent | 784672b833513fa8bb8d8fa0cbc82cb8391aa581 (diff) | |
download | ffmpeg-88d973a5d658dc61dfd32e87b062724def46addc.tar.gz |
lavc/flacdsp: R-V V flac_wasted33
T-Head C908:
flac_wasted_33_c: 786.2
flac_wasted_33_rvv_i64: 486.5
-rw-r--r-- | libavcodec/riscv/flacdsp_init.c | 4 | ||||
-rw-r--r-- | libavcodec/riscv/flacdsp_rvv.S | 32 |
2 files changed, 36 insertions, 0 deletions
diff --git a/libavcodec/riscv/flacdsp_init.c b/libavcodec/riscv/flacdsp_init.c index 454787470b..4f1652dbe7 100644 --- a/libavcodec/riscv/flacdsp_init.c +++ b/libavcodec/riscv/flacdsp_init.c @@ -32,6 +32,7 @@ void ff_flac_lpc32_rvv(int32_t *decoded, const int coeffs[32], void ff_flac_lpc32_rvv_simple(int32_t *decoded, const int coeffs[32], int pred_order, int qlevel, int len); void ff_flac_wasted32_rvv(int32_t *, int shift, int len); +void ff_flac_wasted33_rvv(int64_t *, const int32_t *, int shift, int len); void ff_flac_decorrelate_indep2_16_rvv(uint8_t **out, int32_t **in, int channels, int len, int shift); void ff_flac_decorrelate_indep4_16_rvv(uint8_t **out, int32_t **in, @@ -84,6 +85,9 @@ av_cold void ff_flacdsp_init_riscv(FLACDSPContext *c, enum AVSampleFormat fmt, c->wasted32 = ff_flac_wasted32_rvv; + if (flags & AV_CPU_FLAG_RVV_I64) + c->wasted33 = ff_flac_wasted33_rvv; + # if (__riscv_xlen >= 64) switch (fmt) { case AV_SAMPLE_FMT_S16: diff --git a/libavcodec/riscv/flacdsp_rvv.S b/libavcodec/riscv/flacdsp_rvv.S index d7009cdec2..6287faa260 100644 --- a/libavcodec/riscv/flacdsp_rvv.S +++ b/libavcodec/riscv/flacdsp_rvv.S @@ -115,6 +115,38 @@ func ff_flac_wasted32_rvv, zve32x ret endfunc +func ff_flac_wasted33_rvv, zve64x + srli t0, a2, 5 + li t1, 1 + bnez t0, 2f + sll a2, t1, a2 +1: + vsetvli t0, a3, e32, m4, ta, ma + vle32.v v8, (a1) + sub a3, a3, t0 + vwmulsu.vx v16, v8, a2 + sh2add a1, t0, a1 + vse64.v v16, (a0) + sh3add a0, t0, a0 + bnez a3, 1b + + ret + +2: // Pessimistic case: wasted >= 32 + vsetvli t0, a3, e32, m4, ta, ma + vle32.v v8, (a1) + sub a3, a3, t0 + vwcvtu.x.x.v v16, v8 + sh2add a1, t0, a1 + vsetvli zero, zero, e64, m8, ta, ma + vsll.vx v16, v16, a2 + vse64.v v16, (a0) + sh3add a0, t0, a0 + bnez a3, 2b + + ret +endfunc + #if (__riscv_xlen == 64) func ff_flac_decorrelate_indep2_16_rvv, zve32x ld a0, (a0) |