aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRémi Denis-Courmont <remi@remlab.net>2024-06-30 11:24:43 +0300
committerRémi Denis-Courmont <remi@remlab.net>2024-07-04 19:57:42 +0300
commite2af5904f0fa86dbb2b7755ab579f54d14523e8e (patch)
tree98271578611e91a6676bf6939dfac642647581c7
parent5a6e333fc7ac514255bcd6b424924d92ef558bf0 (diff)
downloadffmpeg-e2af5904f0fa86dbb2b7755ab579f54d14523e8e.tar.gz
lavc/h264dsp: R-V V 8-bit MBAFF loop filter
Performance is (unfortunately) the same as with non-MBAFF, since the hardware under test does not short-circuit vector tail calculations. (IMO, a generic solution or work-around should be agreed on, rather than bespoke approaches all over the place.)
-rw-r--r--libavcodec/riscv/h264dsp_init.c4
-rw-r--r--libavcodec/riscv/h264dsp_rvv.S14
2 files changed, 18 insertions, 0 deletions
diff --git a/libavcodec/riscv/h264dsp_init.c b/libavcodec/riscv/h264dsp_init.c
index 0d4d541992..ab412a9924 100644
--- a/libavcodec/riscv/h264dsp_init.c
+++ b/libavcodec/riscv/h264dsp_init.c
@@ -31,6 +31,8 @@ void ff_h264_v_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride,
int alpha, int beta, int8_t *tc0);
void ff_h264_h_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride,
int alpha, int beta, int8_t *tc0);
+void ff_h264_h_loop_filter_luma_mbaff_8_rvv(uint8_t *pix, ptrdiff_t stride,
+ int alpha, int beta, int8_t *tc0);
extern int ff_startcode_find_candidate_rvb(const uint8_t *, int);
extern int ff_startcode_find_candidate_rvv(const uint8_t *, int);
@@ -48,6 +50,8 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
if (bit_depth == 8 && ff_rv_vlen_least(128)) {
dsp->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_8_rvv;
dsp->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_8_rvv;
+ dsp->h264_h_loop_filter_luma_mbaff =
+ ff_h264_h_loop_filter_luma_mbaff_8_rvv;
}
dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
}
diff --git a/libavcodec/riscv/h264dsp_rvv.S b/libavcodec/riscv/h264dsp_rvv.S
index 77bf40db1f..96a8a0a8a3 100644
--- a/libavcodec/riscv/h264dsp_rvv.S
+++ b/libavcodec/riscv/h264dsp_rvv.S
@@ -138,3 +138,17 @@ func ff_h264_h_loop_filter_luma_8_rvv, zve32x
vssseg6e8.v v8, (a0), a1
ret
endfunc
+
+func ff_h264_h_loop_filter_luma_mbaff_8_rvv, zve32x
+ vsetivli zero, 4, e16, mf2, ta, ma
+ vle8.v v4, (a4)
+ li t0, 0x0101
+ vzext.vf2 v6, v4
+ addi a0, a0, -3
+ vmul.vx v6, v6, t0 # tc_orig
+ vsetivli zero, 8, e8, m1, ta, ma
+ vlsseg6e8.v v8, (a0), a1
+ jal t0, ff_h264_loop_filter_luma_8_rvv
+ vssseg6e8.v v8, (a0), a1
+ ret
+endfunc