diff options
author | sunyuechi <sunyuechi@iscas.ac.cn> | 2024-02-06 23:18:51 +0800 |
---|---|---|
committer | Rémi Denis-Courmont <remi@remlab.net> | 2024-02-25 11:05:25 +0200 |
commit | 925b55a5e889ed585f331dd09b7f7eb9f8299634 (patch) | |
tree | 49facb010d365432eb3747038d7208a165aa5395 /libavcodec/riscv | |
parent | 2e4e424ac237e34fe106dca4c224a46eabefb4c9 (diff) | |
download | ffmpeg-925b55a5e889ed585f331dd09b7f7eb9f8299634.tar.gz |
lavc/me_cmp: R-V V vsse vsad
C908:
vsad_0_c: 936.0
vsad_0_rvv_i32: 236.2
vsad_1_c: 424.0
vsad_1_rvv_i32: 190.2
vsse_0_c: 877.0
vsse_0_rvv_i32: 204.2
vsse_1_c: 439.0
vsse_1_rvv_i32: 140.2
Signed-off-by: Rémi Denis-Courmont <remi@remlab.net>
Diffstat (limited to 'libavcodec/riscv')
-rw-r--r-- | libavcodec/riscv/me_cmp_init.c | 10 | ||||
-rw-r--r-- | libavcodec/riscv/me_cmp_rvv.S | 98 |
2 files changed, 108 insertions, 0 deletions
diff --git a/libavcodec/riscv/me_cmp_init.c b/libavcodec/riscv/me_cmp_init.c index 85ecc22cbc..a6ef5addd0 100644 --- a/libavcodec/riscv/me_cmp_init.c +++ b/libavcodec/riscv/me_cmp_init.c @@ -46,6 +46,11 @@ int ff_sse8_rvv(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, int ff_sse4_rvv(MpegEncContext *v, const uint8_t *pix1, const uint8_t *pix2, ptrdiff_t stride, int h); +int ff_vsse16_rvv(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h); +int ff_vsse8_rvv(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h); +int ff_vsad16_rvv(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h); +int ff_vsad8_rvv(MpegEncContext *c, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride, int h); + av_cold void ff_me_cmp_init_riscv(MECmpContext *c, AVCodecContext *avctx) { #if HAVE_RVV @@ -64,6 +69,11 @@ av_cold void ff_me_cmp_init_riscv(MECmpContext *c, AVCodecContext *avctx) c->sse[0] = ff_sse16_rvv; c->sse[1] = ff_sse8_rvv; c->sse[2] = ff_sse4_rvv; + + c->vsse[0] = ff_vsse16_rvv; + c->vsse[1] = ff_vsse8_rvv; + c->vsad[0] = ff_vsad16_rvv; + c->vsad[1] = ff_vsad8_rvv; } #endif } diff --git a/libavcodec/riscv/me_cmp_rvv.S b/libavcodec/riscv/me_cmp_rvv.S index 11848f3f21..25b15c74ce 100644 --- a/libavcodec/riscv/me_cmp_rvv.S +++ b/libavcodec/riscv/me_cmp_rvv.S @@ -231,3 +231,101 @@ func ff_sse4_rvv, zve32x vmv.x.s a0, v0 ret endfunc + +.macro vabsaddu dst src tmp + vneg.v \tmp, \src + vmax.vv \tmp, \src, \tmp + vwaddu.wv \dst, \dst, \tmp +.endm + +.macro vsad_vsse16 type + vsetivli t0, 16, e32, m4, ta, ma + addi a4, a4, -1 + add t1, a1, a3 + add t2, a2, a3 + vmv.v.x v24, zero + vmv.s.x v0, zero +1: + vsetvli zero, zero, e8, m1, tu, ma + vle8.v v4, (a1) + vle8.v v8, (t1) + vle8.v v12, (a2) + vle8.v v16, (t2) + addi a4, a4, -1 + vwsubu.vv v28, v4, v12 + vwsubu.wv v12, v28, v8 + vwaddu.wv v28, v12, v16 + vsetvli zero, zero, e16, m2, tu, ma + +.ifc \type,abs + vabsaddu v24, v28, v12 +.endif +.ifc \type,square + vwmacc.vv v24, v28, v28 +.endif + + add a1, a1, a3 + add a2, a2, a3 + add t1, t1, a3 + add t2, t2, a3 + bnez a4, 1b + + vsetvli zero, zero, e32, m4, tu, ma + vredsum.vs v0, v24, v0 + vmv.x.s a0, v0 + ret +.endm + +.macro vsad_vsse8 type + vsetivli t0, 8, e32, m2, ta, ma + addi a4, a4, -1 + add t1, a1, a3 + add t2, a2, a3 + vmv.v.x v24, zero + vmv.s.x v0, zero +1: + vsetvli zero, zero, e8, mf2, tu, ma + vle8.v v4, (a1) + vle8.v v8, (t1) + vle8.v v12, (a2) + vle8.v v16, (t2) + addi a4, a4, -1 + vwsubu.vv v28, v4, v12 + vwsubu.wv v12, v28, v8 + vwaddu.wv v28, v12, v16 + vsetvli zero, zero, e16, m1, tu, ma + +.ifc \type,abs + vabsaddu v24, v28, v12 +.endif +.ifc \type,square + vwmacc.vv v24, v28, v28 +.endif + + add a1, a1, a3 + add a2, a2, a3 + add t1, t1, a3 + add t2, t2, a3 + bnez a4, 1b + + vsetvli zero, zero, e32, m2, tu, ma + vredsum.vs v0, v24, v0 + vmv.x.s a0, v0 + ret +.endm + +func ff_vsse16_rvv, zve32x + vsad_vsse16 square +endfunc + +func ff_vsse8_rvv, zve32x + vsad_vsse8 square +endfunc + +func ff_vsad16_rvv, zve32x + vsad_vsse16 abs +endfunc + +func ff_vsad8_rvv, zve32x + vsad_vsse8 abs +endfunc |