summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRémi Denis-Courmont <[email protected]>2024-06-10 20:29:56 +0300
committerRémi Denis-Courmont <[email protected]>2024-06-11 17:15:09 +0300
commitb6f37ffba71fa26b6176eb964cadcb442a115a54 (patch)
treefdc3c0f4bf7e4bd6d0dc2835d81c3610a66769a5
parent6c05069e680f6b9055ac14bc19c663914b554fdb (diff)
lavc/vc1dsp: match C block layout in inv_trans_4x8_rvv
Although checkasm does not verify this, the decoder requires that the transform updates the input block exactly like the C code does. This fixes vc1-ism, vc1_ilaced_twomv, vc1_sa00040, vc1_sa10091, vc1_sa10143, vc1_sa20021, vc1test_smm0005 and wmv3-drm-dec tests.
-rw-r--r--libavcodec/riscv/vc1dsp_rvv.S21
1 files changed, 15 insertions, 6 deletions
diff --git a/libavcodec/riscv/vc1dsp_rvv.S b/libavcodec/riscv/vc1dsp_rvv.S
index c4517d54f5..860b0cc5b1 100644
--- a/libavcodec/riscv/vc1dsp_rvv.S
+++ b/libavcodec/riscv/vc1dsp_rvv.S
@@ -303,15 +303,24 @@ func ff_vc1_inv_trans_4x8_rvv, zve32x
vlsseg4e16.v v0, (a2), a3
li t1, 3
jal t0, ff_vc1_inv_trans_4_rvv
+ vssseg4e16.v v0, (a2), a3
+ vsetivli zero, 4, e16, mf2, ta, ma
addi t1, a2, 1 * 8 * 2
- vse16.v v0, (a2)
+ vle16.v v0, (a2)
addi t2, a2, 2 * 8 * 2
- vse16.v v1, (t1)
+ vle16.v v1, (t1)
addi t3, a2, 3 * 8 * 2
- vse16.v v2, (t2)
- vse16.v v3, (t3)
- vsetivli zero, 4, e16, mf2, ta, ma
- vlseg8e16.v v0, (a2)
+ vle16.v v2, (t2)
+ addi t4, a2, 4 * 8 * 2
+ vle16.v v3, (t3)
+ addi t5, a2, 5 * 8 * 2
+ vle16.v v4, (t4)
+ addi t6, a2, 6 * 8 * 2
+ vle16.v v5, (t5)
+ addi t1, a2, 7 * 8 * 2
+ vle16.v v6, (t6)
+ vle16.v v7, (t1)
+
jal t0, ff_vc1_inv_trans_8_rvv
vadd.vi v4, v4, 1
add t0, a1, a0