lavc/vc1dsp: fix R-V V vector lengths

The 8x4 and 4x4 use a needlessly large multiplier (unless/until we care
about embedded 64-bit-vector hardware). This is merely suboptimal.

The 8x4 case also uses an incorrect vector length, which leads to incorrect
behaviour on future/hypothetical hardware with 256-bit or larger vectors.

Pointed-out-by: Martin Storsjö <martin@martin.st>
This commit is contained in:
Rémi Denis-Courmont
2023-12-16 10:46:52 +02:00
parent eddac2aed4
commit 419145c11b

View File

@@ -68,7 +68,7 @@ endfunc
func ff_vc1_inv_trans_8x4_dc_rvv, zve64x func ff_vc1_inv_trans_8x4_dc_rvv, zve64x
lh t2, (a2) lh t2, (a2)
vsetivli zero, 8, e8, mf2, ta, ma vsetivli zero, 4, e8, mf4, ta, ma
vlse64.v v0, (a0), a1 vlse64.v v0, (a0), a1
sh1add t2, t2, t2 sh1add t2, t2, t2
addi t2, t2, 1 addi t2, t2, 1
@@ -84,14 +84,14 @@ func ff_vc1_inv_trans_8x4_dc_rvv, zve64x
vmax.vx v4, v4, zero vmax.vx v4, v4, zero
vsetvli zero, zero, e8, m2, ta, ma vsetvli zero, zero, e8, m2, ta, ma
vnclipu.wi v0, v4, 0 vnclipu.wi v0, v4, 0
vsetivli zero, 8, e8, mf2, ta, ma vsetivli zero, 4, e8, mf4, ta, ma
vsse64.v v0, (a0), a1 vsse64.v v0, (a0), a1
ret ret
endfunc endfunc
func ff_vc1_inv_trans_4x4_dc_rvv, zve32x func ff_vc1_inv_trans_4x4_dc_rvv, zve32x
lh t2, (a2) lh t2, (a2)
vsetivli zero, 4, e8, mf2, ta, ma vsetivli zero, 4, e8, mf4, ta, ma
vlse32.v v0, (a0), a1 vlse32.v v0, (a0), a1
slli t1, t2, 4 slli t1, t2, 4
add t2, t2, t1 add t2, t2, t1
@@ -107,7 +107,7 @@ func ff_vc1_inv_trans_4x4_dc_rvv, zve32x
vmax.vx v2, v2, zero vmax.vx v2, v2, zero
vsetvli zero, zero, e8, m1, ta, ma vsetvli zero, zero, e8, m1, ta, ma
vnclipu.wi v0, v2, 0 vnclipu.wi v0, v2, 0
vsetivli zero, 4, e8, mf2, ta, ma vsetivli zero, 4, e8, mf4, ta, ma
vsse32.v v0, (a0), a1 vsse32.v v0, (a0), a1
ret ret
endfunc endfunc