lavc/idctdsp: improve R-V V put_pixels_clamped

This commit is contained in:
Rémi Denis-Courmont
2023-10-27 22:08:10 +03:00
parent d48810f3a5
commit ae72412aa8

View File

@@ -20,24 +20,17 @@
#include "libavutil/riscv/asm.S" #include "libavutil/riscv/asm.S"
func ff_put_pixels_clamped_rvv, zve32x func ff_put_pixels_clamped_rvv, zve64x
vsetivli zero, 8, e16, m1, ta, ma li t0, 8 * 8
vlseg8e16.v v24, (a0) vsetvli zero, t0, e16, m8, ta, ma
vle16.v v24, (a0)
/* RVV only has signed-signed and unsigned-unsigned clipping. /* RVV only has signed-signed and unsigned-unsigned clipping.
* We need two steps for signed-to-unsigned clipping. */ * We need two steps for signed-to-unsigned clipping. */
vsetvli t0, zero, e16, m8, ta, ma
vmax.vx v24, v24, zero vmax.vx v24, v24, zero
vsetvli zero, zero, e8, m4, ta, ma
vsetivli zero, 8, e8, mf2, ta, ma
vnclipu.wi v16, v24, 0 vnclipu.wi v16, v24, 0
vnclipu.wi v17, v25, 0 vsetivli zero, 8, e8, mf2, ta, ma
vnclipu.wi v18, v26, 0 vsse64.v v16, (a1), a2
vnclipu.wi v19, v27, 0
vnclipu.wi v20, v28, 0
vnclipu.wi v21, v29, 0
vnclipu.wi v22, v30, 0
vnclipu.wi v23, v31, 0
vssseg8e8.v v16, (a1), a2
ret ret
endfunc endfunc