diff --git a/libavcodec/riscv/vc1dsp_init.c b/libavcodec/riscv/vc1dsp_init.c index f105a3a3c6..e4838fb347 100644 --- a/libavcodec/riscv/vc1dsp_init.c +++ b/libavcodec/riscv/vc1dsp_init.c @@ -56,10 +56,10 @@ av_cold void ff_vc1dsp_init_riscv(VC1DSPContext *dsp) dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_rvv; dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_rvv; dsp->avg_vc1_mspel_pixels_tab[0][0] = ff_avg_pixels16x16_rvv; + dsp->avg_vc1_mspel_pixels_tab[1][0] = ff_avg_pixels8x8_rvv; if (flags & AV_CPU_FLAG_RVV_I64) { dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_rvv; dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_rvv; - dsp->avg_vc1_mspel_pixels_tab[1][0] = ff_avg_pixels8x8_rvv; } } dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv; diff --git a/libavcodec/riscv/vc1dsp_rvv.S b/libavcodec/riscv/vc1dsp_rvv.S index 1166f35cad..8b3a830a4a 100644 --- a/libavcodec/riscv/vc1dsp_rvv.S +++ b/libavcodec/riscv/vc1dsp_rvv.S @@ -132,31 +132,25 @@ endfunc .endm func ff_avg_pixels16x16_rvv, zve32x - csrwi vxrm, 0 - vsetivli zero, 16, e8, m1, ta, ma - mspel_op_all l a1 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 - mspel_op_all l a0 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 - vsetvli t0, zero, e8, m8, ta, ma - sub a0, a0, a2 - vaaddu.vv v0, v0, v16 - neg a2, a2 - vaaddu.vv v8, v8, v24 - vsetivli zero, 16, e8, m1, ta, ma - mspel_op_all s a0 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 - - ret + li t0, 16 + vsetivli zero, 16, e8, m1, ta, ma + j 1f endfunc -func ff_avg_pixels8x8_rvv, zve64x - csrwi vxrm, 0 - li t0, 64 - vsetivli zero, 8, e8, mf2, ta, ma - vlse64.v v16, (a1), a2 - vlse64.v v8, (a0), a2 - vsetvli zero, t0, e8, m4, ta, ma - vaaddu.vv v16, v16, v8 - vsetivli zero, 8, e8, mf2, ta, ma - vsse64.v v16, (a0), a2 +func ff_avg_pixels8x8_rvv, zve32x + li t0, 8 + vsetivli zero, 8, e8, mf2, ta, ma +1: + csrwi vxrm, 0 +2: + vle8.v v16, (a1) + addi t0, t0, -1 + vle8.v v8, (a0) + add a1, a1, a2 + vaaddu.vv v16, v16, v8 + vse8.v v16, (a0) + add a0, a0, a2 + bnez t0, 2b ret endfunc