lavc/ac3dsp: R-V V sum_square_butterfly_int32
ac3_sum_square_bufferfly_int32_c: 61.0 ac3_sum_square_bufferfly_int32_rvv_i64: 14.7
This commit is contained in:
parent
95568c4e31
commit
6459966beb
@ -28,6 +28,8 @@
|
||||
|
||||
void ff_extract_exponents_rvb(uint8_t *exp, int32_t *coef, int nb_coefs);
|
||||
void ff_float_to_fixed24_rvv(int32_t *dst, const float *src, size_t len);
|
||||
void ff_sum_square_butterfly_int32_rvv(int64_t *, const int32_t *,
|
||||
const int32_t *, int);
|
||||
|
||||
av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
|
||||
{
|
||||
@ -39,6 +41,10 @@ av_cold void ff_ac3dsp_init_riscv(AC3DSPContext *c)
|
||||
c->extract_exponents = ff_extract_exponents_rvb;
|
||||
if (flags & AV_CPU_FLAG_RVV_F32)
|
||||
c->float_to_fixed24 = ff_float_to_fixed24_rvv;
|
||||
# if __riscv_xlen >= 64
|
||||
if (flags & AV_CPU_FLAG_RVV_I64)
|
||||
c->sum_square_butterfly_int32 = ff_sum_square_butterfly_int32_rvv;
|
||||
# endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -37,3 +37,44 @@ func ff_float_to_fixed24_rvv, zve32f
|
||||
|
||||
ret
|
||||
endfunc
|
||||
|
||||
#if __riscv_xlen >= 64
|
||||
func ff_sum_square_butterfly_int32_rvv, zve64x
|
||||
vsetvli t0, zero, e64, m8, ta, ma
|
||||
vmv.v.x v0, zero
|
||||
vmv.v.x v8, zero
|
||||
1:
|
||||
vsetvli t0, a3, e32, m2, tu, ma
|
||||
vle32.v v16, (a1)
|
||||
sub a3, a3, t0
|
||||
vle32.v v20, (a2)
|
||||
sh2add a1, t0, a1
|
||||
vadd.vv v24, v16, v20
|
||||
sh2add a2, t0, a2
|
||||
vsub.vv v28, v16, v20
|
||||
vwmacc.vv v0, v16, v16
|
||||
vwmacc.vv v4, v20, v20
|
||||
vwmacc.vv v8, v24, v24
|
||||
vwmacc.vv v12, v28, v28
|
||||
bnez a3, 1b
|
||||
|
||||
vsetvli t0, zero, e64, m4, ta, ma
|
||||
vmv.s.x v16, zero
|
||||
vmv.s.x v17, zero
|
||||
vredsum.vs v16, v0, v16
|
||||
vmv.s.x v18, zero
|
||||
vredsum.vs v17, v4, v17
|
||||
vmv.s.x v19, zero
|
||||
vredsum.vs v18, v8, v18
|
||||
vmv.x.s t0, v16
|
||||
vredsum.vs v19, v12, v19
|
||||
vmv.x.s t1, v17
|
||||
sd t0, (a0)
|
||||
vmv.x.s t2, v18
|
||||
sd t1, 8(a0)
|
||||
vmv.x.s t3, v19
|
||||
sd t2, 16(a0)
|
||||
sd t3, 24(a0)
|
||||
ret
|
||||
endfunc
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user