lavc/h264dsp: R-V V 8-bit MBAFF loop filter
Performance is (unfortunately) the same as with non-MBAFF, since the hardware under test does not short-circuit vector tail calculations. (IMO, a generic solution or work-around should be agreed on, rather than bespoke approaches all over the place.)
This commit is contained in:
@@ -31,6 +31,8 @@ void ff_h264_v_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride,
|
||||
int alpha, int beta, int8_t *tc0);
|
||||
void ff_h264_h_loop_filter_luma_8_rvv(uint8_t *pix, ptrdiff_t stride,
|
||||
int alpha, int beta, int8_t *tc0);
|
||||
void ff_h264_h_loop_filter_luma_mbaff_8_rvv(uint8_t *pix, ptrdiff_t stride,
|
||||
int alpha, int beta, int8_t *tc0);
|
||||
|
||||
extern int ff_startcode_find_candidate_rvb(const uint8_t *, int);
|
||||
extern int ff_startcode_find_candidate_rvv(const uint8_t *, int);
|
||||
@@ -48,6 +50,8 @@ av_cold void ff_h264dsp_init_riscv(H264DSPContext *dsp, const int bit_depth,
|
||||
if (bit_depth == 8 && ff_rv_vlen_least(128)) {
|
||||
dsp->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_8_rvv;
|
||||
dsp->h264_h_loop_filter_luma = ff_h264_h_loop_filter_luma_8_rvv;
|
||||
dsp->h264_h_loop_filter_luma_mbaff =
|
||||
ff_h264_h_loop_filter_luma_mbaff_8_rvv;
|
||||
}
|
||||
dsp->startcode_find_candidate = ff_startcode_find_candidate_rvv;
|
||||
}
|
||||
|
@@ -138,3 +138,17 @@ func ff_h264_h_loop_filter_luma_8_rvv, zve32x
|
||||
vssseg6e8.v v8, (a0), a1
|
||||
ret
|
||||
endfunc
|
||||
|
||||
func ff_h264_h_loop_filter_luma_mbaff_8_rvv, zve32x
|
||||
vsetivli zero, 4, e16, mf2, ta, ma
|
||||
vle8.v v4, (a4)
|
||||
li t0, 0x0101
|
||||
vzext.vf2 v6, v4
|
||||
addi a0, a0, -3
|
||||
vmul.vx v6, v6, t0 # tc_orig
|
||||
vsetivli zero, 8, e8, m1, ta, ma
|
||||
vlsseg6e8.v v8, (a0), a1
|
||||
jal t0, ff_h264_loop_filter_luma_8_rvv
|
||||
vssseg6e8.v v8, (a0), a1
|
||||
ret
|
||||
endfunc
|
||||
|
Reference in New Issue
Block a user