avcodec/hevc: Add qpel_uni_w_v|h4/6/8/12/16/24/32/48/64 asm opt
tests/checkasm/checkasm: C LSX LASX put_hevc_qpel_uni_w_h4_8_c: 6.5 1.7 1.2 put_hevc_qpel_uni_w_h6_8_c: 14.5 4.5 3.7 put_hevc_qpel_uni_w_h8_8_c: 24.5 5.7 4.5 put_hevc_qpel_uni_w_h12_8_c: 54.7 17.5 12.0 put_hevc_qpel_uni_w_h16_8_c: 96.5 22.7 13.2 put_hevc_qpel_uni_w_h24_8_c: 216.0 51.2 33.2 put_hevc_qpel_uni_w_h32_8_c: 385.7 87.0 53.2 put_hevc_qpel_uni_w_h48_8_c: 860.5 192.0 113.2 put_hevc_qpel_uni_w_h64_8_c: 1531.0 334.2 200.0 put_hevc_qpel_uni_w_v4_8_c: 8.0 1.7 put_hevc_qpel_uni_w_v6_8_c: 17.2 4.5 put_hevc_qpel_uni_w_v8_8_c: 29.5 6.0 5.2 put_hevc_qpel_uni_w_v12_8_c: 65.2 16.0 11.7 put_hevc_qpel_uni_w_v16_8_c: 116.5 20.5 14.0 put_hevc_qpel_uni_w_v24_8_c: 259.2 48.5 37.2 put_hevc_qpel_uni_w_v32_8_c: 459.5 80.5 56.0 put_hevc_qpel_uni_w_v48_8_c: 1028.5 180.2 126.5 put_hevc_qpel_uni_w_v64_8_c: 1831.2 319.2 224.2 Speedup of decoding H265 4K 30FPS 30Mbps on 3A6000 with 8 threads is 4fps(48fps-->52fps). Change-Id: I1178848541d90083869225ba98a02e6aa8bb8c5a Reviewed-by: yinshiyou-hf@loongson.cn Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
parent
a28eea2a27
commit
6c6bf18ce8
File diff suppressed because it is too large
Load Diff
@ -188,6 +188,26 @@ void ff_hevc_dsp_init_loongarch(HEVCDSPContext *c, const int bit_depth)
|
||||
c->put_hevc_qpel_uni_w[8][1][1] = ff_hevc_put_hevc_uni_w_qpel_hv48_8_lsx;
|
||||
c->put_hevc_qpel_uni_w[9][1][1] = ff_hevc_put_hevc_uni_w_qpel_hv64_8_lsx;
|
||||
|
||||
c->put_hevc_qpel_uni_w[1][1][0] = ff_hevc_put_hevc_qpel_uni_w_v4_8_lsx;
|
||||
c->put_hevc_qpel_uni_w[2][1][0] = ff_hevc_put_hevc_qpel_uni_w_v6_8_lsx;
|
||||
c->put_hevc_qpel_uni_w[3][1][0] = ff_hevc_put_hevc_qpel_uni_w_v8_8_lsx;
|
||||
c->put_hevc_qpel_uni_w[4][1][0] = ff_hevc_put_hevc_qpel_uni_w_v12_8_lsx;
|
||||
c->put_hevc_qpel_uni_w[5][1][0] = ff_hevc_put_hevc_qpel_uni_w_v16_8_lsx;
|
||||
c->put_hevc_qpel_uni_w[6][1][0] = ff_hevc_put_hevc_qpel_uni_w_v24_8_lsx;
|
||||
c->put_hevc_qpel_uni_w[7][1][0] = ff_hevc_put_hevc_qpel_uni_w_v32_8_lsx;
|
||||
c->put_hevc_qpel_uni_w[8][1][0] = ff_hevc_put_hevc_qpel_uni_w_v48_8_lsx;
|
||||
c->put_hevc_qpel_uni_w[9][1][0] = ff_hevc_put_hevc_qpel_uni_w_v64_8_lsx;
|
||||
|
||||
c->put_hevc_qpel_uni_w[1][0][1] = ff_hevc_put_hevc_qpel_uni_w_h4_8_lsx;
|
||||
c->put_hevc_qpel_uni_w[2][0][1] = ff_hevc_put_hevc_qpel_uni_w_h6_8_lsx;
|
||||
c->put_hevc_qpel_uni_w[3][0][1] = ff_hevc_put_hevc_qpel_uni_w_h8_8_lsx;
|
||||
c->put_hevc_qpel_uni_w[4][0][1] = ff_hevc_put_hevc_qpel_uni_w_h12_8_lsx;
|
||||
c->put_hevc_qpel_uni_w[5][0][1] = ff_hevc_put_hevc_qpel_uni_w_h16_8_lsx;
|
||||
c->put_hevc_qpel_uni_w[6][0][1] = ff_hevc_put_hevc_qpel_uni_w_h24_8_lsx;
|
||||
c->put_hevc_qpel_uni_w[7][0][1] = ff_hevc_put_hevc_qpel_uni_w_h32_8_lsx;
|
||||
c->put_hevc_qpel_uni_w[8][0][1] = ff_hevc_put_hevc_qpel_uni_w_h48_8_lsx;
|
||||
c->put_hevc_qpel_uni_w[9][0][1] = ff_hevc_put_hevc_qpel_uni_w_h64_8_lsx;
|
||||
|
||||
c->sao_edge_filter[0] = ff_hevc_sao_edge_filter_8_lsx;
|
||||
c->sao_edge_filter[1] = ff_hevc_sao_edge_filter_8_lsx;
|
||||
c->sao_edge_filter[2] = ff_hevc_sao_edge_filter_8_lsx;
|
||||
@ -237,6 +257,24 @@ void ff_hevc_dsp_init_loongarch(HEVCDSPContext *c, const int bit_depth)
|
||||
c->put_hevc_epel_uni_w[7][0][0] = ff_hevc_put_hevc_pel_uni_w_pixels32_8_lasx;
|
||||
c->put_hevc_epel_uni_w[8][0][0] = ff_hevc_put_hevc_pel_uni_w_pixels48_8_lasx;
|
||||
c->put_hevc_epel_uni_w[9][0][0] = ff_hevc_put_hevc_pel_uni_w_pixels64_8_lasx;
|
||||
|
||||
c->put_hevc_qpel_uni_w[3][1][0] = ff_hevc_put_hevc_qpel_uni_w_v8_8_lasx;
|
||||
c->put_hevc_qpel_uni_w[4][1][0] = ff_hevc_put_hevc_qpel_uni_w_v12_8_lasx;
|
||||
c->put_hevc_qpel_uni_w[5][1][0] = ff_hevc_put_hevc_qpel_uni_w_v16_8_lasx;
|
||||
c->put_hevc_qpel_uni_w[6][1][0] = ff_hevc_put_hevc_qpel_uni_w_v24_8_lasx;
|
||||
c->put_hevc_qpel_uni_w[7][1][0] = ff_hevc_put_hevc_qpel_uni_w_v32_8_lasx;
|
||||
c->put_hevc_qpel_uni_w[8][1][0] = ff_hevc_put_hevc_qpel_uni_w_v48_8_lasx;
|
||||
c->put_hevc_qpel_uni_w[9][1][0] = ff_hevc_put_hevc_qpel_uni_w_v64_8_lasx;
|
||||
|
||||
c->put_hevc_qpel_uni_w[1][0][1] = ff_hevc_put_hevc_qpel_uni_w_h4_8_lasx;
|
||||
c->put_hevc_qpel_uni_w[2][0][1] = ff_hevc_put_hevc_qpel_uni_w_h6_8_lasx;
|
||||
c->put_hevc_qpel_uni_w[3][0][1] = ff_hevc_put_hevc_qpel_uni_w_h8_8_lasx;
|
||||
c->put_hevc_qpel_uni_w[4][0][1] = ff_hevc_put_hevc_qpel_uni_w_h12_8_lasx;
|
||||
c->put_hevc_qpel_uni_w[5][0][1] = ff_hevc_put_hevc_qpel_uni_w_h16_8_lasx;
|
||||
c->put_hevc_qpel_uni_w[6][0][1] = ff_hevc_put_hevc_qpel_uni_w_h24_8_lasx;
|
||||
c->put_hevc_qpel_uni_w[7][0][1] = ff_hevc_put_hevc_qpel_uni_w_h32_8_lasx;
|
||||
c->put_hevc_qpel_uni_w[8][0][1] = ff_hevc_put_hevc_qpel_uni_w_h48_8_lasx;
|
||||
c->put_hevc_qpel_uni_w[9][0][1] = ff_hevc_put_hevc_qpel_uni_w_h64_8_lasx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -48,6 +48,24 @@ PEL_UNI_W(pel, pixels, 32);
|
||||
PEL_UNI_W(pel, pixels, 48);
|
||||
PEL_UNI_W(pel, pixels, 64);
|
||||
|
||||
PEL_UNI_W(qpel, v, 8);
|
||||
PEL_UNI_W(qpel, v, 12);
|
||||
PEL_UNI_W(qpel, v, 16);
|
||||
PEL_UNI_W(qpel, v, 24);
|
||||
PEL_UNI_W(qpel, v, 32);
|
||||
PEL_UNI_W(qpel, v, 48);
|
||||
PEL_UNI_W(qpel, v, 64);
|
||||
|
||||
PEL_UNI_W(qpel, h, 4);
|
||||
PEL_UNI_W(qpel, h, 6);
|
||||
PEL_UNI_W(qpel, h, 8);
|
||||
PEL_UNI_W(qpel, h, 12);
|
||||
PEL_UNI_W(qpel, h, 16);
|
||||
PEL_UNI_W(qpel, h, 24);
|
||||
PEL_UNI_W(qpel, h, 32);
|
||||
PEL_UNI_W(qpel, h, 48);
|
||||
PEL_UNI_W(qpel, h, 64);
|
||||
|
||||
#undef PEL_UNI_W
|
||||
|
||||
#endif // #ifndef AVCODEC_LOONGARCH_HEVCDSP_LASX_H
|
||||
|
@ -257,6 +257,26 @@ PEL_UNI_W(pel, pixels, 32);
|
||||
PEL_UNI_W(pel, pixels, 48);
|
||||
PEL_UNI_W(pel, pixels, 64);
|
||||
|
||||
PEL_UNI_W(qpel, v, 4);
|
||||
PEL_UNI_W(qpel, v, 6);
|
||||
PEL_UNI_W(qpel, v, 8);
|
||||
PEL_UNI_W(qpel, v, 12);
|
||||
PEL_UNI_W(qpel, v, 16);
|
||||
PEL_UNI_W(qpel, v, 24);
|
||||
PEL_UNI_W(qpel, v, 32);
|
||||
PEL_UNI_W(qpel, v, 48);
|
||||
PEL_UNI_W(qpel, v, 64);
|
||||
|
||||
PEL_UNI_W(qpel, h, 4);
|
||||
PEL_UNI_W(qpel, h, 6);
|
||||
PEL_UNI_W(qpel, h, 8);
|
||||
PEL_UNI_W(qpel, h, 12);
|
||||
PEL_UNI_W(qpel, h, 16);
|
||||
PEL_UNI_W(qpel, h, 24);
|
||||
PEL_UNI_W(qpel, h, 32);
|
||||
PEL_UNI_W(qpel, h, 48);
|
||||
PEL_UNI_W(qpel, h, 64);
|
||||
|
||||
#undef PEL_UNI_W
|
||||
|
||||
#endif // #ifndef AVCODEC_LOONGARCH_HEVCDSP_LSX_H
|
||||
|
Loading…
x
Reference in New Issue
Block a user