sws/input: R-V V 32-bit RGB to Y
T-Head C908: abgr_to_y_8_c: 2.5 abgr_to_y_8_rvv_i32: 2.2 abgr_to_y_128_c: 37.0 abgr_to_y_128_rvv_i32: 8.5 abgr_to_y_1080_c: 327.0 abgr_to_y_1080_rvv_i32: 69.5 abgr_to_y_1920_c: 552.0 abgr_to_y_1920_rvv_i32: 122.2 bgra_to_y_8_c: 2.5 bgra_to_y_8_rvv_i32: 2.2 bgra_to_y_128_c: 37.2 bgra_to_y_128_rvv_i32: 8.5 bgra_to_y_1080_c: 310.2 bgra_to_y_1080_rvv_i32: 69.5 bgra_to_y_1920_c: 568.2 bgra_to_y_1920_rvv_i32: 122.5 SpacemiT X60: abgr_to_y_8_c: 2.5 abgr_to_y_8_rvv_i32: 2.0 abgr_to_y_128_c: 33.0 abgr_to_y_128_rvv_i32: 3.7 abgr_to_y_1080_c: 276.0 abgr_to_y_1080_rvv_i32: 31.5 abgr_to_y_1920_c: 493.7 abgr_to_y_1920_rvv_i32: 55.5 bgra_to_y_8_c: 2.2 bgra_to_y_8_rvv_i32: 2.0 bgra_to_y_128_c: 33.0 bgra_to_y_128_rvv_i32: 3.7 bgra_to_y_1080_c: 276.0 bgra_to_y_1080_rvv_i32: 31.5 bgra_to_y_1920_c: 490.7 bgra_to_y_1920_rvv_i32: 55.5
This commit is contained in:
parent
8b62fb231a
commit
f5555cb106
@ -149,3 +149,48 @@ func ff_rgb24ToUV_half_rvv, zve32x
|
||||
|
||||
ret
|
||||
endfunc
|
||||
|
||||
.macro rgba_input chr0, chr1, high
|
||||
func ff_\chr1\()ToY_rvv, zve32x
|
||||
lw t1, 8(a5) # BY
|
||||
lw t3, 0(a5) # RY
|
||||
j 1f
|
||||
endfunc
|
||||
|
||||
func ff_\chr0\()ToY_rvv, zve32x
|
||||
lw t1, 0(a5) # RY
|
||||
lw t3, 8(a5) # BY
|
||||
1:
|
||||
lw t2, 4(a5) # GY
|
||||
li t4, (32 << (15 - 1)) + (1 << (15 - 7))
|
||||
li t5, 0xff
|
||||
2:
|
||||
vsetvli t0, a4, e32, m8, ta, ma
|
||||
vle32.v v0, (a1)
|
||||
sub a4, a4, t0
|
||||
.if \high
|
||||
vsrl.vi v8, v0, 24
|
||||
.else
|
||||
vand.vx v8, v0, t5
|
||||
.endif
|
||||
sh2add a1, t0, a1
|
||||
vsrl.vi v16, v0, 8 * (1 + \high)
|
||||
vmul.vx v24, v8, t1
|
||||
vand.vx v16, v16, t5
|
||||
vsrl.vi v8, v0, 8 * (2 - \high)
|
||||
vmacc.vx v24, t2, v16
|
||||
vand.vx v8, v8, t5
|
||||
vadd.vx v24, v24, t4
|
||||
vmacc.vx v24, t3, v8
|
||||
vsetvli zero, zero, e16, m4, ta, ma
|
||||
vnsra.wi v0, v24, 15 - 6
|
||||
vse16.v v0, (a0)
|
||||
sh1add a0, t0, a0
|
||||
bnez a4, 2b
|
||||
|
||||
ret
|
||||
endfunc
|
||||
.endm
|
||||
|
||||
rgba_input rgba32, bgra32, 0
|
||||
rgba_input abgr32, argb32, 1
|
||||
|
@ -21,20 +21,22 @@
|
||||
#include "libavutil/riscv/cpu.h"
|
||||
#include "libswscale/swscale_internal.h"
|
||||
|
||||
void ff_bgr24ToY_rvv(uint8_t *dst, const uint8_t *src, const uint8_t *,
|
||||
const uint8_t *, int width, uint32_t *coeffs, void *);
|
||||
void ff_bgr24ToUV_rvv(uint8_t *, uint8_t *, const uint8_t *, const uint8_t *,
|
||||
const uint8_t *, int width, uint32_t *coeffs, void *);
|
||||
void ff_bgr24ToUV_half_rvv(uint8_t *, uint8_t *, const uint8_t *,
|
||||
const uint8_t *, const uint8_t *, int width,
|
||||
uint32_t *coeffs, void *);
|
||||
void ff_rgb24ToY_rvv(uint8_t *dst, const uint8_t *src, const uint8_t *,
|
||||
const uint8_t *, int width, uint32_t *coeffs, void *);
|
||||
void ff_rgb24ToUV_rvv(uint8_t *, uint8_t *, const uint8_t *, const uint8_t *,
|
||||
const uint8_t *, int width, uint32_t *coeffs, void *);
|
||||
void ff_rgb24ToUV_half_rvv(uint8_t *, uint8_t *, const uint8_t *,
|
||||
const uint8_t *, const uint8_t *, int width,
|
||||
uint32_t *coeffs, void *);
|
||||
#define RVV_INPUT(name) \
|
||||
void ff_##name##ToY_rvv(uint8_t *dst, const uint8_t *src, const uint8_t *, \
|
||||
const uint8_t *, int w, uint32_t *coeffs, void *); \
|
||||
void ff_##name##ToUV_rvv(uint8_t *, uint8_t *, const uint8_t *, \
|
||||
const uint8_t *, const uint8_t *, int w, \
|
||||
uint32_t *coeffs, void *); \
|
||||
void ff_##name##ToUV_half_rvv(uint8_t *, uint8_t *, const uint8_t *, \
|
||||
const uint8_t *, const uint8_t *, int w, \
|
||||
uint32_t *coeffs, void *)
|
||||
|
||||
RVV_INPUT(abgr32);
|
||||
RVV_INPUT(argb32);
|
||||
RVV_INPUT(bgr24);
|
||||
RVV_INPUT(bgra32);
|
||||
RVV_INPUT(rgb24);
|
||||
RVV_INPUT(rgba32);
|
||||
|
||||
av_cold void ff_sws_init_swscale_riscv(SwsContext *c)
|
||||
{
|
||||
@ -43,6 +45,14 @@ av_cold void ff_sws_init_swscale_riscv(SwsContext *c)
|
||||
|
||||
if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) {
|
||||
switch (c->srcFormat) {
|
||||
case AV_PIX_FMT_ABGR:
|
||||
c->lumToYV12 = ff_abgr32ToY_rvv;
|
||||
break;
|
||||
|
||||
case AV_PIX_FMT_ARGB:
|
||||
c->lumToYV12 = ff_argb32ToY_rvv;
|
||||
break;
|
||||
|
||||
case AV_PIX_FMT_BGR24:
|
||||
c->lumToYV12 = ff_bgr24ToY_rvv;
|
||||
if (c->chrSrcHSubSample)
|
||||
@ -51,6 +61,10 @@ av_cold void ff_sws_init_swscale_riscv(SwsContext *c)
|
||||
c->chrToYV12 = ff_bgr24ToUV_rvv;
|
||||
break;
|
||||
|
||||
case AV_PIX_FMT_BGRA:
|
||||
c->lumToYV12 = ff_bgra32ToY_rvv;
|
||||
break;
|
||||
|
||||
case AV_PIX_FMT_RGB24:
|
||||
c->lumToYV12 = ff_rgb24ToY_rvv;
|
||||
if (c->chrSrcHSubSample)
|
||||
@ -58,6 +72,10 @@ av_cold void ff_sws_init_swscale_riscv(SwsContext *c)
|
||||
else
|
||||
c->chrToYV12 = ff_rgb24ToUV_rvv;
|
||||
break;
|
||||
|
||||
case AV_PIX_FMT_RGBA:
|
||||
c->lumToYV12 = ff_rgba32ToY_rvv;
|
||||
break;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
Loading…
x
Reference in New Issue
Block a user