swscale: NEON optimized unscaled rgba to nv12 conversion
Signed-off-by: Yu Xiaolei <dreifachstein@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
ffbcb1c6f0
commit
1c67ad9d93
4
libswscale/arm/Makefile
Normal file
4
libswscale/arm/Makefile
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
OBJS += arm/swscale_unscaled.o
|
||||||
|
|
||||||
|
NEON-OBJS += arm/rgb2yuv_neon_32.o
|
||||||
|
NEON-OBJS += arm/rgb2yuv_neon_16.o
|
80
libswscale/arm/rgb2yuv_neon_16.S
Normal file
80
libswscale/arm/rgb2yuv_neon_16.S
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "rgb2yuv_neon_common.S"
|
||||||
|
|
||||||
|
/* downsampled R16G16B16 x8 */
|
||||||
|
alias_qw r16x8, q7
|
||||||
|
alias_qw g16x8, q8
|
||||||
|
alias_qw b16x8, q9
|
||||||
|
|
||||||
|
alias n16x16_l, q11
|
||||||
|
alias n16x16_h, q12
|
||||||
|
|
||||||
|
alias y16x16_l, q13
|
||||||
|
alias y16x16_h, q14
|
||||||
|
|
||||||
|
alias_qw y8x16, q15
|
||||||
|
|
||||||
|
.macro init src
|
||||||
|
vld3.i32 {q13_l, q14_l, q15_l}, [\src]!
|
||||||
|
vld3.i32 {q13_h[0], q14_h[0], q15_h[0]}, [\src]
|
||||||
|
vrshrn.i32 CO_R, q13, #7
|
||||||
|
vrshrn.i32 CO_G, q14, #7
|
||||||
|
vrshrn.i32 CO_B, q15, #7
|
||||||
|
|
||||||
|
vmov.u8 BIAS_Y, #16
|
||||||
|
vmov.u8 BIAS_U, #128
|
||||||
|
.endm
|
||||||
|
|
||||||
|
|
||||||
|
.macro compute_y_16x1_step action, s8x16, coeff
|
||||||
|
vmovl.u8 n16x16_l, \s8x16\()_l
|
||||||
|
vmovl.u8 n16x16_h, \s8x16\()_h
|
||||||
|
|
||||||
|
\action y16x16_l, n16x16_l, \coeff
|
||||||
|
\action y16x16_h, n16x16_h, \coeff
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro compute_y_16x1
|
||||||
|
compute_y_16x1_step vmul, r8x16, CO_RY
|
||||||
|
compute_y_16x1_step vmla, g8x16, CO_GY
|
||||||
|
compute_y_16x1_step vmla, b8x16, CO_BY
|
||||||
|
|
||||||
|
vrshrn.i16 y8x16_l, y16x16_l, #8
|
||||||
|
vrshrn.i16 y8x16_h, y16x16_h, #8
|
||||||
|
|
||||||
|
vadd.u8 y8x16, y8x16, BIAS_Y
|
||||||
|
.endm
|
||||||
|
|
||||||
|
alias c16x8, q15
|
||||||
|
alias_qw c8x8x2, q10
|
||||||
|
|
||||||
|
|
||||||
|
.macro compute_chroma_8x1 c, C
|
||||||
|
vmul c16x8, r16x8, CO_R\C
|
||||||
|
vmla c16x8, g16x8, CO_G\C
|
||||||
|
vmla c16x8, b16x8, CO_B\C
|
||||||
|
|
||||||
|
vrshrn.i16 \c\()8x8, c16x8, #8
|
||||||
|
vadd.u8 \c\()8x8, \c\()8x8, BIAS_\C
|
||||||
|
.endm
|
||||||
|
|
||||||
|
loop_420sp rgbx, nv12, init, kernel_420_16x2, 16
|
119
libswscale/arm/rgb2yuv_neon_32.S
Normal file
119
libswscale/arm/rgb2yuv_neon_32.S
Normal file
@ -0,0 +1,119 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "rgb2yuv_neon_common.S"
|
||||||
|
|
||||||
|
/* downsampled R16G16B16 x8 */
|
||||||
|
alias_qw r16x8, q7
|
||||||
|
alias_qw g16x8, q8
|
||||||
|
alias_qw b16x8, q9
|
||||||
|
|
||||||
|
alias n16x16_o, q11
|
||||||
|
alias n16x16_ol, q11_l
|
||||||
|
alias n16x16_oh, q11_h
|
||||||
|
|
||||||
|
alias y32x16_el, q12
|
||||||
|
alias y32x16_eh, q13
|
||||||
|
alias y32x16_ol, q14
|
||||||
|
alias y32x16_oh, q15
|
||||||
|
|
||||||
|
alias y16x16_e, q12
|
||||||
|
alias y16x16_el, q12_l
|
||||||
|
alias y16x16_eh, q12_h
|
||||||
|
alias y16x16_o, q13
|
||||||
|
alias y16x16_ol, q13_l
|
||||||
|
alias y16x16_oh, q13_h
|
||||||
|
|
||||||
|
|
||||||
|
alias y8x16, y16x16_e
|
||||||
|
|
||||||
|
|
||||||
|
.macro init src
|
||||||
|
// load s32x3x3, narrow to s16x3x3
|
||||||
|
vld3.i32 {q13_l, q14_l, q15_l}, [\src]!
|
||||||
|
vld3.i32 {q13_h[0], q14_h[0], q15_h[0]}, [\src]
|
||||||
|
|
||||||
|
vmovn.i32 CO_R, q13
|
||||||
|
vmovn.i32 CO_G, q14
|
||||||
|
vmovn.i32 CO_B, q15
|
||||||
|
|
||||||
|
vmov.u8 BIAS_Y, #16
|
||||||
|
vmov.u8 BIAS_U, #128
|
||||||
|
.endm
|
||||||
|
|
||||||
|
|
||||||
|
.macro compute_y_16x1_step action, s8x16, coeff
|
||||||
|
vmov.u8 n16x16_o, #0
|
||||||
|
vtrn.u8 \s8x16, n16x16_o
|
||||||
|
|
||||||
|
\action y32x16_el, \s8x16\()_l, \coeff
|
||||||
|
\action y32x16_eh, \s8x16\()_h, \coeff
|
||||||
|
\action y32x16_ol, n16x16_ol, \coeff
|
||||||
|
\action y32x16_oh, n16x16_oh, \coeff
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/*
|
||||||
|
* in: r8x16, g8x16, b8x16
|
||||||
|
* out: y8x16
|
||||||
|
* clobber: q11-q15, r8x16, g8x16, b8x16
|
||||||
|
*/
|
||||||
|
.macro compute_y_16x1
|
||||||
|
compute_y_16x1_step vmull, r8x16, CO_RY
|
||||||
|
compute_y_16x1_step vmlal, g8x16, CO_GY
|
||||||
|
compute_y_16x1_step vmlal, b8x16, CO_BY
|
||||||
|
|
||||||
|
vrshrn.i32 y16x16_el, y32x16_el, #15
|
||||||
|
vrshrn.i32 y16x16_eh, y32x16_eh, #15
|
||||||
|
vrshrn.i32 y16x16_ol, y32x16_ol, #15
|
||||||
|
vrshrn.i32 y16x16_oh, y32x16_oh, #15
|
||||||
|
|
||||||
|
vtrn.8 y16x16_e, y16x16_o
|
||||||
|
vadd.u8 y8x16, y8x16, BIAS_Y
|
||||||
|
.endm
|
||||||
|
|
||||||
|
alias c32x8_l, q14
|
||||||
|
alias c32x8_h, q15
|
||||||
|
|
||||||
|
alias_qw c16x8, q13
|
||||||
|
alias_qw c8x8x2, q10
|
||||||
|
|
||||||
|
.macro compute_chroma_8x1_step action, s16x8, coeff
|
||||||
|
\action c32x8_l, \s16x8\()_l, \coeff
|
||||||
|
\action c32x8_h, \s16x8\()_h, \coeff
|
||||||
|
.endm
|
||||||
|
|
||||||
|
/*
|
||||||
|
* in: r16x8, g16x8, b16x8
|
||||||
|
* out: c8x8
|
||||||
|
* clobber: q14-q15
|
||||||
|
*/
|
||||||
|
.macro compute_chroma_8x1 c, C
|
||||||
|
compute_chroma_8x1_step vmull, r16x8, CO_R\C
|
||||||
|
compute_chroma_8x1_step vmlal, g16x8, CO_G\C
|
||||||
|
compute_chroma_8x1_step vmlal, b16x8, CO_B\C
|
||||||
|
|
||||||
|
vrshrn.i32 c16x8_l, c32x8_l, #15
|
||||||
|
vrshrn.i32 c16x8_h, c32x8_h, #15
|
||||||
|
vmovn.i16 \c\()8x8, c16x8
|
||||||
|
vadd.u8 \c\()8x8, \c\()8x8, BIAS_\C
|
||||||
|
.endm
|
||||||
|
|
||||||
|
|
||||||
|
loop_420sp rgbx, nv12, init, kernel_420_16x2, 32
|
291
libswscale/arm/rgb2yuv_neon_common.S
Normal file
291
libswscale/arm/rgb2yuv_neon_common.S
Normal file
@ -0,0 +1,291 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "libavutil/arm/asm.S"
|
||||||
|
|
||||||
|
.macro alias name, tgt, set=1
|
||||||
|
.if \set != 0
|
||||||
|
\name .req \tgt
|
||||||
|
.else
|
||||||
|
.unreq \name
|
||||||
|
.endif
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.altmacro
|
||||||
|
|
||||||
|
.macro alias_dw_all qw, dw_l, dw_h
|
||||||
|
alias q\qw\()_l, d\dw_l
|
||||||
|
alias q\qw\()_h, d\dw_h
|
||||||
|
.if \qw < 15
|
||||||
|
alias_dw_all %(\qw + 1), %(\dw_l + 2), %(\dw_h + 2)
|
||||||
|
.endif
|
||||||
|
.endm
|
||||||
|
|
||||||
|
alias_dw_all 0, 0, 1
|
||||||
|
|
||||||
|
.noaltmacro
|
||||||
|
|
||||||
|
.macro alias_qw name, qw, set=1
|
||||||
|
alias \name\(), \qw, \set
|
||||||
|
alias \name\()_l, \qw\()_l, \set
|
||||||
|
alias \name\()_h, \qw\()_h, \set
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro prologue
|
||||||
|
push {r4-r12, lr}
|
||||||
|
vpush {q4-q7}
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro epilogue
|
||||||
|
vpop {q4-q7}
|
||||||
|
pop {r4-r12, pc}
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro load_arg reg, ix
|
||||||
|
ldr \reg, [sp, #((10 * 4 + 4 * 16) + (\ix - 4) * 4)]
|
||||||
|
.endm
|
||||||
|
|
||||||
|
|
||||||
|
/* ()_to_()_neon(const uint8_t *src, uint8_t *y, uint8_t *chroma
|
||||||
|
* int width, int height,
|
||||||
|
* int y_stride, int c_stride, int src_stride,
|
||||||
|
* int32_t coeff_table[9]);
|
||||||
|
*/
|
||||||
|
.macro alias_loop_420sp set=1
|
||||||
|
alias src, r0, \set
|
||||||
|
alias src0, src, \set
|
||||||
|
alias y, r1, \set
|
||||||
|
alias y0, y, \set
|
||||||
|
alias chroma, r2, \set
|
||||||
|
alias width, r3, \set
|
||||||
|
alias header, width, \set
|
||||||
|
|
||||||
|
alias height, r4, \set
|
||||||
|
alias y_stride, r5, \set
|
||||||
|
alias c_stride, r6, \set
|
||||||
|
alias c_padding, c_stride, \set
|
||||||
|
alias src_stride, r7, \set
|
||||||
|
|
||||||
|
alias y0_end, r8, \set
|
||||||
|
|
||||||
|
alias src_padding,r9, \set
|
||||||
|
alias y_padding, r10, \set
|
||||||
|
|
||||||
|
alias src1, r11, \set
|
||||||
|
alias y1, r12, \set
|
||||||
|
|
||||||
|
alias coeff_table,r12, \set
|
||||||
|
.endm
|
||||||
|
|
||||||
|
|
||||||
|
.macro loop_420sp s_fmt, d_fmt, init, kernel, precision
|
||||||
|
|
||||||
|
function \s_fmt\()_to_\d_fmt\()_neon_\precision, export=1
|
||||||
|
prologue
|
||||||
|
|
||||||
|
alias_loop_420sp
|
||||||
|
|
||||||
|
load_arg height, 4
|
||||||
|
load_arg y_stride, 5
|
||||||
|
load_arg c_stride, 6
|
||||||
|
load_arg src_stride, 7
|
||||||
|
load_arg coeff_table, 8
|
||||||
|
|
||||||
|
\init coeff_table
|
||||||
|
|
||||||
|
sub y_padding, y_stride, width
|
||||||
|
sub c_padding, c_stride, width
|
||||||
|
sub src_padding, src_stride, width, LSL #2
|
||||||
|
|
||||||
|
add y0_end, y0, width
|
||||||
|
and header, width, #15
|
||||||
|
|
||||||
|
add y1, y0, y_stride
|
||||||
|
add src1, src0, src_stride
|
||||||
|
|
||||||
|
0:
|
||||||
|
cmp header, #0
|
||||||
|
beq 1f
|
||||||
|
|
||||||
|
\kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma, header
|
||||||
|
|
||||||
|
1:
|
||||||
|
\kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma
|
||||||
|
|
||||||
|
cmp y0, y0_end
|
||||||
|
blt 1b
|
||||||
|
2:
|
||||||
|
add y0, y1, y_padding
|
||||||
|
add y0_end, y1, y_stride
|
||||||
|
add chroma, chroma, c_padding
|
||||||
|
add src0, src1, src_padding
|
||||||
|
|
||||||
|
add y1, y0, y_stride
|
||||||
|
add src1, src0, src_stride
|
||||||
|
|
||||||
|
subs height, height, #2
|
||||||
|
|
||||||
|
bgt 0b
|
||||||
|
|
||||||
|
epilogue
|
||||||
|
|
||||||
|
alias_loop_420sp 0
|
||||||
|
|
||||||
|
endfunc
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro downsample
|
||||||
|
vpaddl.u8 r16x8, r8x16
|
||||||
|
vpaddl.u8 g16x8, g8x16
|
||||||
|
vpaddl.u8 b16x8, b8x16
|
||||||
|
.endm
|
||||||
|
|
||||||
|
|
||||||
|
/* acculumate and right shift by 2 */
|
||||||
|
.macro downsample_ars2
|
||||||
|
vpadal.u8 r16x8, r8x16
|
||||||
|
vpadal.u8 g16x8, g8x16
|
||||||
|
vpadal.u8 b16x8, b8x16
|
||||||
|
|
||||||
|
vrshr.u16 r16x8, r16x8, #2
|
||||||
|
vrshr.u16 g16x8, g16x8, #2
|
||||||
|
vrshr.u16 b16x8, b16x8, #2
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro store_y8_16x1 dst, count
|
||||||
|
.if \count == 0
|
||||||
|
vstmia \dst!, {y8x16}
|
||||||
|
.else
|
||||||
|
vstmia \dst, {y8x16}
|
||||||
|
add \dst, \dst, \count
|
||||||
|
.endif
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro store_chroma_nv12_8x1 dst, count
|
||||||
|
.if \count == 0
|
||||||
|
vst2.i8 {u8x8, v8x8}, [\dst]!
|
||||||
|
.else
|
||||||
|
vst2.i8 {u8x8, v8x8}, [\dst], \count
|
||||||
|
.endif
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro store_chroma_nv21_8x1 dst, count
|
||||||
|
.if \count == 0
|
||||||
|
vst2.i8 {v8x8, u8x8}, [\dst]!
|
||||||
|
.else
|
||||||
|
vst2.i8 {v8x8, u8x8}, [\dst], \count
|
||||||
|
.endif
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro load_8888_16x1 a, b, c, d, src, count
|
||||||
|
.if \count == 0
|
||||||
|
vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]!
|
||||||
|
vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]!
|
||||||
|
.else
|
||||||
|
vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]!
|
||||||
|
vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]
|
||||||
|
sub \src, \src, #32
|
||||||
|
add \src, \src, \count, LSL #2
|
||||||
|
.endif
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro load_rgbx_16x1 src, count
|
||||||
|
load_8888_16x1 r, g, b, x, \src, \count
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro load_bgrx_16x1 src, count
|
||||||
|
load_8888_16x1 b, g, r, x, \src, \count
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro alias_src_rgbx set
|
||||||
|
alias_src_8888 r, g, b, x, \set
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro alias_src_bgrx set
|
||||||
|
alias_src_8888 b, g, r, x, \set
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro alias_dst_nv12 set
|
||||||
|
alias u8x8, c8x8x2_l, \set
|
||||||
|
alias v8x8, c8x8x2_h, \set
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro alias_dst_nv21 set
|
||||||
|
alias v8x8, c8x8x2_l, \set
|
||||||
|
alias u8x8, c8x8x2_h, \set
|
||||||
|
.endm
|
||||||
|
|
||||||
|
|
||||||
|
// common aliases
|
||||||
|
|
||||||
|
alias CO_R d0
|
||||||
|
CO_RY .dn d0.s16[0]
|
||||||
|
CO_RU .dn d0.s16[1]
|
||||||
|
CO_RV .dn d0.s16[2]
|
||||||
|
|
||||||
|
alias CO_G d1
|
||||||
|
CO_GY .dn d1.s16[0]
|
||||||
|
CO_GU .dn d1.s16[1]
|
||||||
|
CO_GV .dn d1.s16[2]
|
||||||
|
|
||||||
|
alias CO_B d2
|
||||||
|
CO_BY .dn d2.s16[0]
|
||||||
|
CO_BU .dn d2.s16[1]
|
||||||
|
CO_BV .dn d2.s16[2]
|
||||||
|
|
||||||
|
alias BIAS_U, d3
|
||||||
|
alias BIAS_V, BIAS_U
|
||||||
|
|
||||||
|
alias BIAS_Y, q2
|
||||||
|
|
||||||
|
|
||||||
|
/* q3-q6 R8G8B8X8 x16 */
|
||||||
|
|
||||||
|
.macro alias_src_8888 a, b, c, d, set
|
||||||
|
alias_qw \a\()8x16, q3, \set
|
||||||
|
alias_qw \b\()8x16, q4, \set
|
||||||
|
alias_qw \c\()8x16, q5, \set
|
||||||
|
alias_qw \d\()8x16, q6, \set
|
||||||
|
.endm
|
||||||
|
|
||||||
|
.macro kernel_420_16x2 rgb_fmt, yuv_fmt, rgb0, rgb1, y0, y1, chroma, count=0
|
||||||
|
alias_src_\rgb_fmt
|
||||||
|
alias_dst_\yuv_fmt
|
||||||
|
|
||||||
|
load_\rgb_fmt\()_16x1 \rgb0, \count
|
||||||
|
|
||||||
|
downsample
|
||||||
|
compute_y_16x1
|
||||||
|
store_y8_16x1 \y0, \count
|
||||||
|
|
||||||
|
|
||||||
|
load_\rgb_fmt\()_16x1 \rgb1, \count
|
||||||
|
downsample_ars2
|
||||||
|
compute_y_16x1
|
||||||
|
store_y8_16x1 \y1, \count
|
||||||
|
|
||||||
|
compute_chroma_8x1 u, U
|
||||||
|
compute_chroma_8x1 v, V
|
||||||
|
|
||||||
|
store_chroma_\yuv_fmt\()_8x1 \chroma, \count
|
||||||
|
|
||||||
|
alias_dst_\yuv_fmt 0
|
||||||
|
alias_src_\rgb_fmt 0
|
||||||
|
.endm
|
79
libswscale/arm/swscale_unscaled.c
Normal file
79
libswscale/arm/swscale_unscaled.c
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
|
||||||
|
*
|
||||||
|
* This file is part of FFmpeg.
|
||||||
|
*
|
||||||
|
* FFmpeg is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU Lesser General Public
|
||||||
|
* License as published by the Free Software Foundation; either
|
||||||
|
* version 2.1 of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* FFmpeg is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
* Lesser General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU Lesser General Public
|
||||||
|
* License along with FFmpeg; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "config.h"
|
||||||
|
#include "libswscale/swscale.h"
|
||||||
|
#include "libswscale/swscale_internal.h"
|
||||||
|
#include "libavutil/arm/cpu.h"
|
||||||
|
|
||||||
|
extern void rgbx_to_nv12_neon_32(const uint8_t *src, uint8_t *y, uint8_t *chroma,
|
||||||
|
int width, int height,
|
||||||
|
int y_stride, int c_stride, int src_stride,
|
||||||
|
int32_t coeff_tbl[9]);
|
||||||
|
|
||||||
|
extern void rgbx_to_nv12_neon_16(const uint8_t *src, uint8_t *y, uint8_t *chroma,
|
||||||
|
int width, int height,
|
||||||
|
int y_stride, int c_stride, int src_stride,
|
||||||
|
int32_t coeff_tbl[9]);
|
||||||
|
|
||||||
|
static int rgbx_to_nv12_neon_32_wrapper(SwsContext *context, const uint8_t *src[],
|
||||||
|
int srcStride[], int srcSliceY, int srcSliceH,
|
||||||
|
uint8_t *dst[], int dstStride[]) {
|
||||||
|
|
||||||
|
rgbx_to_nv12_neon_32(src[0] + srcSliceY * srcStride[0],
|
||||||
|
dst[0] + srcSliceY * dstStride[0],
|
||||||
|
dst[1] + (srcSliceY / 2) * dstStride[1],
|
||||||
|
context->srcW, srcSliceH,
|
||||||
|
dstStride[0], dstStride[1], srcStride[0],
|
||||||
|
context->input_rgb2yuv_table);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int rgbx_to_nv12_neon_16_wrapper(SwsContext *context, const uint8_t *src[],
|
||||||
|
int srcStride[], int srcSliceY, int srcSliceH,
|
||||||
|
uint8_t *dst[], int dstStride[]) {
|
||||||
|
|
||||||
|
rgbx_to_nv12_neon_16(src[0] + srcSliceY * srcStride[0],
|
||||||
|
dst[0] + srcSliceY * dstStride[0],
|
||||||
|
dst[1] + (srcSliceY / 2) * dstStride[1],
|
||||||
|
context->srcW, srcSliceH,
|
||||||
|
dstStride[0], dstStride[1], srcStride[0],
|
||||||
|
context->input_rgb2yuv_table);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void get_unscaled_swscale_neon(SwsContext *c) {
|
||||||
|
int accurate_rnd = c->flags & SWS_ACCURATE_RND;
|
||||||
|
if (c->srcFormat == AV_PIX_FMT_RGBA
|
||||||
|
&& c->dstFormat == AV_PIX_FMT_NV12
|
||||||
|
&& (c->srcW >= 16)) {
|
||||||
|
c->swscale = accurate_rnd ? rgbx_to_nv12_neon_32_wrapper
|
||||||
|
: rgbx_to_nv12_neon_16_wrapper;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void ff_get_unscaled_swscale_arm(SwsContext *c)
|
||||||
|
{
|
||||||
|
int cpu_flags = av_get_cpu_flags();
|
||||||
|
if (have_neon(cpu_flags))
|
||||||
|
get_unscaled_swscale_neon(c);
|
||||||
|
}
|
@ -835,6 +835,7 @@ extern const AVClass sws_context_class;
|
|||||||
void ff_get_unscaled_swscale(SwsContext *c);
|
void ff_get_unscaled_swscale(SwsContext *c);
|
||||||
void ff_get_unscaled_swscale_bfin(SwsContext *c);
|
void ff_get_unscaled_swscale_bfin(SwsContext *c);
|
||||||
void ff_get_unscaled_swscale_ppc(SwsContext *c);
|
void ff_get_unscaled_swscale_ppc(SwsContext *c);
|
||||||
|
void ff_get_unscaled_swscale_arm(SwsContext *c);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return function pointer to fastest main scaler path function depending
|
* Return function pointer to fastest main scaler path function depending
|
||||||
|
@ -1384,6 +1384,9 @@ void ff_get_unscaled_swscale(SwsContext *c)
|
|||||||
ff_get_unscaled_swscale_bfin(c);
|
ff_get_unscaled_swscale_bfin(c);
|
||||||
if (ARCH_PPC)
|
if (ARCH_PPC)
|
||||||
ff_get_unscaled_swscale_ppc(c);
|
ff_get_unscaled_swscale_ppc(c);
|
||||||
|
if (ARCH_ARM)
|
||||||
|
ff_get_unscaled_swscale_arm(c);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Convert the palette to the same packed 32-bit format as the palette */
|
/* Convert the palette to the same packed 32-bit format as the palette */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user