2x faster h264_idct_add8_10.
Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
This commit is contained in:
committed by
Ronald S. Bultje
parent
cc9947ffbe
commit
994c3550ff
@@ -249,16 +249,17 @@ IDCT8_DC_ADD avx
|
|||||||
jmp .skipadd%2
|
jmp .skipadd%2
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
|
%assign last_block 16
|
||||||
%macro ADD16_OP_INTRA 3
|
%macro ADD16_OP_INTRA 3
|
||||||
cmp word [r4+%3], 0
|
cmp word [r4+%3], 0
|
||||||
jnz .ac%2
|
jnz .ac%2
|
||||||
mov r6d, dword [r2+ 0]
|
mov r5d, dword [r2+ 0]
|
||||||
or r6d, dword [r2+64]
|
or r5d, dword [r2+64]
|
||||||
jz .skipblock%2
|
jz .skipblock%2
|
||||||
mov r5d, dword [r1+(%2+0)*4]
|
mov r5d, dword [r1+(%2+0)*4]
|
||||||
call idct_dc_add_%1
|
call idct_dc_add_%1
|
||||||
.skipblock%2:
|
.skipblock%2:
|
||||||
%if %2<15
|
%if %2<last_block-2
|
||||||
add r2, 128
|
add r2, 128
|
||||||
%endif
|
%endif
|
||||||
.skipadd%2:
|
.skipadd%2:
|
||||||
@@ -302,47 +303,33 @@ INIT_AVX
|
|||||||
IDCT_ADD16INTRA_10 avx
|
IDCT_ADD16INTRA_10 avx
|
||||||
%endif
|
%endif
|
||||||
|
|
||||||
|
%assign last_block 24
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
|
; h264_idct_add8(pixel **dst, const int *block_offset, dctcoef *block, int stride, const uint8_t nnzc[6*8])
|
||||||
;-----------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------
|
||||||
%macro IDCT_ADD8 1
|
%macro IDCT_ADD8 1
|
||||||
cglobal h264_idct_add8_10_%1,5,7
|
cglobal h264_idct_add8_10_%1,5,7
|
||||||
mov r5, 16
|
|
||||||
add r2, 1024
|
|
||||||
%ifdef PIC
|
|
||||||
lea r11, [scan8_mem]
|
|
||||||
%endif
|
|
||||||
%ifdef ARCH_X86_64
|
%ifdef ARCH_X86_64
|
||||||
mov r10, r0
|
mov r10, r0
|
||||||
%endif
|
%endif
|
||||||
.nextblock:
|
add r2, 1024
|
||||||
movzx r6, byte [scan8+r5]
|
mov r0, [r0]
|
||||||
movzx r6, byte [r4+r6]
|
ADD16_OP_INTRA %1, 16, 1+1*8
|
||||||
or r6d, dword [r2]
|
ADD16_OP_INTRA %1, 18, 1+2*8
|
||||||
test r6, r6
|
|
||||||
jz .skipblock
|
|
||||||
%ifdef ARCH_X86_64
|
%ifdef ARCH_X86_64
|
||||||
mov r0d, dword [r1+r5*4]
|
mov r0, [r10+gprsize]
|
||||||
add r0, [r10]
|
|
||||||
%else
|
%else
|
||||||
mov r0, r0m
|
mov r0, r0m
|
||||||
mov r0, [r0]
|
mov r0, [r0+gprsize]
|
||||||
add r0, dword [r1+r5*4]
|
|
||||||
%endif
|
%endif
|
||||||
IDCT4_ADD_10 r0, r2, r3
|
ADD16_OP_INTRA %1, 20, 1+4*8
|
||||||
.skipblock:
|
ADD16_OP_INTRA %1, 22, 1+5*8
|
||||||
inc r5
|
|
||||||
add r2, 64
|
|
||||||
test r5, 3
|
|
||||||
jnz .nextblock
|
|
||||||
%ifdef ARCH_X86_64
|
|
||||||
add r10, gprsize
|
|
||||||
%else
|
|
||||||
add r0mp, gprsize
|
|
||||||
%endif
|
|
||||||
test r5, 4
|
|
||||||
jnz .nextblock
|
|
||||||
REP_RET
|
REP_RET
|
||||||
|
AC %1, 16
|
||||||
|
AC %1, 18
|
||||||
|
AC %1, 20
|
||||||
|
AC %1, 22
|
||||||
|
|
||||||
%endmacro ; IDCT_ADD8
|
%endmacro ; IDCT_ADD8
|
||||||
|
|
||||||
INIT_XMM
|
INIT_XMM
|
||||||
|
Reference in New Issue
Block a user