x86/aacpsdsp: precompute constant factors
Inspired by the optimization done to the C version by Rémi Denis-Courmont. Signed-off-by: James Almer <jamrial@gmail.com>
This commit is contained in:
parent
08edacc248
commit
2bcf86d53d
@ -400,29 +400,32 @@ HYBRID_SYNTHESIS_DEINT
|
|||||||
; const float (*filter)[8][2],
|
; const float (*filter)[8][2],
|
||||||
; ptrdiff_t stride, int n);
|
; ptrdiff_t stride, int n);
|
||||||
;*******************************************************************
|
;*******************************************************************
|
||||||
%macro PS_HYBRID_ANALYSIS_LOOP 3
|
%macro PS_HYBRID_ANALYSIS_IN 1
|
||||||
movu %1, [inq+mmsize*%3]
|
movu m0, [inq+mmsize*%1]
|
||||||
movu m1, [inq+mmsize*(5-%3)+8]
|
movu m1, [inq+mmsize*(5-%1)+8]
|
||||||
%if cpuflag(sse3)
|
mova m3, m0
|
||||||
pshufd %2, %1, q2301
|
|
||||||
pshufd m4, m1, q0123
|
|
||||||
pshufd m1, m1, q1032
|
|
||||||
pshufd m2, [filterq+nq+mmsize*%3], q2301
|
|
||||||
addsubps %2, m4
|
|
||||||
addsubps %1, m1
|
|
||||||
%else
|
|
||||||
mova m2, [filterq+nq+mmsize*%3]
|
|
||||||
mova %2, %1
|
|
||||||
mova m4, m1
|
mova m4, m1
|
||||||
shufps %2, %2, q2301
|
shufps m3, m3, q2301
|
||||||
shufps m4, m4, q0123
|
shufps m4, m4, q0123
|
||||||
shufps m1, m1, q1032
|
shufps m1, m1, q1032
|
||||||
shufps m2, m2, q2301
|
%if cpuflag(sse3)
|
||||||
|
addsubps m3, m4
|
||||||
|
addsubps m0, m1
|
||||||
|
%else
|
||||||
xorps m4, m7
|
xorps m4, m7
|
||||||
xorps m1, m7
|
xorps m1, m7
|
||||||
subps %2, m4
|
subps m3, m4
|
||||||
subps %1, m1
|
subps m0, m1
|
||||||
%endif
|
%endif
|
||||||
|
mova [rsp+mmsize*%1*2], m3
|
||||||
|
mova [rsp+mmsize+mmsize*%1*2], m0
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
%macro PS_HYBRID_ANALYSIS_LOOP 3
|
||||||
|
mova m2, [filterq+nq+mmsize*%3]
|
||||||
|
shufps m2, m2, q2301
|
||||||
|
mova %2, [rsp+mmsize*%3*2]
|
||||||
|
mova %1, [rsp+mmsize+mmsize*%3*2]
|
||||||
mulps %2, m2
|
mulps %2, m2
|
||||||
mulps %1, m2
|
mulps %1, m2
|
||||||
%if %3
|
%if %3
|
||||||
@ -432,7 +435,7 @@ HYBRID_SYNTHESIS_DEINT
|
|||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
%macro PS_HYBRID_ANALYSIS 0
|
%macro PS_HYBRID_ANALYSIS 0
|
||||||
cglobal ps_hybrid_analysis, 5, 5, 8, out, in, filter, stride, n
|
cglobal ps_hybrid_analysis, 5, 5, 8, 24 * 4, out, in, filter, stride, n
|
||||||
%if cpuflag(sse3)
|
%if cpuflag(sse3)
|
||||||
%define MOVH movsd
|
%define MOVH movsd
|
||||||
%else
|
%else
|
||||||
@ -443,6 +446,9 @@ cglobal ps_hybrid_analysis, 5, 5, 8, out, in, filter, stride, n
|
|||||||
add filterq, nq
|
add filterq, nq
|
||||||
neg nq
|
neg nq
|
||||||
mova m7, [ps_p1m1p1m1]
|
mova m7, [ps_p1m1p1m1]
|
||||||
|
PS_HYBRID_ANALYSIS_IN 0
|
||||||
|
PS_HYBRID_ANALYSIS_IN 1
|
||||||
|
PS_HYBRID_ANALYSIS_IN 2
|
||||||
|
|
||||||
align 16
|
align 16
|
||||||
.loop:
|
.loop:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user