x86/dsputil: use HADDD where applicable
Signed-off-by: James Almer <jamrial@gmail.com> Reviewed-by: "Ronald S. Bultje" <rsbultje@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
committed by
Michael Niedermayer
parent
76ed71a72b
commit
197fe392db
@@ -50,14 +50,7 @@ cglobal scalarproduct_int16, 3,3,3, v1, v2, order
|
|||||||
paddd m2, m1
|
paddd m2, m1
|
||||||
add orderq, mmsize*2
|
add orderq, mmsize*2
|
||||||
jl .loop
|
jl .loop
|
||||||
%if mmsize == 16
|
HADDD m2, m0
|
||||||
movhlps m0, m2
|
|
||||||
paddd m2, m0
|
|
||||||
pshuflw m0, m2, 0x4e
|
|
||||||
%else
|
|
||||||
pshufw m0, m2, 0x4e
|
|
||||||
%endif
|
|
||||||
paddd m2, m0
|
|
||||||
movd eax, m2
|
movd eax, m2
|
||||||
%if mmsize == 8
|
%if mmsize == 8
|
||||||
emms
|
emms
|
||||||
@@ -99,14 +92,7 @@ cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul
|
|||||||
mova [v1q + orderq + mmsize], m3
|
mova [v1q + orderq + mmsize], m3
|
||||||
add orderq, mmsize*2
|
add orderq, mmsize*2
|
||||||
jl .loop
|
jl .loop
|
||||||
%if mmsize == 16
|
HADDD m6, m0
|
||||||
movhlps m0, m6
|
|
||||||
paddd m6, m0
|
|
||||||
pshuflw m0, m6, 0x4e
|
|
||||||
%else
|
|
||||||
pshufw m0, m6, 0x4e
|
|
||||||
%endif
|
|
||||||
paddd m6, m0
|
|
||||||
movd eax, m6
|
movd eax, m6
|
||||||
RET
|
RET
|
||||||
%endmacro
|
%endmacro
|
||||||
@@ -200,10 +186,7 @@ SCALARPRODUCT_LOOP 4
|
|||||||
SCALARPRODUCT_LOOP 2
|
SCALARPRODUCT_LOOP 2
|
||||||
SCALARPRODUCT_LOOP 0
|
SCALARPRODUCT_LOOP 0
|
||||||
.end:
|
.end:
|
||||||
movhlps m0, m6
|
HADDD m6, m0
|
||||||
paddd m6, m0
|
|
||||||
pshuflw m0, m6, 0x4e
|
|
||||||
paddd m6, m0
|
|
||||||
movd eax, m6
|
movd eax, m6
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user