x86: Move XOP emulation to x86util
We need the emulation to support the cases where the first argument is the same as the fourth. To achieve this a fifth argument working as a temporary may be needed. Emulation that doesn't obey the original instruction semantics can't be in x86inc. Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
committed by
Michael Niedermayer
parent
6c6e4dd139
commit
3f3d748cab
@@ -44,21 +44,21 @@ ALIGN 16
|
|||||||
test jq, jq
|
test jq, jq
|
||||||
jz .end_order
|
jz .end_order
|
||||||
.loop_order:
|
.loop_order:
|
||||||
pmacsdql m2, m0, m1, m2
|
PMACSDQL m2, m0, m1, m2, m0
|
||||||
movd m0, [decodedq+jq*4]
|
movd m0, [decodedq+jq*4]
|
||||||
pmacsdql m3, m1, m0, m3
|
PMACSDQL m3, m1, m0, m3, m1
|
||||||
movd m1, [coeffsq+jq*4]
|
movd m1, [coeffsq+jq*4]
|
||||||
inc jq
|
inc jq
|
||||||
jl .loop_order
|
jl .loop_order
|
||||||
.end_order:
|
.end_order:
|
||||||
pmacsdql m2, m0, m1, m2
|
PMACSDQL m2, m0, m1, m2, m0
|
||||||
psrlq m2, m4
|
psrlq m2, m4
|
||||||
movd m0, [decodedq]
|
movd m0, [decodedq]
|
||||||
paddd m0, m2
|
paddd m0, m2
|
||||||
movd [decodedq], m0
|
movd [decodedq], m0
|
||||||
sub lend, 2
|
sub lend, 2
|
||||||
jl .ret
|
jl .ret
|
||||||
pmacsdql m3, m1, m0, m3
|
PMACSDQL m3, m1, m0, m3, m1
|
||||||
psrlq m3, m4
|
psrlq m3, m4
|
||||||
movd m1, [decodedq+4]
|
movd m1, [decodedq+4]
|
||||||
paddd m1, m3
|
paddd m1, m3
|
||||||
|
@@ -1407,25 +1407,6 @@ AVX_INSTR pfmul, 1, 0, 1
|
|||||||
%undef i
|
%undef i
|
||||||
%undef j
|
%undef j
|
||||||
|
|
||||||
%macro FMA_INSTR 3
|
|
||||||
%macro %1 4-7 %1, %2, %3
|
|
||||||
%if cpuflag(xop)
|
|
||||||
v%5 %1, %2, %3, %4
|
|
||||||
%elifidn %1, %4
|
|
||||||
%6 %2, %3
|
|
||||||
%7 %1, %2
|
|
||||||
%else
|
|
||||||
%6 %1, %2, %3
|
|
||||||
%7 %1, %4
|
|
||||||
%endif
|
|
||||||
%endmacro
|
|
||||||
%endmacro
|
|
||||||
|
|
||||||
FMA_INSTR pmacsdd, pmulld, paddd
|
|
||||||
FMA_INSTR pmacsww, pmullw, paddw
|
|
||||||
FMA_INSTR pmacsdql, pmuldq, paddq
|
|
||||||
FMA_INSTR pmadcswd, pmaddwd, paddd
|
|
||||||
|
|
||||||
; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
|
; tzcnt is equivalent to "rep bsf" and is backwards-compatible with bsf.
|
||||||
; This lets us use tzcnt without bumping the yasm version requirement yet.
|
; This lets us use tzcnt without bumping the yasm version requirement yet.
|
||||||
%define tzcnt rep bsf
|
%define tzcnt rep bsf
|
||||||
|
@@ -666,6 +666,25 @@
|
|||||||
%endif
|
%endif
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
|
%macro PMA_EMU 4
|
||||||
|
%macro %1 5-8 %2, %3, %4
|
||||||
|
%if cpuflag(xop)
|
||||||
|
v%6 %1, %2, %3, %4
|
||||||
|
%elifidn %1, %4
|
||||||
|
%7 %5, %2, %3
|
||||||
|
%8 %1, %4, %5
|
||||||
|
%else
|
||||||
|
%7 %1, %2, %3
|
||||||
|
%8 %1, %4
|
||||||
|
%endif
|
||||||
|
%endmacro
|
||||||
|
%endmacro
|
||||||
|
|
||||||
|
PMA_EMU PMACSWW, pmacsww, pmullw, paddw
|
||||||
|
PMA_EMU PMACSDD, pmacsdd, pmulld, paddd ; sse4 emulation
|
||||||
|
PMA_EMU PMACSDQL, pmacsdql, pmuldq, paddq ; sse4 emulation
|
||||||
|
PMA_EMU PMADCSWD, pmadcswd, pmaddwd, paddd
|
||||||
|
|
||||||
; Wrapper for non-FMA version of fmaddps
|
; Wrapper for non-FMA version of fmaddps
|
||||||
%macro FMULADD_PS 5
|
%macro FMULADD_PS 5
|
||||||
%if cpuflag(fma3) || cpuflag(fma4)
|
%if cpuflag(fma3) || cpuflag(fma4)
|
||||||
|
Reference in New Issue
Block a user