diff --git a/libavcodec/x86/audiodsp.asm b/libavcodec/x86/audiodsp.asm index 3973808ca5..de395e5fa8 100644 --- a/libavcodec/x86/audiodsp.asm +++ b/libavcodec/x86/audiodsp.asm @@ -62,7 +62,7 @@ SCALARPRODUCT ; %1 = number of xmm registers used ; %2 = number of inline load/process/store loops per asm loop ; %3 = process 4*mmsize (%3=0) or 8*mmsize (%3=1) bytes per loop -; %4 = CLIPD function takes min/max as float instead of int (CLIPD_SSE2) +; %4 = CLIPD function takes min/max as float instead of int (SSE2 version) ; %5 = suffix %macro VECTOR_CLIP_INT32 4-5 cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len @@ -118,14 +118,11 @@ cglobal vector_clip_int32%5, 5,5,%1, dst, src, min, max, len %endmacro INIT_MMX mmx -%define CLIPD CLIPD_MMX VECTOR_CLIP_INT32 0, 1, 0, 0 INIT_XMM sse2 VECTOR_CLIP_INT32 6, 1, 0, 0, _int -%define CLIPD CLIPD_SSE2 VECTOR_CLIP_INT32 6, 2, 0, 1 INIT_XMM sse4 -%define CLIPD CLIPD_SSE41 %ifdef m8 VECTOR_CLIP_INT32 11, 1, 1, 0 %else diff --git a/libavfilter/x86/yadif-16.asm b/libavfilter/x86/yadif-16.asm index 79d127dfaa..9053b378a5 100644 --- a/libavfilter/x86/yadif-16.asm +++ b/libavfilter/x86/yadif-16.asm @@ -54,30 +54,6 @@ SECTION .text %endif %endmacro -%macro PMINSD 3 -%if cpuflag(sse4) - pminsd %1, %2 -%else - mova %3, %2 - pcmpgtd %3, %1 - pand %1, %3 - pandn %3, %2 - por %1, %3 -%endif -%endmacro - -%macro PMAXSD 3 -%if cpuflag(sse4) - pmaxsd %1, %2 -%else - mova %3, %1 - pcmpgtd %3, %2 - pand %1, %3 - pandn %3, %2 - por %1, %3 -%endif -%endmacro - %macro PMAXUW 2 %if cpuflag(sse4) pmaxuw %1, %2 diff --git a/libavutil/x86/x86util.asm b/libavutil/x86/x86util.asm index e1220dfc1a..21419125d5 100644 --- a/libavutil/x86/x86util.asm +++ b/libavutil/x86/x86util.asm @@ -357,7 +357,7 @@ %endif %endmacro -%macro ABSB 2 ; source mmreg, temp mmreg (unused for ssse3) +%macro ABSB 2 ; source mmreg, temp mmreg (unused for SSSE3) %if cpuflag(ssse3) pabsb %1, %1 %else @@ -381,7 +381,7 @@ %endif %endmacro -%macro ABSD2_MMX 4 +%macro ABSD2 4 pxor %3, %3 pxor %4, %4 pcmpgtd %3, %1 @@ -475,7 +475,7 @@ %else palignr %1, %2, %3 %endif -%elif cpuflag(mmx) ; [dst,] src1, src2, imm, tmp +%else ; [dst,] src1, src2, imm, tmp %define %%dst %1 %if %0==5 %ifnidn %1, %2 @@ -799,37 +799,47 @@ pminsw %1, %3 %endmacro -%macro PMINSD_MMX 3 ; dst, src, tmp +%macro PMINSD 3 ; dst, src, tmp/unused +%if cpuflag(sse4) + pminsd %1, %2 +%elif cpuflag(sse2) + cvtdq2ps %1, %1 + minps %1, %2 + cvtps2dq %1, %1 +%else mova %3, %2 pcmpgtd %3, %1 pxor %1, %2 pand %1, %3 pxor %1, %2 +%endif %endmacro -%macro PMAXSD_MMX 3 ; dst, src, tmp +%macro PMAXSD 3 ; dst, src, tmp/unused +%if cpuflag(sse4) + pmaxsd %1, %2 +%else mova %3, %1 pcmpgtd %3, %2 pand %1, %3 pandn %3, %2 por %1, %3 +%endif %endmacro -%macro CLIPD_MMX 3-4 ; src/dst, min, max, tmp - PMINSD_MMX %1, %3, %4 - PMAXSD_MMX %1, %2, %4 -%endmacro - -%macro CLIPD_SSE2 3-4 ; src/dst, min (float), max (float), unused +%macro CLIPD 3-4 +%if cpuflag(sse4); src/dst, min, max, unused + pminsd %1, %3 + pmaxsd %1, %2 +%elif cpuflag(sse2) ; src/dst, min (float), max (float), unused cvtdq2ps %1, %1 minps %1, %3 maxps %1, %2 cvtps2dq %1, %1 -%endmacro - -%macro CLIPD_SSE41 3-4 ; src/dst, min, max, unused - pminsd %1, %3 - pmaxsd %1, %2 +%else ; src/dst, min, max, tmp + PMINSD %1, %3, %4 + PMAXSD %1, %2, %4 +%endif %endmacro %macro VBROADCASTSS 2 ; dst xmm/ymm, src m32/xmm diff --git a/libswscale/x86/scale.asm b/libswscale/x86/scale.asm index f9781703a9..83cabff722 100644 --- a/libswscale/x86/scale.asm +++ b/libswscale/x86/scale.asm @@ -364,15 +364,7 @@ cglobal hscale%1to%2_%4, %5, 10, %6, pos0, dst, w, srcmem, filter, fltpos, fltsi movd [dstq+wq*2], m0 %endif ; %3 ==/!= X %else ; %2 == 19 -%if mmsize == 8 - PMINSD_MMX m0, m2, m4 -%elif cpuflag(sse4) - pminsd m0, m2 -%else ; sse2/ssse3 - cvtdq2ps m0, m0 - minps m0, m2 - cvtps2dq m0, m0 -%endif ; mmx/sse2/ssse3/sse4 + PMINSD m0, m2, m4 %ifnidn %3, X mova [dstq+wq*(4>>wshr)], m0 %else ; %3 == X