diff --git a/libavutil/x86/pixelutils.asm b/libavutil/x86/pixelutils.asm index 15213d92d8..7522f24a42 100644 --- a/libavutil/x86/pixelutils.asm +++ b/libavutil/x86/pixelutils.asm @@ -109,18 +109,24 @@ cglobal pixelutils_sad_16x16, 4,4,0, src1, stride1, src2, stride2 ;------------------------------------------------------------------------------- INIT_XMM sse2 cglobal pixelutils_sad_16x16, 4,4,5, src1, stride1, src2, stride2 - pxor m4, m4 -%rep 8 - movu m0, [src1q] - movu m1, [src1q + stride1q] + movu m4, [src1q] movu m2, [src2q] + movu m1, [src1q + stride1q] + movu m3, [src2q + stride2q] + psadbw m4, m2 + psadbw m1, m3 + paddw m4, m1 +%rep 7 + lea src1q, [src1q + 2*stride1q] + lea src2q, [src2q + 2*stride2q] + movu m0, [src1q] + movu m2, [src2q] + movu m1, [src1q + stride1q] movu m3, [src2q + stride2q] psadbw m0, m2 psadbw m1, m3 paddw m4, m0 paddw m4, m1 - lea src1q, [src1q + 2*stride1q] - lea src2q, [src2q + 2*stride2q] %endrep movhlps m0, m4 paddw m4, m0