diff --git a/configure b/configure index e584be7ff6..b61359c7a7 100755 --- a/configure +++ b/configure @@ -3347,6 +3347,7 @@ esac # determine libc flavour +# uclibc defines __GLIBC__, so it needs to be checked before glibc. if check_cpp_condition features.h "defined __UCLIBC__"; then libc_type=uclibc add_cppflags -D_POSIX_C_SOURCE=200112 -D_XOPEN_SOURCE=600 diff --git a/libavcodec/x86/dsputil.asm b/libavcodec/x86/dsputil.asm index 6a76655a8b..587d5ee968 100644 --- a/libavcodec/x86/dsputil.asm +++ b/libavcodec/x86/dsputil.asm @@ -648,46 +648,3 @@ BSWAP32_BUF INIT_XMM ssse3 BSWAP32_BUF - -INIT_XMM sse2 -; void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h) -cglobal put_pixels16, 4,5,4 - movsxdifnidn r2, r2d - lea r4, [r2*3] -.loop: - movu m0, [r1] - movu m1, [r1+r2] - movu m2, [r1+r2*2] - movu m3, [r1+r4] - lea r1, [r1+r2*4] - mova [r0], m0 - mova [r0+r2], m1 - mova [r0+r2*2], m2 - mova [r0+r4], m3 - sub r3d, 4 - lea r0, [r0+r2*4] - jnz .loop - REP_RET - -; void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h) -cglobal avg_pixels16, 4,5,4 - movsxdifnidn r2, r2d - lea r4, [r2*3] -.loop: - movu m0, [r1] - movu m1, [r1+r2] - movu m2, [r1+r2*2] - movu m3, [r1+r4] - lea r1, [r1+r2*4] - pavgb m0, [r0] - pavgb m1, [r0+r2] - pavgb m2, [r0+r2*2] - pavgb m3, [r0+r4] - mova [r0], m0 - mova [r0+r2], m1 - mova [r0+r2*2], m2 - mova [r0+r4], m3 - sub r3d, 4 - lea r0, [r0+r2*4] - jnz .loop - REP_RET diff --git a/libavcodec/x86/hpeldsp.asm b/libavcodec/x86/hpeldsp.asm index 72bc111baa..0f6f9e9ad2 100644 --- a/libavcodec/x86/hpeldsp.asm +++ b/libavcodec/x86/hpeldsp.asm @@ -469,3 +469,46 @@ INIT_MMX mmxext AVG_PIXELS8_XY2 INIT_MMX 3dnow AVG_PIXELS8_XY2 + +INIT_XMM sse2 +; void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h) +cglobal put_pixels16, 4,5,4 + movsxdifnidn r2, r2d + lea r4, [r2*3] +.loop: + movu m0, [r1] + movu m1, [r1+r2] + movu m2, [r1+r2*2] + movu m3, [r1+r4] + lea r1, [r1+r2*4] + mova [r0], m0 + mova [r0+r2], m1 + mova [r0+r2*2], m2 + mova [r0+r4], m3 + sub r3d, 4 + lea r0, [r0+r2*4] + jnz .loop + REP_RET + +; void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_size, int h) +cglobal avg_pixels16, 4,5,4 + movsxdifnidn r2, r2d + lea r4, [r2*3] +.loop: + movu m0, [r1] + movu m1, [r1+r2] + movu m2, [r1+r2*2] + movu m3, [r1+r4] + lea r1, [r1+r2*4] + pavgb m0, [r0] + pavgb m1, [r0+r2] + pavgb m2, [r0+r2*2] + pavgb m3, [r0+r4] + mova [r0], m0 + mova [r0+r2], m1 + mova [r0+r2*2], m2 + mova [r0+r4], m3 + sub r3d, 4 + lea r0, [r0+r2*4] + jnz .loop + REP_RET