diff --git a/libswscale/utils.c b/libswscale/utils.c index 20cc3f187a..96d6efbb8c 100644 --- a/libswscale/utils.c +++ b/libswscale/utils.c @@ -795,7 +795,6 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) srcW, srcH, dstW, dstH); return AVERROR(EINVAL); } - FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail); if (!dstFilter) dstFilter= &dummyFilter; if (!srcFilter) srcFilter= &dummyFilter; @@ -851,6 +850,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) } } + FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail); if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) { c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) { diff --git a/libswscale/x86/swscale_template.c b/libswscale/x86/swscale_template.c index 0e8c381c04..bb35693cf9 100644 --- a/libswscale/x86/swscale_template.c +++ b/libswscale/x86/swscale_template.c @@ -59,7 +59,7 @@ "psraw $3, %%mm3 \n\t"\ "psraw $3, %%mm4 \n\t"\ "packuswb %%mm4, %%mm3 \n\t"\ - MOVNTQ(%%mm3, (%1, %%REGa))\ + MOVNTQ(%%mm3, (%1, %3))\ "add $8, %3 \n\t"\ "cmp %2, %3 \n\t"\ "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\ @@ -81,8 +81,9 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, uint8_t *aDest, long dstW, long chrDstW) { if (uDest) { + x86_reg uv_off = c->uv_off; YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0) - YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest, chrDstW + c->uv_off, c->uv_off) + YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off) } if (CONFIG_SWSCALE_ALPHA && aDest) { YSCALEYUV2YV12X(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0) @@ -137,7 +138,7 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, "psraw $3, %%mm4 \n\t"\ "psraw $3, %%mm6 \n\t"\ "packuswb %%mm6, %%mm4 \n\t"\ - MOVNTQ(%%mm4, (%1, %%REGa))\ + MOVNTQ(%%mm4, (%1, %3))\ "add $8, %3 \n\t"\ "cmp %2, %3 \n\t"\ "lea " offset "(%0), %%"REG_d" \n\t"\ @@ -161,8 +162,9 @@ static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter, uint8_t *aDest, long dstW, long chrDstW) { if (uDest) { + x86_reg uv_off = c->uv_off; YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0) - YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest, chrDstW + c->uv_off, c->uv_off) + YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, vDest - uv_off, chrDstW + uv_off, uv_off) } if (CONFIG_SWSCALE_ALPHA && aDest) { YSCALEYUV2YV12X_ACCURATE(ALP_MMX_FILTER_OFFSET, aDest, dstW, 0) @@ -2223,7 +2225,6 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, { int32_t *filterPos = c->hLumFilterPos; int16_t *filter = c->hLumFilter; - int canMMX2BeUsed = c->canMMX2BeUsed; void *mmx2FilterCode= c->lumMmx2FilterCode; int i; #if defined(PIC) @@ -2296,7 +2297,6 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *d { int32_t *filterPos = c->hChrFilterPos; int16_t *filter = c->hChrFilter; - int canMMX2BeUsed = c->canMMX2BeUsed; void *mmx2FilterCode= c->chrMmx2FilterCode; int i; #if defined(PIC) @@ -2362,7 +2362,6 @@ static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int const int flags= c->flags; int16_t **lumPixBuf= c->lumPixBuf; int16_t **chrUPixBuf= c->chrUPixBuf; - int16_t **chrVPixBuf= c->chrVPixBuf; int16_t **alpPixBuf= c->alpPixBuf; const int vLumBufSize= c->vLumBufSize; const int vChrBufSize= c->vChrBufSize; @@ -2388,7 +2387,6 @@ static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int if (dstY < dstH - 2) { const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; - const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; int i; if (flags & SWS_ACCURATE_RND) { diff --git a/libswscale/x86/yuv2rgb_mmx.c b/libswscale/x86/yuv2rgb_mmx.c index 855a82506e..d46d5126da 100644 --- a/libswscale/x86/yuv2rgb_mmx.c +++ b/libswscale/x86/yuv2rgb_mmx.c @@ -82,15 +82,15 @@ SwsFunc ff_yuv2rgb_init_mmx(SwsContext *c) if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_MMX) { switch (c->dstFormat) { case PIX_FMT_RGB32: - if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) { -#if HAVE_7REGS + if (c->srcFormat == PIX_FMT_YUVA420P) { +#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA return yuva420_rgb32_MMX; #endif break; } else return yuv420_rgb32_MMX; case PIX_FMT_BGR32: - if (CONFIG_SWSCALE_ALPHA && c->srcFormat == PIX_FMT_YUVA420P) { -#if HAVE_7REGS + if (c->srcFormat == PIX_FMT_YUVA420P) { +#if HAVE_7REGS && CONFIG_SWSCALE_ALPHA return yuva420_bgr32_MMX; #endif break;