sws/x86: improve rounding for yuv2yuvX
This tries to compensate for the errors introduced by the rounding of pmulhw Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
parent
5ad43af9a6
commit
5e947aeb59
@ -226,10 +226,20 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
|
|||||||
:: "r"(dither)
|
:: "r"(dither)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
filterSize--;
|
||||||
__asm__ volatile(
|
__asm__ volatile(
|
||||||
"pxor %%xmm0, %%xmm0\n\t"
|
"pxor %%xmm0, %%xmm0\n\t"
|
||||||
"punpcklbw %%xmm0, %%xmm3\n\t"
|
"punpcklbw %%xmm0, %%xmm3\n\t"
|
||||||
"psraw $4, %%xmm3\n\t"
|
"movd %0, %%xmm1\n\t"
|
||||||
|
"punpcklwd %%xmm1, %%xmm1\n\t"
|
||||||
|
"punpckldq %%xmm1, %%xmm1\n\t"
|
||||||
|
"punpcklqdq %%xmm1, %%xmm1\n\t"
|
||||||
|
"psllw $3, %%xmm1\n\t"
|
||||||
|
"paddw %%xmm1, %%xmm3\n\t"
|
||||||
|
"psraw $4, %%xmm3\n\t"
|
||||||
|
::"m"(filterSize)
|
||||||
|
);
|
||||||
|
__asm__ volatile(
|
||||||
"movdqa %%xmm3, %%xmm4\n\t"
|
"movdqa %%xmm3, %%xmm4\n\t"
|
||||||
"movdqa %%xmm3, %%xmm7\n\t"
|
"movdqa %%xmm3, %%xmm7\n\t"
|
||||||
"movl %3, %%ecx\n\t"
|
"movl %3, %%ecx\n\t"
|
||||||
|
@ -71,9 +71,20 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
|
|||||||
const uint8_t *dither, int offset)
|
const uint8_t *dither, int offset)
|
||||||
{
|
{
|
||||||
dither_8to16(dither, offset);
|
dither_8to16(dither, offset);
|
||||||
__asm__ volatile(\
|
filterSize--;
|
||||||
|
__asm__ volatile(
|
||||||
|
"movd %0, %%mm1\n\t"
|
||||||
|
"punpcklwd %%mm1, %%mm1\n\t"
|
||||||
|
"punpckldq %%mm1, %%mm1\n\t"
|
||||||
|
"psllw $3, %%mm1\n\t"
|
||||||
|
"paddw %%mm1, %%mm3\n\t"
|
||||||
|
"paddw %%mm1, %%mm4\n\t"
|
||||||
"psraw $4, %%mm3\n\t"
|
"psraw $4, %%mm3\n\t"
|
||||||
"psraw $4, %%mm4\n\t"
|
"psraw $4, %%mm4\n\t"
|
||||||
|
::"m"(filterSize)
|
||||||
|
);
|
||||||
|
|
||||||
|
__asm__ volatile(\
|
||||||
"movq %%mm3, %%mm6\n\t"
|
"movq %%mm3, %%mm6\n\t"
|
||||||
"movq %%mm4, %%mm7\n\t"
|
"movq %%mm4, %%mm7\n\t"
|
||||||
"movl %3, %%ecx\n\t"
|
"movl %3, %%ecx\n\t"
|
||||||
|
Loading…
x
Reference in New Issue
Block a user