diff --git a/libavfilter/vf_interlace.c b/libavfilter/vf_interlace.c index d72cb43401..1a9278082a 100644 --- a/libavfilter/vf_interlace.c +++ b/libavfilter/vf_interlace.c @@ -83,14 +83,23 @@ static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t linesize, const uint8_t *srcp_below = srcp + pref; const uint8_t *srcp_above2 = srcp + mref * 2; const uint8_t *srcp_below2 = srcp + pref * 2; - int i; + int i, srcp_x, srcp_ab; for (i = 0; i < linesize; i++) { // this calculation is an integer representation of // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2' // '4 +' is for rounding. - dstp[i] = av_clip_uint8((4 + (srcp[i] << 2) - + ((srcp[i] + srcp_above[i] + srcp_below[i]) << 1) - - srcp_above2[i] - srcp_below2[i]) >> 3); + srcp_x = srcp[i] << 1; + srcp_ab = srcp_above[i] + srcp_below[i]; + dstp[i] = av_clip_uint8((4 + ((srcp[i] + srcp_x + srcp_ab) << 1) + - srcp_above2[i] - srcp_below2[i]) >> 3); + // Prevent over-sharpening: + // dst must not exceed src when the average of above and below + // is less than src. And the other way around. + if (srcp_ab > srcp_x) { + if (dstp[i] < srcp[i]) + dstp[i] = srcp[i]; + } else if (dstp[i] > srcp[i]) + dstp[i] = srcp[i]; } } diff --git a/libavfilter/vf_tinterlace.c b/libavfilter/vf_tinterlace.c index 65997076ad..81d2d773e0 100644 --- a/libavfilter/vf_tinterlace.c +++ b/libavfilter/vf_tinterlace.c @@ -110,14 +110,23 @@ static void lowpass_line_complex_c(uint8_t *dstp, ptrdiff_t width, const uint8_t const uint8_t *srcp_below = srcp + pref; const uint8_t *srcp_above2 = srcp + mref * 2; const uint8_t *srcp_below2 = srcp + pref * 2; - int i; + int i, srcp_x, srcp_ab; for (i = 0; i < width; i++) { // this calculation is an integer representation of // '0.75 * current + 0.25 * above + 0.25 * below - 0.125 * above2 - 0.125 * below2' // '4 +' is for rounding. - dstp[i] = av_clip_uint8((4 + (srcp[i] << 2) - + ((srcp[i] + srcp_above[i] + srcp_below[i]) << 1) - - srcp_above2[i] - srcp_below2[i]) >> 3); + srcp_x = srcp[i] << 1; + srcp_ab = srcp_above[i] + srcp_below[i]; + dstp[i] = av_clip_uint8((4 + ((srcp[i] + srcp_x + srcp_ab) << 1) + - srcp_above2[i] - srcp_below2[i]) >> 3); + // Prevent over-sharpening: + // dst must not exceed src when the average of above and below + // is less than src. And the other way around. + if (srcp_ab > srcp_x) { + if (dstp[i] < srcp[i]) + dstp[i] = srcp[i]; + } else if (dstp[i] > srcp[i]) + dstp[i] = srcp[i]; } } diff --git a/libavfilter/x86/vf_interlace.asm b/libavfilter/x86/vf_interlace.asm index c601fd7bf4..d0fffd293b 100644 --- a/libavfilter/x86/vf_interlace.asm +++ b/libavfilter/x86/vf_interlace.asm @@ -63,41 +63,46 @@ REP_RET %endmacro %macro LOWPASS_LINE_COMPLEX 0 -cglobal lowpass_line_complex, 5, 5, 7, dst, h, src, mref, pref - pxor m6, m6 +cglobal lowpass_line_complex, 5, 5, 8, dst, h, src, mref, pref + pxor m7, m7 .loop: mova m0, [srcq+mrefq] mova m2, [srcq+prefq] mova m1, m0 mova m3, m2 - punpcklbw m0, m6 - punpcklbw m2, m6 - punpckhbw m1, m6 - punpckhbw m3, m6 + punpcklbw m0, m7 + punpcklbw m2, m7 + punpckhbw m1, m7 + punpckhbw m3, m7 paddw m0, m2 paddw m1, m3 + mova m6, m0 + mova m5, m1 + mova m2, [srcq] + mova m3, m2 + punpcklbw m2, m7 + punpckhbw m3, m7 + paddw m0, m2 + paddw m1, m3 + psllw m2, 1 + psllw m3, 1 + paddw m0, m2 + paddw m1, m3 + psllw m0, 1 + psllw m1, 1 + pcmpgtw m6, m2 + pcmpgtw m5, m3 + packsswb m6, m5 mova m2, [srcq+mrefq*2] mova m4, [srcq+prefq*2] mova m3, m2 mova m5, m4 - punpcklbw m2, m6 - punpcklbw m4, m6 - punpckhbw m3, m6 - punpckhbw m5, m6 + punpcklbw m2, m7 + punpcklbw m4, m7 + punpckhbw m3, m7 + punpckhbw m5, m7 paddw m2, m4 paddw m3, m5 - mova m4, [srcq] - mova m5, m4 - punpcklbw m4, m6 - punpckhbw m5, m6 - paddw m0, m4 - paddw m1, m5 - psllw m0, 1 - psllw m1, 1 - psllw m4, 2 - psllw m5, 2 - paddw m0, m4 - paddw m1, m5 paddw m0, [pw_4] paddw m1, [pw_4] psubusw m0, m2 @@ -105,6 +110,12 @@ cglobal lowpass_line_complex, 5, 5, 7, dst, h, src, mref, pref psrlw m0, 3 psrlw m1, 3 packuswb m0, m1 + mova m1, m0 + pmaxub m0, [srcq] + pminub m1, [srcq] + pand m0, m6 + pandn m6, m1 + por m0, m6 mova [dstq], m0 add dstq, mmsize diff --git a/tests/ref/fate/filter-interlace-complex b/tests/ref/fate/filter-interlace-complex index 3b7812581f..e8db46a934 100644 --- a/tests/ref/fate/filter-interlace-complex +++ b/tests/ref/fate/filter-interlace-complex @@ -3,28 +3,28 @@ #codec_id 0: rawvideo #dimensions 0: 352x288 #sar 0: 0/1 -0, 0, 0, 1, 152064, 0x91290ae6 -0, 1, 1, 1, 152064, 0x24f34baf -0, 2, 2, 1, 152064, 0x799fc436 -0, 3, 3, 1, 152064, 0xfe42c0a9 -0, 4, 4, 1, 152064, 0xb496f879 -0, 5, 5, 1, 152064, 0xc43b36c9 -0, 6, 6, 1, 152064, 0xb65abbf4 -0, 7, 7, 1, 152064, 0xd1806312 -0, 8, 8, 1, 152064, 0x0faf56c1 -0, 9, 9, 1, 152064, 0x4de73b75 -0, 10, 10, 1, 152064, 0xf90f24ce -0, 11, 11, 1, 152064, 0xc1efd6e0 -0, 12, 12, 1, 152064, 0xeb8e5894 -0, 13, 13, 1, 152064, 0xe8aacabc -0, 14, 14, 1, 152064, 0x8bd2163c -0, 15, 15, 1, 152064, 0xbfc72ac2 -0, 16, 16, 1, 152064, 0x1e8f6f56 -0, 17, 17, 1, 152064, 0xe3d19450 -0, 18, 18, 1, 152064, 0x3872af32 -0, 19, 19, 1, 152064, 0xf23be72a -0, 20, 20, 1, 152064, 0x024f8f2b -0, 21, 21, 1, 152064, 0xb49301ea -0, 22, 22, 1, 152064, 0x84f5d056 -0, 23, 23, 1, 152064, 0xd2c09ca5 -0, 24, 24, 1, 152064, 0xe9b5ddfd +0, 0, 0, 1, 152064, 0x778ab0c1 +0, 1, 1, 1, 152064, 0xdc30f7c3 +0, 2, 2, 1, 152064, 0xcb637467 +0, 3, 3, 1, 152064, 0xcbf778ce +0, 4, 4, 1, 152064, 0x573d9f6d +0, 5, 5, 1, 152064, 0xd794df2c +0, 6, 6, 1, 152064, 0x3e885448 +0, 7, 7, 1, 152064, 0xccec1794 +0, 8, 8, 1, 152064, 0x6f32f51a +0, 9, 9, 1, 152064, 0x0657f5ac +0, 10, 10, 1, 152064, 0xfa82d600 +0, 11, 11, 1, 152064, 0x15ff7f32 +0, 12, 12, 1, 152064, 0x1cac0342 +0, 13, 13, 1, 152064, 0x6afb7c49 +0, 14, 14, 1, 152064, 0x6c47d554 +0, 15, 15, 1, 152064, 0xe0fbd132 +0, 16, 16, 1, 152064, 0x4f891e8d +0, 17, 17, 1, 152064, 0x88554045 +0, 18, 18, 1, 152064, 0x0c8e6192 +0, 19, 19, 1, 152064, 0xf73c91c3 +0, 20, 20, 1, 152064, 0x49ac328d +0, 21, 21, 1, 152064, 0xf18ebd82 +0, 22, 22, 1, 152064, 0x3359760d +0, 23, 23, 1, 152064, 0x5c85601a +0, 24, 24, 1, 152064, 0x28c1657b