From f859678f3f274bbfa8228ddf13706f55d66481cb Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Fri, 1 Feb 2013 11:04:50 -0800 Subject: [PATCH 1/3] vp56: Remove clear_blocks call, and clear alpha plane U/V DC only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The non-alpha and alpha-Y planes are cleared in the idct_put/add() calls. For the alpha U/V planes, we only care about the DC for entropy context prediction purposes, the rest of the data is unused. Signed-off-by: Martin Storsjö --- libavcodec/vp56.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/libavcodec/vp56.c b/libavcodec/vp56.c index dec7869c1b..64f33c6dbe 100644 --- a/libavcodec/vp56.c +++ b/libavcodec/vp56.c @@ -394,8 +394,6 @@ static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha) mb_type = vp56_decode_mv(s, row, col); ref_frame = vp56_reference_frame[mb_type]; - s->dsp.clear_blocks(*s->block_coeff); - s->parse_coeff(s); vp56_add_predictors_dc(s, ref_frame); @@ -448,6 +446,11 @@ static void vp56_decode_mb(VP56Context *s, int row, int col, int is_alpha) } break; } + + if (is_alpha) { + s->block_coeff[4][0] = 0; + s->block_coeff[5][0] = 0; + } } static int vp56_size_changed(AVCodecContext *avctx) From 0c0828ecc565a617ed50cd5f682a9dd635a9fbc1 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Tue, 29 Jan 2013 15:55:19 -0800 Subject: [PATCH 2/3] x86: Use simple nop codes for <= sse (rather than <= mmx) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "CentaurHauls family 6 model 9 stepping 8" family of CPUs (flags: fpu vme de pse tsc msr cx8 sep mtrr pge mov pat mmx fxsr sse up rng rng_en ace ace_en) SIGILLs on long nop codes. Signed-off-by: Martin Storsjö --- libavutil/x86/x86inc.asm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index a18c6f466e..bc8e4cb81e 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -752,7 +752,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits %elifidn %1, sse3 %define movu lddqu %endif - %if notcpuflag(mmx2) + %if notcpuflag(sse2) CPUNOP basicnop %endif %else From e5ffffe48d20642acc079166f0fa7d93a6a9f594 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Mon, 11 Feb 2013 13:52:38 -0800 Subject: [PATCH 3/3] h264chroma: Remove duplicate 9/10 bit functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These functions do the same thing in 16 bit space and don't need any depth specific clipping. Signed-off-by: Martin Storsjö --- libavcodec/h264chroma.c | 18 ++++-------------- libavcodec/x86/h264chroma_init.c | 6 +++--- 2 files changed, 7 insertions(+), 17 deletions(-) diff --git a/libavcodec/h264chroma.c b/libavcodec/h264chroma.c index 2bd2df3b56..463d6c454f 100644 --- a/libavcodec/h264chroma.c +++ b/libavcodec/h264chroma.c @@ -23,11 +23,7 @@ #include "h264chroma_template.c" #undef BIT_DEPTH -#define BIT_DEPTH 9 -#include "h264chroma_template.c" -#undef BIT_DEPTH - -#define BIT_DEPTH 10 +#define BIT_DEPTH 16 #include "h264chroma_template.c" #undef BIT_DEPTH @@ -41,16 +37,10 @@ void ff_h264chroma_init(H264ChromaContext *c, int bit_depth) { - switch (bit_depth) { - case 10: - SET_CHROMA(10); - break; - case 9: - SET_CHROMA(9); - break; - default: + if (bit_depth > 8 && bit_depth <= 16) { + SET_CHROMA(16); + } else { SET_CHROMA(8); - break; } if (ARCH_ARM) diff --git a/libavcodec/x86/h264chroma_init.c b/libavcodec/x86/h264chroma_init.c index b48565804c..bdbbafdaa5 100644 --- a/libavcodec/x86/h264chroma_init.c +++ b/libavcodec/x86/h264chroma_init.c @@ -89,14 +89,14 @@ void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth) c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_mmxext; } - if (EXTERNAL_MMXEXT(mm_flags) && bit_depth == 10) { + if (EXTERNAL_MMXEXT(mm_flags) && bit_depth > 8 && bit_depth <= 10) { c->put_h264_chroma_pixels_tab[2] = ff_put_h264_chroma_mc2_10_mmxext; c->avg_h264_chroma_pixels_tab[2] = ff_avg_h264_chroma_mc2_10_mmxext; c->put_h264_chroma_pixels_tab[1] = ff_put_h264_chroma_mc4_10_mmxext; c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_10_mmxext; } - if (EXTERNAL_SSE2(mm_flags) && bit_depth == 10) { + if (EXTERNAL_SSE2(mm_flags) && bit_depth > 8 && bit_depth <= 10) { c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_sse2; c->avg_h264_chroma_pixels_tab[0] = ff_avg_h264_chroma_mc8_10_sse2; } @@ -108,7 +108,7 @@ void ff_h264chroma_init_x86(H264ChromaContext *c, int bit_depth) c->avg_h264_chroma_pixels_tab[1] = ff_avg_h264_chroma_mc4_ssse3; } - if (EXTERNAL_AVX(mm_flags) && bit_depth == 10) { + if (EXTERNAL_AVX(mm_flags) && bit_depth > 8 && bit_depth <= 10) { // AVX implies !cache64. // TODO: Port cache(32|64) detection from x264. c->put_h264_chroma_pixels_tab[0] = ff_put_h264_chroma_mc8_10_avx;