From bfb3b2b7a6ec0e35ba400941183862137b91bf32 Mon Sep 17 00:00:00 2001 From: James Almer Date: Sat, 26 Jul 2014 15:08:51 -0300 Subject: [PATCH] x86/hevc_idct: add 12bit idct_dc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: James Almer Reviewed-by: Mickaƫl Raulet Signed-off-by: Michael Niedermayer --- libavcodec/x86/hevc_idct.asm | 16 ++++++++++++++++ libavcodec/x86/hevcdsp_init.c | 15 ++++++++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/libavcodec/x86/hevc_idct.asm b/libavcodec/x86/hevc_idct.asm index 7a44f0830c..2ad6e973ac 100644 --- a/libavcodec/x86/hevc_idct.asm +++ b/libavcodec/x86/hevc_idct.asm @@ -104,3 +104,19 @@ INIT_YMM avx2 IDCT_DC 16, 2, 10 IDCT_DC 32, 8, 10 %endif ;HAVE_AVX2_EXTERNAL + +; 12-bit +INIT_MMX mmxext +IDCT_DC_NL 4, 12 +IDCT_DC 8, 2, 12 + +INIT_XMM sse2 +IDCT_DC_NL 8, 12 +IDCT_DC 16, 4, 12 +IDCT_DC 32, 16, 12 + +%if HAVE_AVX2_EXTERNAL +INIT_YMM avx2 +IDCT_DC 16, 2, 12 +IDCT_DC 32, 8, 12 +%endif ;HAVE_AVX2_EXTERNAL diff --git a/libavcodec/x86/hevcdsp_init.c b/libavcodec/x86/hevcdsp_init.c index 3357593245..3e8704aec8 100644 --- a/libavcodec/x86/hevcdsp_init.c +++ b/libavcodec/x86/hevcdsp_init.c @@ -54,7 +54,8 @@ LFL_FUNCS(uint8_t, 12, ssse3) #define IDCT_FUNCS(W, opt) \ void ff_hevc_idct##W##_dc_8_##opt(int16_t *coeffs); \ -void ff_hevc_idct##W##_dc_10_##opt(int16_t *coeffs) +void ff_hevc_idct##W##_dc_10_##opt(int16_t *coeffs); \ +void ff_hevc_idct##W##_dc_12_##opt(int16_t *coeffs) IDCT_FUNCS(4x4, mmxext); IDCT_FUNCS(8x8, mmxext); @@ -533,6 +534,10 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) } } else if (bit_depth == 12) { + if (EXTERNAL_MMXEXT(cpu_flags)) { + c->idct_dc[0] = ff_hevc_idct4x4_dc_12_mmxext; + c->idct_dc[1] = ff_hevc_idct8x8_dc_12_mmxext; + } if (EXTERNAL_SSE2(cpu_flags)) { c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_12_sse2; c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_12_sse2; @@ -540,6 +545,10 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_sse2; c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_12_sse2; } + + c->idct_dc[1] = ff_hevc_idct8x8_dc_12_sse2; + c->idct_dc[2] = ff_hevc_idct16x16_dc_12_sse2; + c->idct_dc[3] = ff_hevc_idct32x32_dc_12_sse2; } if (EXTERNAL_SSSE3(cpu_flags) && ARCH_X86_64) { c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_12_ssse3; @@ -556,5 +565,9 @@ void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth) QPEL_LINKS(c->put_hevc_qpel, 1, 0, qpel_v, 12, sse4); QPEL_LINKS(c->put_hevc_qpel, 1, 1, qpel_hv, 12, sse4); } + if (EXTERNAL_AVX2(cpu_flags)) { + c->idct_dc[2] = ff_hevc_idct16x16_dc_12_avx2; + c->idct_dc[3] = ff_hevc_idct32x32_dc_12_avx2; + } } }