diff --git a/libavcodec/tests/x86/dct.c b/libavcodec/tests/x86/dct.c index 207a2bcb36..ef0662ae37 100644 --- a/libavcodec/tests/x86/dct.c +++ b/libavcodec/tests/x86/dct.c @@ -65,9 +65,6 @@ static const struct algo fdct_tab_arch[] = { }; static const struct algo idct_tab_arch[] = { -#if HAVE_MMX_EXTERNAL - { "SIMPLE-MMX", ff_simple_idct_mmx, FF_IDCT_PERM_SIMPLE, AV_CPU_FLAG_MMX }, -#endif #if CONFIG_MPEG4_DECODER && HAVE_X86ASM #if HAVE_SSE2_EXTERNAL { "XVID-SSE2", ff_xvid_idct_sse2, FF_IDCT_PERM_SSE2, AV_CPU_FLAG_SSE2, 1 }, diff --git a/libavcodec/x86/idctdsp.asm b/libavcodec/x86/idctdsp.asm index 089425a9ab..1cfdb5419d 100644 --- a/libavcodec/x86/idctdsp.asm +++ b/libavcodec/x86/idctdsp.asm @@ -37,47 +37,24 @@ SECTION .text %macro PUT_SIGNED_PIXELS_CLAMPED_HALF 1 mova m1, [blockq+mmsize*0+%1] mova m2, [blockq+mmsize*2+%1] -%if mmsize == 8 - mova m3, [blockq+mmsize*4+%1] - mova m4, [blockq+mmsize*6+%1] -%endif packsswb m1, [blockq+mmsize*1+%1] packsswb m2, [blockq+mmsize*3+%1] -%if mmsize == 8 - packsswb m3, [blockq+mmsize*5+%1] - packsswb m4, [blockq+mmsize*7+%1] -%endif paddb m1, m0 paddb m2, m0 -%if mmsize == 8 - paddb m3, m0 - paddb m4, m0 - movq [pixelsq+lsizeq*0], m1 - movq [pixelsq+lsizeq*1], m2 - movq [pixelsq+lsizeq*2], m3 - movq [pixelsq+lsize3q ], m4 -%else movq [pixelsq+lsizeq*0], m1 movhps [pixelsq+lsizeq*1], m1 movq [pixelsq+lsizeq*2], m2 movhps [pixelsq+lsize3q ], m2 -%endif %endmacro -%macro PUT_SIGNED_PIXELS_CLAMPED 1 -cglobal put_signed_pixels_clamped, 3, 4, %1, block, pixels, lsize, lsize3 +INIT_XMM sse2 +cglobal put_signed_pixels_clamped, 3, 4, 3, block, pixels, lsize, lsize3 mova m0, [pb_80] lea lsize3q, [lsizeq*3] PUT_SIGNED_PIXELS_CLAMPED_HALF 0 lea pixelsq, [pixelsq+lsizeq*4] PUT_SIGNED_PIXELS_CLAMPED_HALF 64 RET -%endmacro - -INIT_MMX mmx -PUT_SIGNED_PIXELS_CLAMPED 0 -INIT_XMM sse2 -PUT_SIGNED_PIXELS_CLAMPED 3 ;-------------------------------------------------------------------------- ; void ff_put_pixels_clamped(const int16_t *block, uint8_t *pixels, @@ -87,40 +64,21 @@ PUT_SIGNED_PIXELS_CLAMPED 3 %macro PUT_PIXELS_CLAMPED_HALF 1 mova m0, [blockq+mmsize*0+%1] mova m1, [blockq+mmsize*2+%1] -%if mmsize == 8 - mova m2, [blockq+mmsize*4+%1] - mova m3, [blockq+mmsize*6+%1] -%endif packuswb m0, [blockq+mmsize*1+%1] packuswb m1, [blockq+mmsize*3+%1] -%if mmsize == 8 - packuswb m2, [blockq+mmsize*5+%1] - packuswb m3, [blockq+mmsize*7+%1] - movq [pixelsq], m0 - movq [lsizeq+pixelsq], m1 - movq [2*lsizeq+pixelsq], m2 - movq [lsize3q+pixelsq], m3 -%else movq [pixelsq], m0 movhps [lsizeq+pixelsq], m0 movq [2*lsizeq+pixelsq], m1 movhps [lsize3q+pixelsq], m1 -%endif %endmacro -%macro PUT_PIXELS_CLAMPED 0 +INIT_XMM sse2 cglobal put_pixels_clamped, 3, 4, 2, block, pixels, lsize, lsize3 lea lsize3q, [lsizeq*3] PUT_PIXELS_CLAMPED_HALF 0 lea pixelsq, [pixelsq+lsizeq*4] PUT_PIXELS_CLAMPED_HALF 64 RET -%endmacro - -INIT_MMX mmx -PUT_PIXELS_CLAMPED -INIT_XMM sse2 -PUT_PIXELS_CLAMPED ;-------------------------------------------------------------------------- ; void ff_add_pixels_clamped(const int16_t *block, uint8_t *pixels, @@ -130,41 +88,18 @@ PUT_PIXELS_CLAMPED %macro ADD_PIXELS_CLAMPED 1 mova m0, [blockq+mmsize*0+%1] mova m1, [blockq+mmsize*1+%1] -%if mmsize == 8 - mova m5, [blockq+mmsize*2+%1] - mova m6, [blockq+mmsize*3+%1] -%endif movq m2, [pixelsq] movq m3, [pixelsq+lsizeq] -%if mmsize == 8 - mova m7, m2 - punpcklbw m2, m4 - punpckhbw m7, m4 - paddsw m0, m2 - paddsw m1, m7 - mova m7, m3 - punpcklbw m3, m4 - punpckhbw m7, m4 - paddsw m5, m3 - paddsw m6, m7 -%else punpcklbw m2, m4 punpcklbw m3, m4 paddsw m0, m2 paddsw m1, m3 -%endif packuswb m0, m1 -%if mmsize == 8 - packuswb m5, m6 - movq [pixelsq], m0 - movq [pixelsq+lsizeq], m5 -%else movq [pixelsq], m0 movhps [pixelsq+lsizeq], m0 -%endif %endmacro -%macro ADD_PIXELS_CLAMPED 0 +INIT_XMM sse2 cglobal add_pixels_clamped, 3, 3, 5, block, pixels, lsize pxor m4, m4 ADD_PIXELS_CLAMPED 0 @@ -175,9 +110,3 @@ cglobal add_pixels_clamped, 3, 3, 5, block, pixels, lsize lea pixelsq, [pixelsq+lsizeq*2] ADD_PIXELS_CLAMPED 96 RET -%endmacro - -INIT_MMX mmx -ADD_PIXELS_CLAMPED -INIT_XMM sse2 -ADD_PIXELS_CLAMPED diff --git a/libavcodec/x86/idctdsp.h b/libavcodec/x86/idctdsp.h index 0d0bdb5f57..738e4e36e4 100644 --- a/libavcodec/x86/idctdsp.h +++ b/libavcodec/x86/idctdsp.h @@ -22,16 +22,10 @@ #include #include -void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, - ptrdiff_t line_size); void ff_add_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size); -void ff_put_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, - ptrdiff_t line_size); void ff_put_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size); -void ff_put_signed_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, - ptrdiff_t line_size); void ff_put_signed_pixels_clamped_sse2(const int16_t *block, uint8_t *pixels, ptrdiff_t line_size); diff --git a/libavcodec/x86/idctdsp_init.c b/libavcodec/x86/idctdsp_init.c index 9103b92ce7..f28a1ad744 100644 --- a/libavcodec/x86/idctdsp_init.c +++ b/libavcodec/x86/idctdsp_init.c @@ -63,28 +63,24 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, { int cpu_flags = av_get_cpu_flags(); +#if ARCH_X86_32 if (EXTERNAL_MMX(cpu_flags)) { - c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_mmx; - c->put_pixels_clamped = ff_put_pixels_clamped_mmx; - c->add_pixels_clamped = ff_add_pixels_clamped_mmx; - if (!high_bit_depth && avctx->lowres == 0 && (avctx->idct_algo == FF_IDCT_AUTO || avctx->idct_algo == FF_IDCT_SIMPLEAUTO || avctx->idct_algo == FF_IDCT_SIMPLEMMX)) { - c->idct_put = ff_simple_idct_put_mmx; - c->idct_add = ff_simple_idct_add_mmx; c->idct = ff_simple_idct_mmx; - c->perm_type = FF_IDCT_PERM_SIMPLE; } } +#endif if (EXTERNAL_SSE2(cpu_flags)) { c->put_signed_pixels_clamped = ff_put_signed_pixels_clamped_sse2; c->put_pixels_clamped = ff_put_pixels_clamped_sse2; c->add_pixels_clamped = ff_add_pixels_clamped_sse2; +#if ARCH_X86_32 if (!high_bit_depth && avctx->lowres == 0 && (avctx->idct_algo == FF_IDCT_AUTO || @@ -94,6 +90,7 @@ av_cold void ff_idctdsp_init_x86(IDCTDSPContext *c, AVCodecContext *avctx, c->idct_add = ff_simple_idct_add_sse2; c->perm_type = FF_IDCT_PERM_SIMPLE; } +#endif if (ARCH_X86_64 && !high_bit_depth && diff --git a/libavcodec/x86/simple_idct.asm b/libavcodec/x86/simple_idct.asm index 6fedbb5784..dcf0da6df1 100644 --- a/libavcodec/x86/simple_idct.asm +++ b/libavcodec/x86/simple_idct.asm @@ -25,6 +25,7 @@ %include "libavutil/x86/x86util.asm" +%if ARCH_X86_32 SECTION_RODATA cextern pb_80 @@ -846,26 +847,6 @@ cglobal simple_idct, 1, 2, 8, 128, block, t0 IDCT RET -cglobal simple_idct_put, 3, 5, 8, 128, pixels, lsize, block, lsize3, t0 - IDCT - lea lsize3q, [lsizeq*3] - PUT_PIXELS_CLAMPED_HALF 0 - lea pixelsq, [pixelsq+lsizeq*4] - PUT_PIXELS_CLAMPED_HALF 64 -RET - -cglobal simple_idct_add, 3, 4, 8, 128, pixels, lsize, block, t0 - IDCT - pxor m4, m4 - ADD_PIXELS_CLAMPED 0 - lea pixelsq, [pixelsq+lsizeq*2] - ADD_PIXELS_CLAMPED 32 - lea pixelsq, [pixelsq+lsizeq*2] - ADD_PIXELS_CLAMPED 64 - lea pixelsq, [pixelsq+lsizeq*2] - ADD_PIXELS_CLAMPED 96 -RET - INIT_XMM sse2 cglobal simple_idct_put, 3, 5, 8, 128, pixels, lsize, block, lsize3, t0 @@ -887,3 +868,4 @@ cglobal simple_idct_add, 3, 4, 8, 128, pixels, lsize, block, t0 lea pixelsq, [pixelsq+lsizeq*2] ADD_PIXELS_CLAMPED 96 RET +%endif