vp8: fix PPC assembly to work if src_stride != dst_stride
Signed-off-by: Anton Khirnov <anton@khirnov.net> Signed-off-by: Janne Grunau <janne-libav@jannau.net>
This commit is contained in:
committed by
Janne Grunau
parent
b37effdc52
commit
2f6eec65ac
@@ -241,15 +241,15 @@ void put_vp8_epel ## WIDTH ## _v ## TAPS ## _altivec(uint8_t *dst, ptrdiff_t dst
|
|||||||
}
|
}
|
||||||
|
|
||||||
#define EPEL_HV(WIDTH, HTAPS, VTAPS) \
|
#define EPEL_HV(WIDTH, HTAPS, VTAPS) \
|
||||||
static void put_vp8_epel ## WIDTH ## _h ## HTAPS ## v ## VTAPS ## _altivec(uint8_t *dst, ptrdiff_t stride, uint8_t *src, ptrdiff_t s, int h, int mx, int my) \
|
static void put_vp8_epel ## WIDTH ## _h ## HTAPS ## v ## VTAPS ## _altivec(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \
|
||||||
{ \
|
{ \
|
||||||
DECLARE_ALIGNED(16, uint8_t, tmp)[(2*WIDTH+5)*16]; \
|
DECLARE_ALIGNED(16, uint8_t, tmp)[(2*WIDTH+5)*16]; \
|
||||||
if (VTAPS == 6) { \
|
if (VTAPS == 6) { \
|
||||||
put_vp8_epel ## WIDTH ## _h ## HTAPS ## _altivec(tmp, 16, src-2*stride, stride, h+5, mx, my); \
|
put_vp8_epel ## WIDTH ## _h ## HTAPS ## _altivec(tmp, 16, src-2*sstride, sstride, h+5, mx, my); \
|
||||||
put_vp8_epel ## WIDTH ## _v ## VTAPS ## _altivec(dst, stride, tmp+2*16, 16, h, mx, my); \
|
put_vp8_epel ## WIDTH ## _v ## VTAPS ## _altivec(dst, dstride, tmp+2*16, 16, h, mx, my); \
|
||||||
} else { \
|
} else { \
|
||||||
put_vp8_epel ## WIDTH ## _h ## HTAPS ## _altivec(tmp, 16, src-stride, stride, h+4, mx, my); \
|
put_vp8_epel ## WIDTH ## _h ## HTAPS ## _altivec(tmp, 16, src-sstride, sstride, h+4, mx, my); \
|
||||||
put_vp8_epel ## WIDTH ## _v ## VTAPS ## _altivec(dst, stride, tmp+16, 16, h, mx, my); \
|
put_vp8_epel ## WIDTH ## _v ## VTAPS ## _altivec(dst, dstride, tmp+16, 16, h, mx, my); \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -269,9 +269,44 @@ EPEL_HV(4, 4,6)
|
|||||||
EPEL_HV(4, 6,4)
|
EPEL_HV(4, 6,4)
|
||||||
EPEL_HV(4, 4,4)
|
EPEL_HV(4, 4,4)
|
||||||
|
|
||||||
static void put_vp8_pixels16_altivec(uint8_t *dst, ptrdiff_t stride, uint8_t *src, ptrdiff_t s, int h, int mx, int my)
|
static void put_vp8_pixels16_altivec(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my)
|
||||||
{
|
{
|
||||||
ff_put_pixels16_altivec(dst, src, stride, h);
|
register vector unsigned char pixelsv1, pixelsv2;
|
||||||
|
register vector unsigned char pixelsv1B, pixelsv2B;
|
||||||
|
register vector unsigned char pixelsv1C, pixelsv2C;
|
||||||
|
register vector unsigned char pixelsv1D, pixelsv2D;
|
||||||
|
|
||||||
|
register vector unsigned char perm = vec_lvsl(0, src);
|
||||||
|
int i;
|
||||||
|
register ptrdiff_t dstride2 = dstride << 1, sstride2 = sstride << 1;
|
||||||
|
register ptrdiff_t dstride3 = dstride2 + dstride, sstride3 = sstride + sstride2;
|
||||||
|
register ptrdiff_t dstride4 = dstride << 2, sstride4 = sstride << 2;
|
||||||
|
|
||||||
|
// hand-unrolling the loop by 4 gains about 15%
|
||||||
|
// mininum execution time goes from 74 to 60 cycles
|
||||||
|
// it's faster than -funroll-loops, but using
|
||||||
|
// -funroll-loops w/ this is bad - 74 cycles again.
|
||||||
|
// all this is on a 7450, tuning for the 7450
|
||||||
|
for (i = 0; i < h; i += 4) {
|
||||||
|
pixelsv1 = vec_ld( 0, src);
|
||||||
|
pixelsv2 = vec_ld(15, src);
|
||||||
|
pixelsv1B = vec_ld(sstride, src);
|
||||||
|
pixelsv2B = vec_ld(15 + sstride, src);
|
||||||
|
pixelsv1C = vec_ld(sstride2, src);
|
||||||
|
pixelsv2C = vec_ld(15 + sstride2, src);
|
||||||
|
pixelsv1D = vec_ld(sstride3, src);
|
||||||
|
pixelsv2D = vec_ld(15 + sstride3, src);
|
||||||
|
vec_st(vec_perm(pixelsv1, pixelsv2, perm),
|
||||||
|
0, (unsigned char*)dst);
|
||||||
|
vec_st(vec_perm(pixelsv1B, pixelsv2B, perm),
|
||||||
|
dstride, (unsigned char*)dst);
|
||||||
|
vec_st(vec_perm(pixelsv1C, pixelsv2C, perm),
|
||||||
|
dstride2, (unsigned char*)dst);
|
||||||
|
vec_st(vec_perm(pixelsv1D, pixelsv2D, perm),
|
||||||
|
dstride3, (unsigned char*)dst);
|
||||||
|
src += sstride4;
|
||||||
|
dst += dstride4;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* HAVE_ALTIVEC */
|
#endif /* HAVE_ALTIVEC */
|
||||||
|
Reference in New Issue
Block a user