Communicate proper aliasing to gcc (needed for 4.1).
Originally committed as revision 4384 to svn://svn.ffmpeg.org/ffmpeg/trunk
This commit is contained in:
parent
1a2f549126
commit
2fdf9cb2fb
@ -56,13 +56,33 @@ static inline uint64_t WORD_VEC(uint64_t x)
|
|||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ldq(p) (*(const uint64_t *) (p))
|
|
||||||
#define ldl(p) (*(const int32_t *) (p))
|
|
||||||
#define stl(l, p) do { *(uint32_t *) (p) = (l); } while (0)
|
|
||||||
#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
|
|
||||||
#define sextw(x) ((int16_t) (x))
|
#define sextw(x) ((int16_t) (x))
|
||||||
|
|
||||||
#ifdef __GNUC__
|
#ifdef __GNUC__
|
||||||
|
#define ldq(p) \
|
||||||
|
(((union { \
|
||||||
|
uint64_t __l; \
|
||||||
|
__typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)]; \
|
||||||
|
} *) (p))->__l)
|
||||||
|
#define ldl(p) \
|
||||||
|
(((union { \
|
||||||
|
int32_t __l; \
|
||||||
|
__typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)]; \
|
||||||
|
} *) (p))->__l)
|
||||||
|
#define stq(l, p) \
|
||||||
|
do { \
|
||||||
|
(((union { \
|
||||||
|
uint64_t __l; \
|
||||||
|
__typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)]; \
|
||||||
|
} *) (p))->__l) = l; \
|
||||||
|
} while (0)
|
||||||
|
#define stl(l, p) \
|
||||||
|
do { \
|
||||||
|
(((union { \
|
||||||
|
int32_t __l; \
|
||||||
|
__typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)]; \
|
||||||
|
} *) (p))->__l) = l; \
|
||||||
|
} while (0)
|
||||||
struct unaligned_long { uint64_t l; } __attribute__((packed));
|
struct unaligned_long { uint64_t l; } __attribute__((packed));
|
||||||
#define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
|
#define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
|
||||||
#define uldq(a) (((const struct unaligned_long *) (a))->l)
|
#define uldq(a) (((const struct unaligned_long *) (a))->l)
|
||||||
@ -132,6 +152,10 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
|
|||||||
#elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */
|
#elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */
|
||||||
|
|
||||||
#include <c_asm.h>
|
#include <c_asm.h>
|
||||||
|
#define ldq(p) (*(const uint64_t *) (p))
|
||||||
|
#define ldl(p) (*(const int32_t *) (p))
|
||||||
|
#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
|
||||||
|
#define stl(l, p) do { *(int32_t *) (p) = (l); } while (0)
|
||||||
#define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a)
|
#define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a)
|
||||||
#define uldq(a) (*(const __unaligned uint64_t *) (a))
|
#define uldq(a) (*(const __unaligned uint64_t *) (a))
|
||||||
#define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b)
|
#define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b)
|
||||||
|
@ -235,25 +235,22 @@ static inline void idct_col2(DCTELEM *col)
|
|||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
uint64_t l, r;
|
uint64_t l, r;
|
||||||
uint64_t *lcol = (uint64_t *) col;
|
|
||||||
|
|
||||||
for (i = 0; i < 8; ++i) {
|
for (i = 0; i < 8; ++i) {
|
||||||
int_fast32_t a0 = col[0] + (1 << (COL_SHIFT - 1)) / W4;
|
int_fast32_t a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4;
|
||||||
|
|
||||||
a0 *= W4;
|
a0 *= W4;
|
||||||
col[0] = a0 >> COL_SHIFT;
|
col[i] = a0 >> COL_SHIFT;
|
||||||
++col;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
l = lcol[0];
|
l = ldq(col + 0 * 4); r = ldq(col + 1 * 4);
|
||||||
r = lcol[1];
|
stq(l, col + 2 * 4); stq(r, col + 3 * 4);
|
||||||
lcol[ 2] = l; lcol[ 3] = r;
|
stq(l, col + 4 * 4); stq(r, col + 5 * 4);
|
||||||
lcol[ 4] = l; lcol[ 5] = r;
|
stq(l, col + 6 * 4); stq(r, col + 7 * 4);
|
||||||
lcol[ 6] = l; lcol[ 7] = r;
|
stq(l, col + 8 * 4); stq(r, col + 9 * 4);
|
||||||
lcol[ 8] = l; lcol[ 9] = r;
|
stq(l, col + 10 * 4); stq(r, col + 11 * 4);
|
||||||
lcol[10] = l; lcol[11] = r;
|
stq(l, col + 12 * 4); stq(r, col + 13 * 4);
|
||||||
lcol[12] = l; lcol[13] = r;
|
stq(l, col + 14 * 4); stq(r, col + 15 * 4);
|
||||||
lcol[14] = l; lcol[15] = r;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void simple_idct_axp(DCTELEM *block)
|
void simple_idct_axp(DCTELEM *block)
|
||||||
@ -275,22 +272,20 @@ void simple_idct_axp(DCTELEM *block)
|
|||||||
if (rowsZero) {
|
if (rowsZero) {
|
||||||
idct_col2(block);
|
idct_col2(block);
|
||||||
} else if (rowsConstant) {
|
} else if (rowsConstant) {
|
||||||
uint64_t *lblock = (uint64_t *) block;
|
|
||||||
|
|
||||||
idct_col(block);
|
idct_col(block);
|
||||||
for (i = 0; i < 8; i += 2) {
|
for (i = 0; i < 8; i += 2) {
|
||||||
uint64_t v = (uint16_t) block[i * 8];
|
uint64_t v = (uint16_t) block[0];
|
||||||
uint64_t w = (uint16_t) block[i * 8 + 8];
|
uint64_t w = (uint16_t) block[8];
|
||||||
|
|
||||||
v |= v << 16;
|
v |= v << 16;
|
||||||
w |= w << 16;
|
w |= w << 16;
|
||||||
v |= v << 32;
|
v |= v << 32;
|
||||||
w |= w << 32;
|
w |= w << 32;
|
||||||
lblock[0] = v;
|
stq(v, block + 0 * 4);
|
||||||
lblock[1] = v;
|
stq(v, block + 1 * 4);
|
||||||
lblock[2] = w;
|
stq(w, block + 2 * 4);
|
||||||
lblock[3] = w;
|
stq(w, block + 3 * 4);
|
||||||
lblock += 4;
|
block += 4 * 4;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (i = 0; i < 8; i++)
|
for (i = 0; i < 8; i++)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user