dsputil: create 16/32-bit dctcoef versions of some functions
High bitdepth H.264 needs 32-bit transform coefficients, whereas dnxhd does not. This creates a conflict with the templated functions operating on DCTELEM data. This patch adds a field allowing the caller to choose the element size in dsputil_init() and adds the required functions. Signed-off-by: Mans Rullgard <mans@mansr.com>
This commit is contained in:
parent
0a72533e98
commit
5cc2600964
@ -3159,13 +3159,13 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
|||||||
c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
|
c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth)
|
||||||
|
|
||||||
|
|
||||||
#define BIT_DEPTH_FUNCS(depth)\
|
#define BIT_DEPTH_FUNCS(depth, dct)\
|
||||||
c->draw_edges = FUNCC(draw_edges , depth);\
|
c->draw_edges = FUNCC(draw_edges , depth);\
|
||||||
c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
|
c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\
|
||||||
c->clear_block = FUNCC(clear_block , depth);\
|
c->clear_block = FUNCC(clear_block ## dct , depth);\
|
||||||
c->clear_blocks = FUNCC(clear_blocks , depth);\
|
c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\
|
||||||
c->add_pixels8 = FUNCC(add_pixels8 , depth);\
|
c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\
|
||||||
c->add_pixels4 = FUNCC(add_pixels4 , depth);\
|
c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\
|
||||||
c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
|
c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\
|
||||||
c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
|
c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\
|
||||||
\
|
\
|
||||||
@ -3199,15 +3199,23 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
|
|||||||
|
|
||||||
switch (avctx->bits_per_raw_sample) {
|
switch (avctx->bits_per_raw_sample) {
|
||||||
case 9:
|
case 9:
|
||||||
BIT_DEPTH_FUNCS(9);
|
if (c->dct_bits == 32) {
|
||||||
|
BIT_DEPTH_FUNCS(9, _32);
|
||||||
|
} else {
|
||||||
|
BIT_DEPTH_FUNCS(9, _16);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case 10:
|
case 10:
|
||||||
BIT_DEPTH_FUNCS(10);
|
if (c->dct_bits == 32) {
|
||||||
|
BIT_DEPTH_FUNCS(10, _32);
|
||||||
|
} else {
|
||||||
|
BIT_DEPTH_FUNCS(10, _16);
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
|
av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample);
|
||||||
case 8:
|
case 8:
|
||||||
BIT_DEPTH_FUNCS(8);
|
BIT_DEPTH_FUNCS(8, _16);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -219,6 +219,11 @@ void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int lin
|
|||||||
* DSPContext.
|
* DSPContext.
|
||||||
*/
|
*/
|
||||||
typedef struct DSPContext {
|
typedef struct DSPContext {
|
||||||
|
/**
|
||||||
|
* Size of DCT coefficients.
|
||||||
|
*/
|
||||||
|
int dct_bits;
|
||||||
|
|
||||||
/* pixel ops : interface with DCT */
|
/* pixel ops : interface with DCT */
|
||||||
void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
|
void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size);
|
||||||
void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
|
void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride);
|
||||||
|
@ -192,43 +192,66 @@ void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, i
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FUNCC(add_pixels8)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size)
|
#define DCTELEM_FUNCS(dctcoef, suffix) \
|
||||||
{
|
static void FUNCC(add_pixels8 ## suffix)(uint8_t *restrict _pixels, \
|
||||||
int i;
|
DCTELEM *_block, \
|
||||||
pixel *restrict pixels = (pixel *restrict)_pixels;
|
int line_size) \
|
||||||
dctcoef *block = (dctcoef*)_block;
|
{ \
|
||||||
line_size /= sizeof(pixel);
|
int i; \
|
||||||
|
pixel *restrict pixels = (pixel *restrict)_pixels; \
|
||||||
for(i=0;i<8;i++) {
|
dctcoef *block = (dctcoef*)_block; \
|
||||||
pixels[0] += block[0];
|
line_size /= sizeof(pixel); \
|
||||||
pixels[1] += block[1];
|
\
|
||||||
pixels[2] += block[2];
|
for(i=0;i<8;i++) { \
|
||||||
pixels[3] += block[3];
|
pixels[0] += block[0]; \
|
||||||
pixels[4] += block[4];
|
pixels[1] += block[1]; \
|
||||||
pixels[5] += block[5];
|
pixels[2] += block[2]; \
|
||||||
pixels[6] += block[6];
|
pixels[3] += block[3]; \
|
||||||
pixels[7] += block[7];
|
pixels[4] += block[4]; \
|
||||||
pixels += line_size;
|
pixels[5] += block[5]; \
|
||||||
block += 8;
|
pixels[6] += block[6]; \
|
||||||
}
|
pixels[7] += block[7]; \
|
||||||
|
pixels += line_size; \
|
||||||
|
block += 8; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
static void FUNCC(add_pixels4 ## suffix)(uint8_t *restrict _pixels, \
|
||||||
|
DCTELEM *_block, \
|
||||||
|
int line_size) \
|
||||||
|
{ \
|
||||||
|
int i; \
|
||||||
|
pixel *restrict pixels = (pixel *restrict)_pixels; \
|
||||||
|
dctcoef *block = (dctcoef*)_block; \
|
||||||
|
line_size /= sizeof(pixel); \
|
||||||
|
\
|
||||||
|
for(i=0;i<4;i++) { \
|
||||||
|
pixels[0] += block[0]; \
|
||||||
|
pixels[1] += block[1]; \
|
||||||
|
pixels[2] += block[2]; \
|
||||||
|
pixels[3] += block[3]; \
|
||||||
|
pixels += line_size; \
|
||||||
|
block += 4; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
static void FUNCC(clear_block ## suffix)(DCTELEM *block) \
|
||||||
|
{ \
|
||||||
|
memset(block, 0, sizeof(dctcoef)*64); \
|
||||||
|
} \
|
||||||
|
\
|
||||||
|
/** \
|
||||||
|
* memset(blocks, 0, sizeof(DCTELEM)*6*64) \
|
||||||
|
*/ \
|
||||||
|
static void FUNCC(clear_blocks ## suffix)(DCTELEM *blocks) \
|
||||||
|
{ \
|
||||||
|
memset(blocks, 0, sizeof(dctcoef)*6*64); \
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FUNCC(add_pixels4)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size)
|
DCTELEM_FUNCS(DCTELEM, _16)
|
||||||
{
|
#if BIT_DEPTH > 8
|
||||||
int i;
|
DCTELEM_FUNCS(dctcoef, _32)
|
||||||
pixel *restrict pixels = (pixel *restrict)_pixels;
|
#endif
|
||||||
dctcoef *block = (dctcoef*)_block;
|
|
||||||
line_size /= sizeof(pixel);
|
|
||||||
|
|
||||||
for(i=0;i<4;i++) {
|
|
||||||
pixels[0] += block[0];
|
|
||||||
pixels[1] += block[1];
|
|
||||||
pixels[2] += block[2];
|
|
||||||
pixels[3] += block[3];
|
|
||||||
pixels += line_size;
|
|
||||||
block += 4;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#define PIXOP2(OPNAME, OP) \
|
#define PIXOP2(OPNAME, OP) \
|
||||||
static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
|
||||||
@ -1231,16 +1254,3 @@ void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
|
|||||||
void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
|
void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) {
|
||||||
FUNCC(avg_pixels16)(dst, src, stride, 16);
|
FUNCC(avg_pixels16)(dst, src, stride, 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void FUNCC(clear_block)(DCTELEM *block)
|
|
||||||
{
|
|
||||||
memset(block, 0, sizeof(dctcoef)*64);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* memset(blocks, 0, sizeof(DCTELEM)*6*64)
|
|
||||||
*/
|
|
||||||
static void FUNCC(clear_blocks)(DCTELEM *blocks)
|
|
||||||
{
|
|
||||||
memset(blocks, 0, sizeof(dctcoef)*6*64);
|
|
||||||
}
|
|
||||||
|
@ -3702,6 +3702,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
|
|||||||
|
|
||||||
ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
|
ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma);
|
||||||
ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
|
ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma);
|
||||||
|
s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16;
|
||||||
dsputil_init(&s->dsp, s->avctx);
|
dsputil_init(&s->dsp, s->avctx);
|
||||||
} else {
|
} else {
|
||||||
av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
|
av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user