diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c index 64eee3236b..f07e52352b 100644 --- a/libavcodec/aaccoder.c +++ b/libavcodec/aaccoder.c @@ -40,6 +40,12 @@ #include "aacenc.h" #include "aactab.h" +/** Frequency in Hz for lower limit of noise substitution **/ +#define NOISE_LOW_LIMIT 4000 + +/** Total number of usable codebooks **/ +#define CB_TOT 13 + /** bits needed to code codebook run value for long windows */ static const uint8_t run_value_bits_long[64] = { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, @@ -57,6 +63,10 @@ static const uint8_t * const run_value_bits[2] = { run_value_bits_long, run_value_bits_short }; +/** Map to convert values from BandCodingPath index to a codebook index **/ +static const uint8_t aac_cb_out_map[CB_TOT] = {0,1,2,3,4,5,6,7,8,9,10,11,13}; +/** Inverse map to convert from codebooks to BandCodingPath indices **/ +static const uint8_t aac_cb_in_map[CB_TOT+1] = {0,1,2,3,4,5,6,7,8,9,10,11,0,12}; /** * Quantize one coefficient. @@ -108,7 +118,7 @@ static av_always_inline float quantize_and_encode_band_cost_template( const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, int *bits, int BT_ZERO, int BT_UNSIGNED, - int BT_PAIR, int BT_ESC) + int BT_PAIR, int BT_ESC, int BT_NOISE) { const int q_idx = POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512; const float Q = ff_aac_pow2sf_tab [q_idx]; @@ -119,8 +129,6 @@ static av_always_inline float quantize_and_encode_band_cost_template( float cost = 0; const int dim = BT_PAIR ? 2 : 4; int resbits = 0; - const int range = aac_cb_range[cb]; - const int maxval = aac_cb_maxval[cb]; int off; if (BT_ZERO) { @@ -130,15 +138,22 @@ static av_always_inline float quantize_and_encode_band_cost_template( *bits = 0; return cost * lambda; } + if (BT_NOISE) { + for (i = 0; i < size; i++) + cost += in[i]*in[i]; + if (bits) + *bits = 0; + return cost * lambda; + } if (!scaled) { abs_pow34_v(s->scoefs, in, size); scaled = s->scoefs; } - quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED, maxval); + quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED, aac_cb_maxval[cb]); if (BT_UNSIGNED) { off = 0; } else { - off = maxval; + off = aac_cb_maxval[cb]; } for (i = 0; i < size; i += dim) { const float *vec; @@ -147,7 +162,7 @@ static av_always_inline float quantize_and_encode_band_cost_template( int curbits; float rd = 0.0f; for (j = 0; j < dim; j++) { - curidx *= range; + curidx *= aac_cb_range[cb]; curidx += quants[j] + off; } curbits = ff_aac_spectral_bits[cb-1][curidx]; @@ -207,8 +222,17 @@ static av_always_inline float quantize_and_encode_band_cost_template( return cost; } -#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC) \ -static float quantize_and_encode_band_cost_ ## NAME( \ +static float quantize_and_encode_band_cost_NONE(struct AACEncContext *s, PutBitContext *pb, + const float *in, const float *scaled, + int size, int scale_idx, int cb, + const float lambda, const float uplim, + int *bits) { + av_assert0(0); + return 0.0f; +} + +#define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE) \ +static float quantize_and_encode_band_cost_ ## NAME( \ struct AACEncContext *s, \ PutBitContext *pb, const float *in, \ const float *scaled, int size, int scale_idx, \ @@ -217,15 +241,16 @@ static float quantize_and_encode_band_cost_ ## NAME( return quantize_and_encode_band_cost_template( \ s, pb, in, scaled, size, scale_idx, \ BT_ESC ? ESC_BT : cb, lambda, uplim, bits, \ - BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC); \ + BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE); \ } -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO, 1, 0, 0, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0) -QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC, 0, 1, 1, 1) +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO, 1, 0, 0, 0, 0) +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0, 0) +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0, 0) +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0, 0) +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0, 0) +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC, 0, 1, 1, 1, 0) +QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NOISE, 0, 0, 0, 0, 1) static float (*const quantize_and_encode_band_cost_arr[])( struct AACEncContext *s, @@ -245,6 +270,8 @@ static float (*const quantize_and_encode_band_cost_arr[])( quantize_and_encode_band_cost_UPAIR, quantize_and_encode_band_cost_UPAIR, quantize_and_encode_band_cost_ESC, + quantize_and_encode_band_cost_NONE, /* CB 12 doesn't exist */ + quantize_and_encode_band_cost_NOISE, }; #define quantize_and_encode_band_cost( \ @@ -312,7 +339,7 @@ typedef struct BandCodingPath { static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda) { - BandCodingPath path[120][12]; + BandCodingPath path[120][CB_TOT]; int w, swb, cb, start, size; int i, j; const int max_sfb = sce->ics.max_sfb; @@ -325,7 +352,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce abs_pow34_v(s->scoefs, sce->coeffs, 1024); start = win*128; - for (cb = 0; cb < 12; cb++) { + for (cb = 0; cb < CB_TOT; cb++) { path[0][cb].cost = 0.0f; path[0][cb].prev_idx = -1; path[0][cb].run = 0; @@ -333,7 +360,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce for (swb = 0; swb < max_sfb; swb++) { size = sce->ics.swb_sizes[swb]; if (sce->zeroes[win*16 + swb]) { - for (cb = 0; cb < 12; cb++) { + for (cb = 0; cb < CB_TOT; cb++) { path[swb+1][cb].prev_idx = cb; path[swb+1][cb].cost = path[swb][cb].cost; path[swb+1][cb].run = path[swb][cb].run + 1; @@ -343,14 +370,14 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce int mincb = next_mincb; next_minrd = INFINITY; next_mincb = 0; - for (cb = 0; cb < 12; cb++) { + for (cb = 0; cb < CB_TOT; cb++) { float cost_stay_here, cost_get_here; float rd = 0.0f; for (w = 0; w < group_len; w++) { FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb]; rd += quantize_band_cost(s, sce->coeffs + start + w*128, s->scoefs + start + w*128, size, - sce->sf_idx[(win+w)*16+swb], cb, + sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb], lambda / band->threshold, INFINITY, NULL); } cost_stay_here = path[swb][cb].cost + rd; @@ -379,7 +406,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce //convert resulting path from backward-linked list stack_len = 0; idx = 0; - for (cb = 1; cb < 12; cb++) + for (cb = 1; cb < CB_TOT; cb++) if (path[max_sfb][cb].cost < path[max_sfb][idx].cost) idx = cb; ppos = max_sfb; @@ -394,12 +421,13 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce //perform actual band info encoding start = 0; for (i = stack_len - 1; i >= 0; i--) { - put_bits(&s->pb, 4, stackcb[i]); + cb = aac_cb_out_map[stackcb[i]]; + put_bits(&s->pb, 4, cb); count = stackrun[i]; - memset(sce->zeroes + win*16 + start, !stackcb[i], count); + memset(sce->zeroes + win*16 + start, !cb, count); //XXX: memset when band_type is also uint8_t for (j = 0; j < count; j++) { - sce->band_type[win*16 + start] = stackcb[i]; + sce->band_type[win*16 + start] = cb; start++; } while (count >= run_esc) { @@ -413,7 +441,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, int win, int group_len, const float lambda) { - BandCodingPath path[120][12]; + BandCodingPath path[120][CB_TOT]; int w, swb, cb, start, size; int i, j; const int max_sfb = sce->ics.max_sfb; @@ -426,7 +454,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, abs_pow34_v(s->scoefs, sce->coeffs, 1024); start = win*128; - for (cb = 0; cb < 12; cb++) { + for (cb = 0; cb < CB_TOT; cb++) { path[0][cb].cost = run_bits+4; path[0][cb].prev_idx = -1; path[0][cb].run = 0; @@ -450,7 +478,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, } next_minbits = path[swb+1][0].cost; next_mincb = 0; - for (cb = 1; cb < 12; cb++) { + for (cb = 1; cb < CB_TOT; cb++) { path[swb+1][cb].cost = 61450; path[swb+1][cb].prev_idx = -1; path[swb+1][cb].run = 0; @@ -459,6 +487,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, float minbits = next_minbits; int mincb = next_mincb; int startcb = sce->band_type[win*16+swb]; + startcb = aac_cb_in_map[startcb]; next_minbits = INFINITY; next_mincb = 0; for (cb = 0; cb < startcb; cb++) { @@ -466,13 +495,20 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, path[swb+1][cb].prev_idx = -1; path[swb+1][cb].run = 0; } - for (cb = startcb; cb < 12; cb++) { + for (cb = startcb; cb < CB_TOT; cb++) { float cost_stay_here, cost_get_here; float bits = 0.0f; + if (cb == 12 && sce->band_type[win*16+swb] != NOISE_BT) { + path[swb+1][cb].cost = 61450; + path[swb+1][cb].prev_idx = -1; + path[swb+1][cb].run = 0; + continue; + } for (w = 0; w < group_len; w++) { bits += quantize_band_cost(s, sce->coeffs + start + w*128, s->scoefs + start + w*128, size, - sce->sf_idx[(win+w)*16+swb], cb, + sce->sf_idx[(win+w)*16+swb], + aac_cb_out_map[cb], 0, INFINITY, NULL); } cost_stay_here = path[swb][cb].cost + bits; @@ -501,7 +537,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, //convert resulting path from backward-linked list stack_len = 0; idx = 0; - for (cb = 1; cb < 12; cb++) + for (cb = 1; cb < CB_TOT; cb++) if (path[max_sfb][cb].cost < path[max_sfb][idx].cost) idx = cb; ppos = max_sfb; @@ -517,12 +553,13 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, //perform actual band info encoding start = 0; for (i = stack_len - 1; i >= 0; i--) { - put_bits(&s->pb, 4, stackcb[i]); + cb = aac_cb_out_map[stackcb[i]]; + put_bits(&s->pb, 4, cb); count = stackrun[i]; - memset(sce->zeroes + win*16 + start, !stackcb[i], count); + memset(sce->zeroes + win*16 + start, !cb, count); //XXX: memset when band_type is also uint8_t for (j = 0; j < count; j++) { - sce->band_type[win*16 + start] = stackcb[i]; + sce->band_type[win*16 + start] = cb; start++; } while (count >= run_esc) { @@ -711,9 +748,11 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, { int start = 0, i, w, w2, g; int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f); - float dists[128] = { 0 }, uplims[128]; + const float freq_mult = avctx->sample_rate/(1024.0f/sce->ics.num_windows)/2.0f; + float dists[128] = { 0 }, uplims[128] = { 0 }; float maxvals[128]; - int fflag, minscaler; + int noise_sf[128] = { 0 }; + int fflag, minscaler, minscaler_n; int its = 0; int allz = 0; float minthr = INFINITY; @@ -724,12 +763,14 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, //XXX: some heuristic to determine initial quantizers will reduce search time //determine zero bands and upper limits for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + start = 0; for (g = 0; g < sce->ics.num_swb; g++) { int nz = 0; - float uplim = 0.0f; + float uplim = 0.0f, energy = 0.0f; for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; uplim += band->threshold; + energy += band->energy; if (band->energy <= band->threshold || band->threshold == 0.0f) { sce->zeroes[(w+w2)*16+g] = 1; continue; @@ -737,10 +778,18 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, nz = 1; } uplims[w*16+g] = uplim *512; + if (s->options.pns && start*freq_mult > NOISE_LOW_LIMIT && energy < uplim * 1.2f) { + noise_sf[w*16+g] = av_clip(4+FFMIN(log2f(energy)*2,255), -100, 155); + sce->band_type[w*16+g] = NOISE_BT; + nz= 1; + } else { /** Band type will be determined by the twoloop algorithm */ + sce->band_type[w*16+g] = 0; + } sce->zeroes[w*16+g] = !nz; if (nz) minthr = FFMIN(minthr, uplim); allz |= nz; + start += sce->ics.swb_sizes[g]; } } for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { @@ -771,6 +820,7 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, do { int tbits, qstep; minscaler = sce->sf_idx[0]; + minscaler_n = sce->sf_idx[0]; //inner loop - quantize spectrum to fit into given number of bits qstep = its ? 1 : 32; do { @@ -785,7 +835,11 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, int cb; float dist = 0.0f; - if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) { + if (sce->band_type[w*16+g] == NOISE_BT) { + minscaler_n = FFMIN(minscaler_n, noise_sf[w*16+g]); + start += sce->ics.swb_sizes[g]; + continue; + } else if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) { start += sce->ics.swb_sizes[g]; continue; } @@ -828,9 +882,17 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, fflag = 0; minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF); + + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) + for (g = 0; g < sce->ics.num_swb; g++) + if (sce->band_type[w*16+g] == NOISE_BT) + sce->sf_idx[w*16+g] = av_clip(noise_sf[w*16+g], minscaler_n, minscaler_n + SCALE_MAX_DIFF); + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { for (g = 0; g < sce->ics.num_swb; g++) { int prevsc = sce->sf_idx[w*16+g]; + if (sce->band_type[w*16+g] == NOISE_BT) + continue; if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) { if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1)) sce->sf_idx[w*16+g]--; diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c index 7f6f4b923d..998a87517f 100644 --- a/libavcodec/aacenc.c +++ b/libavcodec/aacenc.c @@ -842,6 +842,9 @@ static const AVOption aacenc_options[] = { {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"}, {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"}, {"fast", "Constant quantizer", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"}, + {"aac_pns", "Perceptual Noise Substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_pns"}, + {"disable", "Disable PNS", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"}, + {"enable", "Enable PNS (Proof of concept)", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"}, {NULL} }; diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h index 0decb1dc2c..7c1f277fa1 100644 --- a/libavcodec/aacenc.h +++ b/libavcodec/aacenc.h @@ -42,6 +42,7 @@ typedef enum AACCoder { typedef struct AACEncOptions { int stereo_mode; int aac_coder; + int pns; } AACEncOptions; struct AACEncContext;