diff --git a/Changelog b/Changelog index 37d0cd0063..20110cec73 100644 --- a/Changelog +++ b/Changelog @@ -18,6 +18,7 @@ version : - ffplay dynamic volume control - displace filter - selectivecolor filter +- extensive native AAC encoder improvements version 2.8: diff --git a/libavcodec/aac.h b/libavcodec/aac.h index 17af49c766..37f98adb31 100644 --- a/libavcodec/aac.h +++ b/libavcodec/aac.h @@ -252,6 +252,7 @@ typedef struct SingleChannelElement { INTFLOAT sf[120]; ///< scalefactors int sf_idx[128]; ///< scalefactor indices (used by encoder) uint8_t zeroes[128]; ///< band is not coded (used by encoder) + uint8_t can_pns[128]; ///< band is allowed to PNS (informative) float is_ener[128]; ///< Intensity stereo pos (used by encoder) float pns_ener[128]; ///< Noise energy values (used by encoder) DECLARE_ALIGNED(32, INTFLOAT, pcoeffs)[1024]; ///< coefficients for IMDCT, pristine diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c index 10ea14b141..dafdc9fab8 100644 --- a/libavcodec/aaccoder.c +++ b/libavcodec/aaccoder.c @@ -33,7 +33,9 @@ #include "libavutil/libm.h" // brought forward to work around cygwin header breakage #include + #include "libavutil/mathematics.h" +#include "mathops.h" #include "avcodec.h" #include "put_bits.h" #include "aac.h" @@ -50,9 +52,6 @@ #include "libavcodec/aaccoder_twoloop.h" -/** Frequency in Hz for lower limit of noise substitution **/ -#define NOISE_LOW_LIMIT 4000 - /* Parameter of f(x) = a*(lambda/100), defines the maximum fourier spread * beyond which no PNS is used (since the SFBs contain tone rather than noise) */ #define NOISE_SPREAD_THRESHOLD 0.5073f @@ -124,7 +123,7 @@ static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce rd += quantize_band_cost(s, &sce->coeffs[start + w*128], &s->scoefs[start + w*128], size, sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb], - lambda / band->threshold, INFINITY, NULL, 0); + lambda / band->threshold, INFINITY, NULL, NULL, 0); } cost_stay_here = path[swb][cb].cost + rd; cost_get_here = minrd + rd + run_bits + 4; @@ -335,7 +334,7 @@ static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s, for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g], - q + q0, cb, lambda / band->threshold, INFINITY, NULL, 0); + q + q0, cb, lambda / band->threshold, INFINITY, NULL, NULL, 0); } minrd = FFMIN(minrd, dist); @@ -499,7 +498,7 @@ static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s, ESC_BT, lambda, INFINITY, - &b, + &b, NULL, 0); dist -= b; } @@ -588,12 +587,36 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne { FFPsyBand *band; int w, g, w2, i; + int wlen = 1024 / sce->ics.num_windows; + int bandwidth, cutoff; float *PNS = &s->scoefs[0*128], *PNS34 = &s->scoefs[1*128]; float *NOR34 = &s->scoefs[3*128]; const float lambda = s->lambda; - const float freq_mult = avctx->sample_rate/(1024.0f/sce->ics.num_windows)/2.0f; + const float freq_mult = avctx->sample_rate*0.5f/wlen; const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda); - const float spread_threshold = NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f); + const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f)); + const float dist_bias = av_clipf(4.f * 120 / lambda, 0.25f, 4.0f); + const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f); + + int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate + / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels) + * (lambda / 120.f); + + /** Keep this in sync with twoloop's cutoff selection */ + float rate_bandwidth_multiplier = 1.5f; + int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE) + ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024) + : (avctx->bit_rate / avctx->channels); + + frame_bit_rate *= 1.15f; + + if (avctx->cutoff > 0) { + bandwidth = avctx->cutoff; + } else { + bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate)); + } + + cutoff = bandwidth * 2 * wlen / avctx->sample_rate; memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type)); for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { @@ -602,32 +625,44 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne int noise_sfi; float dist1 = 0.0f, dist2 = 0.0f, noise_amp; float pns_energy = 0.0f, pns_tgt_energy, energy_ratio, dist_thresh; - float sfb_energy = 0.0f, threshold = 0.0f, spread = 0.0f; + float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f; + float min_energy = -1.0f, max_energy = 0.0f; const int start = wstart+sce->ics.swb_offset[g]; const float freq = (start-wstart)*freq_mult; const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f); - if (freq < NOISE_LOW_LIMIT || avctx->cutoff && freq >= avctx->cutoff) + if (freq < NOISE_LOW_LIMIT || (start-wstart) >= cutoff) continue; for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; sfb_energy += band->energy; - spread += band->spread; + spread = FFMIN(spread, band->spread); threshold += band->threshold; + if (!w2) { + min_energy = max_energy = band->energy; + } else { + min_energy = FFMIN(min_energy, band->energy); + max_energy = FFMAX(max_energy, band->energy); + } } /* Ramps down at ~8000Hz and loosens the dist threshold */ - dist_thresh = FFMIN(2.5f*NOISE_LOW_LIMIT/freq, 2.5f); + dist_thresh = av_clipf(2.5f*NOISE_LOW_LIMIT/freq, 0.5f, 2.5f) * dist_bias; - /* zero and energy close to threshold usually means hole avoidance, - * we do want to remain avoiding holes with PNS + /* PNS is acceptable when all of these are true: + * 1. high spread energy (noise-like band) + * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed) + * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS) + * + * At this stage, point 2 is relaxed for zeroed bands near the noise threshold (hole avoidance is more important) */ if (((sce->zeroes[w*16+g] || !sce->band_alt[w*16+g]) && sfb_energy < threshold*sqrtf(1.5f/freq_boost)) || spread < spread_threshold || - (sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost)) { + (!sce->zeroes[w*16+g] && sce->band_alt[w*16+g] && sfb_energy > threshold*thr_mult*freq_boost) || + min_energy < pns_transient_energy_r * max_energy ) { sce->pns_ener[w*16+g] = sfb_energy; continue; } - pns_tgt_energy = sfb_energy*spread*spread/sce->ics.group_len[w]; + pns_tgt_energy = sfb_energy*FFMIN(1.0f, spread*spread); noise_sfi = av_clip(roundf(log2f(pns_tgt_energy)*2), -100, 155); /* Quantize */ noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO]; /* Dequantize */ for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { @@ -648,13 +683,18 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne sce->ics.swb_sizes[g], sce->sf_idx[(w+w2)*16+g], sce->band_alt[(w+w2)*16+g], - lambda/band->threshold, INFINITY, NULL, 0); - /* Estimate rd on average as 9 bits for CB and sf + spread energy * lambda/thr */ - dist2 += 9+band->energy/(band->spread*band->spread)*lambda/band->threshold; + lambda/band->threshold, INFINITY, NULL, NULL, 0); + /* Estimate rd on average as 5 bits for SF, 4 for the CB, plus spread energy * lambda/thr */ + dist2 += band->energy/(band->spread*band->spread)*lambda*dist_thresh/band->threshold; + } + if (g && sce->sf_idx[(w+w2)*16+g-1] == NOISE_BT) { + dist2 += 5; + } else { + dist2 += 9; } energy_ratio = pns_tgt_energy/pns_energy; /* Compensates for quantization error */ sce->pns_ener[w*16+g] = energy_ratio*pns_tgt_energy; - if (energy_ratio > 0.85f && energy_ratio < 1.25f && (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || dist2*dist_thresh < dist1)) { + if (sce->zeroes[w*16+g] || !sce->band_alt[w*16+g] || (energy_ratio > 0.85f && energy_ratio < 1.25f && dist2 < dist1)) { sce->band_type[w*16+g] = NOISE_BT; sce->zeroes[w*16+g] = 0; } @@ -662,62 +702,203 @@ static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChanne } } +static void mark_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce) +{ + FFPsyBand *band; + int w, g, w2; + int wlen = 1024 / sce->ics.num_windows; + int bandwidth, cutoff; + const float lambda = s->lambda; + const float freq_mult = avctx->sample_rate*0.5f/wlen; + const float spread_threshold = FFMIN(0.75f, NOISE_SPREAD_THRESHOLD*FFMAX(0.5f, lambda/100.f)); + const float pns_transient_energy_r = FFMIN(0.7f, lambda / 140.f); + + int refbits = avctx->bit_rate * 1024.0 / avctx->sample_rate + / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels) + * (lambda / 120.f); + + /** Keep this in sync with twoloop's cutoff selection */ + float rate_bandwidth_multiplier = 1.5f; + int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE) + ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024) + : (avctx->bit_rate / avctx->channels); + + frame_bit_rate *= 1.15f; + + if (avctx->cutoff > 0) { + bandwidth = avctx->cutoff; + } else { + bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate)); + } + + cutoff = bandwidth * 2 * wlen / avctx->sample_rate; + + memcpy(sce->band_alt, sce->band_type, sizeof(sce->band_type)); + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + for (g = 0; g < sce->ics.num_swb; g++) { + float sfb_energy = 0.0f, threshold = 0.0f, spread = 2.0f; + float min_energy = -1.0f, max_energy = 0.0f; + const int start = sce->ics.swb_offset[g]; + const float freq = start*freq_mult; + const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f); + if (freq < NOISE_LOW_LIMIT || start >= cutoff) { + sce->can_pns[w*16+g] = 0; + continue; + } + for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { + band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; + sfb_energy += band->energy; + spread = FFMIN(spread, band->spread); + threshold += band->threshold; + if (!w2) { + min_energy = max_energy = band->energy; + } else { + min_energy = FFMIN(min_energy, band->energy); + max_energy = FFMAX(max_energy, band->energy); + } + } + + /* PNS is acceptable when all of these are true: + * 1. high spread energy (noise-like band) + * 2. near-threshold energy (high PE means the random nature of PNS content will be noticed) + * 3. on short window groups, all windows have similar energy (variations in energy would be destroyed by PNS) + */ + sce->pns_ener[w*16+g] = sfb_energy; + if (sfb_energy < threshold*sqrtf(1.5f/freq_boost) || spread < spread_threshold || min_energy < pns_transient_energy_r * max_energy) { + sce->can_pns[w*16+g] = 0; + } else { + sce->can_pns[w*16+g] = 1; + } + } + } +} + static void search_for_ms(AACEncContext *s, ChannelElement *cpe) { - int start = 0, i, w, w2, g; + int start = 0, i, w, w2, g, sid_sf_boost; float M[128], S[128]; float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3; const float lambda = s->lambda; + const float mslambda = FFMIN(1.0f, lambda / 120.f); SingleChannelElement *sce0 = &cpe->ch[0]; SingleChannelElement *sce1 = &cpe->ch[1]; if (!cpe->common_window) return; for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) { + int min_sf_idx_mid = SCALE_MAX_POS; + int min_sf_idx_side = SCALE_MAX_POS; + for (g = 0; g < sce0->ics.num_swb; g++) { + if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT) + min_sf_idx_mid = FFMIN(min_sf_idx_mid, sce0->sf_idx[w*16+g]); + if (!sce1->zeroes[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT) + min_sf_idx_side = FFMIN(min_sf_idx_side, sce1->sf_idx[w*16+g]); + } + start = 0; for (g = 0; g < sce0->ics.num_swb; g++) { + float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f; + cpe->ms_mask[w*16+g] = 0; if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) { - float dist1 = 0.0f, dist2 = 0.0f; + float Mmax = 0.0f, Smax = 0.0f; + + /* Must compute mid/side SF and book for the whole window group */ for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { - FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; - FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g]; - float minthr = FFMIN(band0->threshold, band1->threshold); - float maxthr = FFMAX(band0->threshold, band1->threshold); for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { M[i] = (sce0->coeffs[start+(w+w2)*128+i] + sce1->coeffs[start+(w+w2)*128+i]) * 0.5; S[i] = M[i] - sce1->coeffs[start+(w+w2)*128+i]; } - abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); - abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); - abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]); - abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]); - dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128], - L34, - sce0->ics.swb_sizes[g], - sce0->sf_idx[(w+w2)*16+g], - sce0->band_type[(w+w2)*16+g], - lambda / band0->threshold, INFINITY, NULL, 0); - dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128], - R34, - sce1->ics.swb_sizes[g], - sce1->sf_idx[(w+w2)*16+g], - sce1->band_type[(w+w2)*16+g], - lambda / band1->threshold, INFINITY, NULL, 0); - dist2 += quantize_band_cost(s, M, - M34, - sce0->ics.swb_sizes[g], - sce0->sf_idx[(w+w2)*16+g], - sce0->band_type[(w+w2)*16+g], - lambda / maxthr, INFINITY, NULL, 0); - dist2 += quantize_band_cost(s, S, - S34, - sce1->ics.swb_sizes[g], - sce1->sf_idx[(w+w2)*16+g], - sce1->band_type[(w+w2)*16+g], - lambda / minthr, INFINITY, NULL, 0); + abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]); + abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]); + for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) { + Mmax = FFMAX(Mmax, M34[i]); + Smax = FFMAX(Smax, S34[i]); + } + } + + for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) { + float dist1 = 0.0f, dist2 = 0.0f; + int B0 = 0, B1 = 0; + int minidx; + int mididx, sididx; + int midcb, sidcb; + + minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]); + mididx = av_clip(minidx, min_sf_idx_mid, min_sf_idx_mid + SCALE_MAX_DIFF); + sididx = av_clip(minidx - sid_sf_boost * 3, min_sf_idx_side, min_sf_idx_side + SCALE_MAX_DIFF); + midcb = find_min_book(Mmax, mididx); + sidcb = find_min_book(Smax, sididx); + + if ((mididx > minidx) || (sididx > minidx)) { + /* scalefactor range violation, bad stuff, will decrease quality unacceptably */ + continue; + } + + /* No CB can be zero */ + midcb = FFMAX(1,midcb); + sidcb = FFMAX(1,sidcb); + + for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { + FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; + FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g]; + float minthr = FFMIN(band0->threshold, band1->threshold); + int b1,b2,b3,b4; + for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { + M[i] = (sce0->coeffs[start+(w+w2)*128+i] + + sce1->coeffs[start+(w+w2)*128+i]) * 0.5; + S[i] = M[i] + - sce1->coeffs[start+(w+w2)*128+i]; + } + + abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); + abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]); + abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]); + abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]); + dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128], + L34, + sce0->ics.swb_sizes[g], + sce0->sf_idx[(w+w2)*16+g], + sce0->band_type[(w+w2)*16+g], + lambda / band0->threshold, INFINITY, &b1, NULL, 0); + dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128], + R34, + sce1->ics.swb_sizes[g], + sce1->sf_idx[(w+w2)*16+g], + sce1->band_type[(w+w2)*16+g], + lambda / band1->threshold, INFINITY, &b2, NULL, 0); + dist2 += quantize_band_cost(s, M, + M34, + sce0->ics.swb_sizes[g], + sce0->sf_idx[(w+w2)*16+g], + sce0->band_type[(w+w2)*16+g], + lambda / minthr, INFINITY, &b3, NULL, 0); + dist2 += quantize_band_cost(s, S, + S34, + sce1->ics.swb_sizes[g], + sce1->sf_idx[(w+w2)*16+g], + sce1->band_type[(w+w2)*16+g], + mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0); + B0 += b1+b2; + B1 += b3+b4; + dist1 -= B0; + dist2 -= B1; + } + cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0; + if (cpe->ms_mask[w*16+g]) { + /* Setting the M/S mask is useful with I/S, but only the flag */ + if (!cpe->is_mask[w*16+g]) { + sce0->sf_idx[w*16+g] = mididx; + sce1->sf_idx[w*16+g] = sididx; + sce0->band_type[w*16+g] = midcb; + sce1->band_type[w*16+g] = sidcb; + } + break; + } else if (B1 > B0) { + /* More boost won't fix this */ + break; + } } - cpe->ms_mask[w*16+g] = dist2 < dist1; } start += sce0->ics.swb_sizes[g]; } @@ -736,6 +917,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { ff_aac_apply_tns, set_special_band_scalefactors, search_for_pns, + mark_pns, ff_aac_search_for_tns, search_for_ms, ff_aac_search_for_is, @@ -752,6 +934,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { ff_aac_apply_tns, set_special_band_scalefactors, search_for_pns, + mark_pns, ff_aac_search_for_tns, search_for_ms, ff_aac_search_for_is, @@ -768,6 +951,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { ff_aac_apply_tns, set_special_band_scalefactors, search_for_pns, + mark_pns, ff_aac_search_for_tns, search_for_ms, ff_aac_search_for_is, @@ -784,6 +968,7 @@ AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = { ff_aac_apply_tns, set_special_band_scalefactors, search_for_pns, + mark_pns, ff_aac_search_for_tns, search_for_ms, ff_aac_search_for_is, diff --git a/libavcodec/aaccoder_trellis.h b/libavcodec/aaccoder_trellis.h index 7d685ebe8c..6187692479 100644 --- a/libavcodec/aaccoder_trellis.h +++ b/libavcodec/aaccoder_trellis.h @@ -129,7 +129,7 @@ static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce, &s->scoefs[start + w*128], size, sce->sf_idx[win*16+swb], aac_cb_out_map[cb], - 0, INFINITY, NULL, 0); + 0, INFINITY, NULL, NULL, 0); } cost_stay_here = path[swb][cb].cost + bits; cost_get_here = minbits + bits + run_bits + 4; diff --git a/libavcodec/aaccoder_twoloop.h b/libavcodec/aaccoder_twoloop.h index 5ac09dc9cc..21a4aed6cb 100644 --- a/libavcodec/aaccoder_twoloop.h +++ b/libavcodec/aaccoder_twoloop.h @@ -22,7 +22,7 @@ /** * @file * AAC encoder twoloop coder - * @author Konstantin Shishkov + * @author Konstantin Shishkov, Claudio Freire */ /** @@ -34,6 +34,7 @@ * - abs_pow34_v * - find_max_val * - find_min_book + * - find_form_factor */ #ifndef AVCODEC_AACCODER_TWOLOOP_H @@ -41,6 +42,7 @@ #include #include "libavutil/mathematics.h" +#include "mathops.h" #include "avcodec.h" #include "put_bits.h" #include "aac.h" @@ -49,6 +51,20 @@ #include "aacenctab.h" #include "aac_tablegen_decl.h" +/** Frequency in Hz for lower limit of noise substitution **/ +#define NOISE_LOW_LIMIT 4000 + +#define sclip(x) av_clip(x,60,218) + + +static av_always_inline int ff_pns_bits(const SingleChannelElement *sce, int w, int g) +{ + if (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) { + return 9; + } else { + return 5; + } +} /** * two-loop quantizers search taken from ISO 13818-7 Appendix C @@ -58,51 +74,219 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, SingleChannelElement *sce, const float lambda) { - int start = 0, i, w, w2, g; - int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f); - float dists[128] = { 0 }, uplims[128] = { 0 }; - float maxvals[128]; - int fflag, minscaler; - int its = 0; - int allz = 0; - float minthr = INFINITY; + int start = 0, i, w, w2, g, recomprd; + int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate + / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels) + * (lambda / 120.f); + int refbits = destbits; + int toomanybits, toofewbits; + char nzs[128]; + int maxsf[128]; + float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128]; + float maxvals[128], spread_thr_r[128]; + float min_spread_thr_r, max_spread_thr_r; - // for values above this the decoder might end up in an endless loop - // due to always having more bits than what can be encoded. + /** + * rdlambda controls the maximum tolerated distortion. Twoloop + * will keep iterating until it fails to lower it or it reaches + * ulimit * rdlambda. Keeping it low increases quality on difficult + * signals, but lower it too much, and bits will be taken from weak + * signals, creating "holes". A balance is necesary. + * rdmax and rdmin specify the relative deviation from rdlambda + * allowed for tonality compensation + */ + float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f); + const float nzslope = 1.5f; + float rdmin = 0.03125f; + float rdmax = 1.0f; + + /** + * sfoffs controls an offset of optmium allocation that will be + * applied based on lambda. Keep it real and modest, the loop + * will take care of the rest, this just accelerates convergence + */ + float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10); + + int fflag, minscaler, maxscaler, nminscaler, minrdsf; + int its = 0; + int maxits = 30; + int allz = 0; + int tbits; + int cutoff = 1024; + int pns_start_pos; + + /** + * zeroscale controls a multiplier of the threshold, if band energy + * is below this, a zero is forced. Keep it lower than 1, unless + * low lambda is used, because energy < threshold doesn't mean there's + * no audible signal outright, it's just energy. Also make it rise + * slower than rdlambda, as rdscale has due compensation with + * noisy band depriorization below, whereas zeroing logic is rather dumb + */ + float zeroscale; + if (lambda > 120.f) { + zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f); + } else { + zeroscale = 1.f; + } + + if (s->psy.bitres.alloc >= 0) { + /** + * Psy granted us extra bits to use, from the reservoire + * adjust for lambda except what psy already did + */ + destbits = s->psy.bitres.alloc + * (lambda / (avctx->global_quality ? avctx->global_quality : 120)); + } + + if (avctx->flags & CODEC_FLAG_QSCALE) { + /** + * Constant Q-scale doesn't compensate MS coding on its own + * No need to be overly precise, this only controls RD + * adjustment CB limits when going overboard + */ + if (s->options.stereo_mode && s->cur_type == TYPE_CPE) + destbits *= 2; + + /** + * When using a constant Q-scale, don't adjust bits, just use RD + * Don't let it go overboard, though... 8x psy target is enough + */ + toomanybits = 5800; + toofewbits = destbits / 16; + + /** Don't offset scalers, just RD */ + sfoffs = sce->ics.num_windows - 1; + rdlambda = sqrtf(rdlambda); + + /** search further */ + maxits *= 2; + } else { + /** When using ABR, be strict */ + toomanybits = destbits + destbits/16; + toofewbits = destbits - destbits/4; + + sfoffs = 0; + rdlambda = sqrtf(rdlambda); + } + + /** and zero out above cutoff frequency */ + { + int wlen = 1024 / sce->ics.num_windows; + int bandwidth; + + /** + * Scale, psy gives us constant quality, this LP only scales + * bitrate by lambda, so we save bits on subjectively unimportant HF + * rather than increase quantization noise. Adjust nominal bitrate + * to effective bitrate according to encoding parameters, + * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate. + */ + float rate_bandwidth_multiplier = 1.5f; + int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE) + ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024) + : (avctx->bit_rate / avctx->channels); + + /** Compensate for extensions that increase efficiency */ + if (s->options.pns || s->options.intensity_stereo) + frame_bit_rate *= 1.15f; + + if (avctx->cutoff > 0) { + bandwidth = avctx->cutoff; + } else { + bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate)); + } + + cutoff = bandwidth * 2 * wlen / avctx->sample_rate; + pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate; + } + + /** + * for values above this the decoder might end up in an endless loop + * due to always having more bits than what can be encoded. + */ destbits = FFMIN(destbits, 5800); - //XXX: some heuristic to determine initial quantizers will reduce search time - //determine zero bands and upper limits + toomanybits = FFMIN(toomanybits, 5800); + toofewbits = FFMIN(toofewbits, 5800); + /** + * XXX: some heuristic to determine initial quantizers will reduce search time + * determine zero bands and upper distortion limits + */ + min_spread_thr_r = -1; + max_spread_thr_r = -1; for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { - for (g = 0; g < sce->ics.num_swb; g++) { + for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) { int nz = 0; - float uplim = 0.0f, energy = 0.0f; + float uplim = 0.0f, energy = 0.0f, spread = 0.0f; for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; - uplim += band->threshold; - energy += band->energy; - if (band->energy <= band->threshold || band->threshold == 0.0f) { + if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) { sce->zeroes[(w+w2)*16+g] = 1; continue; } nz = 1; } - uplims[w*16+g] = uplim *512; + if (!nz) { + uplim = 0.0f; + } else { + nz = 0; + for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { + FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g]; + if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) + continue; + uplim += band->threshold; + energy += band->energy; + spread += band->spread; + nz++; + } + } + uplims[w*16+g] = uplim; + energies[w*16+g] = energy; + nzs[w*16+g] = nz; sce->zeroes[w*16+g] = !nz; - if (nz) - minthr = FFMIN(minthr, uplim); allz |= nz; + if (nz) { + spread_thr_r[w*16+g] = energy * nz / (uplim * spread); + if (min_spread_thr_r < 0) { + min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g]; + } else { + min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]); + max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]); + } + } } } + + /** Compute initial scalers */ + minscaler = 65535; for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { for (g = 0; g < sce->ics.num_swb; g++) { if (sce->zeroes[w*16+g]) { sce->sf_idx[w*16+g] = SCALE_ONE_POS; continue; } - sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59); + /** + * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2). + * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion, + * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus + * more robust. + */ + sce->sf_idx[w*16+g] = av_clip( + SCALE_ONE_POS + + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g]) + + sfoffs, + 60, SCALE_MAX_POS); + minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]); } } + /** Clip */ + minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512); + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) + for (g = 0; g < sce->ics.num_swb; g++) + if (!sce->zeroes[w*16+g]) + sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1); + if (!allz) return; abs_pow34_v(s->scoefs, sce->coeffs, 1024); @@ -116,15 +300,66 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, } } + /** + * Scale uplims to match rate distortion to quality + * bu applying noisy band depriorization and tonal band priorization. + * Maxval-energy ratio gives us an idea of how noisy/tonal the band is. + * If maxval^2 ~ energy, then that band is mostly noise, and we can relax + * rate distortion requirements. + */ + memcpy(euplims, uplims, sizeof(euplims)); + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + /** psy already priorizes transients to some extent */ + float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f; + start = w*128; + for (g = 0; g < sce->ics.num_swb; g++) { + if (nzs[g] > 0) { + float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f)); + float energy2uplim = find_form_factor( + sce->ics.group_len[w], sce->ics.swb_sizes[g], + uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]), + sce->coeffs + start, + nzslope * cleanup_factor); + energy2uplim *= de_psy_factor; + if (!(avctx->flags & CODEC_FLAG_QSCALE)) { + /** In ABR, we need to priorize less and let rate control do its thing */ + energy2uplim = sqrtf(energy2uplim); + } + energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim)); + uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax) + * sce->ics.group_len[w]; + + energy2uplim = find_form_factor( + sce->ics.group_len[w], sce->ics.swb_sizes[g], + uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]), + sce->coeffs + start, + 2.0f); + energy2uplim *= de_psy_factor; + if (!(avctx->flags & CODEC_FLAG_QSCALE)) { + /** In ABR, we need to priorize less and let rate control do its thing */ + energy2uplim = sqrtf(energy2uplim); + } + energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim)); + euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w], + 0.5f, 1.0f); + } + start += sce->ics.swb_sizes[g]; + } + } + + for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i) + maxsf[i] = SCALE_MAX_POS; + //perform two-loop search //outer loop - improve quality do { - int tbits, qstep; - minscaler = sce->sf_idx[0]; //inner loop - quantize spectrum to fit into given number of bits - qstep = its ? 1 : 32; + int overdist; + int qstep = its ? 1 : 32; do { int prev = -1; + int changed = 0; + recomprd = 0; tbits = 0; for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { start = w*128; @@ -134,15 +369,20 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, int bits = 0; int cb; float dist = 0.0f; + float qenergy = 0.0f; if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) { start += sce->ics.swb_sizes[g]; + if (sce->can_pns[w*16+g]) { + /** PNS isn't free */ + tbits += ff_pns_bits(sce, w, g); + } continue; } - minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]); cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { int b; + float sqenergy; dist += quantize_band_cost(s, coefs + w2*128, scaled + w2*128, sce->ics.swb_sizes[g], @@ -150,54 +390,309 @@ static void search_for_quantizers_twoloop(AVCodecContext *avctx, cb, 1.0f, INFINITY, - &b, + &b, &sqenergy, 0); bits += b; + qenergy += sqenergy; } dists[w*16+g] = dist - bits; + qenergies[w*16+g] = qenergy; if (prev != -1) { - bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO]; + int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO; + av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF); + bits += ff_aac_scalefactor_bits[sfdiff]; } tbits += bits; start += sce->ics.swb_sizes[g]; prev = sce->sf_idx[w*16+g]; } } - if (tbits > destbits) { - for (i = 0; i < 128; i++) - if (sce->sf_idx[i] < 218 - qstep) - sce->sf_idx[i] += qstep; - } else { - for (i = 0; i < 128; i++) - if (sce->sf_idx[i] > 60 - qstep) - sce->sf_idx[i] -= qstep; + if (tbits > toomanybits) { + recomprd = 1; + for (i = 0; i < 128; i++) { + if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) { + int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i]; + int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep); + if (new_sf != sce->sf_idx[i]) { + sce->sf_idx[i] = new_sf; + changed = 1; + } + } + } + } else if (tbits < toofewbits) { + recomprd = 1; + for (i = 0; i < 128; i++) { + if (sce->sf_idx[i] > SCALE_ONE_POS) { + int new_sf = FFMAX(SCALE_ONE_POS, sce->sf_idx[i] - qstep); + if (new_sf != sce->sf_idx[i]) { + sce->sf_idx[i] = new_sf; + changed = 1; + } + } + } } qstep >>= 1; - if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217) + if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed) qstep = 1; } while (qstep); - fflag = 0; - minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF); + overdist = 1; + for (i = 0; i < 2 && (overdist || recomprd); ++i) { + if (recomprd) { + /** Must recompute distortion */ + int prev = -1; + tbits = 0; + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + start = w*128; + for (g = 0; g < sce->ics.num_swb; g++) { + const float *coefs = sce->coeffs + start; + const float *scaled = s->scoefs + start; + int bits = 0; + int cb; + float dist = 0.0f; + float qenergy = 0.0f; + if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) { + start += sce->ics.swb_sizes[g]; + if (sce->can_pns[w*16+g]) { + /** PNS isn't free */ + tbits += ff_pns_bits(sce, w, g); + } + continue; + } + cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); + for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { + int b; + float sqenergy; + dist += quantize_band_cost(s, coefs + w2*128, + scaled + w2*128, + sce->ics.swb_sizes[g], + sce->sf_idx[w*16+g], + cb, + 1.0f, + INFINITY, + &b, &sqenergy, + 0); + bits += b; + qenergy += sqenergy; + } + dists[w*16+g] = dist - bits; + qenergies[w*16+g] = qenergy; + if (prev != -1) { + int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO; + av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF); + bits += ff_aac_scalefactor_bits[sfdiff]; + } + tbits += bits; + start += sce->ics.swb_sizes[g]; + prev = sce->sf_idx[w*16+g]; + } + } + } + if (!i && s->options.pns && its > maxits/2) { + float maxoverdist = 0.0f; + overdist = recomprd = 0; + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + float ovrfactor = 2.f+(maxits-its)*16.f/maxits; + for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) { + if (!sce->zeroes[w*16+g] && dists[w*16+g] > uplims[w*16+g]*ovrfactor) { + float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]); + maxoverdist = FFMAX(maxoverdist, ovrdist); + overdist++; + } + } + } + if (overdist) { + /* We have overdistorted bands, trade for zeroes (that can be noise) + * Zero the bands in the lowest 1.25% spread-energy-threshold ranking + */ + float minspread = max_spread_thr_r; + float maxspread = min_spread_thr_r; + float zspread; + int zeroable = 0; + int zeroed = 0; + int maxzeroed; + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) { + if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) { + minspread = FFMIN(minspread, spread_thr_r[w*16+g]); + maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]); + zeroable++; + } + } + } + zspread = (maxspread-minspread) * 0.0125f + minspread; + zspread = FFMIN(maxoverdist, zspread); + maxzeroed = zeroable * its / (2 * maxits); + for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) { + if (sce->ics.swb_offset[g] < pns_start_pos) + continue; + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread) { + sce->zeroes[w*16+g] = 1; + sce->band_type[w*16+g] = 0; + zeroed++; + } + } + } + if (zeroed) + recomprd = 1; + } else { + overdist = 0; + } + } + } + + minscaler = SCALE_MAX_POS; + maxscaler = 0; for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + for (g = 0; g < sce->ics.num_swb; g++) { + if (!sce->zeroes[w*16+g]) { + minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]); + maxscaler = FFMAX(maxscaler, sce->sf_idx[w*16+g]); + } + } + } + + fflag = 0; + minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512); + minrdsf = FFMAX3(60, minscaler - 1, maxscaler - SCALE_MAX_DIFF - 1); + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + /** Start with big steps, end up fine-tunning */ + int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10; + int edepth = depth+2; + float uplmax = its / (maxits*0.25f) + 1.0f; + uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f; + start = w * 128; for (g = 0; g < sce->ics.num_swb; g++) { int prevsc = sce->sf_idx[w*16+g]; - if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) { - if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1)) - sce->sf_idx[w*16+g]--; - else //Try to make sure there is some energy in every band - sce->sf_idx[w*16+g]-=2; + int minrdsfboost = (sce->ics.num_windows > 1) ? av_clip(g-4, -2, 0) : av_clip(g-16, -4, 0); + if (!sce->zeroes[w*16+g]) { + const float *coefs = sce->coeffs + start; + const float *scaled = s->scoefs + start; + int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); + if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > minrdsf) { + /* Try to make sure there is some energy in every nonzero band + * NOTE: This algorithm must be forcibly imbalanced, pushing harder + * on holes or more distorted bands at first, otherwise there's + * no net gain (since the next iteration will offset all bands + * on the opposite direction to compensate for extra bits) + */ + for (i = 0; i < edepth; ++i) { + int cb, bits; + float dist, qenergy; + int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1); + cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); + dist = qenergy = 0.f; + bits = 0; + if (!cb) { + maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]); + } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) { + break; + } + for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { + int b; + float sqenergy; + dist += quantize_band_cost(s, coefs + w2*128, + scaled + w2*128, + sce->ics.swb_sizes[g], + sce->sf_idx[w*16+g]-1, + cb, + 1.0f, + INFINITY, + &b, &sqenergy, + 0); + bits += b; + qenergy += sqenergy; + } + sce->sf_idx[w*16+g]--; + dists[w*16+g] = dist - bits; + qenergies[w*16+g] = qenergy; + if (mb && (sce->sf_idx[w*16+g] < (minrdsf+minrdsfboost) || ( + (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g])) + && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g]) + ) )) { + break; + } + } + } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < maxscaler + && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g])) + && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g]) + ) { + /** Um... over target. Save bits for more important stuff. */ + for (i = 0; i < depth; ++i) { + int cb, bits; + float dist, qenergy; + cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1); + if (cb > 0) { + dist = qenergy = 0.f; + bits = 0; + for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) { + int b; + float sqenergy; + dist += quantize_band_cost(s, coefs + w2*128, + scaled + w2*128, + sce->ics.swb_sizes[g], + sce->sf_idx[w*16+g]+1, + cb, + 1.0f, + INFINITY, + &b, &sqenergy, + 0); + bits += b; + qenergy += sqenergy; + } + dist -= bits; + if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) { + sce->sf_idx[w*16+g]++; + dists[w*16+g] = dist; + qenergies[w*16+g] = qenergy; + } else { + break; + } + } else { + maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]); + break; + } + } + } } - sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF); - sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219); + sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minrdsf, minscaler + SCALE_MAX_DIFF); + sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], SCALE_MAX_POS - SCALE_DIV_512); if (sce->sf_idx[w*16+g] != prevsc) fflag = 1; + nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]); sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); + start += sce->ics.swb_sizes[g]; + } + } + if (nminscaler < minscaler) { + /** Drecreased some scalers below minscaler. Must re-clamp. */ + minscaler = nminscaler; + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + for (g = 0; g < sce->ics.num_swb; g++) { + sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF); + sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); + } } } its++; - } while (fflag && its < 10); + } while (fflag && its < maxits); + + for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) { + /** Make sure proper codebooks are set */ + for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) { + if (!sce->zeroes[w*16+g]) { + sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]); + if (sce->band_type[w*16+g] <= 0) { + sce->zeroes[w*16+g] = 1; + sce->band_type[w*16+g] = 0; + } + } else { + sce->band_type[w*16+g] = 0; + } + } + } } #endif /* AVCODEC_AACCODER_TWOLOOP_H */ diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c index 1b95ebd755..3e21bfffa0 100644 --- a/libavcodec/aacenc.c +++ b/libavcodec/aacenc.c @@ -258,6 +258,8 @@ static void apply_intensity_stereo(ChannelElement *cpe) start += ics->swb_sizes[g]; continue; } + if (cpe->ms_mask[w*16 + g]) + p *= -1; for (i = 0; i < ics->swb_sizes[g]; i++) { float sum = (cpe->ch[0].coeffs[start+i] + p*cpe->ch[1].coeffs[start+i])*scale; cpe->ch[0].coeffs[start+i] = sum; @@ -279,7 +281,7 @@ static void apply_mid_side_stereo(ChannelElement *cpe) for (w2 = 0; w2 < ics->group_len[w]; w2++) { int start = (w+w2) * 128; for (g = 0; g < ics->num_swb; g++) { - if (!cpe->ms_mask[w*16 + g]) { + if (!cpe->ms_mask[w*16 + g] && !cpe->is_mask[w*16 + g]) { start += ics->swb_sizes[g]; continue; } @@ -490,6 +492,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, ChannelElement *cpe; SingleChannelElement *sce; int i, its, ch, w, chans, tag, start_ch, ret, frame_bits; + int target_bits, rate_bits, too_many_bits, too_few_bits; int ms_mode = 0, is_mode = 0, tns_mode = 0, pred_mode = 0; int chan_el_counter[4]; FFPsyWindowInfo windows[AAC_MAX_CHANNELS]; @@ -583,8 +586,6 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, return ret; frame_bits = its = 0; do { - int target_bits, too_many_bits, too_few_bits; - init_put_bits(&s->pb, avpkt->data, avpkt->size); if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT)) @@ -618,12 +619,15 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, s->psy.model->analyze(&s->psy, start_ch, coeffs, wi); if (s->psy.bitres.alloc > 0) { /* Lambda unused here on purpose, we need to take psy's unscaled allocation */ - target_bits += s->psy.bitres.alloc; + target_bits += s->psy.bitres.alloc + * (s->lambda / (avctx->global_quality ? avctx->global_quality : 120)); s->psy.bitres.alloc /= chans; } s->cur_type = tag; for (ch = 0; ch < chans; ch++) { s->cur_channel = start_ch + ch; + if (s->options.pns && s->coder->mark_pns) + s->coder->mark_pns(s, avctx, &cpe->ch[ch]); s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda); } if (chans > 1 @@ -680,8 +684,6 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, s->coder->search_for_ms(s, cpe); else if (cpe->common_window) memset(cpe->ms_mask, 1, sizeof(cpe->ms_mask)); - for (w = 0; w < 128; w++) - cpe->ms_mask[w] = cpe->is_mask[w] ? 0 : cpe->ms_mask[w]; apply_mid_side_stereo(cpe); } adjust_frame_information(cpe, chans); @@ -708,23 +710,25 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, } /* rate control stuff - * target either the nominal bitrate, or what psy's bit reservoir says to target - * whichever is greatest + * allow between the nominal bitrate, and what psy's bit reservoir says to target + * but drift towards the nominal bitrate always */ - frame_bits = put_bits_count(&s->pb); - target_bits = FFMAX(target_bits, avctx->bit_rate * 1024 / avctx->sample_rate); - target_bits = FFMIN(target_bits, 6144 * s->channels - 3); + rate_bits = avctx->bit_rate * 1024 / avctx->sample_rate; + rate_bits = FFMIN(rate_bits, 6144 * s->channels - 3); + too_many_bits = FFMAX(target_bits, rate_bits); + too_many_bits = FFMIN(too_many_bits, 6144 * s->channels - 3); + too_few_bits = FFMIN(FFMAX(rate_bits - rate_bits/4, target_bits), too_many_bits); /* When using ABR, be strict (but only for increasing) */ - too_many_bits = target_bits + target_bits/2; - too_few_bits = target_bits - target_bits/8; + too_few_bits = too_few_bits - too_few_bits/8; + too_many_bits = too_many_bits + too_many_bits/2; if ( its == 0 /* for steady-state Q-scale tracking */ || (its < 5 && (frame_bits < too_few_bits || frame_bits > too_many_bits)) || frame_bits >= 6144 * s->channels - 3 ) { - float ratio = ((float)target_bits) / frame_bits; + float ratio = ((float)rate_bits) / frame_bits; if (frame_bits >= too_few_bits && frame_bits <= too_many_bits) { /* @@ -742,7 +746,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, s->lambda = FFMIN(s->lambda * ratio, 65536.f); /* Keep iterating if we must reduce and lambda is in the sky */ - if (s->lambda < 300.f || ratio > 0.9f) { + if ((s->lambda < 300.f || ratio > 0.9f) && (s->lambda > 10.f || ratio < 1.1f)) { break; } else { if (is_mode || ms_mode || tns_mode || pred_mode) { @@ -764,6 +768,8 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt, put_bits(&s->pb, 3, TYPE_END); flush_put_bits(&s->pb); avctx->frame_bits = put_bits_count(&s->pb); + s->lambda_sum += s->lambda; + s->lambda_count++; if (!frame) s->last_frame++; @@ -780,6 +786,8 @@ static av_cold int aac_encode_end(AVCodecContext *avctx) { AACEncContext *s = avctx->priv_data; + av_log(avctx, AV_LOG_INFO, "Qavg: %.3f\n", s->lambda_sum / s->lambda_count); + ff_mdct_end(&s->mdct1024); ff_mdct_end(&s->mdct128); ff_psy_end(&s->psy); diff --git a/libavcodec/aacenc.h b/libavcodec/aacenc.h index 7e7609b1a8..99f50edc9c 100644 --- a/libavcodec/aacenc.h +++ b/libavcodec/aacenc.h @@ -66,6 +66,7 @@ typedef struct AACCoefficientsEncoder { void (*apply_tns_filt)(struct AACEncContext *s, SingleChannelElement *sce); void (*set_special_band_scalefactors)(struct AACEncContext *s, SingleChannelElement *sce); void (*search_for_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce); + void (*mark_pns)(struct AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce); void (*search_for_tns)(struct AACEncContext *s, SingleChannelElement *sce); void (*search_for_ms)(struct AACEncContext *s, ChannelElement *cpe); void (*search_for_is)(struct AACEncContext *s, AVCodecContext *avctx, ChannelElement *cpe); @@ -100,6 +101,8 @@ typedef struct AACEncContext { int last_frame; int random_state; float lambda; + float lambda_sum; ///< sum(lambda), for Qvg reporting + int lambda_count; ///< count(lambda), for Qvg reporting enum RawDataBlockType cur_type; ///< channel group type cur_channel belongs to AudioFrameQueue afq; diff --git a/libavcodec/aacenc_is.c b/libavcodec/aacenc_is.c index e983b7548f..97be9b3412 100644 --- a/libavcodec/aacenc_is.c +++ b/libavcodec/aacenc_is.c @@ -45,6 +45,11 @@ struct AACISError ff_aac_is_encoding_err(AACEncContext *s, ChannelElement *cpe, float dist1 = 0.0f, dist2 = 0.0f; struct AACISError is_error = {0}; + if (ener01 <= 0 || ener0 <= 0) { + is_error.pass = 0; + return is_error; + } + for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g]; FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g]; @@ -63,15 +68,15 @@ struct AACISError ff_aac_is_encoding_err(AACEncContext *s, ChannelElement *cpe, sce0->ics.swb_sizes[g], sce0->sf_idx[(w+w2)*16+g], sce0->band_type[(w+w2)*16+g], - s->lambda / band0->threshold, INFINITY, NULL, 0); + s->lambda / band0->threshold, INFINITY, NULL, NULL, 0); dist1 += quantize_band_cost(s, &R[start + (w+w2)*128], R34, sce1->ics.swb_sizes[g], sce1->sf_idx[(w+w2)*16+g], sce1->band_type[(w+w2)*16+g], - s->lambda / band1->threshold, INFINITY, NULL, 0); + s->lambda / band1->threshold, INFINITY, NULL, NULL, 0); dist2 += quantize_band_cost(s, IS, I34, sce0->ics.swb_sizes[g], is_sf_idx, is_band_type, - s->lambda / minthr, INFINITY, NULL, 0); + s->lambda / minthr, INFINITY, NULL, NULL, 0); for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { dist_spec_err += (L34[i] - I34[i])*(L34[i] - I34[i]); dist_spec_err += (R34[i] - I34[i]*e01_34)*(R34[i] - I34[i]*e01_34); @@ -85,6 +90,7 @@ struct AACISError ff_aac_is_encoding_err(AACEncContext *s, ChannelElement *cpe, is_error.error = fabsf(dist1 - dist2); is_error.dist1 = dist1; is_error.dist2 = dist2; + is_error.ener01 = ener01; return is_error; } @@ -105,7 +111,7 @@ void ff_aac_search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElemen if (start*freq_mult > INT_STEREO_LOW_LIMIT*(s->lambda/170.0f) && cpe->ch[0].band_type[w*16+g] != NOISE_BT && !cpe->ch[0].zeroes[w*16+g] && cpe->ch[1].band_type[w*16+g] != NOISE_BT && !cpe->ch[1].zeroes[w*16+g]) { - float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f; + float ener0 = 0.0f, ener1 = 0.0f, ener01 = 0.0f, ener01p = 0.0f; struct AACISError ph_err1, ph_err2, *erf; if (sce0->band_type[w*16+g] == NOISE_BT || sce1->band_type[w*16+g] == NOISE_BT) { @@ -114,23 +120,25 @@ void ff_aac_search_for_is(AACEncContext *s, AVCodecContext *avctx, ChannelElemen } for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) { for (i = 0; i < sce0->ics.swb_sizes[g]; i++) { - float coef0 = fabsf(sce0->pcoeffs[start+(w+w2)*128+i]); - float coef1 = fabsf(sce1->pcoeffs[start+(w+w2)*128+i]); + float coef0 = fabsf(sce0->coeffs[start+(w+w2)*128+i]); + float coef1 = fabsf(sce1->coeffs[start+(w+w2)*128+i]); ener0 += coef0*coef0; ener1 += coef1*coef1; ener01 += (coef0 + coef1)*(coef0 + coef1); + ener01p += (coef0 - coef1)*(coef0 - coef1); } } ph_err1 = ff_aac_is_encoding_err(s, cpe, start, w, g, - ener0, ener1, ener01, 0, -1); + ener0, ener1, ener01p, 0, -1); ph_err2 = ff_aac_is_encoding_err(s, cpe, start, w, g, ener0, ener1, ener01, 0, +1); - erf = ph_err1.error < ph_err2.error ? &ph_err1 : &ph_err2; + erf = (ph_err1.pass && ph_err1.error < ph_err2.error) ? &ph_err1 : &ph_err2; if (erf->pass) { cpe->is_mask[w*16+g] = 1; - cpe->ch[0].is_ener[w*16+g] = sqrt(ener0/ener01); + cpe->ms_mask[w*16+g] = 0; + cpe->ch[0].is_ener[w*16+g] = sqrt(ener0 / erf->ener01); cpe->ch[1].is_ener[w*16+g] = ener0/ener1; - cpe->ch[1].band_type[w*16+g] = erf->phase ? INTENSITY_BT : INTENSITY_BT2; + cpe->ch[1].band_type[w*16+g] = (erf->phase > 0) ? INTENSITY_BT : INTENSITY_BT2; count++; } } diff --git a/libavcodec/aacenc_is.h b/libavcodec/aacenc_is.h index 31bbacac58..269fd1a9c9 100644 --- a/libavcodec/aacenc_is.h +++ b/libavcodec/aacenc_is.h @@ -39,6 +39,7 @@ struct AACISError { float error; /* fabs(dist1 - dist2) */ float dist1; /* From original coeffs */ float dist2; /* From IS'd coeffs */ + float ener01; }; struct AACISError ff_aac_is_encoding_err(AACEncContext *s, ChannelElement *cpe, diff --git a/libavcodec/aacenc_pred.c b/libavcodec/aacenc_pred.c index c0e5e6e3b6..7d141930e8 100644 --- a/libavcodec/aacenc_pred.c +++ b/libavcodec/aacenc_pred.c @@ -271,7 +271,7 @@ void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce) abs_pow34_v(O34, &sce->coeffs[start_coef], num_coeffs); dist1 = quantize_and_encode_band_cost(s, NULL, &sce->coeffs[start_coef], NULL, O34, num_coeffs, sce->sf_idx[sfb], - cb_n, s->lambda / band->threshold, INFINITY, &cost1, 0); + cb_n, s->lambda / band->threshold, INFINITY, &cost1, NULL, 0); cost_coeffs += cost1; /* Encoded coefficients - needed for #bits, band type and quant. error */ @@ -284,7 +284,7 @@ void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce) cb_p = cb_n; quantize_and_encode_band_cost(s, NULL, SENT, QERR, S34, num_coeffs, sce->sf_idx[sfb], cb_p, s->lambda / band->threshold, INFINITY, - &cost2, 0); + &cost2, NULL, 0); /* Reconstructed coefficients - needed for distortion measurements */ for (i = 0; i < num_coeffs; i++) @@ -296,7 +296,7 @@ void ff_aac_search_for_pred(AACEncContext *s, SingleChannelElement *sce) cb_p = cb_n; dist2 = quantize_and_encode_band_cost(s, NULL, &sce->prcoeffs[start_coef], NULL, P34, num_coeffs, sce->sf_idx[sfb], - cb_p, s->lambda / band->threshold, INFINITY, NULL, 0); + cb_p, s->lambda / band->threshold, INFINITY, NULL, NULL, 0); for (i = 0; i < num_coeffs; i++) dist_spec_err += (O34[i] - P34[i])*(O34[i] - P34[i]); dist_spec_err *= s->lambda / band->threshold; diff --git a/libavcodec/aacenc_quantization.h b/libavcodec/aacenc_quantization.h index 6776dc37f7..1c3df38e9f 100644 --- a/libavcodec/aacenc_quantization.h +++ b/libavcodec/aacenc_quantization.h @@ -43,7 +43,7 @@ static av_always_inline float quantize_and_encode_band_cost_template( PutBitContext *pb, const float *in, float *out, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits, int BT_ZERO, int BT_UNSIGNED, + int *bits, float *energy, int BT_ZERO, int BT_UNSIGNED, int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO, const float ROUNDING) { @@ -54,6 +54,7 @@ static av_always_inline float quantize_and_encode_band_cost_template( const float CLIPPED_ESCAPE = 165140.0f*IQ; int i, j; float cost = 0; + float qenergy = 0; const int dim = BT_PAIR ? 2 : 4; int resbits = 0; int off; @@ -63,6 +64,8 @@ static av_always_inline float quantize_and_encode_band_cost_template( cost += in[i]*in[i]; if (bits) *bits = 0; + if (energy) + *energy = qenergy; if (out) { for (i = 0; i < size; i += dim) for (j = 0; j < dim; j++) @@ -113,11 +116,13 @@ static av_always_inline float quantize_and_encode_band_cost_template( out[i+j] = in[i+j] >= 0 ? quantized : -quantized; if (vec[j] != 0.0f) curbits++; + qenergy += quantized*quantized; rd += di*di; } } else { for (j = 0; j < dim; j++) { quantized = vec[j]*IQ; + qenergy += quantized*quantized; if (out) out[i+j] = quantized; rd += (in[i+j] - quantized)*(in[i+j] - quantized); @@ -149,6 +154,8 @@ static av_always_inline float quantize_and_encode_band_cost_template( if (bits) *bits = resbits; + if (energy) + *energy = qenergy; return cost; } @@ -156,7 +163,7 @@ static inline float quantize_and_encode_band_cost_NONE(struct AACEncContext *s, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits) { + int *bits, float *energy) { av_assert0(0); return 0.0f; } @@ -167,10 +174,10 @@ static float quantize_and_encode_band_cost_ ## NAME( PutBitContext *pb, const float *in, float *quant, \ const float *scaled, int size, int scale_idx, \ int cb, const float lambda, const float uplim, \ - int *bits) { \ + int *bits, float *energy) { \ return quantize_and_encode_band_cost_template( \ s, pb, in, quant, scaled, size, scale_idx, \ - BT_ESC ? ESC_BT : cb, lambda, uplim, bits, \ + BT_ESC ? ESC_BT : cb, lambda, uplim, bits, energy, \ BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO, \ ROUNDING); \ } @@ -190,7 +197,7 @@ static float (*const quantize_and_encode_band_cost_arr[])( PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits) = { + int *bits, float *energy) = { quantize_and_encode_band_cost_ZERO, quantize_and_encode_band_cost_SQUAD, quantize_and_encode_band_cost_SQUAD, @@ -214,7 +221,7 @@ static float (*const quantize_and_encode_band_cost_rtz_arr[])( PutBitContext *pb, const float *in, float *quant, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits) = { + int *bits, float *energy) = { quantize_and_encode_band_cost_ZERO, quantize_and_encode_band_cost_SQUAD, quantize_and_encode_band_cost_SQUAD, @@ -235,32 +242,32 @@ static float (*const quantize_and_encode_band_cost_rtz_arr[])( #define quantize_and_encode_band_cost( \ s, pb, in, quant, scaled, size, scale_idx, cb, \ - lambda, uplim, bits, rtz) \ + lambda, uplim, bits, energy, rtz) \ ((rtz) ? quantize_and_encode_band_cost_rtz_arr : quantize_and_encode_band_cost_arr)[cb]( \ s, pb, in, quant, scaled, size, scale_idx, cb, \ - lambda, uplim, bits) + lambda, uplim, bits, energy) static inline float quantize_band_cost(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits, int rtz) + int *bits, float *energy, int rtz) { return quantize_and_encode_band_cost(s, NULL, in, NULL, scaled, size, scale_idx, - cb, lambda, uplim, bits, rtz); + cb, lambda, uplim, bits, energy, rtz); } static inline int quantize_band_cost_bits(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits, int rtz) + int *bits, float *energy, int rtz) { - int _bits; + int auxbits; quantize_and_encode_band_cost(s, NULL, in, NULL, scaled, size, scale_idx, - cb, 0.0f, uplim, &_bits, rtz); + cb, 0.0f, uplim, &auxbits, energy, rtz); if (bits) { - *bits = _bits; + *bits = auxbits; } - return _bits; + return auxbits; } static inline void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb, @@ -268,7 +275,7 @@ static inline void quantize_and_encode_band(struct AACEncContext *s, PutBitConte int cb, const float lambda, int rtz) { quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda, - INFINITY, NULL, rtz); + INFINITY, NULL, NULL, rtz); } #endif /* AVCODEC_AACENC_QUANTIZATION_H */ diff --git a/libavcodec/aacenc_utils.h b/libavcodec/aacenc_utils.h index dbc9554379..b2ce22186b 100644 --- a/libavcodec/aacenc_utils.h +++ b/libavcodec/aacenc_utils.h @@ -96,6 +96,54 @@ static inline int find_min_book(float maxval, int sf) return cb; } +static float find_form_factor(int group_len, int swb_size, float thresh, const float *scaled, float nzslope) { + const float iswb_size = 1.0f / swb_size; + const float iswb_sizem1 = 1.0f / (swb_size - 1); + const float ethresh = thresh; + float form = 0.0f, weight = 0.0f; + int w2, i; + for (w2 = 0; w2 < group_len; w2++) { + float e = 0.0f, e2 = 0.0f, var = 0.0f, maxval = 0.0f; + float nzl = 0; + for (i = 0; i < swb_size; i++) { + float s = fabsf(scaled[w2*128+i]); + maxval = FFMAX(maxval, s); + e += s; + e2 += s *= s; + /* We really don't want a hard non-zero-line count, since + * even below-threshold lines do add up towards band spectral power. + * So, fall steeply towards zero, but smoothly + */ + if (s >= ethresh) { + nzl += 1.0f; + } else { + nzl += powf(s / ethresh, nzslope); + } + } + if (e2 > thresh) { + float frm; + e *= iswb_size; + + /** compute variance */ + for (i = 0; i < swb_size; i++) { + float d = fabsf(scaled[w2*128+i]) - e; + var += d*d; + } + var = sqrtf(var * iswb_sizem1); + + e2 *= iswb_size; + frm = e / FFMIN(e+4*var,maxval); + form += e2 * sqrtf(frm) / FFMAX(0.5f,nzl); + weight += e2; + } + } + if (weight > 0) { + return form / weight; + } else { + return 1.0f; + } +} + /** Return the minimum scalefactor where the quantized coef does not clip. */ static inline uint8_t coef2minsf(float coef) { @@ -125,6 +173,14 @@ static inline int quant_array_idx(const float val, const float *arr, const int n return index; } +/** + * approximates exp10f(-3.0f*(0.5f + 0.5f * cosf(FFMIN(b,15.5f) / 15.5f))) + */ +static av_always_inline float bval2bmax(float b) +{ + return 0.001f + 0.0035f * (b*b*b) / (15.5f*15.5f*15.5f); +} + /* * linear congruential pseudorandom number generator, copied from the decoder */ diff --git a/libavcodec/aacpsy.c b/libavcodec/aacpsy.c index af235c758c..34a3ea4296 100644 --- a/libavcodec/aacpsy.c +++ b/libavcodec/aacpsy.c @@ -158,6 +158,7 @@ typedef struct AacPsyContext{ } pe; AacPsyCoeffs psy_coef[2][64]; AacPsyChannel *ch; + float global_quality; ///< normalized global quality taken from avctx }AacPsyContext; /** @@ -300,7 +301,8 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) { float bark; int i, j, g, start; float prev, minscale, minath, minsnr, pe_min; - const int chan_bitrate = ctx->avctx->bit_rate / ctx->avctx->channels; + int chan_bitrate = ctx->avctx->bit_rate / ((ctx->avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : ctx->avctx->channels); + const int bandwidth = ctx->avctx->cutoff ? ctx->avctx->cutoff : AAC_CUTOFF(ctx->avctx); const float num_bark = calc_bark((float)bandwidth); @@ -308,9 +310,15 @@ static av_cold int psy_3gpp_init(FFPsyContext *ctx) { if (!ctx->model_priv_data) return AVERROR(ENOMEM); pctx = (AacPsyContext*) ctx->model_priv_data; + pctx->global_quality = (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) * 0.01f; + + if (ctx->avctx->flags & CODEC_FLAG_QSCALE) { + /* Use the target average bitrate to compute spread parameters */ + chan_bitrate = (int)(chan_bitrate / 120.0 * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120)); + } pctx->chan_bitrate = chan_bitrate; - pctx->frame_bits = chan_bitrate * AAC_BLOCK_SIZE_LONG / ctx->avctx->sample_rate; + pctx->frame_bits = FFMIN(2560, chan_bitrate * AAC_BLOCK_SIZE_LONG / ctx->avctx->sample_rate); pctx->pe.min = 8.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f); pctx->pe.max = 12.0f * AAC_BLOCK_SIZE_LONG * bandwidth / (ctx->avctx->sample_rate * 2.0f); ctx->bitres.size = 6144 - pctx->frame_bits; @@ -398,7 +406,7 @@ static av_unused FFPsyWindowInfo psy_3gpp_window(FFPsyContext *ctx, int channel, int prev_type) { int i, j; - int br = ctx->avctx->bit_rate / ctx->avctx->channels; + int br = ((AacPsyContext*)ctx->model_priv_data)->chan_bitrate; int attack_ratio = br <= 16000 ? 18 : 10; AacPsyContext *pctx = (AacPsyContext*) ctx->model_priv_data; AacPsyChannel *pch = &pctx->ch[channel]; @@ -508,7 +516,12 @@ static int calc_bit_demand(AacPsyContext *ctx, float pe, int bits, int size, ctx->pe.max = FFMAX(pe, ctx->pe.max); ctx->pe.min = FFMIN(pe, ctx->pe.min); - return FFMIN(ctx->frame_bits * bit_factor, ctx->frame_bits + size - bits); + /* NOTE: allocate a minimum of 1/8th average frame bits, to avoid + * reservoir starvation from producing zero-bit frames + */ + return FFMIN( + ctx->frame_bits * bit_factor, + FFMAX(ctx->frame_bits + size - bits, ctx->frame_bits / 8)); } static float calc_pe_3gpp(AacPsyBand *band) @@ -678,8 +691,26 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, /* 5.6.1.3.2 "Calculation of the desired perceptual entropy" */ ctx->ch[channel].entropy = pe; + if (ctx->avctx->flags & CODEC_FLAG_QSCALE) { + /* (2.5 * 120) achieves almost transparent rate, and we want to give + * ample room downwards, so we make that equivalent to QSCALE=2.4 + */ + desired_pe = pe * (ctx->avctx->global_quality ? ctx->avctx->global_quality : 120) / (2 * 2.5f * 120.0f); + desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe)); + desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping + + /* PE slope smoothing */ + if (ctx->bitres.bits > 0) { + desired_bits = FFMIN(2560, PSY_3GPP_PE_TO_BITS(desired_pe)); + desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); // reflect clipping + } + + pctx->pe.max = FFMAX(pe, pctx->pe.max); + pctx->pe.min = FFMIN(pe, pctx->pe.min); + } else { desired_bits = calc_bit_demand(pctx, pe, ctx->bitres.bits, ctx->bitres.size, wi->num_windows == 8); desired_pe = PSY_3GPP_BITS_TO_PE(desired_bits); + /* NOTE: PE correction is kept simple. During initial testing it had very * little effect on the final bitrate. Probably a good idea to come * back and do more testing later. @@ -687,6 +718,7 @@ static void psy_3gpp_analyze_channel(FFPsyContext *ctx, int channel, if (ctx->bitres.bits > 0) desired_pe *= av_clipf(pctx->pe.previous / PSY_3GPP_BITS_TO_PE(ctx->bitres.bits), 0.85f, 1.15f); + } pctx->pe.previous = PSY_3GPP_BITS_TO_PE(desired_bits); ctx->bitres.alloc = desired_bits; diff --git a/libavcodec/mathops.h b/libavcodec/mathops.h index c065018f56..4988f1d3df 100644 --- a/libavcodec/mathops.h +++ b/libavcodec/mathops.h @@ -233,6 +233,11 @@ static inline av_const unsigned int ff_sqrt(unsigned int a) } #endif +static inline av_const float ff_sqrf(float a) +{ + return a*a; +} + static inline int8_t ff_u8_to_s8(uint8_t a) { union { diff --git a/libavcodec/mips/aaccoder_mips.c b/libavcodec/mips/aaccoder_mips.c index 18d3f88743..e85bf8c5ca 100644 --- a/libavcodec/mips/aaccoder_mips.c +++ b/libavcodec/mips/aaccoder_mips.c @@ -178,6 +178,7 @@ static int find_min_book(float maxval, int sf) { float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512]; float Q34 = sqrtf(Q * sqrtf(Q)); int qmaxval, cb; + qmaxval = maxval * Q34 + 0.4054f; if (qmaxval >= (FF_ARRAY_ELEMS(aac_maxval_cb))) cb = 11; else @@ -192,12 +193,13 @@ static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits, const float ROUNDING) + int *bits, float *energy, const float ROUNDING) { const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; int i; int qc1, qc2, qc3, qc4; + float qenergy = 0.0f; uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1]; @@ -262,26 +264,38 @@ static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s, put_bits(pb, p_bits[curidx], p_codes[curidx]); - if (out) { - vec = &p_vec[curidx*4]; - out[i+0] = vec[0] * IQ; - out[i+1] = vec[1] * IQ; - out[i+2] = vec[2] * IQ; - out[i+3] = vec[3] * IQ; + if (out || energy) { + float e1,e2,e3,e4; + vec = &p_vec[curidx*4]; + e1 = vec[0] * IQ; + e2 = vec[1] * IQ; + e3 = vec[2] * IQ; + e4 = vec[3] * IQ; + if (out) { + out[i+0] = e1; + out[i+1] = e2; + out[i+2] = e3; + out[i+3] = e4; + } + if (energy) + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4); } } + if (energy) + *energy = qenergy; } static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits, const float ROUNDING) + int *bits, float *energy, const float ROUNDING) { const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; int i; int qc1, qc2, qc3, qc4; + float qenergy = 0.0f; uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1]; @@ -365,26 +379,38 @@ static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s, v_bits = p_bits[curidx] + count; put_bits(pb, v_bits, v_codes); - if (out) { - vec = &p_vec[curidx*4]; - out[i+0] = copysignf(vec[0] * IQ, in[i+0]); - out[i+1] = copysignf(vec[1] * IQ, in[i+1]); - out[i+2] = copysignf(vec[2] * IQ, in[i+2]); - out[i+3] = copysignf(vec[3] * IQ, in[i+3]); + if (out || energy) { + float e1,e2,e3,e4; + vec = &p_vec[curidx*4]; + e1 = copysignf(vec[0] * IQ, in[i+0]); + e2 = copysignf(vec[1] * IQ, in[i+1]); + e3 = copysignf(vec[2] * IQ, in[i+2]); + e4 = copysignf(vec[3] * IQ, in[i+3]); + if (out) { + out[i+0] = e1; + out[i+1] = e2; + out[i+2] = e3; + out[i+3] = e4; + } + if (energy) + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4); } } + if (energy) + *energy = qenergy; } static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits, const float ROUNDING) + int *bits, float *energy, const float ROUNDING) { const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; int i; int qc1, qc2, qc3, qc4; + float qenergy = 0.0f; uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1]; uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1]; @@ -455,27 +481,39 @@ static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s, v_bits = p_bits[curidx] + p_bits[curidx2]; put_bits(pb, v_bits, v_codes); - if (out) { - vec1 = &p_vec[curidx*2 ]; - vec2 = &p_vec[curidx2*2]; - out[i+0] = vec1[0] * IQ; - out[i+1] = vec1[1] * IQ; - out[i+2] = vec2[0] * IQ; - out[i+3] = vec2[1] * IQ; + if (out || energy) { + float e1,e2,e3,e4; + vec1 = &p_vec[curidx*2 ]; + vec2 = &p_vec[curidx2*2]; + e1 = vec1[0] * IQ; + e2 = vec1[1] * IQ; + e3 = vec2[0] * IQ; + e4 = vec2[1] * IQ; + if (out) { + out[i+0] = e1; + out[i+1] = e2; + out[i+2] = e3; + out[i+3] = e4; + } + if (energy) + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4); } } + if (energy) + *energy = qenergy; } static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits, const float ROUNDING) + int *bits, float *energy, const float ROUNDING) { const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; int i; int qc1, qc2, qc3, qc4; + float qenergy = 0.0f; uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1]; uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1]; @@ -561,27 +599,39 @@ static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s, v_bits = p_bits[curidx2] + count2; put_bits(pb, v_bits, v_codes); - if (out) { - vec1 = &p_vec[curidx1*2]; - vec2 = &p_vec[curidx2*2]; - out[i+0] = copysignf(vec1[0] * IQ, in[i+0]); - out[i+1] = copysignf(vec1[1] * IQ, in[i+1]); - out[i+2] = copysignf(vec2[0] * IQ, in[i+2]); - out[i+3] = copysignf(vec2[1] * IQ, in[i+3]); + if (out || energy) { + float e1,e2,e3,e4; + vec1 = &p_vec[curidx1*2]; + vec2 = &p_vec[curidx2*2]; + e1 = copysignf(vec1[0] * IQ, in[i+0]); + e2 = copysignf(vec1[1] * IQ, in[i+1]); + e3 = copysignf(vec2[0] * IQ, in[i+2]); + e4 = copysignf(vec2[1] * IQ, in[i+3]); + if (out) { + out[i+0] = e1; + out[i+1] = e2; + out[i+2] = e3; + out[i+3] = e4; + } + if (energy) + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4); } } + if (energy) + *energy = qenergy; } static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits, const float ROUNDING) + int *bits, float *energy, const float ROUNDING) { const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; int i; int qc1, qc2, qc3, qc4; + float qenergy = 0.0f; uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1]; uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1]; @@ -666,27 +716,39 @@ static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s, v_bits = p_bits[curidx2] + count2; put_bits(pb, v_bits, v_codes); - if (out) { - vec1 = &p_vec[curidx1*2]; - vec2 = &p_vec[curidx2*2]; - out[i+0] = copysignf(vec1[0] * IQ, in[i+0]); - out[i+1] = copysignf(vec1[1] * IQ, in[i+1]); - out[i+2] = copysignf(vec2[0] * IQ, in[i+2]); - out[i+3] = copysignf(vec2[1] * IQ, in[i+3]); + if (out || energy) { + float e1,e2,e3,e4; + vec1 = &p_vec[curidx1*2]; + vec2 = &p_vec[curidx2*2]; + e1 = copysignf(vec1[0] * IQ, in[i+0]); + e2 = copysignf(vec1[1] * IQ, in[i+1]); + e3 = copysignf(vec2[0] * IQ, in[i+2]); + e4 = copysignf(vec2[1] * IQ, in[i+3]); + if (out) { + out[i+0] = e1; + out[i+1] = e2; + out[i+2] = e3; + out[i+3] = e4; + } + if (energy) + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4); } } + if (energy) + *energy = qenergy; } static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits, const float ROUNDING) + int *bits, float *energy, const float ROUNDING) { const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; int i; int qc1, qc2, qc3, qc4; + float qenergy = 0.0f; uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1]; uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1]; @@ -772,13 +834,22 @@ static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s, v_bits = p_bits[curidx2] + count2; put_bits(pb, v_bits, v_codes); - if (out) { - vec1 = &p_vectors[curidx*2 ]; - vec2 = &p_vectors[curidx2*2]; - out[i+0] = copysignf(vec1[0] * IQ, in[i+0]); - out[i+1] = copysignf(vec1[1] * IQ, in[i+1]); - out[i+2] = copysignf(vec2[0] * IQ, in[i+2]); - out[i+3] = copysignf(vec2[1] * IQ, in[i+3]); + if (out || energy) { + float e1,e2,e3,e4; + vec1 = &p_vectors[curidx*2 ]; + vec2 = &p_vectors[curidx2*2]; + e1 = copysignf(vec1[0] * IQ, in[i+0]); + e2 = copysignf(vec1[1] * IQ, in[i+1]); + e3 = copysignf(vec2[0] * IQ, in[i+2]); + e4 = copysignf(vec2[1] * IQ, in[i+3]); + if (out) { + out[i+0] = e1; + out[i+1] = e2; + out[i+2] = e3; + out[i+3] = e4; + } + if (energy) + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4); } } } else { @@ -892,23 +963,34 @@ static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s, put_bits(pb, len * 2 - 3, v_codes); } - if (out) { - vec1 = &p_vectors[curidx*2]; - vec2 = &p_vectors[curidx2*2]; - out[i+0] = copysignf(c1 * cbrtf(c1) * IQ, in[i+0]); - out[i+1] = copysignf(c2 * cbrtf(c2) * IQ, in[i+1]); - out[i+2] = copysignf(c3 * cbrtf(c3) * IQ, in[i+2]); - out[i+3] = copysignf(c4 * cbrtf(c4) * IQ, in[i+3]); + if (out || energy) { + float e1, e2, e3, e4; + vec1 = &p_vectors[curidx*2]; + vec2 = &p_vectors[curidx2*2]; + e1 = copysignf(c1 * cbrtf(c1) * IQ, in[i+0]); + e2 = copysignf(c2 * cbrtf(c2) * IQ, in[i+1]); + e3 = copysignf(c3 * cbrtf(c3) * IQ, in[i+2]); + e4 = copysignf(c4 * cbrtf(c4) * IQ, in[i+3]); + if (out) { + out[i+0] = e1; + out[i+1] = e2; + out[i+2] = e3; + out[i+3] = e4; + } + if (energy) + qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4); } } } + if (energy) + *energy = qenergy; } static void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits, const float ROUNDING) { + int *bits, float *energy, const float ROUNDING) { av_assert0(0); } @@ -916,7 +998,7 @@ static void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits, const float ROUNDING) { + int *bits, float *energy, const float ROUNDING) { int i; if (bits) *bits = 0; @@ -928,13 +1010,15 @@ static void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s, out[i+3] = 0.0f; } } + if (energy) + *energy = 0.0f; } static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits, const float ROUNDING) = { + int *bits, float *energy, const float ROUNDING) = { quantize_and_encode_band_cost_ZERO_mips, quantize_and_encode_band_cost_SQUAD_mips, quantize_and_encode_band_cost_SQUAD_mips, @@ -955,17 +1039,17 @@ static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s #define quantize_and_encode_band_cost( \ s, pb, in, out, scaled, size, scale_idx, cb, \ - lambda, uplim, bits, ROUNDING) \ + lambda, uplim, bits, energy, ROUNDING) \ quantize_and_encode_band_cost_arr[cb]( \ s, pb, in, out, scaled, size, scale_idx, cb, \ - lambda, uplim, bits, ROUNDING) + lambda, uplim, bits, energy, ROUNDING) static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb, const float *in, float *out, int size, int scale_idx, int cb, const float lambda, int rtz) { quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda, - INFINITY, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD); + INFINITY, NULL, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD); } /** @@ -1445,7 +1529,7 @@ static float (*const get_band_numbits_arr[])(struct AACEncContext *s, static float quantize_band_cost_bits(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits, int rtz) + int *bits, float *energy, int rtz) { return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits); } @@ -1458,7 +1542,7 @@ static float get_band_cost_ZERO_mips(struct AACEncContext *s, PutBitContext *pb, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits) + int *bits, float *energy) { int i; float cost = 0; @@ -1471,6 +1555,8 @@ static float get_band_cost_ZERO_mips(struct AACEncContext *s, } if (bits) *bits = 0; + if (energy) + *energy = 0.0f; return cost * lambda; } @@ -1478,7 +1564,7 @@ static float get_band_cost_NONE_mips(struct AACEncContext *s, PutBitContext *pb, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits) + int *bits, float *energy) { av_assert0(0); return 0; @@ -1488,12 +1574,13 @@ static float get_band_cost_SQUAD_mips(struct AACEncContext *s, PutBitContext *pb, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits) + int *bits, float *energy) { const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; int i; float cost = 0; + float qenergy = 0.0f; int qc1, qc2, qc3, qc4; int curbits = 0; @@ -1560,6 +1647,9 @@ static float get_band_cost_SQUAD_mips(struct AACEncContext *s, curbits += p_bits[curidx]; vec = &p_codes[curidx*4]; + qenergy += vec[0]*vec[0] + vec[1]*vec[1] + + vec[2]*vec[2] + vec[3]*vec[3]; + __asm__ volatile ( ".set push \n\t" ".set noreorder \n\t" @@ -1594,6 +1684,8 @@ static float get_band_cost_SQUAD_mips(struct AACEncContext *s, if (bits) *bits = curbits; + if (energy) + *energy = qenergy * (IQ*IQ); return cost * lambda + curbits; } @@ -1601,12 +1693,13 @@ static float get_band_cost_UQUAD_mips(struct AACEncContext *s, PutBitContext *pb, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits) + int *bits, float *energy) { const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; int i; float cost = 0; + float qenergy = 0.0f; int curbits = 0; int qc1, qc2, qc3, qc4; @@ -1659,6 +1752,9 @@ static float get_band_cost_UQUAD_mips(struct AACEncContext *s, curbits += uquad_sign_bits[curidx]; vec = &p_codes[curidx*4]; + qenergy += vec[0]*vec[0] + vec[1]*vec[1] + + vec[2]*vec[2] + vec[3]*vec[3]; + __asm__ volatile ( ".set push \n\t" ".set noreorder \n\t" @@ -1696,6 +1792,8 @@ static float get_band_cost_UQUAD_mips(struct AACEncContext *s, if (bits) *bits = curbits; + if (energy) + *energy = qenergy * (IQ*IQ); return cost * lambda + curbits; } @@ -1703,12 +1801,13 @@ static float get_band_cost_SPAIR_mips(struct AACEncContext *s, PutBitContext *pb, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits) + int *bits, float *energy) { const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; int i; float cost = 0; + float qenergy = 0.0f; int qc1, qc2, qc3, qc4; int curbits = 0; @@ -1780,6 +1879,9 @@ static float get_band_cost_SPAIR_mips(struct AACEncContext *s, vec = &p_codes[curidx*2]; vec2 = &p_codes[curidx2*2]; + qenergy += vec[0]*vec[0] + vec[1]*vec[1] + + vec2[0]*vec2[0] + vec2[1]*vec2[1]; + __asm__ volatile ( ".set push \n\t" ".set noreorder \n\t" @@ -1814,6 +1916,8 @@ static float get_band_cost_SPAIR_mips(struct AACEncContext *s, if (bits) *bits = curbits; + if (energy) + *energy = qenergy * (IQ*IQ); return cost * lambda + curbits; } @@ -1821,12 +1925,13 @@ static float get_band_cost_UPAIR7_mips(struct AACEncContext *s, PutBitContext *pb, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits) + int *bits, float *energy) { const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; int i; float cost = 0; + float qenergy = 0.0f; int qc1, qc2, qc3, qc4; int curbits = 0; @@ -1910,6 +2015,9 @@ static float get_band_cost_UPAIR7_mips(struct AACEncContext *s, curbits += upair7_sign_bits[curidx2]; vec2 = &p_codes[curidx2*2]; + qenergy += vec[0]*vec[0] + vec[1]*vec[1] + + vec2[0]*vec2[0] + vec2[1]*vec2[1]; + __asm__ volatile ( ".set push \n\t" ".set noreorder \n\t" @@ -1947,6 +2055,8 @@ static float get_band_cost_UPAIR7_mips(struct AACEncContext *s, if (bits) *bits = curbits; + if (energy) + *energy = qenergy * (IQ*IQ); return cost * lambda + curbits; } @@ -1954,12 +2064,13 @@ static float get_band_cost_UPAIR12_mips(struct AACEncContext *s, PutBitContext *pb, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits) + int *bits, float *energy) { const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; int i; float cost = 0; + float qenergy = 0.0f; int qc1, qc2, qc3, qc4; int curbits = 0; @@ -2043,6 +2154,9 @@ static float get_band_cost_UPAIR12_mips(struct AACEncContext *s, vec = &p_codes[curidx*2]; vec2 = &p_codes[curidx2*2]; + qenergy += vec[0]*vec[0] + vec[1]*vec[1] + + vec2[0]*vec2[0] + vec2[1]*vec2[1]; + __asm__ volatile ( ".set push \n\t" ".set noreorder \n\t" @@ -2080,6 +2194,8 @@ static float get_band_cost_UPAIR12_mips(struct AACEncContext *s, if (bits) *bits = curbits; + if (energy) + *energy = qenergy * (IQ*IQ); return cost * lambda + curbits; } @@ -2087,13 +2203,14 @@ static float get_band_cost_ESC_mips(struct AACEncContext *s, PutBitContext *pb, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits) + int *bits, float *energy) { const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512]; const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512]; const float CLIPPED_ESCAPE = 165140.0f * IQ; int i; float cost = 0; + float qenergy = 0.0f; int qc1, qc2, qc3, qc4; int curbits = 0; @@ -2103,7 +2220,7 @@ static float get_band_cost_ESC_mips(struct AACEncContext *s, for (i = 0; i < size; i += 4) { const float *vec, *vec2; int curidx, curidx2; - float t1, t2, t3, t4; + float t1, t2, t3, t4, V; float di1, di2, di3, di4; int cond0, cond1, cond2, cond3; int c1, c2, c3, c4; @@ -2175,38 +2292,54 @@ static float get_band_cost_ESC_mips(struct AACEncContext *s, if (cond0) { if (t1 >= CLIPPED_ESCAPE) { di1 = t1 - CLIPPED_ESCAPE; + qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE; } else { - di1 = t1 - c1 * cbrtf(c1) * IQ; + di1 = t1 - (V = c1 * cbrtf(c1) * IQ); + qenergy += V*V; } - } else - di1 = t1 - vec[0] * IQ; + } else { + di1 = t1 - (V = vec[0] * IQ); + qenergy += V*V; + } if (cond1) { if (t2 >= CLIPPED_ESCAPE) { di2 = t2 - CLIPPED_ESCAPE; + qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE; } else { - di2 = t2 - c2 * cbrtf(c2) * IQ; + di2 = t2 - (V = c2 * cbrtf(c2) * IQ); + qenergy += V*V; } - } else - di2 = t2 - vec[1] * IQ; + } else { + di2 = t2 - (V = vec[1] * IQ); + qenergy += V*V; + } if (cond2) { if (t3 >= CLIPPED_ESCAPE) { di3 = t3 - CLIPPED_ESCAPE; + qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE; } else { - di3 = t3 - c3 * cbrtf(c3) * IQ; + di3 = t3 - (V = c3 * cbrtf(c3) * IQ); + qenergy += V*V; } - } else - di3 = t3 - vec2[0] * IQ; + } else { + di3 = t3 - (V = vec2[0] * IQ); + qenergy += V*V; + } if (cond3) { if (t4 >= CLIPPED_ESCAPE) { di4 = t4 - CLIPPED_ESCAPE; + qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE; } else { - di4 = t4 - c4 * cbrtf(c4) * IQ; + di4 = t4 - (V = c4 * cbrtf(c4) * IQ); + qenergy += V*V; } - } else - di4 = t4 - vec2[1]*IQ; + } else { + di4 = t4 - (V = vec2[1]*IQ); + qenergy += V*V; + } cost += di1 * di1 + di2 * di2 + di3 * di3 + di4 * di4; @@ -2221,7 +2354,7 @@ static float (*const get_band_cost_arr[])(struct AACEncContext *s, PutBitContext *pb, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits) = { + int *bits, float *energy) = { get_band_cost_ZERO_mips, get_band_cost_SQUAD_mips, get_band_cost_SQUAD_mips, @@ -2242,17 +2375,87 @@ static float (*const get_band_cost_arr[])(struct AACEncContext *s, #define get_band_cost( \ s, pb, in, scaled, size, scale_idx, cb, \ - lambda, uplim, bits) \ + lambda, uplim, bits, energy) \ get_band_cost_arr[cb]( \ s, pb, in, scaled, size, scale_idx, cb, \ - lambda, uplim, bits) + lambda, uplim, bits, energy) static float quantize_band_cost(struct AACEncContext *s, const float *in, const float *scaled, int size, int scale_idx, int cb, const float lambda, const float uplim, - int *bits, int rtz) + int *bits, float *energy, int rtz) { - return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits); + return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits, energy); +} + +static float find_form_factor(int group_len, int swb_size, float thresh, const float *scaled, float nzslope) { + const float iswb_size = 1.0f / swb_size; + const float iswb_sizem1 = 1.0f / (swb_size - 1); + const float ethresh = thresh, iethresh = 1.0f / ethresh; + float form = 0.0f, weight = 0.0f; + int w2, i; + for (w2 = 0; w2 < group_len; w2++) { + float e = 0.0f, e2 = 0.0f, var = 0.0f, maxval = 0.0f; + float nzl = 0; + for (i = 0; i < swb_size; i+=4) { + float s1 = fabsf(scaled[w2*128+i ]); + float s2 = fabsf(scaled[w2*128+i+1]); + float s3 = fabsf(scaled[w2*128+i+2]); + float s4 = fabsf(scaled[w2*128+i+3]); + maxval = FFMAX(maxval, FFMAX(FFMAX(s1, s2), FFMAX(s3, s4))); + e += (s1+s2)+(s3+s4); + s1 *= s1; + s2 *= s2; + s3 *= s3; + s4 *= s4; + e2 += (s1+s2)+(s3+s4); + /* We really don't want a hard non-zero-line count, since + * even below-threshold lines do add up towards band spectral power. + * So, fall steeply towards zero, but smoothly + */ + if (s1 >= ethresh) { + nzl += 1.0f; + } else { + nzl += powf(s1 * iethresh, nzslope); + } + if (s2 >= ethresh) { + nzl += 1.0f; + } else { + nzl += powf(s2 * iethresh, nzslope); + } + if (s3 >= ethresh) { + nzl += 1.0f; + } else { + nzl += powf(s3 * iethresh, nzslope); + } + if (s4 >= ethresh) { + nzl += 1.0f; + } else { + nzl += powf(s4 * iethresh, nzslope); + } + } + if (e2 > thresh) { + float frm; + e *= iswb_size; + + /** compute variance */ + for (i = 0; i < swb_size; i++) { + float d = fabsf(scaled[w2*128+i]) - e; + var += d*d; + } + var = sqrtf(var * iswb_sizem1); + + e2 *= iswb_size; + frm = e / FFMIN(e+4*var,maxval); + form += e2 * sqrtf(frm) / FFMAX(0.5f,nzl); + weight += e2; + } + } + if (weight > 0) { + return form / weight; + } else { + return 1.0f; + } } #include "libavcodec/aaccoder_twoloop.h" @@ -2305,25 +2508,25 @@ static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe) sce0->ics.swb_sizes[g], sce0->sf_idx[(w+w2)*16+g], sce0->band_type[(w+w2)*16+g], - lambda / band0->threshold, INFINITY, NULL, 0); + lambda / band0->threshold, INFINITY, NULL, NULL, 0); dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128], R34, sce1->ics.swb_sizes[g], sce1->sf_idx[(w+w2)*16+g], sce1->band_type[(w+w2)*16+g], - lambda / band1->threshold, INFINITY, NULL, 0); + lambda / band1->threshold, INFINITY, NULL, NULL, 0); dist2 += quantize_band_cost(s, M, M34, sce0->ics.swb_sizes[g], sce0->sf_idx[(w+w2)*16+g], sce0->band_type[(w+w2)*16+g], - lambda / maxthr, INFINITY, NULL, 0); + lambda / maxthr, INFINITY, NULL, NULL, 0); dist2 += quantize_band_cost(s, S, S34, sce1->ics.swb_sizes[g], sce1->sf_idx[(w+w2)*16+g], sce1->band_type[(w+w2)*16+g], - lambda / minthr, INFINITY, NULL, 0); + lambda / minthr, INFINITY, NULL, NULL, 0); } cpe->ms_mask[w*16+g] = dist2 < dist1; } diff --git a/libavcodec/psymodel.c b/libavcodec/psymodel.c index 824eefb79e..f7bca6890c 100644 --- a/libavcodec/psymodel.c +++ b/libavcodec/psymodel.c @@ -109,25 +109,21 @@ av_cold struct FFPsyPreprocessContext* ff_psy_preprocess_init(AVCodecContext *av return NULL; ctx->avctx = avctx; + /* AAC has its own LP method */ + if (avctx->codec_id != AV_CODEC_ID_AAC) { if (avctx->cutoff > 0) cutoff_coeff = 2.0 * avctx->cutoff / avctx->sample_rate; - if (!cutoff_coeff && avctx->codec_id == AV_CODEC_ID_AAC) - cutoff_coeff = 2.0 * AAC_CUTOFF(avctx) / avctx->sample_rate; - if (cutoff_coeff && cutoff_coeff < 0.98) ctx->fcoeffs = ff_iir_filter_init_coeffs(avctx, FF_FILTER_TYPE_BUTTERWORTH, FF_FILTER_MODE_LOWPASS, FILT_ORDER, cutoff_coeff, 0.0, 0.0); if (ctx->fcoeffs) { - ctx->fstate = av_mallocz_array(sizeof(ctx->fstate[0]), avctx->channels); - if (!ctx->fstate) { - av_free(ctx); - return NULL; - } + ctx->fstate = av_mallocz(sizeof(ctx->fstate[0]) * avctx->channels); for (i = 0; i < avctx->channels; i++) ctx->fstate[i] = ff_iir_filter_init_state(FILT_ORDER); } + } ff_iir_filter_init(&ctx->fiir); diff --git a/libavcodec/psymodel.h b/libavcodec/psymodel.h index a04cc4d226..565117db73 100644 --- a/libavcodec/psymodel.h +++ b/libavcodec/psymodel.h @@ -29,7 +29,20 @@ /** maximum number of channels */ #define PSY_MAX_CHANS 20 -#define AAC_CUTOFF(s) ((s)->bit_rate ? FFMIN3(4000 + (s)->bit_rate/8, 12000 + (s)->bit_rate/32, (s)->sample_rate / 2) : ((s)->sample_rate / 2)) +/* cutoff for VBR is purposedly increased, since LP filtering actually + * hinders VBR performance rather than the opposite + */ +#define AAC_CUTOFF_FROM_BITRATE(bit_rate,channels,sample_rate) (bit_rate ? FFMIN3(FFMIN3( \ + FFMAX(bit_rate/channels/5, bit_rate/channels*15/32 - 5500), \ + 3000 + bit_rate/channels/4, \ + 12000 + bit_rate/channels/16), \ + 22000, \ + sample_rate / 2): (sample_rate / 2)) +#define AAC_CUTOFF(s) ( \ + (s->flags & CODEC_FLAG_QSCALE) \ + ? s->sample_rate / 2 \ + : AAC_CUTOFF_FROM_BITRATE(s->bit_rate, s->channels, s->sample_rate) \ +) /** * single band psychoacoustic information diff --git a/tests/fate/aac.mak b/tests/fate/aac.mak index d6a355e45b..30f0d9b1c3 100644 --- a/tests/fate/aac.mak +++ b/tests/fate/aac.mak @@ -146,7 +146,7 @@ fate-aac-aref-encode: CMD = enc_dec_pcm adts wav s16le $(REF) -strict -2 -c:a aa fate-aac-aref-encode: CMP = stddev fate-aac-aref-encode: REF = ./tests/data/asynth-44100-2.wav fate-aac-aref-encode: CMP_SHIFT = -4096 -fate-aac-aref-encode: CMP_TARGET = 584 +fate-aac-aref-encode: CMP_TARGET = 1127 fate-aac-aref-encode: SIZE_TOLERANCE = 2464 fate-aac-aref-encode: FUZZ = 6 @@ -155,51 +155,52 @@ fate-aac-ln-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-ref fate-aac-ln-encode: CMP = stddev fate-aac-ln-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav fate-aac-ln-encode: CMP_SHIFT = -4096 -fate-aac-ln-encode: CMP_TARGET = 68 +fate-aac-ln-encode: CMP_TARGET = 80 fate-aac-ln-encode: SIZE_TOLERANCE = 3560 +fate-aac-ln-encode: FUZZ = 30 FATE_AAC_ENCODE += fate-aac-ln-encode-128k -fate-aac-ln-encode-128k: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_is 0 -aac_pns 0 -b:a 128k +fate-aac-ln-encode-128k: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_is 0 -aac_pns 0 -b:a 128k -cutoff 22050 fate-aac-ln-encode-128k: CMP = stddev fate-aac-ln-encode-128k: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav fate-aac-ln-encode-128k: CMP_SHIFT = -4096 -fate-aac-ln-encode-128k: CMP_TARGET = 638 +fate-aac-ln-encode-128k: CMP_TARGET = 745 fate-aac-ln-encode-128k: SIZE_TOLERANCE = 3560 fate-aac-ln-encode-128k: FUZZ = 5 FATE_AAC_ENCODE += fate-aac-pns-encode -fate-aac-pns-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_pns 1 -aac_is 0 -b:a 128k +fate-aac-pns-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_pns 1 -aac_is 0 -b:a 128k -cutoff 22050 fate-aac-pns-encode: CMP = stddev fate-aac-pns-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav fate-aac-pns-encode: CMP_SHIFT = -4096 -fate-aac-pns-encode: CMP_TARGET = 623.77 +fate-aac-pns-encode: CMP_TARGET = 695 fate-aac-pns-encode: SIZE_TOLERANCE = 3560 fate-aac-pns-encode: FUZZ = 25 FATE_AAC_ENCODE += fate-aac-tns-encode -fate-aac-tns-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_tns 1 -aac_is 0 -aac_pns 0 -b:a 128k +fate-aac-tns-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_tns 1 -aac_is 0 -aac_pns 0 -b:a 128k -cutoff 22050 fate-aac-tns-encode: CMP = stddev fate-aac-tns-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav fate-aac-tns-encode: CMP_SHIFT = -4096 -fate-aac-tns-encode: CMP_TARGET = 644.50 +fate-aac-tns-encode: CMP_TARGET = 768 fate-aac-tns-encode: FUZZ = 2.8 fate-aac-tns-encode: SIZE_TOLERANCE = 3560 FATE_AAC_ENCODE += fate-aac-is-encode -fate-aac-is-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_pns 0 -aac_is 1 -b:a 128k +fate-aac-is-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -c:a aac -aac_pns 0 -aac_is 1 -b:a 128k -cutoff 22050 fate-aac-is-encode: CMP = stddev fate-aac-is-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav fate-aac-is-encode: CMP_SHIFT = -4096 -fate-aac-is-encode: CMP_TARGET = 614.04 +fate-aac-is-encode: CMP_TARGET = 582 fate-aac-is-encode: SIZE_TOLERANCE = 3560 fate-aac-is-encode: FUZZ = 1 FATE_AAC_ENCODE += fate-aac-pred-encode -fate-aac-pred-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -profile:a aac_main -c:a aac -aac_is 0 -aac_pns 0 -b:a 128k +fate-aac-pred-encode: CMD = enc_dec_pcm adts wav s16le $(TARGET_SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav -strict -2 -profile:a aac_main -c:a aac -aac_is 0 -aac_pns 0 -b:a 128k -cutoff 22050 fate-aac-pred-encode: CMP = stddev fate-aac-pred-encode: REF = $(SAMPLES)/audio-reference/luckynight_2ch_44kHz_s16.wav fate-aac-pred-encode: CMP_SHIFT = -4096 -fate-aac-pred-encode: CMP_TARGET = 657 +fate-aac-pred-encode: CMP_TARGET = 790 fate-aac-pred-encode: FUZZ = 5 fate-aac-pred-encode: SIZE_TOLERANCE = 3560