From 8c75e5fdd33c4857305aeb45619497d3b6bf2eb4 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Tue, 13 Dec 2022 11:46:02 +0100 Subject: [PATCH] avfilter/af_afir: improve output when IR switching at runtime Also improve normalization and add more gtype modes --- doc/filters.texi | 6 + libavfilter/af_afir.c | 148 +++++++++++----------- libavfilter/af_afir.h | 22 ++-- libavfilter/afir_template.c | 238 +++++++++++++++++++++++------------- 4 files changed, 253 insertions(+), 161 deletions(-) diff --git a/doc/filters.texi b/doc/filters.texi index 920695a65b..9b29b0a431 100644 --- a/doc/filters.texi +++ b/doc/filters.texi @@ -1631,6 +1631,12 @@ select DC gain, limited application. @item gn select gain to noise approach, this is most popular one. + +@item ac +select AC gain. + +@item rms +select RMS gain. @end table @item irgain diff --git a/libavfilter/af_afir.c b/libavfilter/af_afir.c index 83b9a1ba02..dfbc9d7cf1 100644 --- a/libavfilter/af_afir.c +++ b/libavfilter/af_afir.c @@ -105,8 +105,9 @@ static void draw_line(AVFrame *out, int x0, int y0, int x1, int y1, uint32_t col static int fir_channel(AVFilterContext *ctx, AVFrame *out, int ch) { AudioFIRContext *s = ctx->priv; + const int min_part_size = s->min_part_size; - for (int offset = 0; offset < out->nb_samples; offset += s->min_part_size) { + for (int offset = 0; offset < out->nb_samples; offset += min_part_size) { switch (s->format) { case AV_SAMPLE_FMT_FLTP: fir_quantum_float(ctx, out, ch, offset); @@ -126,9 +127,8 @@ static int fir_channels(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) const int start = (out->ch_layout.nb_channels * jobnr) / nb_jobs; const int end = (out->ch_layout.nb_channels * (jobnr+1)) / nb_jobs; - for (int ch = start; ch < end; ch++) { + for (int ch = start; ch < end; ch++) fir_channel(ctx, out, ch); - } return 0; } @@ -143,7 +143,7 @@ static int fir_frame(AudioFIRContext *s, AVFrame *in, AVFilterLink *outlink) av_frame_free(&in); return AVERROR(ENOMEM); } - out->pts = in->pts; + out->pts = s->pts = in->pts; s->in = in; ff_filter_execute(ctx, fir_channels, out, NULL, @@ -156,7 +156,7 @@ static int fir_frame(AudioFIRContext *s, AVFrame *in, AVFilterLink *outlink) } static int init_segment(AVFilterContext *ctx, AudioFIRSegment *seg, - int offset, int nb_partitions, int part_size) + int offset, int nb_partitions, int part_size, int index) { AudioFIRContext *s = ctx->priv; const size_t cpu_align = av_cpu_max_align(); @@ -165,8 +165,9 @@ static int init_segment(AVFilterContext *ctx, AudioFIRSegment *seg, int ret; seg->tx = av_calloc(ctx->inputs[0]->ch_layout.nb_channels, sizeof(*seg->tx)); + seg->ctx = av_calloc(ctx->inputs[0]->ch_layout.nb_channels, sizeof(*seg->ctx)); seg->itx = av_calloc(ctx->inputs[0]->ch_layout.nb_channels, sizeof(*seg->itx)); - if (!seg->tx || !seg->itx) + if (!seg->tx || !seg->ctx || !seg->itx) return AVERROR(ENOMEM); seg->fft_length = part_size * 2 + 2; @@ -177,9 +178,10 @@ static int init_segment(AVFilterContext *ctx, AudioFIRSegment *seg, seg->input_size = offset + s->min_part_size; seg->input_offset = offset; + seg->loading = av_calloc(ctx->inputs[0]->ch_layout.nb_channels, sizeof(*seg->loading)); seg->part_index = av_calloc(ctx->inputs[0]->ch_layout.nb_channels, sizeof(*seg->part_index)); seg->output_offset = av_calloc(ctx->inputs[0]->ch_layout.nb_channels, sizeof(*seg->output_offset)); - if (!seg->part_index || !seg->output_offset) + if (!seg->part_index || !seg->output_offset || !seg->loading) return AVERROR(ENOMEM); switch (s->format) { @@ -197,12 +199,12 @@ static int init_segment(AVFilterContext *ctx, AudioFIRSegment *seg, break; } - ret = av_tx_init(&seg->ctx, &seg->ctx_fn, tx_type, - 0, 2 * part_size, &cscale, 0); - if (ret < 0) - return ret; - for (int ch = 0; ch < ctx->inputs[0]->ch_layout.nb_channels && part_size >= 1; ch++) { + ret = av_tx_init(&seg->ctx[ch], &seg->ctx_fn, tx_type, + 0, 2 * part_size, &cscale, 0); + if (ret < 0) + return ret; + ret = av_tx_init(&seg->tx[ch], &seg->tx_fn, tx_type, 0, 2 * part_size, &scale, 0); if (ret < 0) @@ -215,13 +217,17 @@ static int init_segment(AVFilterContext *ctx, AudioFIRSegment *seg, seg->sumin = ff_get_audio_buffer(ctx->inputs[0], seg->fft_length); seg->sumout = ff_get_audio_buffer(ctx->inputs[0], seg->fft_length); - seg->blockin = ff_get_audio_buffer(ctx->inputs[0], seg->nb_partitions * seg->block_size); - seg->blockout = ff_get_audio_buffer(ctx->inputs[0], seg->nb_partitions * seg->block_size); + seg->blockin = ff_get_audio_buffer(ctx->inputs[0], seg->block_size * seg->nb_partitions); + seg->blockout = ff_get_audio_buffer(ctx->inputs[0], seg->block_size * seg->nb_partitions); + seg->tempin = ff_get_audio_buffer(ctx->inputs[0], seg->block_size); + seg->tempout = ff_get_audio_buffer(ctx->inputs[0], seg->block_size); seg->buffer = ff_get_audio_buffer(ctx->inputs[0], seg->part_size); - seg->coeff = ff_get_audio_buffer(ctx->inputs[1 + s->selir], seg->nb_partitions * seg->coeff_size * 2); + seg->coeff = ff_get_audio_buffer(ctx->inputs[0], seg->nb_partitions * seg->coeff_size * 2); seg->input = ff_get_audio_buffer(ctx->inputs[0], seg->input_size); seg->output = ff_get_audio_buffer(ctx->inputs[0], seg->part_size); - if (!seg->buffer || !seg->sumin || !seg->sumout || !seg->blockin || !seg->blockout || !seg->coeff || !seg->input || !seg->output) + seg->loaded = ff_get_audio_buffer(ctx->inputs[0], seg->nb_partitions); + if (!seg->buffer || !seg->sumin || !seg->sumout || !seg->blockin || !seg->blockout || + !seg->coeff || !seg->input || !seg->output || !seg->loaded || !seg->tempin || !seg->tempout) return AVERROR(ENOMEM); return 0; @@ -231,25 +237,30 @@ static void uninit_segment(AVFilterContext *ctx, AudioFIRSegment *seg) { AudioFIRContext *s = ctx->priv; - av_tx_uninit(&seg->ctx); + if (seg->ctx) { + for (int ch = 0; ch < s->nb_channels; ch++) + av_tx_uninit(&seg->ctx[ch]); + } + av_freep(&seg->ctx); if (seg->tx) { - for (int ch = 0; ch < s->nb_channels; ch++) { + for (int ch = 0; ch < s->nb_channels; ch++) av_tx_uninit(&seg->tx[ch]); - } } av_freep(&seg->tx); if (seg->itx) { - for (int ch = 0; ch < s->nb_channels; ch++) { + for (int ch = 0; ch < s->nb_channels; ch++) av_tx_uninit(&seg->itx[ch]); - } } av_freep(&seg->itx); + av_freep(&seg->loading); av_freep(&seg->output_offset); av_freep(&seg->part_index); + av_frame_free(&seg->tempin); + av_frame_free(&seg->tempout); av_frame_free(&seg->blockin); av_frame_free(&seg->blockout); av_frame_free(&seg->sumin); @@ -258,38 +269,42 @@ static void uninit_segment(AVFilterContext *ctx, AudioFIRSegment *seg) av_frame_free(&seg->coeff); av_frame_free(&seg->input); av_frame_free(&seg->output); + av_frame_free(&seg->loaded); seg->input_size = 0; } -static int convert_coeffs(AVFilterContext *ctx) +static int convert_coeffs(AVFilterContext *ctx, int selir) { AudioFIRContext *s = ctx->priv; - int ret, i, cur_nb_taps; + const int prev_selir = s->prev_selir; + int ret, nb_taps, cur_nb_taps, prev_nb_taps; - if (!s->nb_taps) { + if (!s->nb_taps[selir]) { int part_size, max_part_size; int left, offset = 0; - s->nb_taps = ff_inlink_queued_samples(ctx->inputs[1 + s->selir]); - if (s->nb_taps <= 0) + s->nb_taps[selir] = ff_inlink_queued_samples(ctx->inputs[1 + selir]); + if (s->nb_taps[selir] <= 0) return AVERROR(EINVAL); - if (s->minp > s->maxp) { + if (s->minp > s->maxp) s->maxp = s->minp; - } - left = s->nb_taps; + if (s->nb_segments) + goto skip; + + left = s->nb_taps[selir]; part_size = 1 << av_log2(s->minp); max_part_size = 1 << av_log2(s->maxp); s->min_part_size = part_size; - for (i = 0; left > 0; i++) { + for (int i = 0; left > 0; i++) { int step = part_size == max_part_size ? INT_MAX : 1 + (i == 0); int nb_partitions = FFMIN(step, (left + part_size - 1) / part_size); s->nb_segments = i + 1; - ret = init_segment(ctx, &s->seg[i], offset, nb_partitions, part_size); + ret = init_segment(ctx, &s->seg[i], offset, nb_partitions, part_size, i); if (ret < 0) return ret; offset += nb_partitions * part_size; @@ -299,8 +314,9 @@ static int convert_coeffs(AVFilterContext *ctx) } } - if (!s->ir[s->selir]) { - ret = ff_inlink_consume_samples(ctx->inputs[1 + s->selir], s->nb_taps, s->nb_taps, &s->ir[s->selir]); +skip: + if (!s->ir[selir]) { + ret = ff_inlink_consume_samples(ctx->inputs[1 + selir], s->nb_taps[selir], s->nb_taps[selir], &s->ir[selir]); if (ret < 0) return ret; if (ret == 0) @@ -318,34 +334,21 @@ static int convert_coeffs(AVFilterContext *ctx) } } - s->gain = 1; - cur_nb_taps = s->ir[s->selir]->nb_samples; + cur_nb_taps = s->ir[selir]->nb_samples; + prev_nb_taps = s->ir[prev_selir]->nb_samples; + nb_taps = FFMAX(cur_nb_taps, prev_nb_taps); - switch (s->format) { - case AV_SAMPLE_FMT_FLTP: - ret = get_power_float(ctx, s, cur_nb_taps); - break; - case AV_SAMPLE_FMT_DBLP: - ret = get_power_double(ctx, s, cur_nb_taps); - break; + if (!s->norm_ir || s->norm_ir->nb_samples < nb_taps) { + av_frame_free(&s->norm_ir); + s->norm_ir = ff_get_audio_buffer(ctx->inputs[0], FFALIGN(nb_taps, 8)); + if (!s->norm_ir) + return AVERROR(ENOMEM); } - if (ret < 0) - return ret; - av_log(ctx, AV_LOG_DEBUG, "nb_taps: %d\n", cur_nb_taps); av_log(ctx, AV_LOG_DEBUG, "nb_segments: %d\n", s->nb_segments); - switch (s->format) { - case AV_SAMPLE_FMT_FLTP: - convert_channels_float(ctx, s); - break; - case AV_SAMPLE_FMT_DBLP: - convert_channels_double(ctx, s); - break; - } - - s->have_coeffs = 1; + s->have_coeffs[selir] = 1; return 0; } @@ -394,8 +397,8 @@ static int activate(AVFilterContext *ctx) } } - if (!s->have_coeffs && s->eof_coeffs[s->selir]) { - ret = convert_coeffs(ctx); + if (!s->have_coeffs[s->selir] && s->eof_coeffs[s->selir]) { + ret = convert_coeffs(ctx, s->selir); if (ret < 0) return ret; } @@ -409,7 +412,7 @@ static int activate(AVFilterContext *ctx) if (ret < 0) return ret; - if (s->response && s->have_coeffs) { + if (s->response && s->have_coeffs[s->selir]) { int64_t old_pts = s->video->pts; int64_t new_pts = av_rescale_q(s->pts, ctx->inputs[0]->time_base, ctx->outputs[1]->time_base); @@ -520,9 +523,8 @@ FF_ENABLE_DEPRECATION_WARNINGS return ret; outlink->ch_layout.nb_channels = ctx->inputs[0]->ch_layout.nb_channels; - s->nb_channels = outlink->ch_layout.nb_channels; - s->nb_coef_channels = ctx->inputs[1 + s->selir]->ch_layout.nb_channels; s->format = outlink->format; + s->nb_channels = outlink->ch_layout.nb_channels; return 0; } @@ -531,15 +533,14 @@ static av_cold void uninit(AVFilterContext *ctx) { AudioFIRContext *s = ctx->priv; - for (int i = 0; i < s->nb_segments; i++) { + for (int i = 0; i < s->nb_segments; i++) uninit_segment(ctx, &s->seg[i]); - } av_freep(&s->fdsp); - for (int i = 0; i < s->nb_irs; i++) { + av_frame_free(&s->norm_ir); + for (int i = 0; i < s->nb_irs; i++) av_frame_free(&s->ir[i]); - } av_frame_free(&s->video); } @@ -569,6 +570,8 @@ static av_cold int init(AVFilterContext *ctx) AVFilterPad pad, vpad; int ret; + s->prev_selir = FFMIN(s->nb_irs - 1, s->selir); + pad = (AVFilterPad) { .name = "main", .type = AVMEDIA_TYPE_AUDIO, @@ -631,16 +634,21 @@ static int process_command(AVFilterContext *ctx, int flags) { AudioFIRContext *s = ctx->priv; - int prev_ir = s->selir; - int ret = ff_filter_process_command(ctx, cmd, arg, res, res_len, flags); + int ret; + s->prev_selir = s->selir; + ret = ff_filter_process_command(ctx, cmd, arg, res, res_len, flags); if (ret < 0) return ret; s->selir = FFMIN(s->nb_irs - 1, s->selir); + if (s->selir != s->prev_selir) { + for (int n = 0; n < s->nb_segments; n++) { + AudioFIRSegment *seg = &s->seg[n]; - if (prev_ir != s->selir) { - s->have_coeffs = 0; + for (int ch = 0; ch < s->nb_channels; ch++) + seg->loading[ch] = 0; + } } return 0; @@ -655,11 +663,13 @@ static const AVOption afir_options[] = { { "dry", "set dry gain", OFFSET(dry_gain), AV_OPT_TYPE_FLOAT, {.dbl=1}, 0, 10, AF }, { "wet", "set wet gain", OFFSET(wet_gain), AV_OPT_TYPE_FLOAT, {.dbl=1}, 0, 10, AF }, { "length", "set IR length", OFFSET(length), AV_OPT_TYPE_FLOAT, {.dbl=1}, 0, 1, AF }, - { "gtype", "set IR auto gain type",OFFSET(gtype), AV_OPT_TYPE_INT, {.i64=0}, -1, 2, AF, "gtype" }, + { "gtype", "set IR auto gain type",OFFSET(gtype), AV_OPT_TYPE_INT, {.i64=0}, -1, 4, AF, "gtype" }, { "none", "without auto gain", 0, AV_OPT_TYPE_CONST, {.i64=-1}, 0, 0, AF, "gtype" }, { "peak", "peak gain", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, AF, "gtype" }, { "dc", "DC gain", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, AF, "gtype" }, { "gn", "gain to noise", 0, AV_OPT_TYPE_CONST, {.i64=2}, 0, 0, AF, "gtype" }, + { "ac", "AC gain", 0, AV_OPT_TYPE_CONST, {.i64=3}, 0, 0, AF, "gtype" }, + { "rms", "RMS gain", 0, AV_OPT_TYPE_CONST, {.i64=4}, 0, 0, AF, "gtype" }, { "irgain", "set IR gain", OFFSET(ir_gain), AV_OPT_TYPE_FLOAT, {.dbl=1}, 0, 1, AF }, { "irfmt", "set IR format", OFFSET(ir_format), AV_OPT_TYPE_INT, {.i64=1}, 0, 1, AF, "irfmt" }, { "mono", "single channel", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, AF, "irfmt" }, diff --git a/libavfilter/af_afir.h b/libavfilter/af_afir.h index 6a071eddf7..3bc6abfef9 100644 --- a/libavfilter/af_afir.h +++ b/libavfilter/af_afir.h @@ -37,6 +37,8 @@ typedef struct AudioFIRSegment { int input_size; int input_offset; + int *selir; + int *loading; int *output_offset; int *part_index; @@ -44,15 +46,20 @@ typedef struct AudioFIRSegment { AVFrame *sumout; AVFrame *blockin; AVFrame *blockout; + AVFrame *tempin; + AVFrame *tempout; AVFrame *buffer; AVFrame *coeff; AVFrame *input; AVFrame *output; + AVFrame *loaded; - AVTXContext *ctx, **tx, **itx; + AVTXContext **ctx, **tx, **itx; av_tx_fn ctx_fn, tx_fn, itx_fn; } AudioFIRSegment; +#define MAX_IR_STREAMS 32 + typedef struct AudioFIRContext { const AVClass *class; @@ -70,24 +77,23 @@ typedef struct AudioFIRContext { int minp; int maxp; int nb_irs; + int prev_selir; int selir; int precision; int format; - double gain; - - int eof_coeffs[32]; - int have_coeffs; - int nb_taps; + int eof_coeffs[MAX_IR_STREAMS]; + int have_coeffs[MAX_IR_STREAMS]; + int nb_taps[MAX_IR_STREAMS]; int nb_channels; - int nb_coef_channels; int one2many; AudioFIRSegment seg[1024]; int nb_segments; AVFrame *in; - AVFrame *ir[32]; + AVFrame *ir[MAX_IR_STREAMS]; + AVFrame *norm_ir; AVFrame *video; int min_part_size; int64_t pts; diff --git a/libavfilter/afir_template.c b/libavfilter/afir_template.c index fea0627b6b..821be95785 100644 --- a/libavfilter/afir_template.c +++ b/libavfilter/afir_template.c @@ -18,6 +18,7 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ +#include "libavutil/tx.h" #include "avfilter.h" #include "formats.h" #include "internal.h" @@ -26,17 +27,23 @@ #undef ctype #undef ftype #undef SQRT +#undef HYPOT #undef SAMPLE_FORMAT +#undef TX_TYPE #if DEPTH == 32 #define SAMPLE_FORMAT float #define SQRT sqrtf +#define HYPOT hypotf #define ctype AVComplexFloat #define ftype float +#define TX_TYPE AV_TX_FLOAT_RDFT #else #define SAMPLE_FORMAT double #define SQRT sqrt +#define HYPOT hypot #define ctype AVComplexDouble #define ftype double +#define TX_TYPE AV_TX_DOUBLE_RDFT #endif #define fn3(a,b) a##_##b @@ -66,7 +73,7 @@ static void fn(draw_response)(AVFilterContext *ctx, AVFrame *out) double w = i * M_PI / (s->w - 1); double div, real_num = 0., imag_num = 0., real = 0., imag = 0.; - for (x = 0; x < s->nb_taps; x++) { + for (x = 0; x < s->nb_taps[s->selir]; x++) { real += cos(-x * w) * src[x]; imag += sin(-x * w) * src[x]; real_num += cos(-x * w) * src[x] * x; @@ -132,111 +139,162 @@ end: av_free(mag); } -static void fn(convert_channels)(AVFilterContext *ctx, AudioFIRContext *s) +static int fn(get_power)(AVFilterContext *ctx, AudioFIRContext *s, + int cur_nb_taps, int ch) { - for (int ch = 0; ch < ctx->inputs[1 + s->selir]->ch_layout.nb_channels; ch++) { - ftype *time = (ftype *)s->ir[s->selir]->extended_data[!s->one2many * ch]; - int toffset = 0; - - for (int i = FFMAX(1, s->length * s->nb_taps); i < s->nb_taps; i++) - time[i] = 0; - - av_log(ctx, AV_LOG_DEBUG, "channel: %d\n", ch); - - for (int segment = 0; segment < s->nb_segments; segment++) { - AudioFIRSegment *seg = &s->seg[segment]; - ftype *blockin = (ftype *)seg->blockin->extended_data[ch]; - ftype *blockout = (ftype *)seg->blockout->extended_data[ch]; - ctype *coeff = (ctype *)seg->coeff->extended_data[ch]; - - av_log(ctx, AV_LOG_DEBUG, "segment: %d\n", segment); - - for (int i = 0; i < seg->nb_partitions; i++) { - const int coffset = i * seg->coeff_size; - const int remaining = s->nb_taps - toffset; - const int size = remaining >= seg->part_size ? seg->part_size : remaining; - - memset(blockin, 0, sizeof(*blockin) * seg->fft_length); - memcpy(blockin, time + toffset, size * sizeof(*blockin)); - - seg->ctx_fn(seg->ctx, blockout, blockin, sizeof(ftype)); - - for (int n = 0; n < seg->part_size + 1; n++) { - coeff[coffset + n].re = blockout[2 * n]; - coeff[coffset + n].im = blockout[2 * n + 1]; - } - - toffset += size; - } - - av_log(ctx, AV_LOG_DEBUG, "nb_partitions: %d\n", seg->nb_partitions); - av_log(ctx, AV_LOG_DEBUG, "partition size: %d\n", seg->part_size); - av_log(ctx, AV_LOG_DEBUG, "block size: %d\n", seg->block_size); - av_log(ctx, AV_LOG_DEBUG, "fft_length: %d\n", seg->fft_length); - av_log(ctx, AV_LOG_DEBUG, "coeff_size: %d\n", seg->coeff_size); - av_log(ctx, AV_LOG_DEBUG, "input_size: %d\n", seg->input_size); - av_log(ctx, AV_LOG_DEBUG, "input_offset: %d\n", seg->input_offset); - } - } -} - -static int fn(get_power)(AVFilterContext *ctx, AudioFIRContext *s, int cur_nb_taps) -{ - ftype power = 0; - int ch; + ftype ch_gain = 1; switch (s->gtype) { case -1: - /* nothing to do */ + ch_gain = 1; break; case 0: - for (ch = 0; ch < ctx->inputs[1 + s->selir]->ch_layout.nb_channels; ch++) { - ftype *time = (ftype *)s->ir[s->selir]->extended_data[!s->one2many * ch]; + { + ftype *time = (ftype *)s->norm_ir->extended_data[ch]; + ftype sum = 0; for (int i = 0; i < cur_nb_taps; i++) - power += FFABS(time[i]); + sum += FFABS(time[i]); + ch_gain = 1. / sum; } - s->gain = ctx->inputs[1 + s->selir]->ch_layout.nb_channels / power; break; case 1: - for (ch = 0; ch < ctx->inputs[1 + s->selir]->ch_layout.nb_channels; ch++) { - ftype *time = (ftype *)s->ir[s->selir]->extended_data[!s->one2many * ch]; + { + ftype *time = (ftype *)s->norm_ir->extended_data[ch]; + ftype sum = 0; for (int i = 0; i < cur_nb_taps; i++) - power += time[i]; + sum += time[i]; + ch_gain = 1. / sum; } - s->gain = ctx->inputs[1 + s->selir]->ch_layout.nb_channels / power; break; case 2: - for (ch = 0; ch < ctx->inputs[1 + s->selir]->ch_layout.nb_channels; ch++) { - ftype *time = (ftype *)s->ir[s->selir]->extended_data[!s->one2many * ch]; + { + ftype *time = (ftype *)s->norm_ir->extended_data[ch]; + ftype sum = 0; for (int i = 0; i < cur_nb_taps; i++) - power += time[i] * time[i]; + sum += time[i] * time[i]; + ch_gain = 1. / SQRT(sum); + } + break; + case 3: + case 4: + { + ftype *inc, *outc, scale; + AVTXContext *tx; + av_tx_fn tx_fn; + int ret, size; + + size = 1 << av_ceil_log2_c(cur_nb_taps); + inc = av_calloc(size + 2, sizeof(SAMPLE_FORMAT)); + outc = av_calloc(size + 2, sizeof(SAMPLE_FORMAT)); + if (!inc || !outc) { + av_free(outc); + av_free(inc); + break; + } + + scale = 1.; + ret = av_tx_init(&tx, &tx_fn, TX_TYPE, 0, size, &scale, 0); + if (ret < 0) { + av_free(outc); + av_free(inc); + break; + } + + { + ftype power, *time = (ftype *)s->norm_ir->extended_data[ch]; + memcpy(inc, time, cur_nb_taps * sizeof(SAMPLE_FORMAT)); + tx_fn(tx, outc, inc, sizeof(SAMPLE_FORMAT)); + + power = 0; + if (s->gtype == 3) { + for (int i = 0; i < size / 2 + 1; i++) + power = FFMAX(power, HYPOT(outc[i * 2], outc[i * 2 + 1])); + } else { + ftype sum = 0; + for (int i = 0; i < size / 2 + 1; i++) + sum += HYPOT(outc[i * 2], outc[i * 2 + 1]); + power = SQRT(sum / (size / 2 + 1)); + } + + ch_gain = 1. / power; + } + + av_tx_uninit(&tx); + av_free(outc); + av_free(inc); } - s->gain = SQRT(ch / power); break; default: return AVERROR_BUG; } - s->gain = FFMIN(s->gain * s->ir_gain, 1.); - - av_log(ctx, AV_LOG_DEBUG, "power %f, gain %f\n", power, s->gain); - - for (int ch = 0; ch < ctx->inputs[1 + s->selir]->ch_layout.nb_channels; ch++) { - ftype *time = (ftype *)s->ir[s->selir]->extended_data[!s->one2many * ch]; + if (ch_gain != 1. || s->ir_gain != 1.) { + ftype *time = (ftype *)s->norm_ir->extended_data[ch]; + ftype gain = ch_gain * s->ir_gain; + av_log(ctx, AV_LOG_DEBUG, "ch%d gain %f\n", ch, gain); #if DEPTH == 32 - s->fdsp->vector_fmul_scalar(time, time, s->gain, FFALIGN(cur_nb_taps, 4)); + s->fdsp->vector_fmul_scalar(time, time, gain, FFALIGN(cur_nb_taps, 4)); #else - s->fdsp->vector_dmul_scalar(time, time, s->gain, FFALIGN(cur_nb_taps, 8)); + s->fdsp->vector_dmul_scalar(time, time, gain, FFALIGN(cur_nb_taps, 8)); #endif } return 0; } +static void fn(convert_channel)(AVFilterContext *ctx, AudioFIRContext *s, int ch, + AudioFIRSegment *seg) +{ + const int coeff_partition = seg->loading[ch]; + const int coffset = coeff_partition * seg->coeff_size; + const int selir = s->selir; + const int nb_taps = s->nb_taps[selir]; + ftype *tsrc = (ftype *)s->ir[selir]->extended_data[!s->one2many * ch]; + ftype *time = (ftype *)s->norm_ir->extended_data[ch]; + ftype *tempin = (ftype *)seg->tempin->extended_data[ch]; + ftype *tempout = (ftype *)seg->tempout->extended_data[ch]; + ctype *coeff = (ctype *)seg->coeff->extended_data[ch]; + int *loaded = (int *)seg->loaded->extended_data[ch]; + const int remaining = nb_taps - (seg->input_offset + coeff_partition * seg->part_size); + const int size = remaining >= seg->part_size ? seg->part_size : remaining; + + if (loaded[coeff_partition] == selir + 1) + return; + loaded[coeff_partition] = selir + 1; + + memcpy(time, tsrc, sizeof(*time) * nb_taps); + for (int i = FFMAX(1, s->length * nb_taps); i < nb_taps; i++) + time[i] = 0; + +#if DEPTH == 32 + get_power_float(ctx, s, nb_taps, ch); +#else + get_power_double(ctx, s, nb_taps, ch); +#endif + + av_log(ctx, AV_LOG_DEBUG, "channel: %d\n", ch); + + memset(tempin, 0, sizeof(*tempin) * seg->fft_length); + memcpy(tempin, time + seg->input_offset + coeff_partition * seg->part_size, + size * sizeof(*tempin)); + + seg->ctx_fn(seg->ctx[ch], tempout, tempin, sizeof(*tempin)); + + memcpy(coeff + coffset, tempout, (seg->part_size + 1) * sizeof(*coeff)); + + av_log(ctx, AV_LOG_DEBUG, "nb_partitions: %d\n", seg->nb_partitions); + av_log(ctx, AV_LOG_DEBUG, "partition size: %d\n", seg->part_size); + av_log(ctx, AV_LOG_DEBUG, "block size: %d\n", seg->block_size); + av_log(ctx, AV_LOG_DEBUG, "fft_length: %d\n", seg->fft_length); + av_log(ctx, AV_LOG_DEBUG, "coeff_size: %d\n", seg->coeff_size); + av_log(ctx, AV_LOG_DEBUG, "input_size: %d\n", seg->input_size); + av_log(ctx, AV_LOG_DEBUG, "input_offset: %d\n", seg->input_offset); +} + static void fn(fir_fadd)(AudioFIRContext *s, ftype *dst, const ftype *src, int nb_samples) { if ((nb_samples & 15) == 0 && nb_samples >= 8) { @@ -256,11 +314,12 @@ static int fn(fir_quantum)(AVFilterContext *ctx, AVFrame *out, int ch, int offse AudioFIRContext *s = ctx->priv; const ftype *in = (const ftype *)s->in->extended_data[ch] + offset; ftype *blockin, *blockout, *buf, *ptr = (ftype *)out->extended_data[ch] + offset; - const int nb_samples = FFMIN(s->min_part_size, out->nb_samples - offset); const int min_part_size = s->min_part_size; + const int nb_samples = FFMIN(min_part_size, out->nb_samples - offset); + const int nb_segments = s->nb_segments; const float dry_gain = s->dry_gain; - for (int segment = 0; segment < s->nb_segments; segment++) { + for (int segment = 0; segment < nb_segments; segment++) { AudioFIRSegment *seg = &s->seg[segment]; ftype *src = (ftype *)seg->input->extended_data[ch]; ftype *dst = (ftype *)seg->output->extended_data[ch]; @@ -272,6 +331,7 @@ static int fn(fir_quantum)(AVFilterContext *ctx, AVFrame *out, int ch, int offse const int part_size = seg->part_size; int j; + seg->part_index[ch] = seg->part_index[ch] % nb_partitions;; if (min_part_size >= 8) { #if DEPTH == 32 s->fdsp->vector_fmul_scalar(src + input_offset, in, dry_gain, FFALIGN(nb_samples, 4)); @@ -286,7 +346,7 @@ static int fn(fir_quantum)(AVFilterContext *ctx, AVFrame *out, int ch, int offse } output_offset[0] += min_part_size; - if (output_offset[0] == part_size) { + if (output_offset[0] >= part_size) { output_offset[0] = 0; } else { memmove(src, src + min_part_size, (seg->input_size - min_part_size) * sizeof(*src)); @@ -300,26 +360,36 @@ static int fn(fir_quantum)(AVFilterContext *ctx, AVFrame *out, int ch, int offse blockin = (ftype *)seg->blockin->extended_data[ch] + seg->part_index[ch] * seg->block_size; blockout = (ftype *)seg->blockout->extended_data[ch] + seg->part_index[ch] * seg->block_size; memset(blockin + part_size, 0, sizeof(*blockin) * (seg->fft_length - part_size)); - memcpy(blockin, src, sizeof(*src) * part_size); seg->tx_fn(seg->tx[ch], blockout, blockin, sizeof(ftype)); j = seg->part_index[ch]; + if (seg->loading[ch] < nb_partitions) { +#if DEPTH == 32 + convert_channel_float(ctx, s, ch, seg); +#else + convert_channel_double(ctx, s, ch, seg); +#endif + seg->loading[ch]++; + } for (int i = 0; i < nb_partitions; i++) { - const int coffset = j * seg->coeff_size; - const ftype *blockout = (const ftype *)seg->blockout->extended_data[ch] + i * seg->block_size; - const ctype *coeff = (const ctype *)seg->coeff->extended_data[ch * !s->one2many] + coffset; + const int input_partition = i; + const int coeff_partition = j; + const int coffset = coeff_partition * seg->coeff_size; + const ftype *blockout = (const ftype *)seg->blockout->extended_data[ch] + input_partition * seg->block_size; + const ctype *coeff = ((const ctype *)seg->coeff->extended_data[ch]) + coffset; + + if (j == 0) + j = nb_partitions; + j--; #if DEPTH == 32 s->afirdsp.fcmul_add(sumin, blockout, (const ftype *)coeff, part_size); #else s->afirdsp.dcmul_add(sumin, blockout, (const ftype *)coeff, part_size); #endif - if (j == 0) - j = nb_partitions; - j--; } seg->itx_fn(seg->itx[ch], sumout, sumin, sizeof(ctype)); @@ -332,7 +402,7 @@ static int fn(fir_quantum)(AVFilterContext *ctx, AVFrame *out, int ch, int offse buf = (ftype *)seg->buffer->extended_data[ch]; memcpy(buf, sumout + part_size, part_size * sizeof(*buf)); - seg->part_index[ch] = (seg->part_index[ch] + 1) % nb_partitions;; + seg->part_index[ch] = (seg->part_index[ch] + 1) % nb_partitions; memmove(src, src + min_part_size, (seg->input_size - min_part_size) * sizeof(*src));