avfilter/af_adynamicequalizer: add adaptive detection mode

2023-11-08 14:31:50 +01:00
parent 82be1e5c0d
commit 08e97dae20
3 changed files with 149 additions and 0 deletions
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -1100,6 +1100,8 @@ Disable using automatically gathered threshold value.
 Stop picking threshold value.
@item on
 Start picking threshold value.
@item adaptive
 Adaptively pick threshold value, by calculating sliding window entropy.
@end table
@item precision
--- a/libavfilter/adynamicequalizer_template.c
+++ b/libavfilter/adynamicequalizer_template.c
@@ -27,7 +27,11 @@
 #undef CLIP
 #undef SAMPLE_FORMAT
 #undef FABS
 #undef FLOG
 #undef FEXP
 #undef FLOG2
 #undef FLOG10
 #undef FEXP2
 #undef FEXP10
 #undef EPSILON
 #if DEPTH == 32
@@ -41,7 +45,11 @@
 #define FMAX fmaxf
 #define CLIP av_clipf
 #define FABS fabsf
 #define FLOG logf
 #define FEXP expf
 #define FLOG2 log2f
 #define FLOG10 log10f
 #define FEXP2 exp2f
 #define FEXP10 ff_exp10f
 #define EPSILON (1.f / (1 << 23))
 #define ftype float
@@ -56,7 +64,11 @@
 #define FMAX fmax
 #define CLIP av_clipd
 #define FABS fabs
 #define FLOG log
 #define FEXP exp
 #define FLOG2 log2
 #define FLOG10 log10
 #define FEXP2 exp2
 #define FEXP10 ff_exp10
 #define EPSILON (1.0 / (1LL << 53))
 #define ftype double
@@ -150,6 +162,92 @@ static int fn(filter_prepare)(AVFilterContext *ctx)
    return 0;
 }
 #define PEAKS(empty_value,op,sample, psample)\
    if (!empty && psample == ss[front]) {    \
        ss[front] = empty_value;             \
        if (back != front) {                 \
            front--;                         \
            if (front < 0)                   \
                front = n - 1;               \
        }                                    \
        empty = front == back;               \
    }                                        \
                                             \
    if (!empty && sample op ss[front]) {     \
        while (1) {                          \
            ss[front] = empty_value;         \
            if (back == front) {             \
                empty = 1;                   \
                break;                       \
            }                                \
            front--;                         \
            if (front < 0)                   \
                front = n - 1;               \
        }                                    \
    }                                        \
                                             \
    while (!empty && sample op ss[back]) {   \
        ss[back] = empty_value;              \
        if (back == front) {                 \
            empty = 1;                       \
            break;                           \
        }                                    \
        back++;                              \
        if (back >= n)                       \
            back = 0;                        \
    }                                        \
                                             \
    if (!empty) {                            \
        back--;                              \
        if (back < 0)                        \
            back = n - 1;                    \
    }
 static void fn(queue_sample)(ChannelContext *cc,
                             const ftype x,
                             const int nb_samples)
 {
    ftype *ss = cc->dqueue;
    ftype *qq = cc->queue;
    int front = cc->front;
    int back = cc->back;
    int empty, n, pos = cc->position;
    ftype px = qq[pos];
    fn(cc->sum) += x;
    fn(cc->log_sum) += FLOG2(x);
    if (cc->size >= nb_samples) {
        fn(cc->sum) -= px;
        fn(cc->log_sum) -= FLOG2(px);
    }
    qq[pos] = x;
    pos++;
    if (pos >= nb_samples)
        pos = 0;
    cc->position = pos;
    if (cc->size < nb_samples)
        cc->size++;
    n = cc->size;
    empty = (front == back) && (ss[front] == ZERO);
    PEAKS(ZERO, >, x, px)
    ss[back] = x;
    cc->front = front;
    cc->back = back;
 }
 static ftype fn(get_peak)(ChannelContext *cc, ftype *score)
 {
    ftype s, *ss = cc->dqueue;
    s = FEXP2(fn(cc->log_sum) / cc->size) / (fn(cc->sum) / cc->size);
    *score = LIN2LOG(s);
    return ss[cc->front];
 }
 static int fn(filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
 {
    AudioDynamicEqualizerContext *s = ctx->priv;
@@ -157,6 +255,7 @@ static int fn(filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int n
    AVFrame *in = td->in;
    AVFrame *out = td->out;
    const ftype sample_rate = in->sample_rate;
    const int isample_rate = in->sample_rate;
    const ftype makeup = s->makeup;
    const ftype ratio = s->ratio;
    const ftype range = s->range;
@@ -197,6 +296,27 @@ static int fn(filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int n
            fn(cc->new_threshold_log) = FMAX(fn(cc->new_threshold_log), LIN2LOG(new_threshold));
        }
    } else if (detection == DET_ADAPTIVE) {
        for (int ch = start; ch < end; ch++) {
            const ftype *src = (const ftype *)in->extended_data[ch];
            ChannelContext *cc = &s->cc[ch];
            ftype *tstate = fn(cc->tstate);
            ftype score, peak;
            for (int n = 0; n < in->nb_samples; n++) {
                ftype detect = FMAX(FABS(fn(get_svf)(src[n], dm, da, tstate)), EPSILON);
                fn(queue_sample)(cc, detect, isample_rate);
            }
            peak = fn(get_peak)(cc, &score);
            if (score >= -3.5) {
                fn(cc->threshold_log) = LIN2LOG(peak);
            } else if (cc->detection == DET_UNSET) {
                fn(cc->threshold_log) = s->threshold_log;
            }
            cc->detection = detection;
        }
    } else if (detection == DET_DISABLED) {
        for (int ch = start; ch < end; ch++) {
            ChannelContext *cc = &s->cc[ch];
--- a/libavfilter/af_adynamicequalizer.c
+++ b/libavfilter/af_adynamicequalizer.c
@@ -29,6 +29,7 @@ enum DetectionModes {
    DET_DISABLED,
    DET_OFF,
    DET_ON,
    DET_ADAPTIVE,
    NB_DMODES,
 };
@@ -50,6 +51,8 @@ typedef struct ChannelContext {
    double detect_double;
    double threshold_log_double;
    double new_threshold_log_double;
    double log_sum_double;
    double sum_double;
    float fa_float[3], fm_float[3];
    float dstate_float[2];
    float fstate_float[2];
@@ -58,6 +61,14 @@ typedef struct ChannelContext {
    float detect_float;
    float threshold_log_float;
    float new_threshold_log_float;
    float log_sum_float;
    float sum_float;
    void *dqueue;
    void *queue;
    int position;
    int size;
    int front;
    int back;
    int detection;
    int init;
 } ChannelContext;
@@ -86,6 +97,7 @@ typedef struct AudioDynamicEqualizerContext {
    int dftype;
    int precision;
    int format;
    int nb_channels;
    int (*filter_prepare)(AVFilterContext *ctx);
    int (*filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
@@ -140,6 +152,7 @@ static int config_input(AVFilterLink *inlink)
    s->cc = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->cc));
    if (!s->cc)
        return AVERROR(ENOMEM);
    s->nb_channels = inlink->ch_layout.nb_channels;
    switch (s->format) {
    case AV_SAMPLE_FMT_DBLP:
@@ -152,6 +165,14 @@ static int config_input(AVFilterLink *inlink)
        break;
    }
    for (int ch = 0; ch < s->nb_channels; ch++) {
        ChannelContext *cc = &s->cc[ch];
        cc->queue = av_calloc(inlink->sample_rate, sizeof(double));
        cc->dqueue = av_calloc(inlink->sample_rate, sizeof(double));
        if (!cc->queue || !cc->dqueue)
            return AVERROR(ENOMEM);
    }
    return 0;
 }
@@ -189,6 +210,11 @@ static av_cold void uninit(AVFilterContext *ctx)
 {
    AudioDynamicEqualizerContext *s = ctx->priv;
    for (int ch = 0; ch < s->nb_channels; ch++) {
        ChannelContext *cc = &s->cc[ch];
        av_freep(&cc->queue);
        av_freep(&cc->dqueue);
    }
    av_freep(&s->cc);
 }
@@ -226,6 +252,7 @@ static const AVOption adynamicequalizer_options[] = {
    {   "disabled", 0,                         0,                  AV_OPT_TYPE_CONST,  {.i64=DET_DISABLED}, 0, 0,   FLAGS, "auto" },
    {   "off",      0,                         0,                  AV_OPT_TYPE_CONST,  {.i64=DET_OFF},      0, 0,   FLAGS, "auto" },
    {   "on",       0,                         0,                  AV_OPT_TYPE_CONST,  {.i64=DET_ON},       0, 0,   FLAGS, "auto" },
    {   "adaptive", 0,                         0,                  AV_OPT_TYPE_CONST,  {.i64=DET_ADAPTIVE}, 0, 0,   FLAGS, "auto" },
    { "precision", "set processing precision", OFFSET(precision),  AV_OPT_TYPE_INT,    {.i64=0},        0, 2,       AF, "precision" },
    {   "auto",  "set auto processing precision",                  0, AV_OPT_TYPE_CONST, {.i64=0},      0, 0,       AF, "precision" },
    {   "float", "set single-floating point processing precision", 0, AV_OPT_TYPE_CONST, {.i64=1},      0, 0,       AF, "precision" },