aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul B Mahol <onemda@gmail.com>2023-11-08 14:31:50 +0100
committerPaul B Mahol <onemda@gmail.com>2023-11-17 00:17:54 +0100
commit08e97dae205d10806a0360bfc62f654d629dda93 (patch)
treee70399bfe0e7fe00c3e85448d4369e72b50a689e
parent82be1e5c0d828eef0b69307a61bc14f5b23ed595 (diff)
downloadffmpeg-08e97dae205d10806a0360bfc62f654d629dda93.tar.gz
avfilter/af_adynamicequalizer: add adaptive detection mode
-rw-r--r--doc/filters.texi2
-rw-r--r--libavfilter/adynamicequalizer_template.c120
-rw-r--r--libavfilter/af_adynamicequalizer.c27
3 files changed, 149 insertions, 0 deletions
diff --git a/doc/filters.texi b/doc/filters.texi
index 13c18a2574..d83a3fb91e 100644
--- a/doc/filters.texi
+++ b/doc/filters.texi
@@ -1100,6 +1100,8 @@ Disable using automatically gathered threshold value.
Stop picking threshold value.
@item on
Start picking threshold value.
+@item adaptive
+Adaptively pick threshold value, by calculating sliding window entropy.
@end table
@item precision
diff --git a/libavfilter/adynamicequalizer_template.c b/libavfilter/adynamicequalizer_template.c
index 653d51c3a9..4eb2489cd6 100644
--- a/libavfilter/adynamicequalizer_template.c
+++ b/libavfilter/adynamicequalizer_template.c
@@ -27,7 +27,11 @@
#undef CLIP
#undef SAMPLE_FORMAT
#undef FABS
+#undef FLOG
+#undef FEXP
+#undef FLOG2
#undef FLOG10
+#undef FEXP2
#undef FEXP10
#undef EPSILON
#if DEPTH == 32
@@ -41,7 +45,11 @@
#define FMAX fmaxf
#define CLIP av_clipf
#define FABS fabsf
+#define FLOG logf
+#define FEXP expf
+#define FLOG2 log2f
#define FLOG10 log10f
+#define FEXP2 exp2f
#define FEXP10 ff_exp10f
#define EPSILON (1.f / (1 << 23))
#define ftype float
@@ -56,7 +64,11 @@
#define FMAX fmax
#define CLIP av_clipd
#define FABS fabs
+#define FLOG log
+#define FEXP exp
+#define FLOG2 log2
#define FLOG10 log10
+#define FEXP2 exp2
#define FEXP10 ff_exp10
#define EPSILON (1.0 / (1LL << 53))
#define ftype double
@@ -150,6 +162,92 @@ static int fn(filter_prepare)(AVFilterContext *ctx)
return 0;
}
+#define PEAKS(empty_value,op,sample, psample)\
+ if (!empty && psample == ss[front]) { \
+ ss[front] = empty_value; \
+ if (back != front) { \
+ front--; \
+ if (front < 0) \
+ front = n - 1; \
+ } \
+ empty = front == back; \
+ } \
+ \
+ if (!empty && sample op ss[front]) { \
+ while (1) { \
+ ss[front] = empty_value; \
+ if (back == front) { \
+ empty = 1; \
+ break; \
+ } \
+ front--; \
+ if (front < 0) \
+ front = n - 1; \
+ } \
+ } \
+ \
+ while (!empty && sample op ss[back]) { \
+ ss[back] = empty_value; \
+ if (back == front) { \
+ empty = 1; \
+ break; \
+ } \
+ back++; \
+ if (back >= n) \
+ back = 0; \
+ } \
+ \
+ if (!empty) { \
+ back--; \
+ if (back < 0) \
+ back = n - 1; \
+ }
+
+static void fn(queue_sample)(ChannelContext *cc,
+ const ftype x,
+ const int nb_samples)
+{
+ ftype *ss = cc->dqueue;
+ ftype *qq = cc->queue;
+ int front = cc->front;
+ int back = cc->back;
+ int empty, n, pos = cc->position;
+ ftype px = qq[pos];
+
+ fn(cc->sum) += x;
+ fn(cc->log_sum) += FLOG2(x);
+ if (cc->size >= nb_samples) {
+ fn(cc->sum) -= px;
+ fn(cc->log_sum) -= FLOG2(px);
+ }
+
+ qq[pos] = x;
+ pos++;
+ if (pos >= nb_samples)
+ pos = 0;
+ cc->position = pos;
+
+ if (cc->size < nb_samples)
+ cc->size++;
+ n = cc->size;
+
+ empty = (front == back) && (ss[front] == ZERO);
+ PEAKS(ZERO, >, x, px)
+
+ ss[back] = x;
+
+ cc->front = front;
+ cc->back = back;
+}
+
+static ftype fn(get_peak)(ChannelContext *cc, ftype *score)
+{
+ ftype s, *ss = cc->dqueue;
+ s = FEXP2(fn(cc->log_sum) / cc->size) / (fn(cc->sum) / cc->size);
+ *score = LIN2LOG(s);
+ return ss[cc->front];
+}
+
static int fn(filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
{
AudioDynamicEqualizerContext *s = ctx->priv;
@@ -157,6 +255,7 @@ static int fn(filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int n
AVFrame *in = td->in;
AVFrame *out = td->out;
const ftype sample_rate = in->sample_rate;
+ const int isample_rate = in->sample_rate;
const ftype makeup = s->makeup;
const ftype ratio = s->ratio;
const ftype range = s->range;
@@ -197,6 +296,27 @@ static int fn(filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int n
fn(cc->new_threshold_log) = FMAX(fn(cc->new_threshold_log), LIN2LOG(new_threshold));
}
+ } else if (detection == DET_ADAPTIVE) {
+ for (int ch = start; ch < end; ch++) {
+ const ftype *src = (const ftype *)in->extended_data[ch];
+ ChannelContext *cc = &s->cc[ch];
+ ftype *tstate = fn(cc->tstate);
+ ftype score, peak;
+
+ for (int n = 0; n < in->nb_samples; n++) {
+ ftype detect = FMAX(FABS(fn(get_svf)(src[n], dm, da, tstate)), EPSILON);
+ fn(queue_sample)(cc, detect, isample_rate);
+ }
+
+ peak = fn(get_peak)(cc, &score);
+
+ if (score >= -3.5) {
+ fn(cc->threshold_log) = LIN2LOG(peak);
+ } else if (cc->detection == DET_UNSET) {
+ fn(cc->threshold_log) = s->threshold_log;
+ }
+ cc->detection = detection;
+ }
} else if (detection == DET_DISABLED) {
for (int ch = start; ch < end; ch++) {
ChannelContext *cc = &s->cc[ch];
diff --git a/libavfilter/af_adynamicequalizer.c b/libavfilter/af_adynamicequalizer.c
index 1926ae8ec1..611e542c1b 100644
--- a/libavfilter/af_adynamicequalizer.c
+++ b/libavfilter/af_adynamicequalizer.c
@@ -29,6 +29,7 @@ enum DetectionModes {
DET_DISABLED,
DET_OFF,
DET_ON,
+ DET_ADAPTIVE,
NB_DMODES,
};
@@ -50,6 +51,8 @@ typedef struct ChannelContext {
double detect_double;
double threshold_log_double;
double new_threshold_log_double;
+ double log_sum_double;
+ double sum_double;
float fa_float[3], fm_float[3];
float dstate_float[2];
float fstate_float[2];
@@ -58,6 +61,14 @@ typedef struct ChannelContext {
float detect_float;
float threshold_log_float;
float new_threshold_log_float;
+ float log_sum_float;
+ float sum_float;
+ void *dqueue;
+ void *queue;
+ int position;
+ int size;
+ int front;
+ int back;
int detection;
int init;
} ChannelContext;
@@ -86,6 +97,7 @@ typedef struct AudioDynamicEqualizerContext {
int dftype;
int precision;
int format;
+ int nb_channels;
int (*filter_prepare)(AVFilterContext *ctx);
int (*filter_channels)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
@@ -140,6 +152,7 @@ static int config_input(AVFilterLink *inlink)
s->cc = av_calloc(inlink->ch_layout.nb_channels, sizeof(*s->cc));
if (!s->cc)
return AVERROR(ENOMEM);
+ s->nb_channels = inlink->ch_layout.nb_channels;
switch (s->format) {
case AV_SAMPLE_FMT_DBLP:
@@ -152,6 +165,14 @@ static int config_input(AVFilterLink *inlink)
break;
}
+ for (int ch = 0; ch < s->nb_channels; ch++) {
+ ChannelContext *cc = &s->cc[ch];
+ cc->queue = av_calloc(inlink->sample_rate, sizeof(double));
+ cc->dqueue = av_calloc(inlink->sample_rate, sizeof(double));
+ if (!cc->queue || !cc->dqueue)
+ return AVERROR(ENOMEM);
+ }
+
return 0;
}
@@ -189,6 +210,11 @@ static av_cold void uninit(AVFilterContext *ctx)
{
AudioDynamicEqualizerContext *s = ctx->priv;
+ for (int ch = 0; ch < s->nb_channels; ch++) {
+ ChannelContext *cc = &s->cc[ch];
+ av_freep(&cc->queue);
+ av_freep(&cc->dqueue);
+ }
av_freep(&s->cc);
}
@@ -226,6 +252,7 @@ static const AVOption adynamicequalizer_options[] = {
{ "disabled", 0, 0, AV_OPT_TYPE_CONST, {.i64=DET_DISABLED}, 0, 0, FLAGS, "auto" },
{ "off", 0, 0, AV_OPT_TYPE_CONST, {.i64=DET_OFF}, 0, 0, FLAGS, "auto" },
{ "on", 0, 0, AV_OPT_TYPE_CONST, {.i64=DET_ON}, 0, 0, FLAGS, "auto" },
+ { "adaptive", 0, 0, AV_OPT_TYPE_CONST, {.i64=DET_ADAPTIVE}, 0, 0, FLAGS, "auto" },
{ "precision", "set processing precision", OFFSET(precision), AV_OPT_TYPE_INT, {.i64=0}, 0, 2, AF, "precision" },
{ "auto", "set auto processing precision", 0, AV_OPT_TYPE_CONST, {.i64=0}, 0, 0, AF, "precision" },
{ "float", "set single-floating point processing precision", 0, AV_OPT_TYPE_CONST, {.i64=1}, 0, 0, AF, "precision" },