aboutsummaryrefslogtreecommitdiffstats
path: root/libavfilter/af_speechnorm.c
diff options
context:
space:
mode:
authorPaul B Mahol <onemda@gmail.com>2022-11-06 14:03:27 +0100
committerPaul B Mahol <onemda@gmail.com>2022-11-07 08:32:48 +0100
commit7027101904a483e1c6a6db89b47b3362c37155a0 (patch)
tree5ab15a46a604629cf2417fb638cc773d1b2fb357 /libavfilter/af_speechnorm.c
parentb48d2320f118f8a687a7e65f57adecbe3479e7a0 (diff)
downloadffmpeg-7027101904a483e1c6a6db89b47b3362c37155a0.tar.gz
avfilter/af_speechnorm: implement rms option
Diffstat (limited to 'libavfilter/af_speechnorm.c')
-rw-r--r--libavfilter/af_speechnorm.c33
1 files changed, 27 insertions, 6 deletions
diff --git a/libavfilter/af_speechnorm.c b/libavfilter/af_speechnorm.c
index c9bd8d5cac..fd6b7d9a32 100644
--- a/libavfilter/af_speechnorm.c
+++ b/libavfilter/af_speechnorm.c
@@ -46,6 +46,7 @@ typedef struct PeriodItem {
int size;
int type;
double max_peak;
+ double rms_sum;
} PeriodItem;
typedef struct ChannelContext {
@@ -54,6 +55,7 @@ typedef struct ChannelContext {
PeriodItem pi[MAX_ITEMS];
double gain_state;
double pi_max_peak;
+ double pi_rms_sum;
int pi_start;
int pi_end;
int pi_size;
@@ -62,6 +64,7 @@ typedef struct ChannelContext {
typedef struct SpeechNormalizerContext {
const AVClass *class;
+ double rms_value;
double peak_value;
double max_expansion;
double max_compression;
@@ -110,6 +113,8 @@ static const AVOption speechnorm_options[] = {
{ "i", "set inverted filtering", OFFSET(invert), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
{ "link", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
{ "l", "set linked channels filtering", OFFSET(link), AV_OPT_TYPE_BOOL, {.i64=0}, 0, 1, FLAGS },
+ { "rms", "set the RMS value", OFFSET(rms_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.0}, 0.0, 1.0, FLAGS },
+ { "m", "set the RMS value", OFFSET(rms_value), AV_OPT_TYPE_DOUBLE, {.dbl=0.0}, 0.0, 1.0, FLAGS },
{ NULL }
};
@@ -161,12 +166,16 @@ static void consume_pi(ChannelContext *cc, int nb_samples)
}
}
-static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state)
+static double next_gain(AVFilterContext *ctx, double pi_max_peak, int bypass, double state,
+ double pi_rms_sum, int pi_size)
{
SpeechNormalizerContext *s = ctx->priv;
- const double expansion = FFMIN(s->max_expansion, s->peak_value / pi_max_peak);
const double compression = 1. / s->max_compression;
const int type = s->invert ? pi_max_peak <= s->threshold_value : pi_max_peak >= s->threshold_value;
+ double expansion = FFMIN(s->max_expansion, s->peak_value / pi_max_peak);
+
+ if (s->rms_value > DBL_EPSILON)
+ expansion = FFMIN(expansion, s->rms_value / sqrt(pi_rms_sum / pi_size));
if (bypass) {
return 1.;
@@ -187,13 +196,15 @@ static void next_pi(AVFilterContext *ctx, ChannelContext *cc, int bypass)
av_assert1(cc->pi[start].size > 0);
av_assert0(cc->pi[start].type > 0 || s->eof);
cc->pi_size = cc->pi[start].size;
+ cc->pi_rms_sum = cc->pi[start].rms_sum;
cc->pi_max_peak = cc->pi[start].max_peak;
av_assert1(cc->pi_start != cc->pi_end || s->eof);
start++;
if (start >= MAX_ITEMS)
start = 0;
cc->pi_start = start;
- cc->gain_state = next_gain(ctx, cc->pi_max_peak, bypass, cc->gain_state);
+ cc->gain_state = next_gain(ctx, cc->pi_max_peak, bypass, cc->gain_state,
+ cc->pi_rms_sum, cc->pi_size);
}
}
@@ -209,7 +220,8 @@ static double min_gain(AVFilterContext *ctx, ChannelContext *cc, int max_size)
while (size <= max_size) {
if (idx == cc->pi_end)
break;
- gain_state = next_gain(ctx, cc->pi[idx].max_peak, 0, gain_state);
+ gain_state = next_gain(ctx, cc->pi[idx].max_peak, 0, gain_state,
+ cc->pi[idx].rms_sum, cc->pi[idx].size);
min_gain = FFMIN(min_gain, gain_state);
size += cc->pi[idx].size;
idx++;
@@ -236,11 +248,13 @@ static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc,
\
while (n < nb_samples) { \
ptype new_max_peak; \
+ ptype new_rms_sum; \
int new_size; \
\
if ((cc->state != (src[n] >= zero)) || \
(pi[pi_end].size > max_period)) { \
ptype max_peak = pi[pi_end].max_peak; \
+ ptype rms_sum = pi[pi_end].rms_sum; \
int state = cc->state; \
\
cc->state = src[n] >= zero; \
@@ -251,10 +265,13 @@ static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc,
pi_end++; \
if (pi_end >= MAX_ITEMS) \
pi_end = 0; \
- if (cc->state != state) \
+ if (cc->state != state) { \
pi[pi_end].max_peak = DBL_MIN; \
- else \
+ pi[pi_end].rms_sum = 0.0; \
+ } else { \
pi[pi_end].max_peak = max_peak; \
+ pi[pi_end].rms_sum = rms_sum; \
+ } \
pi[pi_end].type = 0; \
pi[pi_end].size = 0; \
av_assert1(pi_end != cc->pi_start); \
@@ -262,10 +279,12 @@ static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc,
} \
\
new_max_peak = pi[pi_end].max_peak; \
+ new_rms_sum = pi[pi_end].rms_sum; \
new_size = pi[pi_end].size; \
if (cc->state) { \
while (src[n] >= zero) { \
new_max_peak = FFMAX(new_max_peak, src[n]); \
+ new_rms_sum += src[n] * src[n]; \
new_size++; \
n++; \
if (n >= nb_samples) \
@@ -274,6 +293,7 @@ static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc,
} else { \
while (src[n] < zero) { \
new_max_peak = FFMAX(new_max_peak, -src[n]); \
+ new_rms_sum += src[n] * src[n]; \
new_size++; \
n++; \
if (n >= nb_samples) \
@@ -282,6 +302,7 @@ static void analyze_channel_## name (AVFilterContext *ctx, ChannelContext *cc,
} \
\
pi[pi_end].max_peak = new_max_peak; \
+ pi[pi_end].rms_sum = new_rms_sum; \
pi[pi_end].size = new_size; \
} \
cc->pi_end = pi_end; \