aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorClaudio Freire <klaussfreire@gmail.com>2015-03-03 03:43:06 -0300
committerMichael Niedermayer <michaelni@gmx.at>2015-03-03 13:57:42 +0100
commit6394acaf36da3106f4793bda32730f8ff6b0ddb1 (patch)
tree142051ea207b4ff45e1c53773a7067a2946959fb
parent4851db80a4f80ddade1d50d2ec741375c763f001 (diff)
downloadffmpeg-6394acaf36da3106f4793bda32730f8ff6b0ddb1.tar.gz
AAC: Fix M/S stereo encoding
This patch fixes a pointer arithmetic bug in adjust_frame_information that resulted in heavily corrupted audio when using M/S encoding. Also, a backup copy of untransformed coefficients has to be kept around or attempts at re-processing the frame (which happens when hevavily overspending bits during transients) will result in re-encoding of the coefficients and subsequent corruption of the resulting stream. A/B testing shows the bug as corrected, but still cannot prove that M/S coding is a win at least in numbers. Limited listening tests do show improvement on M/S encoded samples in lower bitrates, but they're hidden among the other artifacts that remain to be corrected in the encoder. Some of the regressions flagged in the report do show poor stereo image (but not buggy), so M/S encoding is clearly not good enough yet to be defaulted to auto. In numbers, Patched against Unpatched, stereo_mode auto: Files: 114 Bitrates: 6 Tests: 683 Serious Regressions: 0 (0%) Regressions: 0 (0%) Improvements: 227 (33%) Big improvements: 92 (13%) Worst regression - mybloodrusts.wv - 256k - StdDev: 28.61 pSNR: -0.43 maxdiff: 1372.00 Best improvement - 60.wv - 384k - StdDev: -369.57 pSNR: 45.02 maxdiff: -13322.00 Average - StdDev: -80.56 pSNR: 2.49 maxdiff: -8858.00 Patched against Unpatched stereo_mode ms_off shows no difference. Patched stereo_mode auto vs Unpatched stereo_mode ms_off shows a small average improvement, just not too significant: Serious Regressions: 0 (0%) Regressions: 10 (1%) Improvements: 45 (6%) Big improvements: 2 (0%) Worst regression - Illinois.wv - 256k - StdDev: 33.20 pSNR: -2.03 maxdiff: 477.00 Best improvement - song_of_circomstances.flac - 384k - StdDev: -3.97 pSNR: 7.61 maxdiff: -826.00 Average - StdDev: -10.25 pSNR: 0.20 maxdiff: -281.00 Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r--libavcodec/aac.h3
-rw-r--r--libavcodec/aaccoder.c6
-rw-r--r--libavcodec/aacenc.c38
3 files changed, 31 insertions, 16 deletions
diff --git a/libavcodec/aac.h b/libavcodec/aac.h
index e9c373fc34..b25b40c9c6 100644
--- a/libavcodec/aac.h
+++ b/libavcodec/aac.h
@@ -233,7 +233,8 @@ typedef struct SingleChannelElement {
float sf[120]; ///< scalefactors
int sf_idx[128]; ///< scalefactor indices (used by encoder)
uint8_t zeroes[128]; ///< band is not coded (used by encoder)
- DECLARE_ALIGNED(32, float, coeffs)[1024]; ///< coefficients for IMDCT
+ DECLARE_ALIGNED(32, float, pcoeffs)[1024]; ///< coefficients for IMDCT, pristine
+ DECLARE_ALIGNED(32, float, coeffs)[1024]; ///< coefficients for IMDCT, maybe processed
DECLARE_ALIGNED(32, float, saved)[1536]; ///< overlap
DECLARE_ALIGNED(32, float, ret_buf)[2048]; ///< PCM output buffer
DECLARE_ALIGNED(16, float, ltp_state)[3072]; ///< time signal for LTP
diff --git a/libavcodec/aaccoder.c b/libavcodec/aaccoder.c
index b4d2009838..64eee3236b 100644
--- a/libavcodec/aaccoder.c
+++ b/libavcodec/aaccoder.c
@@ -1069,10 +1069,10 @@ static void search_for_ms(AACEncContext *s, ChannelElement *cpe,
float minthr = FFMIN(band0->threshold, band1->threshold);
float maxthr = FFMAX(band0->threshold, band1->threshold);
for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
- M[i] = (sce0->coeffs[start+w2*128+i]
- + sce1->coeffs[start+w2*128+i]) * 0.5;
+ M[i] = (sce0->pcoeffs[start+w2*128+i]
+ + sce1->pcoeffs[start+w2*128+i]) * 0.5;
S[i] = M[i]
- - sce1->coeffs[start+w2*128+i];
+ - sce1->pcoeffs[start+w2*128+i];
}
abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
diff --git a/libavcodec/aacenc.c b/libavcodec/aacenc.c
index 9c910b7f7f..7c286aac20 100644
--- a/libavcodec/aacenc.c
+++ b/libavcodec/aacenc.c
@@ -260,6 +260,7 @@ static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
for (i = 0; i < 1024; i += 128)
s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
+ memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
}
/**
@@ -311,20 +312,23 @@ static void adjust_frame_information(ChannelElement *cpe, int chans)
start = 0;
maxsfb = 0;
cpe->ch[ch].pulse.num_pulse = 0;
- for (w = 0; w < ics->num_windows*16; w += 16) {
- for (g = 0; g < ics->num_swb; g++) {
- //apply M/S
- if (cpe->common_window && !ch && cpe->ms_mask[w + g]) {
- for (i = 0; i < ics->swb_sizes[g]; i++) {
- cpe->ch[0].coeffs[start+i] = (cpe->ch[0].coeffs[start+i] + cpe->ch[1].coeffs[start+i]) / 2.0;
- cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].coeffs[start+i];
+ for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
+ for (w2 = 0; w2 < ics->group_len[w]; w2++) {
+ start = (w+w2) * 128;
+ for (g = 0; g < ics->num_swb; g++) {
+ //apply M/S
+ if (cpe->common_window && !ch && cpe->ms_mask[w*16 + g]) {
+ for (i = 0; i < ics->swb_sizes[g]; i++) {
+ cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + cpe->ch[1].pcoeffs[start+i]) * 0.5f;
+ cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].pcoeffs[start+i];
+ }
}
+ start += ics->swb_sizes[g];
}
- start += ics->swb_sizes[g];
+ for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
+ ;
+ maxsfb = FFMAX(maxsfb, cmaxsfb);
}
- for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w+cmaxsfb-1]; cmaxsfb--)
- ;
- maxsfb = FFMAX(maxsfb, cmaxsfb);
}
ics->max_sfb = maxsfb;
@@ -507,7 +511,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
AACEncContext *s = avctx->priv_data;
float **samples = s->planar_samples, *samples2, *la, *overlap;
ChannelElement *cpe;
- int i, ch, w, g, chans, tag, start_ch, ret;
+ int i, ch, w, g, chans, tag, start_ch, ret, ms_mode = 0;
int chan_el_counter[4];
FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
@@ -630,6 +634,7 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
if (cpe->common_window) {
put_ics_info(s, &cpe->ch[0].ics);
encode_ms_info(&s->pb, cpe);
+ if (cpe->ms_mode) ms_mode = 1;
}
}
for (ch = 0; ch < chans; ch++) {
@@ -644,6 +649,15 @@ static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
s->psy.bitres.bits = frame_bits / s->channels;
break;
}
+ if (ms_mode) {
+ for (i = 0; i < s->chan_map[0]; i++) {
+ // Must restore coeffs
+ chans = tag == TYPE_CPE ? 2 : 1;
+ cpe = &s->cpe[i];
+ for (ch = 0; ch < chans; ch++)
+ memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
+ }
+ }
s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;