diff options
author | Robert Swain <robert.swain@gmail.com> | 2008-08-22 18:21:22 +0000 |
---|---|---|
committer | Robert Swain <robert.swain@gmail.com> | 2008-08-22 18:21:22 +0000 |
commit | b0f5852a13cb9888e997b87ec3604bcff88c0c36 (patch) | |
tree | b1bbad3c5fae036aa6da407ef5516d686f6ab0b5 /libavcodec/aac.c | |
parent | db38c38624dfd2504207746cf10e78b34064b25d (diff) | |
download | ffmpeg-b0f5852a13cb9888e997b87ec3604bcff88c0c36.tar.gz |
Use ff_imdct_half() and vector_fmul_window() for IMDCT and windowing. Reduce
buffer sizes accordingly. This produces a ~10% overall decoding perfomance
improvement.
Originally committed as revision 14908 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/aac.c')
-rw-r--r-- | libavcodec/aac.c | 46 |
1 files changed, 22 insertions, 24 deletions
diff --git a/libavcodec/aac.c b/libavcodec/aac.c index 22913b5804..d122ef9ec5 100644 --- a/libavcodec/aac.c +++ b/libavcodec/aac.c @@ -1166,11 +1166,11 @@ static void imdct_and_windowing(AACContext * ac, SingleChannelElement * sce) { float * in = sce->coeffs; float * out = sce->ret; float * saved = sce->saved; - const float * lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; const float * swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; const float * lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024; const float * swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; float * buf = ac->buf_mdct; + DECLARE_ALIGNED(16, float, temp[128]); int i; // imdct @@ -1179,12 +1179,10 @@ static void imdct_and_windowing(AACContext * ac, SingleChannelElement * sce) { av_log(ac->avccontext, AV_LOG_WARNING, "Transition from an ONLY_LONG or LONG_STOP to an EIGHT_SHORT sequence detected. " "If you heard an audible artifact, please submit the sample to the FFmpeg developers.\n"); - for (i = 0; i < 2048; i += 256) { - ff_imdct_calc(&ac->mdct_small, buf + i, in + i/2); - ac->dsp.vector_fmul_reverse(ac->revers + i/2, buf + i + 128, swindow, 128); - } + for (i = 0; i < 1024; i += 128) + ff_imdct_half(&ac->mdct_small, buf + i, in + i); } else - ff_imdct_calc(&ac->mdct, buf, in); + ff_imdct_half(&ac->mdct, buf, in); /* window overlapping * NOTE: To simplify the overlapping code, all 'meaningless' short to long @@ -1194,38 +1192,38 @@ static void imdct_and_windowing(AACContext * ac, SingleChannelElement * sce) { */ if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) && (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) { - ac->dsp.vector_fmul_add_add(out, buf, lwindow_prev, saved, ac->add_bias, 1024, 1); + ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, ac->add_bias, 512); } else { for (i = 0; i < 448; i++) out[i] = saved[i] + ac->add_bias; if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { - ac->dsp.vector_fmul_add_add(out + 448 + 0*128, buf + 0*128, swindow_prev, saved + 448 , ac->add_bias, 128, 1); - ac->dsp.vector_fmul_add_add(out + 448 + 1*128, buf + 2*128, swindow, ac->revers + 0*128, ac->add_bias, 128, 1); - ac->dsp.vector_fmul_add_add(out + 448 + 2*128, buf + 4*128, swindow, ac->revers + 1*128, ac->add_bias, 128, 1); - ac->dsp.vector_fmul_add_add(out + 448 + 3*128, buf + 6*128, swindow, ac->revers + 2*128, ac->add_bias, 128, 1); - ac->dsp.vector_fmul_add_add(out + 448 + 4*128, buf + 8*128, swindow, ac->revers + 3*128, ac->add_bias, 64, 1); + ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, ac->add_bias, 64); + ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, ac->add_bias, 64); + ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, ac->add_bias, 64); + ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, ac->add_bias, 64); + ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, ac->add_bias, 64); + memcpy( out + 448 + 4*128, temp, 64 * sizeof(float)); } else { - ac->dsp.vector_fmul_add_add(out + 448, buf + 448, swindow_prev, saved + 448, ac->add_bias, 128, 1); + ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, ac->add_bias, 64); for (i = 576; i < 1024; i++) - out[i] = buf[i] + saved[i] + ac->add_bias; + out[i] = buf[i-512] + ac->add_bias; } } // buffer update if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { - ac->dsp.vector_fmul_add_add(saved, buf + 1024 + 64, swindow + 64, ac->revers + 3*128+64, 0, 64, 1); - ac->dsp.vector_fmul_add_add(saved + 64, buf + 1024 + 2*128, swindow, ac->revers + 4*128, 0, 128, 1); - ac->dsp.vector_fmul_add_add(saved + 192, buf + 1024 + 4*128, swindow, ac->revers + 5*128, 0, 128, 1); - ac->dsp.vector_fmul_add_add(saved + 320, buf + 1024 + 6*128, swindow, ac->revers + 6*128, 0, 128, 1); - memcpy( saved + 448, ac->revers + 7*128, 128 * sizeof(float)); - memset( saved + 576, 0, 448 * sizeof(float)); + for (i = 0; i < 64; i++) + saved[i] = temp[64 + i] - ac->add_bias; + ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 0, 64); + ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64); + ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64); + memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float)); } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { - memcpy(saved, buf + 1024, 448 * sizeof(float)); - ac->dsp.vector_fmul_reverse(saved + 448, buf + 1024 + 448, swindow, 128); - memset(saved + 576, 0, 448 * sizeof(float)); + memcpy( saved, buf + 512, 448 * sizeof(float)); + memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float)); } else { // LONG_STOP or ONLY_LONG - ac->dsp.vector_fmul_reverse(saved, buf + 1024, lwindow, 1024); + memcpy( saved, buf + 512, 512 * sizeof(float)); } } |