aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRostislav Pehlivanov <atomnuker@gmail.com>2017-07-12 04:49:21 +0100
committerRostislav Pehlivanov <atomnuker@gmail.com>2017-07-13 19:53:52 +0100
commit035c755b4ef3b25daadc9e5a81a845dde6b3930c (patch)
tree96b2198f723e1fa7ce86a009bcca3b2d7ea06365
parent264f6c6f9537b68327d68357046fb1d732f01526 (diff)
downloadffmpeg-035c755b4ef3b25daadc9e5a81a845dde6b3930c.tar.gz
opusenc: use float_dsp for transient mdcts
vector_fmul_reverse requires padding the window at the front Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
-rw-r--r--libavcodec/opus_celt.h4
-rw-r--r--libavcodec/opusenc.c8
-rw-r--r--libavcodec/opustab.c10
-rw-r--r--libavcodec/opustab.h3
4 files changed, 14 insertions, 11 deletions
diff --git a/libavcodec/opus_celt.h b/libavcodec/opus_celt.h
index b80ade84f2..31299912bd 100644
--- a/libavcodec/opus_celt.h
+++ b/libavcodec/opus_celt.h
@@ -75,8 +75,8 @@ typedef struct CeltBlock {
DECLARE_ALIGNED(32, float, coeffs)[CELT_MAX_FRAME_SIZE];
/* Used by the encoder */
- DECLARE_ALIGNED(32, float, overlap)[120];
- DECLARE_ALIGNED(32, float, samples)[CELT_MAX_FRAME_SIZE];
+ DECLARE_ALIGNED(32, float, overlap)[FFALIGN(CELT_OVERLAP, 16)];
+ DECLARE_ALIGNED(32, float, samples)[FFALIGN(CELT_MAX_FRAME_SIZE, 16)];
/* postfilter parameters */
int pf_period_new;
diff --git a/libavcodec/opusenc.c b/libavcodec/opusenc.c
index 8aba291e7e..6cefd33884 100644
--- a/libavcodec/opusenc.c
+++ b/libavcodec/opusenc.c
@@ -210,17 +210,15 @@ static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
int i, t, ch;
float *win = s->scratch;
- /* I think I can use s->dsp->vector_fmul_window for transients at least */
if (f->transient) {
for (ch = 0; ch < f->channels; ch++) {
CeltBlock *b = &f->block[ch];
float *src1 = b->overlap;
for (t = 0; t < f->blocks; t++) {
float *src2 = &b->samples[CELT_OVERLAP*t];
- for (i = 0; i < CELT_OVERLAP; i++) {
- win[ i] = src1[i]*ff_celt_window[i];
- win[CELT_OVERLAP + i] = src2[i]*ff_celt_window[CELT_OVERLAP - i - 1];
- }
+ s->dsp->vector_fmul(win, src1, ff_celt_window, CELT_OVERLAP);
+ s->dsp->vector_fmul_reverse(&win[CELT_OVERLAP], src2,
+ ff_celt_window - 8, CELT_OVERLAP + 8);
src1 = src2;
s->mdct[0]->mdct(s->mdct[0], b->coeffs + t, win, f->blocks);
}
diff --git a/libavcodec/opustab.c b/libavcodec/opustab.c
index 635cc363e2..b31705297e 100644
--- a/libavcodec/opustab.c
+++ b/libavcodec/opustab.c
@@ -1096,7 +1096,9 @@ const float ff_celt_postfilter_taps[3][3] = {
{ 0.7998046875f, 0.1000976562f, 0.0 }
};
-DECLARE_ALIGNED(32, const float, ff_celt_window)[120] = {
+DECLARE_ALIGNED(32, static const float, ff_celt_window_padded)[136] = {
+ 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f,
+ 0.00000000f, 0.00000000f, 0.00000000f, 0.00000000f,
6.7286966e-05f, 0.00060551348f, 0.0016815970f, 0.0032947962f, 0.0054439943f,
0.0081276923f, 0.011344001f, 0.015090633f, 0.019364886f, 0.024163635f,
0.029483315f, 0.035319905f, 0.041668911f, 0.048525347f, 0.055883718f,
@@ -1120,9 +1122,13 @@ DECLARE_ALIGNED(32, const float, ff_celt_window)[120] = {
0.99499004f, 0.99592297f, 0.99672162f, 0.99739874f, 0.99796667f,
0.99843728f, 0.99882195f, 0.99913147f, 0.99937606f, 0.99956527f,
0.99970802f, 0.99981248f, 0.99988613f, 0.99993565f, 0.99996697f,
- 0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.0000000f,
+ 0.99998518f, 0.99999457f, 0.99999859f, 0.99999982f, 1.00000000f,
+ 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f, 1.00000000f,
+ 1.00000000f, 1.00000000f, 1.00000000f,
};
+const float *ff_celt_window = &ff_celt_window_padded[8];
+
/* square of the window, used for the postfilter */
const float ff_celt_window2[120] = {
4.5275357e-09f, 3.66647e-07f, 2.82777e-06f, 1.08557e-05f, 2.96371e-05f, 6.60594e-05f,
diff --git a/libavcodec/opustab.h b/libavcodec/opustab.h
index b4589869ef..bce5a42830 100644
--- a/libavcodec/opustab.h
+++ b/libavcodec/opustab.h
@@ -154,8 +154,7 @@ extern const uint32_t ff_celt_pvq_u[1272];
extern const float ff_celt_postfilter_taps[3][3];
extern const float ff_celt_window2[120];
-
-DECLARE_ALIGNED(32, extern const float, ff_celt_window)[120];
+extern const float *ff_celt_window;
extern const uint32_t * const ff_celt_pvq_u_row[15];