diff options
author | Rostislav Pehlivanov <atomnuker@gmail.com> | 2017-05-10 06:47:44 +0100 |
---|---|---|
committer | Rostislav Pehlivanov <atomnuker@gmail.com> | 2017-05-16 11:22:07 +0100 |
commit | 8e7e74df93d18c903164a67c861a428bd4244cb1 (patch) | |
tree | 53bc9d9c22ee0a41bbac22560acafc0025829e60 | |
parent | e6ec482b429b241de0fb3088d87e28777d70ded5 (diff) | |
download | ffmpeg-8e7e74df93d18c903164a67c861a428bd4244cb1.tar.gz |
opus_pvq: port to allow for SIMD functions
Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>
-rw-r--r-- | libavcodec/opus_celt.c | 17 | ||||
-rw-r--r-- | libavcodec/opus_celt.h | 6 | ||||
-rw-r--r-- | libavcodec/opus_pvq.c | 118 | ||||
-rw-r--r-- | libavcodec/opus_pvq.h | 32 | ||||
-rw-r--r-- | libavcodec/opusenc.c | 12 |
5 files changed, 107 insertions, 78 deletions
diff --git a/libavcodec/opus_celt.c b/libavcodec/opus_celt.c index aee8ddc616..feb604d9af 100644 --- a/libavcodec/opus_celt.c +++ b/libavcodec/opus_celt.c @@ -753,15 +753,15 @@ static void celt_decode_bands(CeltFrame *f, OpusRangeCoder *rc) } if (f->dual_stereo) { - cm[0] = ff_celt_decode_band(f, rc, i, X, NULL, band_size, b / 2, f->blocks, + cm[0] = f->pvq->decode_band(f->pvq, f, rc, i, X, NULL, band_size, b / 2, f->blocks, effective_lowband != -1 ? norm + (effective_lowband << f->size) : NULL, f->size, norm + band_offset, 0, 1.0f, lowband_scratch, cm[0]); - cm[1] = ff_celt_decode_band(f, rc, i, Y, NULL, band_size, b/2, f->blocks, + cm[1] = f->pvq->decode_band(f->pvq, f, rc, i, Y, NULL, band_size, b/2, f->blocks, effective_lowband != -1 ? norm2 + (effective_lowband << f->size) : NULL, f->size, norm2 + band_offset, 0, 1.0f, lowband_scratch, cm[1]); } else { - cm[0] = ff_celt_decode_band(f, rc, i, X, Y, band_size, b, f->blocks, + cm[0] = f->pvq->decode_band(f->pvq, f, rc, i, X, Y, band_size, b, f->blocks, effective_lowband != -1 ? norm + (effective_lowband << f->size) : NULL, f->size, norm + band_offset, 0, 1.0f, lowband_scratch, cm[0]|cm[1]); cm[1] = cm[0]; @@ -984,6 +984,8 @@ void ff_celt_free(CeltFrame **f) for (i = 0; i < FF_ARRAY_ELEMS(frm->imdct); i++) ff_mdct15_uninit(&frm->imdct[i]); + ff_celt_pvq_uninit(&frm->pvq); + av_freep(&frm->dsp); av_freep(f); } @@ -1006,11 +1008,12 @@ int ff_celt_init(AVCodecContext *avctx, CeltFrame **f, int output_channels) frm->avctx = avctx; frm->output_channels = output_channels; - for (i = 0; i < FF_ARRAY_ELEMS(frm->imdct); i++) { - ret = ff_mdct15_init(&frm->imdct[i], 1, i + 3, -1.0f); - if (ret < 0) + for (i = 0; i < FF_ARRAY_ELEMS(frm->imdct); i++) + if ((ret = ff_mdct15_init(&frm->imdct[i], 1, i + 3, -1.0f)) < 0) goto fail; - } + + if ((ret = ff_celt_pvq_init(&frm->pvq)) < 0) + goto fail; frm->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT); if (!frm->dsp) { diff --git a/libavcodec/opus_celt.h b/libavcodec/opus_celt.h index f0d55d600b..b80ade84f2 100644 --- a/libavcodec/opus_celt.h +++ b/libavcodec/opus_celt.h @@ -27,6 +27,7 @@ #include <float.h> #include "opus.h" +#include "opus_pvq.h" #include "mdct15.h" #include "libavutil/float_dsp.h" @@ -43,6 +44,8 @@ #define CELT_POSTFILTER_MINPERIOD 15 #define CELT_ENERGY_SILENCE (-28.0f) +typedef struct CeltPVQ CeltPVQ; + enum CeltSpread { CELT_SPREAD_NONE, CELT_SPREAD_LIGHT, @@ -92,6 +95,7 @@ struct CeltFrame { MDCT15Context *imdct[4]; AVFloatDSPContext *dsp; CeltBlock block[2]; + CeltPVQ *pvq; int channels; int output_channels; @@ -125,8 +129,6 @@ struct CeltFrame { int fine_priority[CELT_MAX_BANDS]; int pulses [CELT_MAX_BANDS]; int tf_change [CELT_MAX_BANDS]; - - DECLARE_ALIGNED(32, float, scratch)[22 * 8]; // MAX(ff_celt_freq_range) * 1<<CELT_MAX_LOG_BLOCKS }; /* LCG for noise generation */ diff --git a/libavcodec/opus_pvq.c b/libavcodec/opus_pvq.c index fa349c47da..2ac66a0ede 100644 --- a/libavcodec/opus_pvq.c +++ b/libavcodec/opus_pvq.c @@ -363,7 +363,7 @@ static inline float celt_decode_pulses(OpusRangeCoder *rc, int *y, uint32_t N, u * Faster than libopus's search, operates entirely in the signed domain. * Slightly worse/better depending on N, K and the input vector. */ -static int celt_pvq_search(float *X, int *y, int K, int N) +static float ppp_pvq_search_c(float *X, int *y, int K, int N) { int i, y_norm = 0; float res = 0.0f, xy_norm = 0.0f; @@ -408,17 +408,17 @@ static int celt_pvq_search(float *X, int *y, int K, int N) y[max_idx] += phase; } - return y_norm; + return (float)y_norm; } static uint32_t celt_alg_quant(OpusRangeCoder *rc, float *X, uint32_t N, uint32_t K, enum CeltSpread spread, uint32_t blocks, float gain, - void *scratch) + CeltPVQ *pvq) { - int *y = scratch; + int *y = pvq->qcoeff; celt_exp_rotation(X, N, blocks, K, spread, 1); - gain /= sqrtf(celt_pvq_search(X, y, K, N)); + gain /= sqrtf(pvq->pvq_search(X, y, K, N)); celt_encode_pulses(rc, y, N, K); celt_normalize_residual(y, X, N, gain); celt_exp_rotation(X, N, blocks, K, spread, 0); @@ -429,9 +429,9 @@ static uint32_t celt_alg_quant(OpusRangeCoder *rc, float *X, uint32_t N, uint32_ the final normalised signal in the current band. */ static uint32_t celt_alg_unquant(OpusRangeCoder *rc, float *X, uint32_t N, uint32_t K, enum CeltSpread spread, uint32_t blocks, float gain, - void *scratch) + CeltPVQ *pvq) { - int *y = scratch; + int *y = pvq->qcoeff; gain /= sqrtf(celt_decode_pulses(rc, y, N, K)); celt_normalize_residual(y, X, N, gain); @@ -477,19 +477,16 @@ static void celt_stereo_ms_decouple(float *X, float *Y, int N) } } -#define QUANT_FN(name) uint32_t (*name)(CeltFrame *f, OpusRangeCoder *rc, \ - const int band, float *X, float *Y, \ - int N, int b, uint32_t blocks, \ - float *lowband, int duration, \ - float *lowband_out, int level, \ - float gain, float *lowband_scratch, \ - int fill) - -static av_always_inline uint32_t quant_band_template(CeltFrame *f, OpusRangeCoder *rc, const int band, - float *X, float *Y, int N, int b, uint32_t blocks, - float *lowband, int duration, float *lowband_out, - int level, float gain, float *lowband_scratch, - int fill, int quant) +static av_always_inline uint32_t quant_band_template(CeltPVQ *pvq, CeltFrame *f, + OpusRangeCoder *rc, + const int band, float *X, + float *Y, int N, int b, + uint32_t blocks, float *lowband, + int duration, float *lowband_out, + int level, float gain, + float *lowband_scratch, + int fill, int quant, + QUANT_FN(*rec)) { int i; const uint8_t *cache; @@ -505,7 +502,6 @@ static av_always_inline uint32_t quant_band_template(CeltFrame *f, OpusRangeCode float mid = 0, side = 0; int longblocks = (B0 == 1); uint32_t cm = 0; - QUANT_FN(rec) = quant ? ff_celt_encode_band : ff_celt_decode_band; if (N == 1) { float *x = X; @@ -565,7 +561,7 @@ static av_always_inline uint32_t quant_band_template(CeltFrame *f, OpusRangeCode /* Reorganize the samples in time order instead of frequency order */ if (B0 > 1 && (quant || lowband)) - celt_deinterleave_hadamard(f->scratch, quant ? X : lowband, + celt_deinterleave_hadamard(pvq->hadamard_tmp, quant ? X : lowband, N_B >> recombine, B0 << recombine, longblocks); } @@ -702,7 +698,7 @@ static av_always_inline uint32_t quant_band_template(CeltFrame *f, OpusRangeCode sign = 1 - 2 * sign; /* We use orig_fill here because we want to fold the side, but if itheta==16384, we'll have cleared the low bits of fill. */ - cm = rec(f, rc, band, x2, NULL, N, mbits, blocks, lowband, duration, + cm = rec(pvq, f, rc, band, x2, NULL, N, mbits, blocks, lowband, duration, lowband_out, level, gain, lowband_scratch, orig_fill); /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse), and there's no need to worry about mixing with the other channel. */ @@ -755,7 +751,7 @@ static av_always_inline uint32_t quant_band_template(CeltFrame *f, OpusRangeCode if (mbits >= sbits) { /* In stereo mode, we do not apply a scaling to the mid * because we need the normalized mid for folding later */ - cm = rec(f, rc, band, X, NULL, N, mbits, blocks, lowband, + cm = rec(pvq, f, rc, band, X, NULL, N, mbits, blocks, lowband, duration, next_lowband_out1, next_level, stereo ? 1.0f : (gain * mid), lowband_scratch, fill); rebalance = mbits - (rebalance - f->remaining2); @@ -764,14 +760,14 @@ static av_always_inline uint32_t quant_band_template(CeltFrame *f, OpusRangeCode /* For a stereo split, the high bits of fill are always zero, * so no folding will be done to the side. */ - cmt = rec(f, rc, band, Y, NULL, N, sbits, blocks, next_lowband2, + cmt = rec(pvq, f, rc, band, Y, NULL, N, sbits, blocks, next_lowband2, duration, NULL, next_level, gain * side, NULL, fill >> blocks); cm |= cmt << ((B0 >> 1) & (stereo - 1)); } else { /* For a stereo split, the high bits of fill are always zero, * so no folding will be done to the side. */ - cm = rec(f, rc, band, Y, NULL, N, sbits, blocks, next_lowband2, + cm = rec(pvq, f, rc, band, Y, NULL, N, sbits, blocks, next_lowband2, duration, NULL, next_level, gain * side, NULL, fill >> blocks); cm <<= ((B0 >> 1) & (stereo - 1)); rebalance = sbits - (rebalance - f->remaining2); @@ -780,7 +776,7 @@ static av_always_inline uint32_t quant_band_template(CeltFrame *f, OpusRangeCode /* In stereo mode, we do not apply a scaling to the mid because * we need the normalized mid for folding later */ - cm |= rec(f, rc, band, X, NULL, N, mbits, blocks, lowband, duration, + cm |= rec(pvq, f, rc, band, X, NULL, N, mbits, blocks, lowband, duration, next_lowband_out1, next_level, stereo ? 1.0f : (gain * mid), lowband_scratch, fill); } @@ -802,10 +798,10 @@ static av_always_inline uint32_t quant_band_template(CeltFrame *f, OpusRangeCode /* Finally do the actual (de)quantization */ if (quant) { cm = celt_alg_quant(rc, X, N, (q < 8) ? q : (8 + (q & 7)) << ((q >> 3) - 1), - f->spread, blocks, gain, f->scratch); + f->spread, blocks, gain, pvq); } else { cm = celt_alg_unquant(rc, X, N, (q < 8) ? q : (8 + (q & 7)) << ((q >> 3) - 1), - f->spread, blocks, gain, f->scratch); + f->spread, blocks, gain, pvq); } } else { /* If there's no pulse, fill the band anyway */ @@ -845,7 +841,7 @@ static av_always_inline uint32_t quant_band_template(CeltFrame *f, OpusRangeCode /* Undo the sample reorganization going from time order to frequency order */ if (B0 > 1) - celt_interleave_hadamard(f->scratch, X, N_B >> recombine, + celt_interleave_hadamard(pvq->hadamard_tmp, X, N_B >> recombine, B0 << recombine, longblocks); /* Undo time-freq changes that we did earlier */ @@ -876,33 +872,28 @@ static av_always_inline uint32_t quant_band_template(CeltFrame *f, OpusRangeCode return cm; } -uint32_t ff_celt_decode_band(CeltFrame *f, OpusRangeCoder *rc, const int band, - float *X, float *Y, int N, int b, uint32_t blocks, - float *lowband, int duration, float *lowband_out, - int level, float gain, float *lowband_scratch, - int fill) + +static QUANT_FN(pvq_decode_band) { - return quant_band_template(f, rc, band, X, Y, N, b, blocks, lowband, duration, - lowband_out, level, gain, lowband_scratch, fill, 0); + return quant_band_template(pvq, f, rc, band, X, Y, N, b, blocks, lowband, duration, + lowband_out, level, gain, lowband_scratch, fill, 0, + pvq->decode_band); } -uint32_t ff_celt_encode_band(CeltFrame *f, OpusRangeCoder *rc, const int band, - float *X, float *Y, int N, int b, uint32_t blocks, - float *lowband, int duration, float *lowband_out, - int level, float gain, float *lowband_scratch, - int fill) +static QUANT_FN(pvq_encode_band) { - return quant_band_template(f, rc, band, X, Y, N, b, blocks, lowband, duration, - lowband_out, level, gain, lowband_scratch, fill, 1); + return quant_band_template(pvq, f, rc, band, X, Y, N, b, blocks, lowband, duration, + lowband_out, level, gain, lowband_scratch, fill, 1, + pvq->encode_band); } -float ff_celt_quant_band_cost(CeltFrame *f, OpusRangeCoder *rc, int band, float *bits, - float lambda) +static float pvq_band_cost(CeltPVQ *pvq, CeltFrame *f, OpusRangeCoder *rc, int band, + float *bits, float lambda) { int i, b = 0; uint32_t cm[2] = { (1 << f->blocks) - 1, (1 << f->blocks) - 1 }; const int band_size = ff_celt_freq_range[band] << f->size; - float buf[352], lowband_scratch[176], norm1[176], norm2[176]; + float buf[176 * 2], lowband_scratch[176], norm1[176], norm2[176]; float dist, cost, err_x = 0.0f, err_y = 0.0f; float *X = buf; float *X_orig = f->block[0].coeffs + (ff_celt_freq_bands[band] << f->size); @@ -921,14 +912,14 @@ float ff_celt_quant_band_cost(CeltFrame *f, OpusRangeCoder *rc, int band, float } if (f->dual_stereo) { - ff_celt_encode_band(f, rc, band, X, NULL, band_size, b / 2, f->blocks, NULL, - f->size, norm1, 0, 1.0f, lowband_scratch, cm[0]); + pvq->encode_band(pvq, f, rc, band, X, NULL, band_size, b / 2, f->blocks, NULL, + f->size, norm1, 0, 1.0f, lowband_scratch, cm[0]); - ff_celt_encode_band(f, rc, band, Y, NULL, band_size, b / 2, f->blocks, NULL, - f->size, norm2, 0, 1.0f, lowband_scratch, cm[1]); + pvq->encode_band(pvq, f, rc, band, Y, NULL, band_size, b / 2, f->blocks, NULL, + f->size, norm2, 0, 1.0f, lowband_scratch, cm[1]); } else { - ff_celt_encode_band(f, rc, band, X, Y, band_size, b, f->blocks, NULL, f->size, - norm1, 0, 1.0f, lowband_scratch, cm[0] | cm[1]); + pvq->encode_band(pvq, f, rc, band, X, Y, band_size, b, f->blocks, NULL, f->size, + norm1, 0, 1.0f, lowband_scratch, cm[0] | cm[1]); } for (i = 0; i < band_size; i++) { @@ -944,3 +935,24 @@ float ff_celt_quant_band_cost(CeltFrame *f, OpusRangeCoder *rc, int band, float return lambda*dist*cost; } + +int av_cold ff_celt_pvq_init(CeltPVQ **pvq) +{ + CeltPVQ *s = av_malloc(sizeof(CeltPVQ)); + if (!s) + return AVERROR(ENOMEM); + + s->pvq_search = ppp_pvq_search_c; + s->decode_band = pvq_decode_band; + s->encode_band = pvq_encode_band; + s->band_cost = pvq_band_cost; + + *pvq = s; + + return 0; +} + +void av_cold ff_celt_pvq_uninit(CeltPVQ **pvq) +{ + av_freep(pvq); +} diff --git a/libavcodec/opus_pvq.h b/libavcodec/opus_pvq.h index 045015406b..6691494838 100644 --- a/libavcodec/opus_pvq.h +++ b/libavcodec/opus_pvq.h @@ -23,22 +23,28 @@ #ifndef AVCODEC_OPUS_PVQ_H #define AVCODEC_OPUS_PVQ_H -#include "opus.h" #include "opus_celt.h" -/* Decodes a band using PVQ */ -uint32_t ff_celt_decode_band(CeltFrame *f, OpusRangeCoder *rc, const int band, - float *X, float *Y, int N, int b, uint32_t blocks, - float *lowband, int duration, float *lowband_out, int level, - float gain, float *lowband_scratch, int fill); +#define QUANT_FN(name) uint32_t (name)(struct CeltPVQ *pvq, CeltFrame *f, \ + OpusRangeCoder *rc, const int band, float *X, \ + float *Y, int N, int b, uint32_t blocks, \ + float *lowband, int duration, \ + float *lowband_out, int level, float gain, \ + float *lowband_scratch, int fill) -/* Encodes a band using PVQ */ -uint32_t ff_celt_encode_band(CeltFrame *f, OpusRangeCoder *rc, const int band, - float *X, float *Y, int N, int b, uint32_t blocks, - float *lowband, int duration, float *lowband_out, int level, - float gain, float *lowband_scratch, int fill); +struct CeltPVQ { + DECLARE_ALIGNED(32, int, qcoeff )[176]; + DECLARE_ALIGNED(32, float, hadamard_tmp)[176]; -float ff_celt_quant_band_cost(CeltFrame *f, OpusRangeCoder *rc, int band, - float *bits, float lambda); + float (*pvq_search)(float *X, int *y, int K, int N); + + QUANT_FN(*decode_band); + QUANT_FN(*encode_band); + float (*band_cost)(struct CeltPVQ *pvq, CeltFrame *f, OpusRangeCoder *rc, + int band, float *bits, float lambda); +}; + +int ff_celt_pvq_init (struct CeltPVQ **pvq); +void ff_celt_pvq_uninit(struct CeltPVQ **pvq); #endif /* AVCODEC_OPUS_PVQ_H */ diff --git a/libavcodec/opusenc.c b/libavcodec/opusenc.c index 41e1a3fb38..303f11f7e7 100644 --- a/libavcodec/opusenc.c +++ b/libavcodec/opusenc.c @@ -55,6 +55,7 @@ typedef struct OpusEncContext { AudioFrameQueue afq; AVFloatDSPContext *dsp; MDCT15Context *mdct[CELT_BLOCK_NB]; + CeltPVQ *pvq; struct FFBufQueue bufqueue; enum OpusMode mode; @@ -797,15 +798,15 @@ static void celt_quant_bands(OpusRangeCoder *rc, CeltFrame *f) } if (f->dual_stereo) { - cm[0] = ff_celt_encode_band(f, rc, i, X, NULL, band_size, b / 2, f->blocks, + cm[0] = f->pvq->encode_band(f->pvq, f, rc, i, X, NULL, band_size, b / 2, f->blocks, effective_lowband != -1 ? norm + (effective_lowband << f->size) : NULL, f->size, norm + band_offset, 0, 1.0f, lowband_scratch, cm[0]); - cm[1] = ff_celt_encode_band(f, rc, i, Y, NULL, band_size, b / 2, f->blocks, + cm[1] = f->pvq->encode_band(f->pvq, f, rc, i, Y, NULL, band_size, b / 2, f->blocks, effective_lowband != -1 ? norm2 + (effective_lowband << f->size) : NULL, f->size, norm2 + band_offset, 0, 1.0f, lowband_scratch, cm[1]); } else { - cm[0] = ff_celt_encode_band(f, rc, i, X, Y, band_size, b, f->blocks, + cm[0] = f->pvq->encode_band(f->pvq, f, rc, i, X, Y, band_size, b, f->blocks, effective_lowband != -1 ? norm + (effective_lowband << f->size) : NULL, f->size, norm + band_offset, 0, 1.0f, lowband_scratch, cm[0] | cm[1]); cm[1] = cm[0]; @@ -883,6 +884,7 @@ static void ff_opus_psy_celt_frame_setup(OpusEncContext *s, CeltFrame *f, int in f->avctx = s->avctx; f->dsp = s->dsp; + f->pvq = s->pvq; f->start_band = (s->mode == OPUS_MODE_HYBRID) ? 17 : 0; f->end_band = ff_celt_band_end[s->bandwidth]; f->channels = s->channels; @@ -1019,6 +1021,7 @@ static av_cold int opus_encode_end(AVCodecContext *avctx) for (i = 0; i < CELT_BLOCK_NB; i++) ff_mdct15_uninit(&s->mdct[i]); + ff_celt_pvq_uninit(&s->pvq); av_freep(&s->dsp); av_freep(&s->frame); av_freep(&s->rc); @@ -1075,6 +1078,9 @@ static av_cold int opus_encode_init(AVCodecContext *avctx) ff_af_queue_init(avctx, &s->afq); + if ((ret = ff_celt_pvq_init(&s->pvq)) < 0) + return ret; + if (!(s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT))) return AVERROR(ENOMEM); |