diff options
author | Nedeljko Babic <nbabic@mips.com> | 2012-06-04 18:02:56 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-06-11 21:12:39 +0200 |
commit | 3827a86eacd04d9d7b356f769be553f7b8cca361 (patch) | |
tree | 5666cb5c2bb96e22bd58a7043fb50a44f8cac677 /libavcodec | |
parent | 751dcd92435d8f38fd3f2035d515b10049483c49 (diff) | |
download | ffmpeg-3827a86eacd04d9d7b356f769be553f7b8cca361.tar.gz |
Optimization of AMR NB and WB decoders for MIPS
AMR NB and WB decoders are optimized for MIPS architecture.
Appropriate Makefiles are changed accordingly.
Cnfigure script is changed in order to support optimizations.
Optimizations are enabled by default when compiling is done for
mips architecture.
Appropriate cflags are automatically set.
Support for several mips CPUs is added in configure script.
New ffmpeg options are added for disabling optimizations.
The FFMPEG option --disable-mipsfpu disables MIPS floating point
optimizations.
The FFMPEG option --disable-mips32r2 disables MIPS32R2
optimizations.
The FFMPEG option --disable-mipsdspr1 disables MIPS DSP ASE R1
optimizations.
The FFMPEG option --disable-mipsdspr2 disables MIPS DSP ASE R2
optimizations.
Signed-off-by: Nedeljko Babic <nbabic@mips.com>
Reviewed-by: Vitor Sessak <vitor1001@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/acelp_filters.c | 9 | ||||
-rw-r--r-- | libavcodec/acelp_filters.h | 33 | ||||
-rw-r--r-- | libavcodec/acelp_vectors.c | 8 | ||||
-rw-r--r-- | libavcodec/acelp_vectors.h | 24 | ||||
-rw-r--r-- | libavcodec/amrnbdec.c | 54 | ||||
-rw-r--r-- | libavcodec/amrwbdec.c | 58 | ||||
-rw-r--r-- | libavcodec/celp_filters.c | 9 | ||||
-rw-r--r-- | libavcodec/celp_filters.h | 49 | ||||
-rw-r--r-- | libavcodec/celp_math.c | 8 | ||||
-rw-r--r-- | libavcodec/celp_math.h | 19 | ||||
-rw-r--r-- | libavcodec/lsp.c | 4 | ||||
-rw-r--r-- | libavcodec/mips/Makefile | 12 | ||||
-rw-r--r-- | libavcodec/mips/acelp_filters_mips.c | 210 | ||||
-rw-r--r-- | libavcodec/mips/acelp_vectors_mips.c | 96 | ||||
-rw-r--r-- | libavcodec/mips/amrwbdec_mips.c | 185 | ||||
-rw-r--r-- | libavcodec/mips/amrwbdec_mips.h | 62 | ||||
-rw-r--r-- | libavcodec/mips/celp_filters_mips.c | 281 | ||||
-rw-r--r-- | libavcodec/mips/celp_math_mips.c | 84 | ||||
-rw-r--r-- | libavcodec/mips/lsp_mips.h | 108 |
19 files changed, 1274 insertions, 39 deletions
diff --git a/libavcodec/acelp_filters.c b/libavcodec/acelp_filters.c index 1ce5eed5e2..831d672cda 100644 --- a/libavcodec/acelp_filters.c +++ b/libavcodec/acelp_filters.c @@ -142,3 +142,12 @@ void ff_tilt_compensation(float *mem, float tilt, float *samples, int size) samples[0] -= tilt * *mem; *mem = new_tilt_mem; } + +void ff_acelp_filter_init(ACELPFContext *c) +{ + c->acelp_interpolatef = ff_acelp_interpolatef; + c->acelp_apply_order_2_transfer_function = ff_acelp_apply_order_2_transfer_function; + + if(HAVE_MIPSFPU) + ff_acelp_filter_init_mips(c); +} diff --git a/libavcodec/acelp_filters.h b/libavcodec/acelp_filters.h index e807aed7b9..56197bcc18 100644 --- a/libavcodec/acelp_filters.h +++ b/libavcodec/acelp_filters.h @@ -25,6 +25,39 @@ #include <stdint.h> +typedef struct ACELPFContext { + /** + * Floating point version of ff_acelp_interpolate() + */ + void (*acelp_interpolatef)(float *out, const float *in, + const float *filter_coeffs, int precision, + int frac_pos, int filter_length, int length); + + /** + * Apply an order 2 rational transfer function in-place. + * + * @param out output buffer for filtered speech samples + * @param in input buffer containing speech data (may be the same as out) + * @param zero_coeffs z^-1 and z^-2 coefficients of the numerator + * @param pole_coeffs z^-1 and z^-2 coefficients of the denominator + * @param gain scale factor for final output + * @param mem intermediate values used by filter (should be 0 initially) + * @param n number of samples (should be a multiple of eight) + */ + void (*acelp_apply_order_2_transfer_function)(float *out, const float *in, + const float zero_coeffs[2], + const float pole_coeffs[2], + float gain, + float mem[2], int n); + +}ACELPFContext; + +/** + * Initialize ACELPFContext. + */ +void ff_acelp_filter_init(ACELPFContext *c); +void ff_acelp_filter_init_mips(ACELPFContext *c); + /** * low-pass Finite Impulse Response filter coefficients. * diff --git a/libavcodec/acelp_vectors.c b/libavcodec/acelp_vectors.c index 6a544a912d..c7036477a9 100644 --- a/libavcodec/acelp_vectors.c +++ b/libavcodec/acelp_vectors.c @@ -260,3 +260,11 @@ void ff_clear_fixed_vector(float *out, const AMRFixed *in, int size) } while (x < size && repeats); } } + +void ff_acelp_vectors_init(ACELPVContext *c) +{ + c->weighted_vector_sumf = ff_weighted_vector_sumf; + + if(HAVE_MIPSFPU) + ff_acelp_vectors_init_mips(c); +} diff --git a/libavcodec/acelp_vectors.h b/libavcodec/acelp_vectors.h index f3bc781446..d92f288de4 100644 --- a/libavcodec/acelp_vectors.h +++ b/libavcodec/acelp_vectors.h @@ -25,6 +25,30 @@ #include <stdint.h> +typedef struct ACELPVContext { + /** + * float implementation of weighted sum of two vectors. + * @param[out] out result of addition + * @param in_a first vector + * @param in_b second vector + * @param weight_coeff_a first vector weight coefficient + * @param weight_coeff_a second vector weight coefficient + * @param length vectors length (should be a multiple of two) + * + * @note It is safe to pass the same buffer for out and in_a or in_b. + */ + void (*weighted_vector_sumf)(float *out, const float *in_a, const float *in_b, + float weight_coeff_a, float weight_coeff_b, + int length); + +}ACELPVContext; + +/** + * Initialize ACELPVContext. + */ +void ff_acelp_vectors_init(ACELPVContext *c); +void ff_acelp_vectors_init_mips(ACELPVContext *c); + /** Sparse representation for the algebraic codebook (fixed) vector */ typedef struct { int n; diff --git a/libavcodec/amrnbdec.c b/libavcodec/amrnbdec.c index 6b658c0a1a..46e4856beb 100644 --- a/libavcodec/amrnbdec.c +++ b/libavcodec/amrnbdec.c @@ -136,6 +136,11 @@ typedef struct AMRContext { float samples_in[LP_FILTER_ORDER + AMR_SUBFRAME_SIZE]; ///< floating point samples + ACELPFContext acelpf_ctx; ///< context for filters for ACELP-based codecs + ACELPVContext acelpv_ctx; ///< context for vector operations for ACELP-based codecs + CELPFContext celpf_ctx; ///< context for filters for CELP-based codecs + CELPMContext celpm_ctx; ///< context for fixed point math operations + } AMRContext; /** Double version of ff_weighted_vector_sumf() */ @@ -171,6 +176,11 @@ static av_cold int amrnb_decode_init(AVCodecContext *avctx) avcodec_get_frame_defaults(&p->avframe); avctx->coded_frame = &p->avframe; + ff_acelp_filter_init(&p->acelpf_ctx); + ff_acelp_vectors_init(&p->acelpv_ctx); + ff_celp_filter_init(&p->celpf_ctx); + ff_celp_math_init(&p->celpm_ctx); + return 0; } @@ -214,15 +224,16 @@ static enum Mode unpack_bitstream(AMRContext *p, const uint8_t *buf, * Interpolate the LSF vector (used for fixed gain smoothing). * The interpolation is done over all four subframes even in MODE_12k2. * + * @param[in] ctx The Context * @param[in,out] lsf_q LSFs in [0,1] for each subframe * @param[in] lsf_new New LSFs in [0,1] for subframe 4 */ -static void interpolate_lsf(float lsf_q[4][LP_FILTER_ORDER], float *lsf_new) +static void interpolate_lsf(ACELPVContext *ctx, float lsf_q[4][LP_FILTER_ORDER], float *lsf_new) { int i; for (i = 0; i < 4; i++) - ff_weighted_vector_sumf(lsf_q[i], lsf_q[3], lsf_new, + ctx->weighted_vector_sumf(lsf_q[i], lsf_q[3], lsf_new, 0.25 * (3 - i), 0.25 * (i + 1), LP_FILTER_ORDER); } @@ -266,7 +277,7 @@ static void lsf2lsp_for_mode12k2(AMRContext *p, double lsp[LP_FILTER_ORDER], ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER); if (update) - interpolate_lsf(p->lsf_q, lsf_q); + interpolate_lsf(&p->acelpv_ctx, p->lsf_q, lsf_q); ff_acelp_lsf2lspd(lsp, lsf_q, LP_FILTER_ORDER); } @@ -329,7 +340,7 @@ static void lsf2lsp_3(AMRContext *p) ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER); // store data for computing the next frame's LSFs - interpolate_lsf(p->lsf_q, lsf_q); + interpolate_lsf(&p->acelpv_ctx, p->lsf_q, lsf_q); memcpy(p->prev_lsf_r, lsf_r, LP_FILTER_ORDER * sizeof(*lsf_r)); ff_acelp_lsf2lspd(p->lsp[3], lsf_q, LP_FILTER_ORDER); @@ -395,7 +406,8 @@ static void decode_pitch_vector(AMRContext *p, /* Calculate the pitch vector by interpolating the past excitation at the pitch lag using a b60 hamming windowed sinc function. */ - ff_acelp_interpolatef(p->excitation, p->excitation + 1 - pitch_lag_int, + p->acelpf_ctx.acelp_interpolatef(p->excitation, + p->excitation + 1 - pitch_lag_int, ff_b60_sinc, 6, pitch_lag_frac + 6 - 6*(pitch_lag_frac > 0), 10, AMR_SUBFRAME_SIZE); @@ -780,12 +792,12 @@ static int synthesis(AMRContext *p, float *lpc, for (i = 0; i < AMR_SUBFRAME_SIZE; i++) p->pitch_vector[i] *= 0.25; - ff_weighted_vector_sumf(excitation, p->pitch_vector, fixed_vector, + p->acelpv_ctx.weighted_vector_sumf(excitation, p->pitch_vector, fixed_vector, p->pitch_gain[4], fixed_gain, AMR_SUBFRAME_SIZE); // emphasize pitch vector contribution if (p->pitch_gain[4] > 0.5 && !overflow) { - float energy = ff_dot_productf(excitation, excitation, + float energy = p->celpm_ctx.dot_productf(excitation, excitation, AMR_SUBFRAME_SIZE); float pitch_factor = p->pitch_gain[4] * @@ -800,7 +812,8 @@ static int synthesis(AMRContext *p, float *lpc, AMR_SUBFRAME_SIZE); } - ff_celp_lp_synthesis_filterf(samples, lpc, excitation, AMR_SUBFRAME_SIZE, + p->celpf_ctx.celp_lp_synthesis_filterf(samples, lpc, excitation, + AMR_SUBFRAME_SIZE, LP_FILTER_ORDER); // detect overflow @@ -846,10 +859,11 @@ static void update_state(AMRContext *p) /** * Get the tilt factor of a formant filter from its transfer function * + * @param p The Context * @param lpc_n LP_FILTER_ORDER coefficients of the numerator * @param lpc_d LP_FILTER_ORDER coefficients of the denominator */ -static float tilt_factor(float *lpc_n, float *lpc_d) +static float tilt_factor(AMRContext *p, float *lpc_n, float *lpc_d) { float rh0, rh1; // autocorrelation at lag 0 and 1 @@ -859,11 +873,12 @@ static float tilt_factor(float *lpc_n, float *lpc_d) hf[0] = 1.0; memcpy(hf + 1, lpc_n, sizeof(float) * LP_FILTER_ORDER); - ff_celp_lp_synthesis_filterf(hf, lpc_d, hf, AMR_TILT_RESPONSE, + p->celpf_ctx.celp_lp_synthesis_filterf(hf, lpc_d, hf, + AMR_TILT_RESPONSE, LP_FILTER_ORDER); - rh0 = ff_dot_productf(hf, hf, AMR_TILT_RESPONSE); - rh1 = ff_dot_productf(hf, hf + 1, AMR_TILT_RESPONSE - 1); + rh0 = p->celpm_ctx.dot_productf(hf, hf, AMR_TILT_RESPONSE); + rh1 = p->celpm_ctx.dot_productf(hf, hf + 1, AMR_TILT_RESPONSE - 1); // The spec only specifies this check for 12.2 and 10.2 kbit/s // modes. But in the ref source the tilt is always non-negative. @@ -883,7 +898,7 @@ static void postfilter(AMRContext *p, float *lpc, float *buf_out) int i; float *samples = p->samples_in + LP_FILTER_ORDER; // Start of input - float speech_gain = ff_dot_productf(samples, samples, + float speech_gain = p->celpm_ctx.dot_productf(samples, samples, AMR_SUBFRAME_SIZE); float pole_out[AMR_SUBFRAME_SIZE + LP_FILTER_ORDER]; // Output of pole filter @@ -904,16 +919,16 @@ static void postfilter(AMRContext *p, float *lpc, float *buf_out) } memcpy(pole_out, p->postfilter_mem, sizeof(float) * LP_FILTER_ORDER); - ff_celp_lp_synthesis_filterf(pole_out + LP_FILTER_ORDER, lpc_d, samples, + p->celpf_ctx.celp_lp_synthesis_filterf(pole_out + LP_FILTER_ORDER, lpc_d, samples, AMR_SUBFRAME_SIZE, LP_FILTER_ORDER); memcpy(p->postfilter_mem, pole_out + AMR_SUBFRAME_SIZE, sizeof(float) * LP_FILTER_ORDER); - ff_celp_lp_zero_synthesis_filterf(buf_out, lpc_n, + p->celpf_ctx.celp_lp_zero_synthesis_filterf(buf_out, lpc_n, pole_out + LP_FILTER_ORDER, AMR_SUBFRAME_SIZE, LP_FILTER_ORDER); - ff_tilt_compensation(&p->tilt_mem, tilt_factor(lpc_n, lpc_d), buf_out, + ff_tilt_compensation(&p->tilt_mem, tilt_factor(p, lpc_n, lpc_d), buf_out, AMR_SUBFRAME_SIZE); ff_adaptive_gain_control(buf_out, buf_out, speech_gain, AMR_SUBFRAME_SIZE, @@ -990,7 +1005,7 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data, p->fixed_gain[4] = ff_amr_set_fixed_gain(fixed_gain_factor, - ff_dot_productf(p->fixed_vector, p->fixed_vector, + p->celpm_ctx.dot_productf(p->fixed_vector, p->fixed_vector, AMR_SUBFRAME_SIZE)/AMR_SUBFRAME_SIZE, p->prediction_error, energy_mean[p->cur_frame_mode], energy_pred_fac); @@ -1034,7 +1049,8 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data, update_state(p); } - ff_acelp_apply_order_2_transfer_function(buf_out, buf_out, highpass_zeros, + p->acelpf_ctx.acelp_apply_order_2_transfer_function(buf_out, + buf_out, highpass_zeros, highpass_poles, highpass_gain * AMR_SAMPLE_SCALE, p->high_pass_mem, AMR_BLOCK_SIZE); @@ -1045,7 +1061,7 @@ static int amrnb_decode_frame(AVCodecContext *avctx, void *data, * for fixed_gain_smooth. * The specification has an incorrect formula: the reference decoder uses * qbar(n-1) rather than qbar(n) in section 6.1(4) equation 71. */ - ff_weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3], + p->acelpv_ctx.weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3], 0.84, 0.16, LP_FILTER_ORDER); *got_frame_ptr = 1; diff --git a/libavcodec/amrwbdec.c b/libavcodec/amrwbdec.c index 9b8b306af9..beb3bd79a3 100644 --- a/libavcodec/amrwbdec.c +++ b/libavcodec/amrwbdec.c @@ -38,6 +38,7 @@ #include "amr.h" #include "amrwbdata.h" +#include "mips/amrwbdec_mips.h" typedef struct { AVFrame avframe; ///< AVFrame for decoded samples @@ -82,6 +83,11 @@ typedef struct { AVLFG prng; ///< random number generator for white noise excitation uint8_t first_frame; ///< flag active during decoding of the first frame + ACELPFContext acelpf_ctx; ///< context for filters for ACELP-based codecs + ACELPVContext acelpv_ctx; ///< context for vector operations for ACELP-based codecs + CELPFContext celpf_ctx; ///< context for filters for CELP-based codecs + CELPMContext celpm_ctx; ///< context for fixed point math operations + } AMRWBContext; static av_cold int amrwb_decode_init(AVCodecContext *avctx) @@ -105,6 +111,11 @@ static av_cold int amrwb_decode_init(AVCodecContext *avctx) avcodec_get_frame_defaults(&ctx->avframe); avctx->coded_frame = &ctx->avframe; + ff_acelp_filter_init(&ctx->acelpf_ctx); + ff_acelp_vectors_init(&ctx->acelpv_ctx); + ff_celp_filter_init(&ctx->celpf_ctx); + ff_celp_math_init(&ctx->celpm_ctx); + return 0; } @@ -319,7 +330,8 @@ static void decode_pitch_vector(AMRWBContext *ctx, /* Calculate the pitch vector by interpolating the past excitation at the pitch lag using a hamming windowed sinc function */ - ff_acelp_interpolatef(exc, exc + 1 - pitch_lag_int, + ctx->acelpf_ctx.acelp_interpolatef(exc, + exc + 1 - pitch_lag_int, ac_inter, 4, pitch_lag_frac + (pitch_lag_frac > 0 ? 0 : 4), LP_ORDER, AMRWB_SFR_SIZE + 1); @@ -578,15 +590,17 @@ static void pitch_sharpening(AMRWBContext *ctx, float *fixed_vector) * * @param[in] p_vector, f_vector Pitch and fixed excitation vectors * @param[in] p_gain, f_gain Pitch and fixed gains + * @param[in] ctx The context */ // XXX: There is something wrong with the precision here! The magnitudes // of the energies are not correct. Please check the reference code carefully static float voice_factor(float *p_vector, float p_gain, - float *f_vector, float f_gain) + float *f_vector, float f_gain, + CELPMContext *ctx) { - double p_ener = (double) ff_dot_productf(p_vector, p_vector, + double p_ener = (double) ctx->dot_productf(p_vector, p_vector, AMRWB_SFR_SIZE) * p_gain * p_gain; - double f_ener = (double) ff_dot_productf(f_vector, f_vector, + double f_ener = (double) ctx->dot_productf(f_vector, f_vector, AMRWB_SFR_SIZE) * f_gain * f_gain; return (p_ener - f_ener) / (p_ener + f_ener); @@ -749,13 +763,13 @@ static void synthesis(AMRWBContext *ctx, float *lpc, float *excitation, float fixed_gain, const float *fixed_vector, float *samples) { - ff_weighted_vector_sumf(excitation, ctx->pitch_vector, fixed_vector, + ctx->acelpv_ctx.weighted_vector_sumf(excitation, ctx->pitch_vector, fixed_vector, ctx->pitch_gain[0], fixed_gain, AMRWB_SFR_SIZE); /* emphasize pitch vector contribution in low bitrate modes */ if (ctx->pitch_gain[0] > 0.5 && ctx->fr_cur_mode <= MODE_8k85) { int i; - float energy = ff_dot_productf(excitation, excitation, + float energy = ctx->celpm_ctx.dot_productf(excitation, excitation, AMRWB_SFR_SIZE); // XXX: Weird part in both ref code and spec. A unknown parameter @@ -769,7 +783,7 @@ static void synthesis(AMRWBContext *ctx, float *lpc, float *excitation, energy, AMRWB_SFR_SIZE); } - ff_celp_lp_synthesis_filterf(samples, lpc, excitation, + ctx->celpf_ctx.celp_lp_synthesis_filterf(samples, lpc, excitation, AMRWB_SFR_SIZE, LP_ORDER); } @@ -801,8 +815,9 @@ static void de_emphasis(float *out, float *in, float m, float mem[1]) * @param[out] out Buffer for interpolated signal * @param[in] in Current signal data (length 0.8*o_size) * @param[in] o_size Output signal length + * @param[in] ctx The context */ -static void upsample_5_4(float *out, const float *in, int o_size) +static void upsample_5_4(float *out, const float *in, int o_size, CELPMContext *ctx) { const float *in0 = in - UPS_FIR_SIZE + 1; int i, j, k; @@ -815,7 +830,8 @@ static void upsample_5_4(float *out, const float *in, int o_size) i++; for (k = 1; k < 5; k++) { - out[i] = ff_dot_productf(in0 + int_part, upsample_fir[4 - frac_part], + out[i] = ctx->dot_productf(in0 + int_part, + upsample_fir[4 - frac_part], UPS_MEM_SIZE); int_part++; frac_part--; @@ -842,8 +858,8 @@ static float find_hb_gain(AMRWBContext *ctx, const float *synth, if (ctx->fr_cur_mode == MODE_23k85) return qua_hb_gain[hb_idx] * (1.0f / (1 << 14)); - tilt = ff_dot_productf(synth, synth + 1, AMRWB_SFR_SIZE - 1) / - ff_dot_productf(synth, synth, AMRWB_SFR_SIZE); + tilt = ctx->celpm_ctx.dot_productf(synth, synth + 1, AMRWB_SFR_SIZE - 1) / + ctx->celpm_ctx.dot_productf(synth, synth, AMRWB_SFR_SIZE); /* return gain bounded by [0.1, 1.0] */ return av_clipf((1.0 - FFMAX(0.0, tilt)) * (1.25 - 0.25 * wsp), 0.1, 1.0); @@ -862,7 +878,7 @@ static void scaled_hb_excitation(AMRWBContext *ctx, float *hb_exc, const float *synth_exc, float hb_gain) { int i; - float energy = ff_dot_productf(synth_exc, synth_exc, AMRWB_SFR_SIZE); + float energy = ctx->celpm_ctx.dot_productf(synth_exc, synth_exc, AMRWB_SFR_SIZE); /* Generate a white-noise excitation */ for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) @@ -993,7 +1009,7 @@ static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples, float e_isf[LP_ORDER_16k]; // ISF vector for extrapolation double e_isp[LP_ORDER_16k]; - ff_weighted_vector_sumf(e_isf, isf_past, isf, isfp_inter[subframe], + ctx->acelpv_ctx.weighted_vector_sumf(e_isf, isf_past, isf, isfp_inter[subframe], 1.0 - isfp_inter[subframe], LP_ORDER); extrapolate_isf(e_isf); @@ -1007,7 +1023,7 @@ static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples, lpc_weighting(hb_lpc, ctx->lp_coef[subframe], 0.6, LP_ORDER); } - ff_celp_lp_synthesis_filterf(samples, hb_lpc, exc, AMRWB_SFR_SIZE_16k, + ctx->celpf_ctx.celp_lp_synthesis_filterf(samples, hb_lpc, exc, AMRWB_SFR_SIZE_16k, (mode == MODE_6k60) ? LP_ORDER_16k : LP_ORDER); } @@ -1022,6 +1038,8 @@ static void hb_synthesis(AMRWBContext *ctx, int subframe, float *samples, * * @remark It is safe to pass the same array in in and out parameters */ + +#ifndef hb_fir_filter static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1], float mem[HB_FIR_SIZE], const float *in) { @@ -1039,6 +1057,7 @@ static void hb_fir_filter(float *out, const float fir_coef[HB_FIR_SIZE + 1], memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float)); } +#endif /* hb_fir_filter */ /** * Update context state before the next subframe. @@ -1155,14 +1174,15 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, ctx->fixed_gain[0] = ff_amr_set_fixed_gain(fixed_gain_factor, - ff_dot_productf(ctx->fixed_vector, ctx->fixed_vector, + ctx->celpm_ctx.dot_productf(ctx->fixed_vector, ctx->fixed_vector, AMRWB_SFR_SIZE) / AMRWB_SFR_SIZE, ctx->prediction_error, ENERGY_MEAN, energy_pred_fac); /* Calculate voice factor and store tilt for next subframe */ voice_fac = voice_factor(ctx->pitch_vector, ctx->pitch_gain[0], - ctx->fixed_vector, ctx->fixed_gain[0]); + ctx->fixed_vector, ctx->fixed_gain[0], + &ctx->celpm_ctx); ctx->tilt_coef = voice_fac * 0.25 + 0.25; /* Construct current excitation */ @@ -1188,15 +1208,15 @@ static int amrwb_decode_frame(AVCodecContext *avctx, void *data, de_emphasis(&ctx->samples_up[UPS_MEM_SIZE], &ctx->samples_az[LP_ORDER], PREEMPH_FAC, ctx->demph_mem); - ff_acelp_apply_order_2_transfer_function(&ctx->samples_up[UPS_MEM_SIZE], + ctx->acelpf_ctx.acelp_apply_order_2_transfer_function(&ctx->samples_up[UPS_MEM_SIZE], &ctx->samples_up[UPS_MEM_SIZE], hpf_zeros, hpf_31_poles, hpf_31_gain, ctx->hpf_31_mem, AMRWB_SFR_SIZE); upsample_5_4(sub_buf, &ctx->samples_up[UPS_FIR_SIZE], - AMRWB_SFR_SIZE_16k); + AMRWB_SFR_SIZE_16k, &ctx->celpm_ctx); /* High frequency band (6.4 - 7.0 kHz) generation part */ - ff_acelp_apply_order_2_transfer_function(hb_samples, + ctx->acelpf_ctx.acelp_apply_order_2_transfer_function(hb_samples, &ctx->samples_up[UPS_MEM_SIZE], hpf_zeros, hpf_400_poles, hpf_400_gain, ctx->hpf_400_mem, AMRWB_SFR_SIZE); diff --git a/libavcodec/celp_filters.c b/libavcodec/celp_filters.c index 04ede491ac..8047a78452 100644 --- a/libavcodec/celp_filters.c +++ b/libavcodec/celp_filters.c @@ -205,3 +205,12 @@ void ff_celp_lp_zero_synthesis_filterf(float *out, const float *filter_coeffs, out[n] += filter_coeffs[i-1] * in[n-i]; } } + +void ff_celp_filter_init(CELPFContext *c) +{ + c->celp_lp_synthesis_filterf = ff_celp_lp_synthesis_filterf; + c->celp_lp_zero_synthesis_filterf = ff_celp_lp_zero_synthesis_filterf; + + if(HAVE_MIPSFPU) + ff_celp_filter_init_mips(c); +} diff --git a/libavcodec/celp_filters.h b/libavcodec/celp_filters.h index f7e8fbddd3..f644ec325e 100644 --- a/libavcodec/celp_filters.h +++ b/libavcodec/celp_filters.h @@ -25,6 +25,55 @@ #include <stdint.h> +typedef struct CELPFContext { + /** + * LP synthesis filter. + * @param[out] out pointer to output buffer + * - the array out[-filter_length, -1] must + * contain the previous result of this filter + * @param filter_coeffs filter coefficients. + * @param in input signal + * @param buffer_length amount of data to process + * @param filter_length filter length (10 for 10th order LP filter). Must be + * greater than 4 and even. + * + * @note Output buffer must contain filter_length samples of past + * speech data before pointer. + * + * Routine applies 1/A(z) filter to given speech data. + */ + void (*celp_lp_synthesis_filterf)(float *out, const float *filter_coeffs, + const float *in, int buffer_length, + int filter_length); + + /** + * LP zero synthesis filter. + * @param[out] out pointer to output buffer + * @param filter_coeffs filter coefficients. + * @param in input signal + * - the array in[-filter_length, -1] must + * contain the previous input of this filter + * @param buffer_length amount of data to process (should be a multiple of eight) + * @param filter_length filter length (10 for 10th order LP filter; + * should be a multiple of two) + * + * @note Output buffer must contain filter_length samples of past + * speech data before pointer. + * + * Routine applies A(z) filter to given speech data. + */ + void (*celp_lp_zero_synthesis_filterf)(float *out, const float *filter_coeffs, + const float *in, int buffer_length, + int filter_length); + +}CELPFContext; + +/** + * Initialize CELPFContext. + */ +void ff_celp_filter_init(CELPFContext *c); +void ff_celp_filter_init_mips(CELPFContext *c); + /** * Circularly convolve fixed vector with a phase dispersion impulse * response filter (D.6.2 of G.729 and 6.1.5 of AMR). diff --git a/libavcodec/celp_math.c b/libavcodec/celp_math.c index d85277f209..443bd7f0e7 100644 --- a/libavcodec/celp_math.c +++ b/libavcodec/celp_math.c @@ -218,3 +218,11 @@ float ff_dot_productf(const float* a, const float* b, int length) return sum; } + +void ff_celp_math_init(CELPMContext *c) +{ + c->dot_productf = ff_dot_productf; + + if(HAVE_MIPSFPU) + ff_celp_math_init_mips(c); +} diff --git a/libavcodec/celp_math.h b/libavcodec/celp_math.h index ec62a9ea09..16cc19ccd7 100644 --- a/libavcodec/celp_math.h +++ b/libavcodec/celp_math.h @@ -25,6 +25,25 @@ #include <stdint.h> +typedef struct CELPMContext { + /** + * Return the dot product. + * @param a input data array + * @param b input data array + * @param length number of elements + * + * @return dot product = sum of elementwise products + */ + float (*dot_productf)(const float* a, const float* b, int length); + +}CELPMContext; + +/** + * Initialize CELPMContext. + */ +void ff_celp_math_init(CELPMContext *c); +void ff_celp_math_init_mips(CELPMContext *c); + /** * fixed-point implementation of cosine in [0; PI) domain. * @param arg fixed-point cosine argument, 0 <= arg < 0x4000 diff --git a/libavcodec/lsp.c b/libavcodec/lsp.c index 7fda12ee62..a5a86c87e4 100644 --- a/libavcodec/lsp.c +++ b/libavcodec/lsp.c @@ -28,6 +28,8 @@ #include "mathops.h" #include "lsp.h" #include "celp_math.h" +#include "libavcodec/mips/lsp_mips.h" + void ff_acelp_reorder_lsf(int16_t* lsfq, int lsfq_min_distance, int lsfq_min, int lsfq_max, int lp_order) { @@ -162,6 +164,7 @@ void ff_acelp_lp_decode(int16_t* lp_1st, int16_t* lp_2nd, const int16_t* lsp_2nd ff_acelp_lsp2lpc(lp_2nd, lsp_2nd, lp_order >> 1); } +#ifndef ff_lsp2polyf void ff_lsp2polyf(const double *lsp, double *f, int lp_half_order) { int i, j; @@ -178,6 +181,7 @@ void ff_lsp2polyf(const double *lsp, double *f, int lp_half_order) f[1] += val; } } +#endif /* ff_lsp2polyf */ void ff_acelp_lspd2lpc(const double *lsp, float *lpc, int lp_half_order) { diff --git a/libavcodec/mips/Makefile b/libavcodec/mips/Makefile index 37899b1f7a..24a95b54c6 100644 --- a/libavcodec/mips/Makefile +++ b/libavcodec/mips/Makefile @@ -1,3 +1,13 @@ MMI-OBJS += mips/dsputil_mmi.o \ mips/idct_mmi.o \ - mips/mpegvideo_mmi.o \ + mips/mpegvideo_mmi.o + +MIPSFPU-OBJS-$(CONFIG_AMRNB_DECODER) += mips/acelp_filters_mips.o \ + mips/celp_filters_mips.o \ + mips/celp_math_mips.o \ + mips/acelp_vectors_mips.o +MIPSFPU-OBJS-$(CONFIG_AMRWB_DECODER) += mips/acelp_filters_mips.o \ + mips/celp_filters_mips.o \ + mips/amrwbdec_mips.o \ + mips/celp_math_mips.o \ + mips/acelp_vectors_mips.o diff --git a/libavcodec/mips/acelp_filters_mips.c b/libavcodec/mips/acelp_filters_mips.c new file mode 100644 index 0000000000..be686c287a --- /dev/null +++ b/libavcodec/mips/acelp_filters_mips.c @@ -0,0 +1,210 @@ + /* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nedeljko Babic (nbabic@mips.com) + * + * various filters for ACELP-based codecs optimized for MIPS + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Reference: libavcodec/acelp_filters.c + */ +#include "libavutil/attributes.h" +#include "libavcodec/acelp_filters.h" + +static void ff_acelp_interpolatef_mips(float *out, const float *in, + const float *filter_coeffs, int precision, + int frac_pos, int filter_length, int length) +{ + int n, i; + int prec = precision * 4; + int fc_offset = precision - frac_pos; + float in_val_p, in_val_m, fc_val_p, fc_val_m; + + for (n = 0; n < length; n++) { + /** + * four pointers are defined in order to minimize number of + * computations done in inner loop + */ + const float *p_in_p = &in[n]; + const float *p_in_m = &in[n-1]; + const float *p_filter_coeffs_p = &filter_coeffs[frac_pos]; + const float *p_filter_coeffs_m = filter_coeffs + fc_offset; + float v = 0; + + for (i = 0; i < filter_length;i++) { + __asm__ __volatile__ ( + "lwc1 %[in_val_p], 0(%[p_in_p]) \n\t" + "lwc1 %[fc_val_p], 0(%[p_filter_coeffs_p]) \n\t" + "lwc1 %[in_val_m], 0(%[p_in_m]) \n\t" + "lwc1 %[fc_val_m], 0(%[p_filter_coeffs_m]) \n\t" + "addiu %[p_in_p], %[p_in_p], 4 \n\t" + "madd.s %[v],%[v], %[in_val_p],%[fc_val_p] \n\t" + "addiu %[p_in_m], %[p_in_m], -4 \n\t" + "addu %[p_filter_coeffs_p], %[p_filter_coeffs_p], %[prec] \n\t" + "addu %[p_filter_coeffs_m], %[p_filter_coeffs_m], %[prec] \n\t" + "madd.s %[v],%[v],%[in_val_m], %[fc_val_m] \n\t" + + : [v] "=&f" (v),[p_in_p] "+r" (p_in_p), [p_in_m] "+r" (p_in_m), + [p_filter_coeffs_p] "+r" (p_filter_coeffs_p), + [in_val_p] "=&f" (in_val_p), [in_val_m] "=&f" (in_val_m), + [fc_val_p] "=&f" (fc_val_p), [fc_val_m] "=&f" (fc_val_m), + [p_filter_coeffs_m] "+r" (p_filter_coeffs_m) + : [prec] "r" (prec) + ); + } + out[n] = v; + } +} + +static void ff_acelp_apply_order_2_transfer_function_mips(float *out, const float *in, + const float zero_coeffs[2], + const float pole_coeffs[2], + float gain, float mem[2], int n) +{ + /** + * loop is unrolled eight times + */ + + __asm__ __volatile__ ( + "lwc1 $f0, 0(%[mem]) \n\t" + "blez %[n], ff_acelp_apply_order_2_transfer_function_end%= \n\t" + "lwc1 $f1, 4(%[mem]) \n\t" + "lwc1 $f2, 0(%[pole_coeffs]) \n\t" + "lwc1 $f3, 4(%[pole_coeffs]) \n\t" + "lwc1 $f4, 0(%[zero_coeffs]) \n\t" + "lwc1 $f5, 4(%[zero_coeffs]) \n\t" + + "ff_acelp_apply_order_2_transfer_function_madd%=: \n\t" + + "lwc1 $f6, 0(%[in]) \n\t" + "mul.s $f9, $f3, $f1 \n\t" + "mul.s $f7, $f2, $f0 \n\t" + "msub.s $f7, $f7, %[gain], $f6 \n\t" + "sub.s $f7, $f7, $f9 \n\t" + "madd.s $f8, $f7, $f4, $f0 \n\t" + "madd.s $f8, $f8, $f5, $f1 \n\t" + "lwc1 $f11, 4(%[in]) \n\t" + "mul.s $f12, $f3, $f0 \n\t" + "mul.s $f13, $f2, $f7 \n\t" + "msub.s $f13, $f13, %[gain], $f11 \n\t" + "sub.s $f13, $f13, $f12 \n\t" + "madd.s $f14, $f13, $f4, $f7 \n\t" + "madd.s $f14, $f14, $f5, $f0 \n\t" + "swc1 $f8, 0(%[out]) \n\t" + "lwc1 $f6, 8(%[in]) \n\t" + "mul.s $f9, $f3, $f7 \n\t" + "mul.s $f15, $f2, $f13 \n\t" + "msub.s $f15, $f15, %[gain], $f6 \n\t" + "sub.s $f15, $f15, $f9 \n\t" + "madd.s $f8, $f15, $f4, $f13 \n\t" + "madd.s $f8, $f8, $f5, $f7 \n\t" + "swc1 $f14, 4(%[out]) \n\t" + "lwc1 $f11, 12(%[in]) \n\t" + "mul.s $f12, $f3, $f13 \n\t" + "mul.s $f16, $f2, $f15 \n\t" + "msub.s $f16, $f16, %[gain], $f11 \n\t" + "sub.s $f16, $f16, $f12 \n\t" + "madd.s $f14, $f16, $f4, $f15 \n\t" + "madd.s $f14, $f14, $f5, $f13 \n\t" + "swc1 $f8, 8(%[out]) \n\t" + "lwc1 $f6, 16(%[in]) \n\t" + "mul.s $f9, $f3, $f15 \n\t" + "mul.s $f7, $f2, $f16 \n\t" + "msub.s $f7, $f7, %[gain], $f6 \n\t" + "sub.s $f7, $f7, $f9 \n\t" + "madd.s $f8, $f7, $f4, $f16 \n\t" + "madd.s $f8, $f8, $f5, $f15 \n\t" + "swc1 $f14, 12(%[out]) \n\t" + "lwc1 $f11, 20(%[in]) \n\t" + "mul.s $f12, $f3, $f16 \n\t" + "mul.s $f13, $f2, $f7 \n\t" + "msub.s $f13, $f13, %[gain], $f11 \n\t" + "sub.s $f13, $f13, $f12 \n\t" + "madd.s $f14, $f13, $f4, $f7 \n\t" + "madd.s $f14, $f14, $f5, $f16 \n\t" + "swc1 $f8, 16(%[out]) \n\t" + "lwc1 $f6, 24(%[in]) \n\t" + "mul.s $f9, $f3, $f7 \n\t" + "mul.s $f15, $f2, $f13 \n\t" + "msub.s $f15, $f15, %[gain], $f6 \n\t" + "sub.s $f1, $f15, $f9 \n\t" + "madd.s $f8, $f1, $f4, $f13 \n\t" + "madd.s $f8, $f8, $f5, $f7 \n\t" + "swc1 $f14, 20(%[out]) \n\t" + "lwc1 $f11, 28(%[in]) \n\t" + "mul.s $f12, $f3, $f13 \n\t" + "mul.s $f16, $f2, $f1 \n\t" + "msub.s $f16, $f16, %[gain], $f11 \n\t" + "sub.s $f0, $f16, $f12 \n\t" + "madd.s $f14, $f0, $f4, $f1 \n\t" + "madd.s $f14, $f14, $f5, $f13 \n\t" + "swc1 $f8, 24(%[out]) \n\t" + "addiu %[out], 32 \n\t" + "addiu %[in], 32 \n\t" + "addiu %[n], -8 \n\t" + "swc1 $f14, -4(%[out]) \n\t" + "bnez %[n], ff_acelp_apply_order_2_transfer_function_madd%= \n\t" + "swc1 $f1, 4(%[mem]) \n\t" + "swc1 $f0, 0(%[mem]) \n\t" + + "ff_acelp_apply_order_2_transfer_function_end%=: \n\t" + + : [out] "+r" (out), + [in] "+r" (in), [gain] "+f" (gain), + [n] "+r" (n), [mem] "+r" (mem) + : [zero_coeffs] "r" (zero_coeffs), + [pole_coeffs] "r" (pole_coeffs) + : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", + "$f6", "$f7", "$f8", "$f9", "$f10", "$f11", + "$f12", "$f13", "$f14", "$f15", "$f16" + ); +} + +void ff_acelp_filter_init_mips(ACELPFContext *c) +{ + c->acelp_interpolatef = ff_acelp_interpolatef_mips; + c->acelp_apply_order_2_transfer_function = ff_acelp_apply_order_2_transfer_function_mips; +} diff --git a/libavcodec/mips/acelp_vectors_mips.c b/libavcodec/mips/acelp_vectors_mips.c new file mode 100644 index 0000000000..d62b37798c --- /dev/null +++ b/libavcodec/mips/acelp_vectors_mips.c @@ -0,0 +1,96 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nedeljko Babic (nbabic@mips.com) + * + * adaptive and fixed codebook vector operations for ACELP-based codecs + * optimized for MIPS + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Reference: libavcodec/acelp_vectors.c + */ +#include "libavcodec/acelp_vectors.h" + +static void ff_weighted_vector_sumf_mips( + float *out, const float *in_a, const float *in_b, + float weight_coeff_a, float weight_coeff_b, int length) +{ + const float *a_end = in_a + length; + + /* loop unrolled two times */ + __asm__ __volatile__ ( + "blez %[length], ff_weighted_vector_sumf_end%= \n\t" + + "ff_weighted_vector_sumf_madd%=: \n\t" + "lwc1 $f0, 0(%[in_a]) \n\t" + "lwc1 $f3, 4(%[in_a]) \n\t" + "lwc1 $f1, 0(%[in_b]) \n\t" + "lwc1 $f4, 4(%[in_b]) \n\t" + "mul.s $f2, %[weight_coeff_a], $f0 \n\t" + "mul.s $f5, %[weight_coeff_a], $f3 \n\t" + "madd.s $f2, $f2, %[weight_coeff_b], $f1 \n\t" + "madd.s $f5, $f5, %[weight_coeff_b], $f4 \n\t" + "addiu %[in_a], 8 \n\t" + "addiu %[in_b], 8 \n\t" + "swc1 $f2, 0(%[out]) \n\t" + "swc1 $f5, 4(%[out]) \n\t" + "addiu %[out], 8 \n\t" + "bne %[in_a], %[a_end], ff_weighted_vector_sumf_madd%= \n\t" + + "ff_weighted_vector_sumf_end%=: \n\t" + + : [out] "+r" (out), [in_a] "+r" (in_a), [in_b] "+r" (in_b) + : [weight_coeff_a] "f" (weight_coeff_a), + [weight_coeff_b] "f" (weight_coeff_b), + [length] "r" (length), [a_end]"r"(a_end) + : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5" + ); +} + +void ff_acelp_vectors_init_mips(ACELPVContext *c) +{ + c->weighted_vector_sumf = ff_weighted_vector_sumf_mips; +} diff --git a/libavcodec/mips/amrwbdec_mips.c b/libavcodec/mips/amrwbdec_mips.c new file mode 100644 index 0000000000..ad08b63095 --- /dev/null +++ b/libavcodec/mips/amrwbdec_mips.c @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nedeljko Babic (nbabic@mips.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Reference: libavcodec/amrwbdec.c + */ +#include "libavutil/avutil.h" +#include "libavcodec/amrwbdata.h" +#include "amrwbdec_mips.h" + +void hb_fir_filter_mips(float *out, const float fir_coef[HB_FIR_SIZE + 1], + float mem[HB_FIR_SIZE], const float *in) +{ + int i; + float data[AMRWB_SFR_SIZE_16k + HB_FIR_SIZE]; // past and current samples + + memcpy(data, mem, HB_FIR_SIZE * sizeof(float)); + memcpy(data + HB_FIR_SIZE, in, AMRWB_SFR_SIZE_16k * sizeof(float)); + + for (i = 0; i < AMRWB_SFR_SIZE_16k; i++) { + float output; + float * p_data = (data+i); + + /** + * inner loop is entirely unrolled and instructions are scheduled + * to minimize pipeline stall + */ + __asm__ __volatile__( + "mtc1 $zero, %[output] \n\t" + "lwc1 $f0, 0(%[p_data]) \n\t" + "lwc1 $f1, 0(%[fir_coef]) \n\t" + "lwc1 $f2, 4(%[p_data]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f3, 4(%[fir_coef]) \n\t" + "lwc1 $f4, 8(%[p_data]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + "lwc1 $f5, 8(%[fir_coef]) \n\t" + + "lwc1 $f0, 12(%[p_data]) \n\t" + "lwc1 $f1, 12(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + "lwc1 $f2, 16(%[p_data]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f3, 16(%[fir_coef]) \n\t" + "lwc1 $f4, 20(%[p_data]) \n\t" + "lwc1 $f5, 20(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + + "lwc1 $f0, 24(%[p_data]) \n\t" + "lwc1 $f1, 24(%[fir_coef]) \n\t" + "lwc1 $f2, 28(%[p_data]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + "lwc1 $f3, 28(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f4, 32(%[p_data]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + "lwc1 $f5, 32(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + + "lwc1 $f0, 36(%[p_data]) \n\t" + "lwc1 $f1, 36(%[fir_coef]) \n\t" + "lwc1 $f2, 40(%[p_data]) \n\t" + "lwc1 $f3, 40(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f4, 44(%[p_data]) \n\t" + "lwc1 $f5, 44(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + + "lwc1 $f0, 48(%[p_data]) \n\t" + "lwc1 $f1, 48(%[fir_coef]) \n\t" + "lwc1 $f2, 52(%[p_data]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + "lwc1 $f3, 52(%[fir_coef]) \n\t" + "lwc1 $f4, 56(%[p_data]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f5, 56(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + + "lwc1 $f0, 60(%[p_data]) \n\t" + "lwc1 $f1, 60(%[fir_coef]) \n\t" + "lwc1 $f2, 64(%[p_data]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + "lwc1 $f3, 64(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f4, 68(%[p_data]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + "lwc1 $f5, 68(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + + "lwc1 $f0, 72(%[p_data]) \n\t" + "lwc1 $f1, 72(%[fir_coef]) \n\t" + "lwc1 $f2, 76(%[p_data]) \n\t" + "lwc1 $f3, 76(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f4, 80(%[p_data]) \n\t" + "lwc1 $f5, 80(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + + "lwc1 $f0, 84(%[p_data]) \n\t" + "lwc1 $f1, 84(%[fir_coef]) \n\t" + "lwc1 $f2, 88(%[p_data]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + "lwc1 $f3, 88(%[fir_coef]) \n\t" + "lwc1 $f4, 92(%[p_data]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f5, 92(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + + "lwc1 $f0, 96(%[p_data]) \n\t" + "lwc1 $f1, 96(%[fir_coef]) \n\t" + "lwc1 $f2, 100(%[p_data]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + "lwc1 $f3, 100(%[fir_coef]) \n\t" + "lwc1 $f4, 104(%[p_data]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f5, 104(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + + "lwc1 $f0, 108(%[p_data]) \n\t" + "lwc1 $f1, 108(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + "lwc1 $f2, 112(%[p_data]) \n\t" + "lwc1 $f3, 112(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + "lwc1 $f4, 116(%[p_data]) \n\t" + "lwc1 $f5, 116(%[fir_coef]) \n\t" + "lwc1 $f0, 120(%[p_data]) \n\t" + "madd.s %[output], %[output], $f2, $f3 \n\t" + "lwc1 $f1, 120(%[fir_coef]) \n\t" + "madd.s %[output], %[output], $f4, $f5 \n\t" + "madd.s %[output], %[output], $f0, $f1 \n\t" + + : [output]"=&f"(output) + : [fir_coef]"r"(fir_coef), [p_data]"r"(p_data) + : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5" + ); + out[i] = output; + } + memcpy(mem, data + AMRWB_SFR_SIZE_16k, HB_FIR_SIZE * sizeof(float)); +} diff --git a/libavcodec/mips/amrwbdec_mips.h b/libavcodec/mips/amrwbdec_mips.h new file mode 100644 index 0000000000..a469918d2c --- /dev/null +++ b/libavcodec/mips/amrwbdec_mips.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nedeljko Babic (nbabic@mips.com) + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Reference: libavcodec/amrwbdec.c + */ +#ifndef AVCODEC_AMRWBDEC_MIPS_H +#define AVCODEC_AMRWBDEC_MIPS_H +#include "config.h" + +#if HAVE_MIPSFPU && HAVE_INLINE_ASM +void hb_fir_filter_mips(float *out, const float fir_coef[], + float mem[], const float *in); +#define hb_fir_filter hb_fir_filter_mips +#endif + +#endif /* AVCODEC_AMRWBDEC_MIPS_H */ diff --git a/libavcodec/mips/celp_filters_mips.c b/libavcodec/mips/celp_filters_mips.c new file mode 100644 index 0000000000..a31b81db17 --- /dev/null +++ b/libavcodec/mips/celp_filters_mips.c @@ -0,0 +1,281 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nedeljko Babic (nbabic@mips.com) + * + * various filters for CELP-based codecs optimized for MIPS + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Reference: libavcodec/celp_filters.c + */ +#include "libavutil/attributes.h" +#include "libavutil/common.h" +#include "libavcodec/celp_filters.h" + +static void ff_celp_lp_synthesis_filterf_mips(float *out, + const float *filter_coeffs, + const float* in, int buffer_length, + int filter_length) +{ + int i,n; + + float out0, out1, out2, out3; + float old_out0, old_out1, old_out2, old_out3; + float a,b,c; + const float *p_filter_coeffs; + float *p_out; + + a = filter_coeffs[0]; + b = filter_coeffs[1]; + c = filter_coeffs[2]; + b -= filter_coeffs[0] * filter_coeffs[0]; + c -= filter_coeffs[1] * filter_coeffs[0]; + c -= filter_coeffs[0] * b; + + old_out0 = out[-4]; + old_out1 = out[-3]; + old_out2 = out[-2]; + old_out3 = out[-1]; + for (n = 0; n <= buffer_length - 4; n+=4) { + p_filter_coeffs = filter_coeffs; + p_out = out; + + out0 = in[0]; + out1 = in[1]; + out2 = in[2]; + out3 = in[3]; + + __asm__ __volatile__( + "lwc1 $f2, 8(%[filter_coeffs]) \n\t" + "lwc1 $f1, 4(%[filter_coeffs]) \n\t" + "lwc1 $f0, 0(%[filter_coeffs]) \n\t" + "nmsub.s %[out0], %[out0], $f2, %[old_out1] \n\t" + "nmsub.s %[out1], %[out1], $f2, %[old_out2] \n\t" + "nmsub.s %[out2], %[out2], $f2, %[old_out3] \n\t" + "lwc1 $f3, 12(%[filter_coeffs]) \n\t" + "nmsub.s %[out0], %[out0], $f1, %[old_out2] \n\t" + "nmsub.s %[out1], %[out1], $f1, %[old_out3] \n\t" + "nmsub.s %[out2], %[out2], $f3, %[old_out2] \n\t" + "nmsub.s %[out0], %[out0], $f0, %[old_out3] \n\t" + "nmsub.s %[out3], %[out3], $f3, %[old_out3] \n\t" + "nmsub.s %[out1], %[out1], $f3, %[old_out1] \n\t" + "nmsub.s %[out0], %[out0], $f3, %[old_out0] \n\t" + + : [out0]"+f"(out0), [out1]"+f"(out1), + [out2]"+f"(out2), [out3]"+f"(out3) + : [old_out0]"f"(old_out0), [old_out1]"f"(old_out1), + [old_out2]"f"(old_out2), [old_out3]"f"(old_out3), + [filter_coeffs]"r"(filter_coeffs) + : "$f0", "$f1", "$f2", "$f3", "$f4" + ); + + for (i = 5; i <= filter_length; i += 2) { + __asm__ __volatile__( + "lwc1 %[old_out3], -20(%[p_out]) \n\t" + "lwc1 $f5, 16(%[p_filter_coeffs]) \n\t" + "addiu %[p_out], -8 \n\t" + "addiu %[p_filter_coeffs], 8 \n\t" + "nmsub.s %[out1], %[out1], $f5, %[old_out0] \n\t" + "nmsub.s %[out3], %[out3], $f5, %[old_out2] \n\t" + "lwc1 $f4, 12(%[p_filter_coeffs]) \n\t" + "lwc1 %[old_out2], -16(%[p_out]) \n\t" + "nmsub.s %[out0], %[out0], $f5, %[old_out3] \n\t" + "nmsub.s %[out2], %[out2], $f5, %[old_out1] \n\t" + "nmsub.s %[out1], %[out1], $f4, %[old_out3] \n\t" + "nmsub.s %[out3], %[out3], $f4, %[old_out1] \n\t" + "mov.s %[old_out1], %[old_out3] \n\t" + "nmsub.s %[out0], %[out0], $f4, %[old_out2] \n\t" + "nmsub.s %[out2], %[out2], $f4, %[old_out0] \n\t" + + : [out0]"+f"(out0), [out1]"+f"(out1), + [out2]"+f"(out2), [out3]"+f"(out3), [old_out0]"+f"(old_out0), + [old_out1]"+f"(old_out1), [old_out2]"+f"(old_out2), + [old_out3]"+f"(old_out3),[p_filter_coeffs]"+r"(p_filter_coeffs), + [p_out]"+r"(p_out) + : + : "$f4", "$f5" + ); + FFSWAP(float, old_out0, old_out2); + } + + __asm__ __volatile__( + "nmsub.s %[out3], %[out3], %[a], %[out2] \n\t" + "nmsub.s %[out2], %[out2], %[a], %[out1] \n\t" + "nmsub.s %[out3], %[out3], %[b], %[out1] \n\t" + "nmsub.s %[out1], %[out1], %[a], %[out0] \n\t" + "nmsub.s %[out2], %[out2], %[b], %[out0] \n\t" + "nmsub.s %[out3], %[out3], %[c], %[out0] \n\t" + + : [out0]"+f"(out0), [out1]"+f"(out1), + [out2]"+f"(out2), [out3]"+f"(out3) + : [a]"f"(a), [b]"f"(b), [c]"f"(c) + ); + + out[0] = out0; + out[1] = out1; + out[2] = out2; + out[3] = out3; + + old_out0 = out0; + old_out1 = out1; + old_out2 = out2; + old_out3 = out3; + + out += 4; + in += 4; + } + + out -= n; + in -= n; + for (; n < buffer_length; n++) { + float out_val, out_val_i, fc_val; + p_filter_coeffs = filter_coeffs; + p_out = &out[n]; + out_val = in[n]; + for (i = 1; i <= filter_length; i++) { + __asm__ __volatile__( + "lwc1 %[fc_val], 0(%[p_filter_coeffs]) \n\t" + "lwc1 %[out_val_i], -4(%[p_out]) \n\t" + "addiu %[p_filter_coeffs], 4 \n\t" + "addiu %[p_out], -4 \n\t" + "nmsub.s %[out_val], %[out_val], %[fc_val], %[out_val_i] \n\t" + + : [fc_val]"=&f"(fc_val), [out_val]"+f"(out_val), + [out_val_i]"=&f"(out_val_i), [p_out]"+r"(p_out), + [p_filter_coeffs]"+r"(p_filter_coeffs) + ); + } + out[n] = out_val; + } +} + +static void ff_celp_lp_zero_synthesis_filterf_mips(float *out, + const float *filter_coeffs, + const float *in, int buffer_length, + int filter_length) +{ + int i,n; + float sum_out8, sum_out7, sum_out6, sum_out5, sum_out4, fc_val; + float sum_out3, sum_out2, sum_out1; + const float *p_filter_coeffs, *p_in; + + for (n = 0; n < buffer_length; n+=8) { + p_in = &in[n]; + p_filter_coeffs = filter_coeffs; + sum_out8 = in[n+7]; + sum_out7 = in[n+6]; + sum_out6 = in[n+5]; + sum_out5 = in[n+4]; + sum_out4 = in[n+3]; + sum_out3 = in[n+2]; + sum_out2 = in[n+1]; + sum_out1 = in[n]; + i = filter_length; + + /* i is always greater than 0 + * outer loop is unrolled eight times so there is less memory access + * inner loop is unrolled two times + */ + __asm__ __volatile__( + "filt_lp_inner%=: \n\t" + "lwc1 %[fc_val], 0(%[p_filter_coeffs]) \n\t" + "lwc1 $f7, 6*4(%[p_in]) \n\t" + "lwc1 $f6, 5*4(%[p_in]) \n\t" + "lwc1 $f5, 4*4(%[p_in]) \n\t" + "lwc1 $f4, 3*4(%[p_in]) \n\t" + "lwc1 $f3, 2*4(%[p_in]) \n\t" + "lwc1 $f2, 4(%[p_in]) \n\t" + "lwc1 $f1, 0(%[p_in]) \n\t" + "lwc1 $f0, -4(%[p_in]) \n\t" + "addiu %[i], -2 \n\t" + "madd.s %[sum_out8], %[sum_out8], %[fc_val], $f7 \n\t" + "madd.s %[sum_out7], %[sum_out7], %[fc_val], $f6 \n\t" + "madd.s %[sum_out6], %[sum_out6], %[fc_val], $f5 \n\t" + "madd.s %[sum_out5], %[sum_out5], %[fc_val], $f4 \n\t" + "madd.s %[sum_out4], %[sum_out4], %[fc_val], $f3 \n\t" + "madd.s %[sum_out3], %[sum_out3], %[fc_val], $f2 \n\t" + "madd.s %[sum_out2], %[sum_out2], %[fc_val], $f1 \n\t" + "madd.s %[sum_out1], %[sum_out1], %[fc_val], $f0 \n\t" + "lwc1 %[fc_val], 4(%[p_filter_coeffs]) \n\t" + "lwc1 $f7, -8(%[p_in]) \n\t" + "addiu %[p_filter_coeffs], 8 \n\t" + "addiu %[p_in], -8 \n\t" + "madd.s %[sum_out8], %[sum_out8], %[fc_val], $f6 \n\t" + "madd.s %[sum_out7], %[sum_out7], %[fc_val], $f5 \n\t" + "madd.s %[sum_out6], %[sum_out6], %[fc_val], $f4 \n\t" + "madd.s %[sum_out5], %[sum_out5], %[fc_val], $f3 \n\t" + "madd.s %[sum_out4], %[sum_out4], %[fc_val], $f2 \n\t" + "madd.s %[sum_out3], %[sum_out3], %[fc_val], $f1 \n\t" + "madd.s %[sum_out2], %[sum_out2], %[fc_val], $f0 \n\t" + "madd.s %[sum_out1], %[sum_out1], %[fc_val], $f7 \n\t" + "bgtz %[i], filt_lp_inner%= \n\t" + + : [sum_out8]"+f"(sum_out8), [sum_out7]"+f"(sum_out7), + [sum_out6]"+f"(sum_out6), [sum_out5]"+f"(sum_out5), + [sum_out4]"+f"(sum_out4), [sum_out3]"+f"(sum_out3), + [sum_out2]"+f"(sum_out2), [sum_out1]"+f"(sum_out1), + [fc_val]"=&f"(fc_val), [p_filter_coeffs]"+r"(p_filter_coeffs), + [p_in]"+r"(p_in), [i]"+r"(i) + : + : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7" + ); + + out[n+7] = sum_out8; + out[n+6] = sum_out7; + out[n+5] = sum_out6; + out[n+4] = sum_out5; + out[n+3] = sum_out4; + out[n+2] = sum_out3; + out[n+1] = sum_out2; + out[n] = sum_out1; + } +} + +void ff_celp_filter_init_mips(CELPFContext *c) +{ + c->celp_lp_synthesis_filterf = ff_celp_lp_synthesis_filterf_mips; + c->celp_lp_zero_synthesis_filterf = ff_celp_lp_zero_synthesis_filterf_mips; +} diff --git a/libavcodec/mips/celp_math_mips.c b/libavcodec/mips/celp_math_mips.c new file mode 100644 index 0000000000..0af4171bca --- /dev/null +++ b/libavcodec/mips/celp_math_mips.c @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nedeljko Babic (nbabic@mips.com) + * + * Math operations optimized for MIPS + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Reference: libavcodec/celp_math.c + */ +#include "libavcodec/celp_math.h" + +static float ff_dot_productf_mips(const float* a, const float* b, + int length) +{ + float sum; + const float* a_end = a + length; + + __asm__ __volatile__ ( + "mtc1 $zero, %[sum] \n\t" + "blez %[length], ff_dot_productf_end%= \n\t" + "ff_dot_productf_madd%=: \n\t" + "lwc1 $f2, 0(%[a]) \n\t" + "lwc1 $f1, 0(%[b]) \n\t" + "addiu %[a], %[a], 4 \n\t" + "addiu %[b], %[b], 4 \n\t" + "madd.s %[sum], %[sum], $f1, $f2 \n\t" + "bne %[a], %[a_end], ff_dot_productf_madd%= \n\t" + "ff_dot_productf_end%=: \n\t" + + : [sum] "=&f" (sum), [a] "+r" (a), [b] "+r" (b) + : [a_end]"r"(a_end), [length] "r" (length) + : "$f1", "$f2" + ); + return sum; +} + +void ff_celp_math_init_mips(CELPMContext *c) +{ + c->dot_productf = ff_dot_productf_mips; +} diff --git a/libavcodec/mips/lsp_mips.h b/libavcodec/mips/lsp_mips.h new file mode 100644 index 0000000000..f875392099 --- /dev/null +++ b/libavcodec/mips/lsp_mips.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2012 + * MIPS Technologies, Inc., California. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * Author: Nedeljko Babic (nbabic@mips.com) + * + * LSP routines for ACELP-based codecs optimized for MIPS + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/** + * @file + * Reference: libavcodec/lsp.c + */ +#ifndef AVCODEC_LSP_MIPS_H +#define AVCODEC_LSP_MIPS_H + +#if HAVE_MIPSFPU && HAVE_INLINE_ASM +static av_always_inline void ff_lsp2polyf_mips(const double *lsp, double *f, int lp_half_order) +{ + int i, j = 0; + double * p_fi = f; + double * p_f = 0; + + f[0] = 1.0; + f[1] = -2 * lsp[0]; + lsp -= 2; + + for(i=2; i<=lp_half_order; i++) + { + double tmp, f_j_2, f_j_1, f_j; + double val = lsp[2*i]; + + __asm__ __volatile__( + "move %[p_f], %[p_fi] \n\t" + "add.d %[val], %[val], %[val] \n\t" + "addiu %[p_fi], 8 \n\t" + "ldc1 %[f_j_1], 0(%[p_f]) \n\t" + "ldc1 %[f_j], 8(%[p_f]) \n\t" + "neg.d %[val], %[val] \n\t" + "add.d %[tmp], %[f_j_1], %[f_j_1] \n\t" + "madd.d %[tmp], %[tmp], %[f_j], %[val] \n\t" + "addiu %[j], %[i], -2 \n\t" + "ldc1 %[f_j_2], -8(%[p_f]) \n\t" + "sdc1 %[tmp], 16(%[p_f]) \n\t" + "beqz %[j], ff_lsp2polyf_lp_j_end%= \n\t" + "ff_lsp2polyf_lp_j%=: \n\t" + "add.d %[tmp], %[f_j], %[f_j_2] \n\t" + "madd.d %[tmp], %[tmp], %[f_j_1], %[val] \n\t" + "mov.d %[f_j], %[f_j_1] \n\t" + "addiu %[j], -1 \n\t" + "mov.d %[f_j_1], %[f_j_2] \n\t" + "ldc1 %[f_j_2], -16(%[p_f]) \n\t" + "sdc1 %[tmp], 8(%[p_f]) \n\t" + "addiu %[p_f], -8 \n\t" + "bgtz %[j], ff_lsp2polyf_lp_j%= \n\t" + "ff_lsp2polyf_lp_j_end%=: \n\t" + + : [f_j_2]"=&f"(f_j_2), [f_j_1]"=&f"(f_j_1), [val]"+f"(val), + [tmp]"=&f"(tmp), [f_j]"=&f"(f_j), [p_f]"+r"(p_f), + [j]"+r"(j), [p_fi]"+r"(p_fi) + : [i]"r"(i) + ); + f[1] += val; + } +} +#define ff_lsp2polyf ff_lsp2polyf_mips +#endif /* HAVE_MIPSFPU && HAVE_INLINE_ASM */ +#endif /* AVCODEC_LSP_MIPS_H */ |