diff options
author | Christophe Gisquet <christophe.gisquet@gmail.com> | 2012-05-11 11:17:36 +0200 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2014-02-08 00:55:42 +0100 |
commit | 481a46a462ca762600ee4ad4c3b1e93d21b1fa35 (patch) | |
tree | f81182d2736a1ae14a14b83eb35b70541d344509 /libavcodec | |
parent | a7574a36afa1ccf5b170e4e827050710bd280899 (diff) | |
download | ffmpeg-481a46a462ca762600ee4ad4c3b1e93d21b1fa35.tar.gz |
dcadsp: add int8x8_fmul_int32 to DSP context
It is currently declared as a macro who is set to inlinable functions,
among which a Neon and a default C implementations.
Add a DSP parameter to each inline function, unused except by the
default C implementation which calls a function from the DSP context.
On an Arrandale CPU, gain for an inlined SSE2 function vs. a call:
- Win32: 29 to 26 cycles
- Win64: 25 to 23 cycles
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/arm/dca.h | 3 | ||||
-rw-r--r-- | libavcodec/dcadec.c | 10 | ||||
-rw-r--r-- | libavcodec/dcadsp.c | 9 | ||||
-rw-r--r-- | libavcodec/dcadsp.h | 1 |
4 files changed, 16 insertions, 7 deletions
diff --git a/libavcodec/arm/dca.h b/libavcodec/arm/dca.h index 35971a8c3f..86d20517a4 100644 --- a/libavcodec/arm/dca.h +++ b/libavcodec/arm/dca.h @@ -81,7 +81,8 @@ static inline int decode_blockcodes(int code1, int code2, int levels, #if HAVE_NEON_INLINE && HAVE_ASM_MOD_Y #define int8x8_fmul_int32 int8x8_fmul_int32 -static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int scale) +static inline void int8x8_fmul_int32(av_unused DCADSPContext *dsp, + float *dst, const int8_t *src, int scale) { __asm__ ("vcvt.f32.s32 %2, %2, #4 \n" "vld1.8 {d0}, [%1,:64] \n" diff --git a/libavcodec/dcadec.c b/libavcodec/dcadec.c index 9f7ee54bd4..969e1f2341 100644 --- a/libavcodec/dcadec.c +++ b/libavcodec/dcadec.c @@ -1247,12 +1247,10 @@ static const uint8_t abits_sizes[7] = { 7, 10, 12, 13, 15, 17, 19 }; static const uint8_t abits_levels[7] = { 3, 5, 7, 9, 13, 17, 25 }; #ifndef int8x8_fmul_int32 -static inline void int8x8_fmul_int32(float *dst, const int8_t *src, int scale) +static inline void int8x8_fmul_int32(DCADSPContext *dsp, float *dst, + const int8_t *src, int scale) { - float fscale = scale / 16.0; - int i; - for (i = 0; i < 8; i++) - dst[i] = src[i] * fscale; + dsp->int8x8_fmul_int32(dst, src, scale); } #endif @@ -1380,7 +1378,7 @@ static int dca_subsubframe(DCAContext *s, int base_channel, int block_index) s->debug_flag |= 0x01; } - int8x8_fmul_int32(subband_samples[k][l], + int8x8_fmul_int32(&s->dcadsp, subband_samples[k][l], &high_freq_vq[hfvq][subsubframe * 8], s->scale_factor[k][l][0]); } diff --git a/libavcodec/dcadsp.c b/libavcodec/dcadsp.c index abeba2492b..a82548f83c 100644 --- a/libavcodec/dcadsp.c +++ b/libavcodec/dcadsp.c @@ -24,6 +24,14 @@ #include "libavutil/intreadwrite.h" #include "dcadsp.h" +static void int8x8_fmul_int32_c(float *dst, const int8_t *src, int scale) +{ + float fscale = scale / 16.0; + int i; + for (i = 0; i < 8; i++) + dst[i] = src[i] * fscale; +} + static void dca_lfe_fir_c(float *out, const float *in, const float *coefs, int decifactor, float scale) { @@ -78,5 +86,6 @@ av_cold void ff_dcadsp_init(DCADSPContext *s) { s->lfe_fir = dca_lfe_fir_c; s->qmf_32_subbands = dca_qmf_32_subbands; + s->int8x8_fmul_int32 = int8x8_fmul_int32_c; if (ARCH_ARM) ff_dcadsp_init_arm(s); } diff --git a/libavcodec/dcadsp.h b/libavcodec/dcadsp.h index d86c1f32a6..3d1a097b36 100644 --- a/libavcodec/dcadsp.h +++ b/libavcodec/dcadsp.h @@ -31,6 +31,7 @@ typedef struct DCADSPContext { int *synth_buf_offset, float synth_buf2[32], const float window[512], float *samples_out, float raXin[32], float scale); + void (*int8x8_fmul_int32)(float *dst, const int8_t *src, int scale); } DCADSPContext; void ff_dcadsp_init(DCADSPContext *s); |