aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/apedec.c
diff options
context:
space:
mode:
authorLoren Merritt <lorenm@u.washington.edu>2009-12-05 15:09:10 +0000
committerLoren Merritt <lorenm@u.washington.edu>2009-12-05 15:09:10 +0000
commitb1159ad92818cd8f0885d252b0800f5960fe7241 (patch)
treea9d4177c61a9a89b4ac78a4a5b8a95f962a858a0 /libavcodec/apedec.c
parente470691aa8798004bf5589871865a765cb791014 (diff)
downloadffmpeg-b1159ad92818cd8f0885d252b0800f5960fe7241.tar.gz
refactor and optimize scalarproduct
29-105% faster apply_filter, 6-90% faster ape decoding on core2 (Any x86 other than core2 probably gets much less, since this is mostly due to ssse3 cachesplit avoidance and I haven't written the full gamut of other cachesplit modes.) 9-123% faster ape decoding on G4. Originally committed as revision 20739 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/apedec.c')
-rw-r--r--libavcodec/apedec.c12
1 files changed, 3 insertions, 9 deletions
diff --git a/libavcodec/apedec.c b/libavcodec/apedec.c
index b8d1e9e254..c27d0863ef 100644
--- a/libavcodec/apedec.c
+++ b/libavcodec/apedec.c
@@ -648,22 +648,16 @@ static void init_filter(APEContext * ctx, APEFilter *f, int16_t * buf, int order
do_init_filter(&f[1], buf + order * 3 + HISTORY_SIZE, order);
}
-static inline void do_apply_filter(APEContext * ctx, int version, APEFilter *f, int32_t *data, int count, int order, int fracbits)
+static void do_apply_filter(APEContext * ctx, int version, APEFilter *f, int32_t *data, int count, int order, int fracbits)
{
int res;
int absres;
while (count--) {
/* round fixedpoint scalar product */
- res = (ctx->dsp.scalarproduct_int16(f->delay - order, f->coeffs, order, 0) + (1 << (fracbits - 1))) >> fracbits;
-
- if (*data < 0)
- ctx->dsp.add_int16(f->coeffs, f->adaptcoeffs - order, order);
- else if (*data > 0)
- ctx->dsp.sub_int16(f->coeffs, f->adaptcoeffs - order, order);
-
+ res = ctx->dsp.scalarproduct_and_madd_int16(f->coeffs, f->delay - order, f->adaptcoeffs - order, order, APESIGN(*data));
+ res = (res + (1 << (fracbits - 1))) >> fracbits;
res += *data;
-
*data++ = res;
/* Update the output history */