diff options
author | Loren Merritt <lorenm@u.washington.edu> | 2009-12-05 15:09:10 +0000 |
---|---|---|
committer | Loren Merritt <lorenm@u.washington.edu> | 2009-12-05 15:09:10 +0000 |
commit | b1159ad92818cd8f0885d252b0800f5960fe7241 (patch) | |
tree | a9d4177c61a9a89b4ac78a4a5b8a95f962a858a0 /libavcodec/apedec.c | |
parent | e470691aa8798004bf5589871865a765cb791014 (diff) | |
download | ffmpeg-b1159ad92818cd8f0885d252b0800f5960fe7241.tar.gz |
refactor and optimize scalarproduct
29-105% faster apply_filter, 6-90% faster ape decoding on core2
(Any x86 other than core2 probably gets much less, since this is mostly due to ssse3 cachesplit avoidance and I haven't written the full gamut of other cachesplit modes.)
9-123% faster ape decoding on G4.
Originally committed as revision 20739 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/apedec.c')
-rw-r--r-- | libavcodec/apedec.c | 12 |
1 files changed, 3 insertions, 9 deletions
diff --git a/libavcodec/apedec.c b/libavcodec/apedec.c index b8d1e9e254..c27d0863ef 100644 --- a/libavcodec/apedec.c +++ b/libavcodec/apedec.c @@ -648,22 +648,16 @@ static void init_filter(APEContext * ctx, APEFilter *f, int16_t * buf, int order do_init_filter(&f[1], buf + order * 3 + HISTORY_SIZE, order); } -static inline void do_apply_filter(APEContext * ctx, int version, APEFilter *f, int32_t *data, int count, int order, int fracbits) +static void do_apply_filter(APEContext * ctx, int version, APEFilter *f, int32_t *data, int count, int order, int fracbits) { int res; int absres; while (count--) { /* round fixedpoint scalar product */ - res = (ctx->dsp.scalarproduct_int16(f->delay - order, f->coeffs, order, 0) + (1 << (fracbits - 1))) >> fracbits; - - if (*data < 0) - ctx->dsp.add_int16(f->coeffs, f->adaptcoeffs - order, order); - else if (*data > 0) - ctx->dsp.sub_int16(f->coeffs, f->adaptcoeffs - order, order); - + res = ctx->dsp.scalarproduct_and_madd_int16(f->coeffs, f->delay - order, f->adaptcoeffs - order, order, APESIGN(*data)); + res = (res + (1 << (fracbits - 1))) >> fracbits; res += *data; - *data++ = res; /* Update the output history */ |