diff options
author | Loren Merritt <lorenm@u.washington.edu> | 2009-12-05 15:09:10 +0000 |
---|---|---|
committer | Loren Merritt <lorenm@u.washington.edu> | 2009-12-05 15:09:10 +0000 |
commit | b1159ad92818cd8f0885d252b0800f5960fe7241 (patch) | |
tree | a9d4177c61a9a89b4ac78a4a5b8a95f962a858a0 /libavcodec/dsputil.h | |
parent | e470691aa8798004bf5589871865a765cb791014 (diff) | |
download | ffmpeg-b1159ad92818cd8f0885d252b0800f5960fe7241.tar.gz |
refactor and optimize scalarproduct
29-105% faster apply_filter, 6-90% faster ape decoding on core2
(Any x86 other than core2 probably gets much less, since this is mostly due to ssse3 cachesplit avoidance and I haven't written the full gamut of other cachesplit modes.)
9-123% faster ape decoding on G4.
Originally committed as revision 20739 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/dsputil.h')
-rw-r--r-- | libavcodec/dsputil.h | 18 |
1 files changed, 7 insertions, 11 deletions
diff --git a/libavcodec/dsputil.h b/libavcodec/dsputil.h index 4079396ff5..e483276070 100644 --- a/libavcodec/dsputil.h +++ b/libavcodec/dsputil.h @@ -560,23 +560,19 @@ typedef struct DSPContext { void (*x8_setup_spatial_compensation)(uint8_t *src, uint8_t *dst, int linesize, int * range, int * sum, int edges); - /* ape functions */ - /** - * Add contents of the second vector to the first one. - * @param len length of vectors, should be multiple of 16 - */ - void (*add_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len); - /** - * Add contents of the second vector to the first one. - * @param len length of vectors, should be multiple of 16 - */ - void (*sub_int16)(int16_t *v1/*align 16*/, int16_t *v2, int len); /** * Calculate scalar product of two vectors. * @param len length of vectors, should be multiple of 16 * @param shift number of bits to discard from product */ int32_t (*scalarproduct_int16)(int16_t *v1, int16_t *v2/*align 16*/, int len, int shift); + /* ape functions */ + /** + * Calculate scalar product of v1 and v2, + * and v1[i] += v3[i] * mul + * @param len length of vectors, should be multiple of 16 + */ + int32_t (*scalarproduct_and_madd_int16)(int16_t *v1/*align 16*/, int16_t *v2, int16_t *v3, int len, int mul); /* rv30 functions */ qpel_mc_func put_rv30_tpel_pixels_tab[4][16]; |