aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec/dsputil.c
diff options
context:
space:
mode:
authorLoren Merritt <lorenm@u.washington.edu>2009-12-05 15:09:10 +0000
committerLoren Merritt <lorenm@u.washington.edu>2009-12-05 15:09:10 +0000
commitb1159ad92818cd8f0885d252b0800f5960fe7241 (patch)
treea9d4177c61a9a89b4ac78a4a5b8a95f962a858a0 /libavcodec/dsputil.c
parente470691aa8798004bf5589871865a765cb791014 (diff)
downloadffmpeg-b1159ad92818cd8f0885d252b0800f5960fe7241.tar.gz
refactor and optimize scalarproduct
29-105% faster apply_filter, 6-90% faster ape decoding on core2 (Any x86 other than core2 probably gets much less, since this is mostly due to ssse3 cachesplit avoidance and I haven't written the full gamut of other cachesplit modes.) 9-123% faster ape decoding on G4. Originally committed as revision 20739 to svn://svn.ffmpeg.org/ffmpeg/trunk
Diffstat (limited to 'libavcodec/dsputil.c')
-rw-r--r--libavcodec/dsputil.c25
1 files changed, 11 insertions, 14 deletions
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c
index a04b8a400a..ffa8cecd4a 100644
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -4298,18 +4298,6 @@ void ff_float_to_int16_interleave_c(int16_t *dst, const float **src, long len, i
}
}
-static void add_int16_c(int16_t * v1, int16_t * v2, int order)
-{
- while (order--)
- *v1++ += *v2++;
-}
-
-static void sub_int16_c(int16_t * v1, int16_t * v2, int order)
-{
- while (order--)
- *v1++ -= *v2++;
-}
-
static int32_t scalarproduct_int16_c(int16_t * v1, int16_t * v2, int order, int shift)
{
int res = 0;
@@ -4320,6 +4308,16 @@ static int32_t scalarproduct_int16_c(int16_t * v1, int16_t * v2, int order, int
return res;
}
+static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, int16_t *v2, int16_t *v3, int order, int mul)
+{
+ int res = 0;
+ while (order--) {
+ res += *v1 * *v2++;
+ *v1++ += mul * *v3++;
+ }
+ return res;
+}
+
#define W0 2048
#define W1 2841 /* 2048*sqrt (2)*cos (1*pi/16) */
#define W2 2676 /* 2048*sqrt (2)*cos (2*pi/16) */
@@ -4848,9 +4846,8 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c->vector_clipf = vector_clipf_c;
c->float_to_int16 = ff_float_to_int16_c;
c->float_to_int16_interleave = ff_float_to_int16_interleave_c;
- c->add_int16 = add_int16_c;
- c->sub_int16 = sub_int16_c;
c->scalarproduct_int16 = scalarproduct_int16_c;
+ c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
c->scalarproduct_float = scalarproduct_float_c;
c->butterflies_float = butterflies_float_c;
c->vector_fmul_scalar = vector_fmul_scalar_c;