aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLoren Merritt <lorenm@u.washington.edu>2007-09-27 02:42:00 +0000
committerLoren Merritt <lorenm@u.washington.edu>2007-09-27 02:42:00 +0000
commitdc44d4ad64cddb32d7895d1fdc8a18c5fe49f0e8 (patch)
tree0d720000a984240ba3ef6377d8892bb736865f3e
parent6b19786b1187e0c194be3b9e76091e6f86a6e821 (diff)
downloadffmpeg-dc44d4ad64cddb32d7895d1fdc8a18c5fe49f0e8.tar.gz
unroll encode_residual_lpc(). speedup varies between 1.2x and 1.8x depending on lpc order.
Originally committed as revision 10596 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r--libavcodec/flacenc.c85
-rw-r--r--libavcodec/utils.c2
2 files changed, 84 insertions, 3 deletions
diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c
index e2357affeb..f5766e85a4 100644
--- a/libavcodec/flacenc.c
+++ b/libavcodec/flacenc.c
@@ -834,15 +834,83 @@ static void encode_residual_fixed(int32_t *res, const int32_t *smp, int n,
}
}
+#define LPC1(x) {\
+ int s = smp[i-(x)+1];\
+ p1 += c*s;\
+ c = coefs[(x)-2];\
+ p0 += c*s;\
+}
+
+static av_always_inline void encode_residual_lpc_unrolled(
+ int32_t *res, const int32_t *smp, int n,
+ int order, const int32_t *coefs, int shift, int big)
+{
+ int i;
+ for(i=order; i<n; i+=2) {
+ int c = coefs[order-1];
+ int p0 = c * smp[i-order];
+ int p1 = 0;
+ if(big) {
+ switch(order) {
+ case 32: LPC1(32)
+ case 31: LPC1(31)
+ case 30: LPC1(30)
+ case 29: LPC1(29)
+ case 28: LPC1(28)
+ case 27: LPC1(27)
+ case 26: LPC1(26)
+ case 25: LPC1(25)
+ case 24: LPC1(24)
+ case 23: LPC1(23)
+ case 22: LPC1(22)
+ case 21: LPC1(21)
+ case 20: LPC1(20)
+ case 19: LPC1(19)
+ case 18: LPC1(18)
+ case 17: LPC1(17)
+ case 16: LPC1(16)
+ case 15: LPC1(15)
+ case 14: LPC1(14)
+ case 13: LPC1(13)
+ case 12: LPC1(12)
+ case 11: LPC1(11)
+ case 10: LPC1(10)
+ case 9: LPC1( 9)
+ LPC1( 8)
+ LPC1( 7)
+ LPC1( 6)
+ LPC1( 5)
+ LPC1( 4)
+ LPC1( 3)
+ LPC1( 2)
+ }
+ } else {
+ switch(order) {
+ case 8: LPC1( 8)
+ case 7: LPC1( 7)
+ case 6: LPC1( 6)
+ case 5: LPC1( 5)
+ case 4: LPC1( 4)
+ case 3: LPC1( 3)
+ case 2: LPC1( 2)
+ }
+ }
+ p1 += c * smp[i];
+ res[i ] = smp[i ] - (p0 >> shift);
+ res[i+1] = smp[i+1] - (p1 >> shift);
+ }
+}
+
static void encode_residual_lpc(int32_t *res, const int32_t *smp, int n,
int order, const int32_t *coefs, int shift)
{
- int i, j;
-
+ int i;
for(i=0; i<order; i++) {
res[i] = smp[i];
}
+#ifdef CONFIG_SMALL
for(i=order; i<n; i+=2) {
+ int j;
int32_t c = coefs[0];
int32_t p0 = 0, p1 = c*smp[i];
for(j=1; j<order; j++) {
@@ -855,6 +923,19 @@ static void encode_residual_lpc(int32_t *res, const int32_t *smp, int n,
res[i+0] = smp[i+0] - (p0 >> shift);
res[i+1] = smp[i+1] - (p1 >> shift);
}
+#else
+ switch(order) {
+ case 1: encode_residual_lpc_unrolled(res, smp, n, 1, coefs, shift, 0); break;
+ case 2: encode_residual_lpc_unrolled(res, smp, n, 2, coefs, shift, 0); break;
+ case 3: encode_residual_lpc_unrolled(res, smp, n, 3, coefs, shift, 0); break;
+ case 4: encode_residual_lpc_unrolled(res, smp, n, 4, coefs, shift, 0); break;
+ case 5: encode_residual_lpc_unrolled(res, smp, n, 5, coefs, shift, 0); break;
+ case 6: encode_residual_lpc_unrolled(res, smp, n, 6, coefs, shift, 0); break;
+ case 7: encode_residual_lpc_unrolled(res, smp, n, 7, coefs, shift, 0); break;
+ case 8: encode_residual_lpc_unrolled(res, smp, n, 8, coefs, shift, 0); break;
+ default: encode_residual_lpc_unrolled(res, smp, n, order, coefs, shift, 1); break;
+ }
+#endif
}
static int encode_residual(FlacEncodeContext *ctx, int ch)
diff --git a/libavcodec/utils.c b/libavcodec/utils.c
index 3d4c2f2f5f..0a90b4aa73 100644
--- a/libavcodec/utils.c
+++ b/libavcodec/utils.c
@@ -670,7 +670,7 @@ static const AVOption options[]={
{"context", "context model", OFFSET(context_model), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
{"slice_flags", NULL, OFFSET(slice_flags), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
{"xvmc_acceleration", NULL, OFFSET(xvmc_acceleration), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
-{"mbd", "macroblock decision algorithm (high quality mode)", OFFSET(mb_decision), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "mbd"},
+{"mbd", "macroblock decision algorithm (high quality mode)", OFFSET(mb_decision), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|A|E, "mbd"},
{"simple", "use mbcmp (default)", 0, FF_OPT_TYPE_CONST, FF_MB_DECISION_SIMPLE, INT_MIN, INT_MAX, V|E, "mbd"},
{"bits", "use fewest bits", 0, FF_OPT_TYPE_CONST, FF_MB_DECISION_BITS, INT_MIN, INT_MAX, V|E, "mbd"},
{"rd", "use best rate distortion", 0, FF_OPT_TYPE_CONST, FF_MB_DECISION_RD, INT_MIN, INT_MAX, V|E, "mbd"},