diff options
author | Loren Merritt <lorenm@u.washington.edu> | 2007-09-27 02:42:00 +0000 |
---|---|---|
committer | Loren Merritt <lorenm@u.washington.edu> | 2007-09-27 02:42:00 +0000 |
commit | dc44d4ad64cddb32d7895d1fdc8a18c5fe49f0e8 (patch) | |
tree | 0d720000a984240ba3ef6377d8892bb736865f3e | |
parent | 6b19786b1187e0c194be3b9e76091e6f86a6e821 (diff) | |
download | ffmpeg-dc44d4ad64cddb32d7895d1fdc8a18c5fe49f0e8.tar.gz |
unroll encode_residual_lpc(). speedup varies between 1.2x and 1.8x depending on lpc order.
Originally committed as revision 10596 to svn://svn.ffmpeg.org/ffmpeg/trunk
-rw-r--r-- | libavcodec/flacenc.c | 85 | ||||
-rw-r--r-- | libavcodec/utils.c | 2 |
2 files changed, 84 insertions, 3 deletions
diff --git a/libavcodec/flacenc.c b/libavcodec/flacenc.c index e2357affeb..f5766e85a4 100644 --- a/libavcodec/flacenc.c +++ b/libavcodec/flacenc.c @@ -834,15 +834,83 @@ static void encode_residual_fixed(int32_t *res, const int32_t *smp, int n, } } +#define LPC1(x) {\ + int s = smp[i-(x)+1];\ + p1 += c*s;\ + c = coefs[(x)-2];\ + p0 += c*s;\ +} + +static av_always_inline void encode_residual_lpc_unrolled( + int32_t *res, const int32_t *smp, int n, + int order, const int32_t *coefs, int shift, int big) +{ + int i; + for(i=order; i<n; i+=2) { + int c = coefs[order-1]; + int p0 = c * smp[i-order]; + int p1 = 0; + if(big) { + switch(order) { + case 32: LPC1(32) + case 31: LPC1(31) + case 30: LPC1(30) + case 29: LPC1(29) + case 28: LPC1(28) + case 27: LPC1(27) + case 26: LPC1(26) + case 25: LPC1(25) + case 24: LPC1(24) + case 23: LPC1(23) + case 22: LPC1(22) + case 21: LPC1(21) + case 20: LPC1(20) + case 19: LPC1(19) + case 18: LPC1(18) + case 17: LPC1(17) + case 16: LPC1(16) + case 15: LPC1(15) + case 14: LPC1(14) + case 13: LPC1(13) + case 12: LPC1(12) + case 11: LPC1(11) + case 10: LPC1(10) + case 9: LPC1( 9) + LPC1( 8) + LPC1( 7) + LPC1( 6) + LPC1( 5) + LPC1( 4) + LPC1( 3) + LPC1( 2) + } + } else { + switch(order) { + case 8: LPC1( 8) + case 7: LPC1( 7) + case 6: LPC1( 6) + case 5: LPC1( 5) + case 4: LPC1( 4) + case 3: LPC1( 3) + case 2: LPC1( 2) + } + } + p1 += c * smp[i]; + res[i ] = smp[i ] - (p0 >> shift); + res[i+1] = smp[i+1] - (p1 >> shift); + } +} + static void encode_residual_lpc(int32_t *res, const int32_t *smp, int n, int order, const int32_t *coefs, int shift) { - int i, j; - + int i; for(i=0; i<order; i++) { res[i] = smp[i]; } +#ifdef CONFIG_SMALL for(i=order; i<n; i+=2) { + int j; int32_t c = coefs[0]; int32_t p0 = 0, p1 = c*smp[i]; for(j=1; j<order; j++) { @@ -855,6 +923,19 @@ static void encode_residual_lpc(int32_t *res, const int32_t *smp, int n, res[i+0] = smp[i+0] - (p0 >> shift); res[i+1] = smp[i+1] - (p1 >> shift); } +#else + switch(order) { + case 1: encode_residual_lpc_unrolled(res, smp, n, 1, coefs, shift, 0); break; + case 2: encode_residual_lpc_unrolled(res, smp, n, 2, coefs, shift, 0); break; + case 3: encode_residual_lpc_unrolled(res, smp, n, 3, coefs, shift, 0); break; + case 4: encode_residual_lpc_unrolled(res, smp, n, 4, coefs, shift, 0); break; + case 5: encode_residual_lpc_unrolled(res, smp, n, 5, coefs, shift, 0); break; + case 6: encode_residual_lpc_unrolled(res, smp, n, 6, coefs, shift, 0); break; + case 7: encode_residual_lpc_unrolled(res, smp, n, 7, coefs, shift, 0); break; + case 8: encode_residual_lpc_unrolled(res, smp, n, 8, coefs, shift, 0); break; + default: encode_residual_lpc_unrolled(res, smp, n, order, coefs, shift, 1); break; + } +#endif } static int encode_residual(FlacEncodeContext *ctx, int ch) diff --git a/libavcodec/utils.c b/libavcodec/utils.c index 3d4c2f2f5f..0a90b4aa73 100644 --- a/libavcodec/utils.c +++ b/libavcodec/utils.c @@ -670,7 +670,7 @@ static const AVOption options[]={ {"context", "context model", OFFSET(context_model), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, {"slice_flags", NULL, OFFSET(slice_flags), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, {"xvmc_acceleration", NULL, OFFSET(xvmc_acceleration), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, -{"mbd", "macroblock decision algorithm (high quality mode)", OFFSET(mb_decision), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "mbd"}, +{"mbd", "macroblock decision algorithm (high quality mode)", OFFSET(mb_decision), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|A|E, "mbd"}, {"simple", "use mbcmp (default)", 0, FF_OPT_TYPE_CONST, FF_MB_DECISION_SIMPLE, INT_MIN, INT_MAX, V|E, "mbd"}, {"bits", "use fewest bits", 0, FF_OPT_TYPE_CONST, FF_MB_DECISION_BITS, INT_MIN, INT_MAX, V|E, "mbd"}, {"rd", "use best rate distortion", 0, FF_OPT_TYPE_CONST, FF_MB_DECISION_RD, INT_MIN, INT_MAX, V|E, "mbd"}, |