aboutsummaryrefslogtreecommitdiffstats
path: root/libavcodec
diff options
context:
space:
mode:
authorRĂ©mi Denis-Courmont <remi@remlab.net>2022-09-20 20:42:13 +0300
committerJames Almer <jamrial@gmail.com>2022-09-22 13:27:43 -0300
commit08edacc248bce3f8946d75e97188d189c74a6de6 (patch)
treeaa401517247543a9ad6f31117e393ffd568dc0d0 /libavcodec
parentdb73ae0dc114aa6fae08e69f977944f056a24995 (diff)
downloadffmpeg-08edacc248bce3f8946d75e97188d189c74a6de6.tar.gz
lavc/aacpsdsp: precompute constant factors
The input complex factors are constant for each iterations. This substitudes 4 loads, 2 additions and 2 subtractions per iteration of the inner-loop with another 4 loads. Thus effectively 4 arithmetic operations per iteration of the inner loop are avoided, i.e. 24 operations per iteration of the outer loop, or 24 * (n - 1) operations in total. If the inner loop is not unrolled by the compiler, this also might also save some pointer arithmetic as most instruction sets do not have addressing modes with negated register offsets (12 - j). Unless the compiler is optimising for code size, this is unlikely though.
Diffstat (limited to 'libavcodec')
-rw-r--r--libavcodec/aacpsdsp_template.c25
1 files changed, 14 insertions, 11 deletions
diff --git a/libavcodec/aacpsdsp_template.c b/libavcodec/aacpsdsp_template.c
index e644037587..e3cbf3feec 100644
--- a/libavcodec/aacpsdsp_template.c
+++ b/libavcodec/aacpsdsp_template.c
@@ -47,21 +47,24 @@ static void ps_hybrid_analysis_c(INTFLOAT (*out)[2], INTFLOAT (*in)[2],
const INTFLOAT (*filter)[8][2],
ptrdiff_t stride, int n)
{
- int i, j;
+ INT64FLOAT inre0[6], inre1[6], inim0[6], inim1[6];
- for (i = 0; i < n; i++) {
+ for (int j = 0; j < 6; j++) {
+ inre0[j] = in[j][0] + in[12 - j][0];
+ inre1[j] = in[j][1] - in[12 - j][1];
+ inim0[j] = in[j][1] + in[12 - j][1];
+ inim1[j] = in[j][0] - in[12 - j][0];
+ }
+
+ for (int i = 0; i < n; i++) {
INT64FLOAT sum_re = (INT64FLOAT)filter[i][6][0] * in[6][0];
INT64FLOAT sum_im = (INT64FLOAT)filter[i][6][0] * in[6][1];
- for (j = 0; j < 6; j++) {
- INT64FLOAT in0_re = in[j][0];
- INT64FLOAT in0_im = in[j][1];
- INT64FLOAT in1_re = in[12-j][0];
- INT64FLOAT in1_im = in[12-j][1];
- sum_re += (INT64FLOAT)filter[i][j][0] * (in0_re + in1_re) -
- (INT64FLOAT)filter[i][j][1] * (in0_im - in1_im);
- sum_im += (INT64FLOAT)filter[i][j][0] * (in0_im + in1_im) +
- (INT64FLOAT)filter[i][j][1] * (in0_re - in1_re);
+ for (int j = 0; j < 6; j++) {
+ sum_re += (INT64FLOAT)filter[i][j][0] * inre0[j] -
+ (INT64FLOAT)filter[i][j][1] * inre1[j];
+ sum_im += (INT64FLOAT)filter[i][j][0] * inim0[j] +
+ (INT64FLOAT)filter[i][j][1] * inim1[j];
}
#if USE_FIXED
out[i * stride][0] = (int)((sum_re + 0x40000000) >> 31);