aboutsummaryrefslogtreecommitdiffstats
path: root/libavutil/tx.c
diff options
context:
space:
mode:
authorLynne <dev@lynne.ee>2022-08-16 01:11:40 +0200
committerLynne <dev@lynne.ee>2022-08-16 01:22:38 +0200
commitae66a9db7bc19f00daaad96b3c15cbffe6280a93 (patch)
tree1c044b8f6d388b624308d8f1be559dfc9ded6708 /libavutil/tx.c
parent412922cc6fa790897ef6bb2be5d6f9a5f030754d (diff)
downloadffmpeg-ae66a9db7bc19f00daaad96b3c15cbffe6280a93.tar.gz
lavu/tx: optimize and simplify inverse MDCTs
Convert the input from a scatter to a gather instead, which is faster and better for SIMD. Also, add a pre-shuffled exptab version to avoid gathering there at all. This doubles the exptab size, but the speedup makes it worth it. In SIMD, the exptab will likely be purged to a higher cache anyway because of the FFT in the middle, and the amount of loads stays identical. For a 960-point inverse MDCT, the speedup is 10%. This makes it possible to write sane and fast SIMD versions of inverse MDCTs.
Diffstat (limited to 'libavutil/tx.c')
-rw-r--r--libavutil/tx.c4
1 files changed, 1 insertions, 3 deletions
diff --git a/libavutil/tx.c b/libavutil/tx.c
index 4cc3a98751..e6fcf9f451 100644
--- a/libavutil/tx.c
+++ b/libavutil/tx.c
@@ -44,7 +44,6 @@ int ff_tx_gen_compound_mapping(AVTXContext *s, int n, int m)
int *in_map, *out_map;
const int inv = s->inv;
const int len = n*m; /* Will not be equal to s->len for MDCTs */
- const int mdct = TYPE_IS(MDCT, s->type);
int m_inv, n_inv;
/* Make sure the numbers are coprime */
@@ -63,8 +62,7 @@ int ff_tx_gen_compound_mapping(AVTXContext *s, int n, int m)
/* Ruritanian map for input, CRT map for output, can be swapped */
for (int j = 0; j < m; j++) {
for (int i = 0; i < n; i++) {
- /* Shifted by 1 to simplify MDCTs */
- in_map[j*n + i] = ((i*m + j*n) % len) << mdct;
+ in_map[j*n + i] = (i*m + j*n) % len;
out_map[(i*m*m_inv + j*n*n_inv) % len] = i*m + j;
}
}