diff options
author | Lynne <dev@lynne.ee> | 2021-04-10 03:53:38 +0200 |
---|---|---|
committer | Lynne <dev@lynne.ee> | 2021-04-24 17:19:17 +0200 |
commit | 1978b143ebdffb885fbfed1f9c0c40c7ba36b3de (patch) | |
tree | f109899732987e76ef3ab6acec57d00e35efde37 /libavutil | |
parent | ff71671d88ef23e1a539e1a99eadd766c27f3ed3 (diff) | |
download | ffmpeg-1978b143ebdffb885fbfed1f9c0c40c7ba36b3de.tar.gz |
checkasm: add av_tx FFT SIMD testing code
This sadly required making changes to the code itself,
due to the same context needing to be reused for both versions.
The lookup table had to be duplicated for both versions.
Diffstat (limited to 'libavutil')
-rw-r--r-- | libavutil/tx.c | 15 | ||||
-rw-r--r-- | libavutil/tx_priv.h | 5 | ||||
-rw-r--r-- | libavutil/tx_template.c | 18 |
3 files changed, 22 insertions, 16 deletions
diff --git a/libavutil/tx.c b/libavutil/tx.c index 6d0e854084..dcfb257899 100644 --- a/libavutil/tx.c +++ b/libavutil/tx.c @@ -106,22 +106,24 @@ int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup) { const int m = s->m, inv = s->inv; - if (!(s->revtab = av_malloc(m*sizeof(*s->revtab)))) + if (!(s->revtab = av_malloc(s->m*sizeof(*s->revtab)))) + return AVERROR(ENOMEM); + if (!(s->revtab_c = av_malloc(m*sizeof(*s->revtab_c)))) return AVERROR(ENOMEM); /* Default */ for (int i = 0; i < m; i++) { int k = -split_radix_permutation(i, m, inv) & (m - 1); if (invert_lookup) - s->revtab[i] = k; + s->revtab[i] = s->revtab_c[i] = k; else - s->revtab[k] = i; + s->revtab[i] = s->revtab_c[k] = i; } return 0; } -int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s) +int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s, int *revtab) { int nb_inplace_idx = 0; @@ -130,7 +132,7 @@ int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s) /* The first coefficient is always already in-place */ for (int src = 1; src < s->m; src++) { - int dst = s->revtab[src]; + int dst = revtab[src]; int found = 0; if (dst <= src) @@ -146,7 +148,7 @@ int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s) break; } } - dst = s->revtab[dst]; + dst = revtab[dst]; } while (dst != src && !found); if (!found) @@ -215,6 +217,7 @@ av_cold void av_tx_uninit(AVTXContext **ctx) av_free((*ctx)->pfatab); av_free((*ctx)->exptab); av_free((*ctx)->revtab); + av_free((*ctx)->revtab_c); av_free((*ctx)->inplace_idx); av_free((*ctx)->tmp); diff --git a/libavutil/tx_priv.h b/libavutil/tx_priv.h index b889f6d3b4..88589fcbb4 100644 --- a/libavutil/tx_priv.h +++ b/libavutil/tx_priv.h @@ -122,6 +122,9 @@ struct AVTXContext { int *revtab; /* Input mapping for power of two transforms */ int *inplace_idx; /* Required indices to revtab for in-place transforms */ + int *revtab_c; /* Revtab for only the C transforms, needed because + * checkasm makes us reuse the same context. */ + av_tx_fn top_tx; /* Used for computing transforms derived from other * transforms, like full-length iMDCTs and RDFTs. * NOTE: Do NOT use this to mix assembly with C code. */ @@ -147,7 +150,7 @@ int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup); * specific order, allows the revtab to be done in-place. AVTXContext->revtab * must already exist. */ -int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s); +int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s, int *revtab); /* * This generates a parity-based revtab of length len and direction inv. diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c index a68a84dcd5..cad66a8bc0 100644 --- a/libavutil/tx_template.c +++ b/libavutil/tx_template.c @@ -593,7 +593,7 @@ static void compound_fft_##N##xM(AVTXContext *s, void *_out, \ for (int i = 0; i < m; i++) { \ for (int j = 0; j < N; j++) \ fft##N##in[j] = in[in_map[i*N + j]]; \ - fft##N(s->tmp + s->revtab[i], fft##N##in, m); \ + fft##N(s->tmp + s->revtab_c[i], fft##N##in, m); \ } \ \ for (int i = 0; i < N; i++) \ @@ -624,16 +624,16 @@ static void split_radix_fft(AVTXContext *s, void *_out, void *_in, do { tmp = out[src]; - dst = s->revtab[src]; + dst = s->revtab_c[src]; do { FFSWAP(FFTComplex, tmp, out[dst]); - dst = s->revtab[dst]; + dst = s->revtab_c[dst]; } while (dst != src); /* Can be > as well, but is less predictable */ out[dst] = tmp; } while ((src = *inplace_idx++)); } else { for (int i = 0; i < m; i++) - out[i] = in[s->revtab[i]]; + out[i] = in[s->revtab_c[i]]; } fft_dispatch[mb](out); @@ -685,7 +685,7 @@ static void compound_imdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \ FFTComplex tmp = { in2[-k*stride], in1[k*stride] }; \ CMUL3(fft##N##in[j], tmp, exp[k >> 1]); \ } \ - fft##N(s->tmp + s->revtab[i], fft##N##in, m); \ + fft##N(s->tmp + s->revtab_c[i], fft##N##in, m); \ } \ \ for (int i = 0; i < N; i++) \ @@ -733,7 +733,7 @@ static void compound_mdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \ CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \ exp[k >> 1].re, exp[k >> 1].im); \ } \ - fft##N(s->tmp + s->revtab[i], fft##N##in, m); \ + fft##N(s->tmp + s->revtab_c[i], fft##N##in, m); \ } \ \ for (int i = 0; i < N; i++) \ @@ -772,7 +772,7 @@ static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src, for (int i = 0; i < m; i++) { FFTComplex tmp = { in2[-2*i*stride], in1[2*i*stride] }; - CMUL3(z[s->revtab[i]], tmp, exp[i]); + CMUL3(z[s->revtab_c[i]], tmp, exp[i]); } fftp(z); @@ -806,7 +806,7 @@ static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src, tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]); tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]); } - CMUL(z[s->revtab[i]].im, z[s->revtab[i]].re, tmp.re, tmp.im, + CMUL(z[s->revtab_c[i]].im, z[s->revtab_c[i]].re, tmp.re, tmp.im, exp[i].re, exp[i].im); } @@ -1005,7 +1005,7 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx, if (flags & AV_TX_INPLACE) { if (is_mdct) /* In-place MDCTs are not supported yet */ return AVERROR(ENOSYS); - if ((err = ff_tx_gen_ptwo_inplace_revtab_idx(s))) + if ((err = ff_tx_gen_ptwo_inplace_revtab_idx(s, s->revtab_c))) return err; } for (int i = 4; i <= av_log2(m); i++) |