diff options
author | Lynne <dev@lynne.ee> | 2022-10-01 12:21:28 +0200 |
---|---|---|
committer | Lynne <dev@lynne.ee> | 2022-11-24 15:58:33 +0100 |
commit | 6ddd10c3e2d63d1ad1ea1034b0e3862107a27063 (patch) | |
tree | 9136f37ee4edffca309e6813728019351225d6eb /libavutil | |
parent | dd77e61182865e396195a19b1e6ec697717cef56 (diff) | |
download | ffmpeg-6ddd10c3e2d63d1ad1ea1034b0e3862107a27063.tar.gz |
lavu/tx: allow codelets to specify a minimum number of matching factors
Diffstat (limited to 'libavutil')
-rw-r--r-- | libavutil/tx.c | 30 | ||||
-rw-r--r-- | libavutil/tx_priv.h | 11 | ||||
-rw-r--r-- | libavutil/tx_template.c | 18 |
3 files changed, 40 insertions, 19 deletions
diff --git a/libavutil/tx.c b/libavutil/tx.c index 13fb54f916..a1173f6137 100644 --- a/libavutil/tx.c +++ b/libavutil/tx.c @@ -409,42 +409,38 @@ static int cmp_matches(TXCodeletMatch *a, TXCodeletMatch *b) /* We want all factors to completely cover the length */ static inline int check_cd_factors(const FFTXCodelet *cd, int len) { - int all_flag = 0; + int matches = 0, any_flag = 0; - for (int i = 0; i < TX_MAX_SUB; i++) { + for (int i = 0; i < TX_MAX_FACTORS; i++) { int factor = cd->factors[i]; - /* Conditions satisfied */ - if (len == 1) - return 1; - - /* No more factors */ - if (!factor) { - break; - } else if (factor == TX_FACTOR_ANY) { - all_flag = 1; + if (factor == TX_FACTOR_ANY) { + any_flag = 1; + matches++; continue; - } - - if (factor == 2) { /* Fast path */ + } else if (len <= 1 || !factor) { + break; + } else if (factor == 2) { /* Fast path */ int bits_2 = ff_ctz(len); if (!bits_2) - return 0; /* Factor not supported */ + continue; /* Factor not supported */ len >>= bits_2; + matches++; } else { int res = len % factor; if (res) - return 0; /* Factor not supported */ + continue; /* Factor not supported */ while (!res) { len /= factor; res = len % factor; } + matches++; } } - return all_flag || (len == 1); + return (cd->nb_factors <= matches) && (any_flag || len == 1); } av_cold int ff_tx_init_subtx(AVTXContext *s, enum AVTXType type, diff --git a/libavutil/tx_priv.h b/libavutil/tx_priv.h index d9e38ba19b..80d045f6af 100644 --- a/libavutil/tx_priv.h +++ b/libavutil/tx_priv.h @@ -71,7 +71,8 @@ typedef void TXComplex; .function = TX_FN_NAME(fn, suffix), \ .type = TX_TYPE(tx_type), \ .flags = FF_TX_ALIGNED | FF_TX_OUT_OF_PLACE | cd_flags, \ - .factors = { f1, f2 }, \ + .factors = { (f1), (f2) }, \ + .nb_factors = !!(f1) + !!(f2), \ .min_len = len_min, \ .max_len = len_max, \ .init = init_fn, \ @@ -163,6 +164,9 @@ typedef struct FFTXCodeletOptions { invert the lookup direction for the map generated */ } FFTXCodeletOptions; +/* Maximum number of factors a codelet may have. Arbitrary. */ +#define TX_MAX_FACTORS 16 + /* Maximum amount of subtransform functions, subtransforms and factors. Arbitrary. */ #define TX_MAX_SUB 4 @@ -175,13 +179,16 @@ typedef struct FFTXCodelet { uint64_t flags; /* A combination of AVTXFlags and codelet * flags that describe its properties. */ - int factors[TX_MAX_SUB]; /* Length factors */ + int factors[TX_MAX_FACTORS]; /* Length factors. MUST be coprime. */ #define TX_FACTOR_ANY -1 /* When used alone, signals that the codelet * supports all factors. Otherwise, if other * factors are present, it signals that whatever * remains will be supported, as long as the * other factors are a component of the length */ + int nb_factors; /* Minimum number of factors that have to + * be a modulo of the length. Must not be 0. */ + int min_len; /* Minimum length of transform, must be >= 1 */ int max_len; /* Maximum length of transform */ #define TX_LEN_UNLIMITED -1 /* Special length value to permit all lengths */ diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c index 228209521b..c157719d73 100644 --- a/libavutil/tx_template.c +++ b/libavutil/tx_template.c @@ -518,6 +518,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \ .flags = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \ AV_TX_UNALIGNED | FF_TX_PRESHUFFLE, \ .factors[0] = n, \ + .nb_factors = 1, \ .min_len = n, \ .max_len = n, \ .init = TX_NAME(ff_tx_fft_factor_init), \ @@ -534,6 +535,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft##n##_fwd_def) = { \ .flags = AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | \ AV_TX_UNALIGNED | FF_TX_FORWARD_ONLY, \ .factors[0] = n, \ + .nb_factors = 1, \ .min_len = n, \ .max_len = n, \ .init = TX_NAME(ff_tx_fft_factor_init), \ @@ -614,6 +616,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft##n##_ns_def) = { \ .flags = FF_TX_OUT_OF_PLACE | AV_TX_INPLACE | \ AV_TX_UNALIGNED | FF_TX_PRESHUFFLE, \ .factors[0] = 2, \ + .nb_factors = 1, \ .min_len = n, \ .max_len = n, \ .init = TX_NAME(ff_tx_fft_sr_codelet_init), \ @@ -814,6 +817,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_def) = { .type = TX_TYPE(FFT), .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE, .factors[0] = TX_FACTOR_ANY, + .nb_factors = 1, .min_len = 2, .max_len = TX_LEN_UNLIMITED, .init = TX_NAME(ff_tx_fft_init), @@ -827,6 +831,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_inplace_small_def) = { .type = TX_TYPE(FFT), .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | AV_TX_INPLACE, .factors[0] = TX_FACTOR_ANY, + .nb_factors = 1, .min_len = 2, .max_len = 65536, .init = TX_NAME(ff_tx_fft_inplace_small_init), @@ -840,6 +845,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_inplace_def) = { .type = TX_TYPE(FFT), .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | AV_TX_INPLACE, .factors[0] = TX_FACTOR_ANY, + .nb_factors = 1, .min_len = 2, .max_len = TX_LEN_UNLIMITED, .init = TX_NAME(ff_tx_fft_init), @@ -927,6 +933,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_naive_small_def) = { .type = TX_TYPE(FFT), .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE, .factors[0] = TX_FACTOR_ANY, + .nb_factors = 1, .min_len = 2, .max_len = 1024, .init = TX_NAME(ff_tx_fft_init_naive_small), @@ -940,6 +947,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_naive_def) = { .type = TX_TYPE(FFT), .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE, .factors[0] = TX_FACTOR_ANY, + .nb_factors = 1, .min_len = 2, .max_len = TX_LEN_UNLIMITED, .init = NULL, @@ -1007,6 +1015,7 @@ static const FFTXCodelet TX_NAME(ff_tx_fft_pfa_##N##xM_def) = { \ .type = TX_TYPE(FFT), \ .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE, \ .factors = { N, TX_FACTOR_ANY }, \ + .nb_factors = 2, \ .min_len = N*2, \ .max_len = TX_LEN_UNLIMITED, \ .init = TX_NAME(ff_tx_fft_pfa_init), \ @@ -1089,6 +1098,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_naive_fwd_def) = { .type = TX_TYPE(MDCT), .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, .factors = { 2, TX_FACTOR_ANY }, /* MDCTs need an even length */ + .nb_factors = 2, .min_len = 2, .max_len = TX_LEN_UNLIMITED, .init = TX_NAME(ff_tx_mdct_naive_init), @@ -1102,6 +1112,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_naive_inv_def) = { .type = TX_TYPE(MDCT), .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY, .factors = { 2, TX_FACTOR_ANY }, + .nb_factors = 2, .min_len = 2, .max_len = TX_LEN_UNLIMITED, .init = TX_NAME(ff_tx_mdct_naive_init), @@ -1234,6 +1245,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_fwd_def) = { .type = TX_TYPE(MDCT), .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, .factors = { 2, TX_FACTOR_ANY }, + .nb_factors = 2, .min_len = 2, .max_len = TX_LEN_UNLIMITED, .init = TX_NAME(ff_tx_mdct_init), @@ -1247,6 +1259,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_inv_def) = { .type = TX_TYPE(MDCT), .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY, .factors = { 2, TX_FACTOR_ANY }, + .nb_factors = 2, .min_len = 2, .max_len = TX_LEN_UNLIMITED, .init = TX_NAME(ff_tx_mdct_init), @@ -1299,6 +1312,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_inv_full_def) = { .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | AV_TX_FULL_IMDCT, .factors = { 2, TX_FACTOR_ANY }, + .nb_factors = 2, .min_len = 2, .max_len = TX_LEN_UNLIMITED, .init = TX_NAME(ff_tx_mdct_inv_full_init), @@ -1396,6 +1410,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_inv_def) = { \ .type = TX_TYPE(MDCT), \ .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY, \ .factors = { N, TX_FACTOR_ANY }, \ + .nb_factors = 2, \ .min_len = N*2, \ .max_len = TX_LEN_UNLIMITED, \ .init = TX_NAME(ff_tx_mdct_pfa_init), \ @@ -1463,6 +1478,7 @@ static const FFTXCodelet TX_NAME(ff_tx_mdct_pfa_##N##xM_fwd_def) = { \ .type = TX_TYPE(MDCT), \ .flags = AV_TX_UNALIGNED | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, \ .factors = { N, TX_FACTOR_ANY }, \ + .nb_factors = 2, \ .min_len = N*2, \ .max_len = TX_LEN_UNLIMITED, \ .init = TX_NAME(ff_tx_mdct_pfa_init), \ @@ -1583,6 +1599,7 @@ static const FFTXCodelet TX_NAME(ff_tx_rdft_r2c_def) = { .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | FF_TX_FORWARD_ONLY, .factors = { 2, TX_FACTOR_ANY }, + .nb_factors = 2, .min_len = 2, .max_len = TX_LEN_UNLIMITED, .init = TX_NAME(ff_tx_rdft_init), @@ -1597,6 +1614,7 @@ static const FFTXCodelet TX_NAME(ff_tx_rdft_c2r_def) = { .flags = AV_TX_UNALIGNED | AV_TX_INPLACE | FF_TX_OUT_OF_PLACE | FF_TX_INVERSE_ONLY, .factors = { 2, TX_FACTOR_ANY }, + .nb_factors = 2, .min_len = 2, .max_len = TX_LEN_UNLIMITED, .init = TX_NAME(ff_tx_rdft_init), |