x86/tx_float: add 15xN PFA FFT AVX SIMD

~4x faster than the C version. The shuffles in the 15pt dim1 are seriously expensive. Not happy with it, but I'm contempt. Can be easily converted to pure AVX by removing all vpermpd/vpermps instructions.
author: Lynne <dev@lynne.ee> 2022-09-19 05:53:01 +0200
committer: Lynne <dev@lynne.ee> 2022-09-23 12:35:27 +0200
commit: ace42cf581f8c06872bfb58cf575d9e8bd398c0a (patch)
tree: 217d6653d5664d47f95c327fdb09d63e01dffcb3 /tests
parent: 3241e9225c7adfb2d8d24cfd05a7a8db8ddbd023 (diff)
download: ffmpeg-ace42cf581f8c06872bfb58cf575d9e8bd398c0a.tar.gz
1 files changed, 2 insertions, 2 deletions
diff --git a/tests/checkasm/av_tx.c b/tests/checkasm/av_tx.c
index 1fa6da45ac..aa8fc6b4e9 100644
--- a/tests/checkasm/av_tx.c
+++ b/tests/checkasm/av_tx.c
@@ -24,7 +24,7 @@
 
 #include <stdlib.h>
 
-#define EPS 0.00005
+#define EPS 0.0005
 
 #define SCALE_NOOP(x) (x)
 #define SCALE_INT20(x) (av_clip64(lrintf((x) * 2147483648.0), INT32_MIN, INT32_MAX) >> 12)
@@ -40,7 +40,7 @@
     } while (0)
 
 static const int check_lens[] = {
-    2, 4, 8, 16, 32, 64, 1024, 16384,
+    2, 4, 8, 16, 32, 64, 120, 960, 1024, 1920, 16384,
 };
 
 static AVTXContext *tx_refs[AV_TX_NB][2 /* Direction */][FF_ARRAY_ELEMS(check_lens)] = { 0 };
author	Lynne <dev@lynne.ee>	2022-09-19 05:53:01 +0200
committer	Lynne <dev@lynne.ee>	2022-09-23 12:35:27 +0200
commit	ace42cf581f8c06872bfb58cf575d9e8bd398c0a (patch)
tree	217d6653d5664d47f95c327fdb09d63e01dffcb3 /tests
parent	3241e9225c7adfb2d8d24cfd05a7a8db8ddbd023 (diff)
download	ffmpeg-ace42cf581f8c06872bfb58cf575d9e8bd398c0a.tar.gz