checkasm: aacpsdsp: Tolerate extra intermediate precision in stereo_interpolate

The stereo_interpolate functions add h_step to the values h BUF_SIZE times. Within the stereo_interpolate C functions, the values h (h0-h3, h00-h13) are declared as local float variables, but the compiler is free to keep them in a register with extra precision. If the accumulation is rounded to 32 bit float precision after each step, the less significant bits of h_step end up ignored and the sum can deviate, affecting the end result more than the currently set EPS. By clearing the log2(BUF_SIZE) lower bits of h_step, we make sure that the accumulation shouldn't differ significantly, regardless of any extra precision in the accmulating register/variable. This fixes the aacpsdsp checkasm test when built with clang for mingw/x86_32. Signed-off-by: Martin Storsjö <martin@martin.st>
author: Martin Storsjö <martin@martin.st> 2019-12-04 13:04:41 +0200
committer: Martin Storsjö <martin@martin.st> 2019-12-18 15:15:29 +0200
commit: aad0e26f9312d380e613e312a3b307609296fe58 (patch)
tree: f19909bd36c30a86c13c44202900d76afff53b0d /tests/checkasm
parent: e10654de2b0db0f3b4828a3cc066d6fa21d02516 (diff)
download: ffmpeg-aad0e26f9312d380e613e312a3b307609296fe58.tar.gz
1 files changed, 18 insertions, 0 deletions
diff --git a/tests/checkasm/aacpsdsp.c b/tests/checkasm/aacpsdsp.c
index ea68b39fa9..2ceef4341f 100644
--- a/tests/checkasm/aacpsdsp.c
+++ b/tests/checkasm/aacpsdsp.c
@@ -17,6 +17,7 @@
  */
 
 #include "libavcodec/aacpsdsp.h"
+#include "libavutil/intfloat.h"
 
 #include "checkasm.h"
 
@@ -34,6 +35,16 @@
 
 #define EPS 0.005
 
+static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
+{
+    int i;
+    for (i = 0; i < len; i++) {
+        union av_intfloat32 u = { .f = buf[i] };
+        u.i &= (0xffffffff << bits);
+        buf[i] = u.f;
+    }
+}
+
 static void test_add_squares(void)
 {
     LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]);
@@ -198,6 +209,13 @@ static void test_stereo_interpolate(PSDSPContext *psdsp)
 
             randomize((INTFLOAT *)h, 2 * 4);
             randomize((INTFLOAT *)h_step, 2 * 4);
+            // Clear the least significant 14 bits of h_step, to avoid
+            // divergence when accumulating h_step BUF_SIZE times into
+            // a float variable which may or may not have extra intermediate
+            // precision. Therefore clear roughly log2(BUF_SIZE) less
+            // significant bits, to get the same result regardless of any
+            // extra precision in the accumulator.
+            clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14);
 
             call_ref(l0, r0, h, h_step, BUF_SIZE);
             call_new(l1, r1, h, h_step, BUF_SIZE);
author	Martin Storsjö <martin@martin.st>	2019-12-04 13:04:41 +0200
committer	Martin Storsjö <martin@martin.st>	2019-12-18 15:15:29 +0200
commit	aad0e26f9312d380e613e312a3b307609296fe58 (patch)
tree	f19909bd36c30a86c13c44202900d76afff53b0d /tests/checkasm
parent	e10654de2b0db0f3b4828a3cc066d6fa21d02516 (diff)
download	ffmpeg-aad0e26f9312d380e613e312a3b307609296fe58.tar.gz