avcodec/simple_idct10: improve precision

omse goes from 0.03060703 (which fails for dct-test) to 0.01663750. This also actually improve the error of decoding the sample generated by fate-vsynth3-dnxhd1080i-10bit using simple_idct10 to FAANI, which goes (when resampled to yuv422p) from: stddev: 0.06 PSNR: 72.28 MAXDIFF: 1 to identical. Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
author: Christophe Gisquet <christophe.gisquet@gmail.com> 2015-10-12 19:37:45 +0200
committer: Michael Niedermayer <michael@niedermayer.cc> 2015-10-13 02:10:51 +0200
commit: 2fd14dd8eb66dc5dd14254d0b758fb80d44b3140 (patch)
tree: 34a086df2390e67180c11276193f258ae4b540e3 /libavcodec
parent: e9a68b0316ab127098ac4c24a6762ce68980bd23 (diff)
download: ffmpeg-2fd14dd8eb66dc5dd14254d0b758fb80d44b3140.tar.gz
2 files changed, 38 insertions, 16 deletions
diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c
index eeb627999c..4d6d20df5a 100644
--- a/libavcodec/simple_idct.c
+++ b/libavcodec/simple_idct.c
@@ -36,6 +36,11 @@
 
 #define BIT_DEPTH 10
 #include "simple_idct_template.c"
+
+#define EXTRA_SHIFT  2
+#include "simple_idct_template.c"
+
+#undef EXTRA_SHIFT
 #undef BIT_DEPTH
 
 #define BIT_DEPTH 12
@@ -230,10 +235,10 @@ void ff_prores_idct(int16_t *block, const int16_t *qmat)
         block[i] *= qmat[i];
 
     for (i = 0; i < 8; i++)
-        idctRowCondDC_10(block + i*8, 2);
+        idctRowCondDC_extrashift_10(block + i*8, 2);
 
     for (i = 0; i < 8; i++) {
         block[i] += 8192;
-        idctSparseCol_10(block + i);
+        idctSparseCol_extrashift_10(block + i);
     }
 }
diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c
index 789db8d0ac..0585679b6d 100644
--- a/libavcodec/simple_idct_template.c
+++ b/libavcodec/simple_idct_template.c
@@ -66,19 +66,26 @@
 
 #elif BIT_DEPTH == 10 || BIT_DEPTH == 12
 
-#if BIT_DEPTH == 10
-#define W1 (22725*4)  // 90901
-#define W2 (21407*4) //  85627
-#define W3 (19265*4) //  77062
-#define W4 (16384*4) //  65535
-#define W5 (12873*4) //  51491
-#define W6 ( 8867*4) //  35468
-#define W7 ( 4520*4) //  18081
-
-#define ROW_SHIFT 15
-#define COL_SHIFT 20
-#define DC_SHIFT 1
-#else
+# if BIT_DEPTH == 10
+#define W1 22725 // 90901
+#define W2 21407 //  85627
+#define W3 19265 //  77062
+#define W4 16384 //  65535
+#define W5 12873 //  51491
+#define W6  8867 //  35468
+#define W7  4520 //  18081
+
+#   ifdef EXTRA_SHIFT
+#define ROW_SHIFT 13
+#define COL_SHIFT 18
+#define DC_SHIFT  1
+#   else
+#define ROW_SHIFT 12
+#define COL_SHIFT 19
+#define DC_SHIFT  2
+#   endif
+
+# else
 #define W1 45451
 #define W2 42813
 #define W3 38531
@@ -90,7 +97,7 @@
 #define ROW_SHIFT 16
 #define COL_SHIFT 17
 #define DC_SHIFT -1
-#endif
+# endif
 
 #define MUL(a, b)    ((a) * (b))
 #define MAC(a, b, c) ((a) += (b) * (c))
@@ -101,7 +108,11 @@
 
 #endif
 
+#ifdef EXTRA_SHIFT
+static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift)
+#else
 static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
+#endif
 {
     int a0, a1, a2, a3, b0, b1, b2, b3;
 
@@ -236,6 +247,9 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
         }                                               \
     } while (0)
 
+#ifdef EXTRA_SHIFT
+static inline void FUNC(idctSparseCol_extrashift)(int16_t *col)
+#else
 static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size,
                                           int16_t *col)
 {
@@ -285,6 +299,7 @@ static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size,
 }
 
 static inline void FUNC(idctSparseCol)(int16_t *col)
+#endif
 {
     int a0, a1, a2, a3, b0, b1, b2, b3;
 
@@ -300,6 +315,7 @@ static inline void FUNC(idctSparseCol)(int16_t *col)
     col[56] = ((a0 - b0) >> COL_SHIFT);
 }
 
+#ifndef EXTRA_SHIFT
 void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, int16_t *block)
 {
     pixel *dest = (pixel *)dest_;
@@ -338,3 +354,4 @@ void FUNC(ff_simple_idct)(int16_t *block)
     for (i = 0; i < 8; i++)
         FUNC(idctSparseCol)(block + i);
 }
+#endif
author	Christophe Gisquet <christophe.gisquet@gmail.com>	2015-10-12 19:37:45 +0200
committer	Michael Niedermayer <michael@niedermayer.cc>	2015-10-13 02:10:51 +0200
commit	2fd14dd8eb66dc5dd14254d0b758fb80d44b3140 (patch)
tree	34a086df2390e67180c11276193f258ae4b540e3 /libavcodec
parent	e9a68b0316ab127098ac4c24a6762ce68980bd23 (diff)
download	ffmpeg-2fd14dd8eb66dc5dd14254d0b758fb80d44b3140.tar.gz