diff options
author | Christophe Gisquet <christophe.gisquet@gmail.com> | 2015-10-12 19:37:45 +0200 |
---|---|---|
committer | Michael Niedermayer <michael@niedermayer.cc> | 2015-10-13 02:10:51 +0200 |
commit | 2fd14dd8eb66dc5dd14254d0b758fb80d44b3140 (patch) | |
tree | 34a086df2390e67180c11276193f258ae4b540e3 /libavcodec | |
parent | e9a68b0316ab127098ac4c24a6762ce68980bd23 (diff) | |
download | ffmpeg-2fd14dd8eb66dc5dd14254d0b758fb80d44b3140.tar.gz |
avcodec/simple_idct10: improve precision
omse goes from 0.03060703 (which fails for dct-test) to 0.01663750.
This also actually improve the error of decoding the sample generated
by fate-vsynth3-dnxhd1080i-10bit using simple_idct10 to FAANI, which
goes (when resampled to yuv422p) from:
stddev: 0.06 PSNR: 72.28 MAXDIFF: 1
to identical.
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
Diffstat (limited to 'libavcodec')
-rw-r--r-- | libavcodec/simple_idct.c | 9 | ||||
-rw-r--r-- | libavcodec/simple_idct_template.c | 45 |
2 files changed, 38 insertions, 16 deletions
diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c index eeb627999c..4d6d20df5a 100644 --- a/libavcodec/simple_idct.c +++ b/libavcodec/simple_idct.c @@ -36,6 +36,11 @@ #define BIT_DEPTH 10 #include "simple_idct_template.c" + +#define EXTRA_SHIFT 2 +#include "simple_idct_template.c" + +#undef EXTRA_SHIFT #undef BIT_DEPTH #define BIT_DEPTH 12 @@ -230,10 +235,10 @@ void ff_prores_idct(int16_t *block, const int16_t *qmat) block[i] *= qmat[i]; for (i = 0; i < 8; i++) - idctRowCondDC_10(block + i*8, 2); + idctRowCondDC_extrashift_10(block + i*8, 2); for (i = 0; i < 8; i++) { block[i] += 8192; - idctSparseCol_10(block + i); + idctSparseCol_extrashift_10(block + i); } } diff --git a/libavcodec/simple_idct_template.c b/libavcodec/simple_idct_template.c index 789db8d0ac..0585679b6d 100644 --- a/libavcodec/simple_idct_template.c +++ b/libavcodec/simple_idct_template.c @@ -66,19 +66,26 @@ #elif BIT_DEPTH == 10 || BIT_DEPTH == 12 -#if BIT_DEPTH == 10 -#define W1 (22725*4) // 90901 -#define W2 (21407*4) // 85627 -#define W3 (19265*4) // 77062 -#define W4 (16384*4) // 65535 -#define W5 (12873*4) // 51491 -#define W6 ( 8867*4) // 35468 -#define W7 ( 4520*4) // 18081 - -#define ROW_SHIFT 15 -#define COL_SHIFT 20 -#define DC_SHIFT 1 -#else +# if BIT_DEPTH == 10 +#define W1 22725 // 90901 +#define W2 21407 // 85627 +#define W3 19265 // 77062 +#define W4 16384 // 65535 +#define W5 12873 // 51491 +#define W6 8867 // 35468 +#define W7 4520 // 18081 + +# ifdef EXTRA_SHIFT +#define ROW_SHIFT 13 +#define COL_SHIFT 18 +#define DC_SHIFT 1 +# else +#define ROW_SHIFT 12 +#define COL_SHIFT 19 +#define DC_SHIFT 2 +# endif + +# else #define W1 45451 #define W2 42813 #define W3 38531 @@ -90,7 +97,7 @@ #define ROW_SHIFT 16 #define COL_SHIFT 17 #define DC_SHIFT -1 -#endif +# endif #define MUL(a, b) ((a) * (b)) #define MAC(a, b, c) ((a) += (b) * (c)) @@ -101,7 +108,11 @@ #endif +#ifdef EXTRA_SHIFT +static inline void FUNC(idctRowCondDC_extrashift)(int16_t *row, int extra_shift) +#else static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) +#endif { int a0, a1, a2, a3, b0, b1, b2, b3; @@ -236,6 +247,9 @@ static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) } \ } while (0) +#ifdef EXTRA_SHIFT +static inline void FUNC(idctSparseCol_extrashift)(int16_t *col) +#else static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size, int16_t *col) { @@ -285,6 +299,7 @@ static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size, } static inline void FUNC(idctSparseCol)(int16_t *col) +#endif { int a0, a1, a2, a3, b0, b1, b2, b3; @@ -300,6 +315,7 @@ static inline void FUNC(idctSparseCol)(int16_t *col) col[56] = ((a0 - b0) >> COL_SHIFT); } +#ifndef EXTRA_SHIFT void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, int16_t *block) { pixel *dest = (pixel *)dest_; @@ -338,3 +354,4 @@ void FUNC(ff_simple_idct)(int16_t *block) for (i = 0; i < 8; i++) FUNC(idctSparseCol)(block + i); } +#endif |