diff options
author | Ganesh Ajjanagadde <gajjanagadde@gmail.com> | 2016-01-11 17:09:44 -0500 |
---|---|---|
committer | Ganesh Ajjanagadde <gajjanagadde@gmail.com> | 2016-01-11 17:20:38 -0500 |
commit | 07a11ebcab9b31e9fc784029e5d24e6fbf486ff3 (patch) | |
tree | 4fe2631f3261cf2bb6306f930372a21ba1b28d89 | |
parent | f6e1c96730ebbcebbd0341329d51d3d3a36b4fa1 (diff) | |
download | ffmpeg-07a11ebcab9b31e9fc784029e5d24e6fbf486ff3.tar.gz |
lavc/cbrt_tablegen: speed up tablegen
This exploits an approach based on the sieve of Eratosthenes, a popular
method for generating prime numbers.
Tables are identical to previous ones.
Tested with FATE with/without --enable-hardcoded-tables.
Sample benchmark (Haswell, GNU/Linux+gcc):
prev:
7860100 decicycles in cbrt_tableinit, 1 runs, 0 skips
7777490 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
7582339 decicycles in cbrt_tableinit, 256 runs, 0 skips
7563556 decicycles in cbrt_tableinit, 512 runs, 0 skips
new:
2099480 decicycles in cbrt_tableinit, 1 runs, 0 skips
2044470 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
1796544 decicycles in cbrt_tableinit, 256 runs, 0 skips
1791631 decicycles in cbrt_tableinit, 512 runs, 0 skips
Both small and large run count given as this is called once so small run
count may give a better picture, small numbers are fairly consistent,
and there is a consistent downward trend from small to large runs,
at which point it stabilizes to a new value.
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
-rw-r--r-- | libavcodec/cbrt_tablegen.h | 39 |
1 files changed, 29 insertions, 10 deletions
diff --git a/libavcodec/cbrt_tablegen.h b/libavcodec/cbrt_tablegen.h index 59b5a1d236..21e4b9a117 100644 --- a/libavcodec/cbrt_tablegen.h +++ b/libavcodec/cbrt_tablegen.h @@ -26,12 +26,13 @@ #include <stdint.h> #include <math.h> #include "libavutil/attributes.h" +#include "libavutil/intfloat.h" #include "libavcodec/aac_defines.h" #if USE_FIXED -#define CBRT(x) lrint((x).f * 8192) +#define CBRT(x) lrint((x) * 8192) #else -#define CBRT(x) x.i +#define CBRT(x) av_float2int((float)(x)) #endif #if CONFIG_HARDCODED_TABLES @@ -47,16 +48,34 @@ static uint32_t cbrt_tab[1 << 13]; static av_cold void AAC_RENAME(cbrt_tableinit)(void) { + static double cbrt_tab_dbl[1 << 13]; if (!cbrt_tab[(1<<13) - 1]) { - int i; - for (i = 0; i < 1<<13; i++) { - union { - float f; - uint32_t i; - } f; - f.f = cbrt(i) * i; - cbrt_tab[i] = CBRT(f); + int i, j, k; + double cbrt_val; + + for (i = 1; i < 1<<13; i++) + cbrt_tab_dbl[i] = 1; + + /* have to take care of non-squarefree numbers */ + for (i = 2; i < 90; i++) { + if (cbrt_tab_dbl[i] == 1) { + cbrt_val = i * cbrt(i); + for (k = i; k < 1<<13; k *= i) + for (j = k; j < 1<<13; j += k) + cbrt_tab_dbl[j] *= cbrt_val; + } } + + for (i = 91; i <= 8191; i+= 2) { + if (cbrt_tab_dbl[i] == 1) { + cbrt_val = i * cbrt(i); + for (j = i; j < 1<<13; j += i) + cbrt_tab_dbl[j] *= cbrt_val; + } + } + + for (i = 0; i < 1<<13; i++) + cbrt_tab[i] = CBRT(cbrt_tab_dbl[i]); } } #endif /* CONFIG_HARDCODED_TABLES */ |