diff options
author | Michael Niedermayer <michaelni@gmx.at> | 2012-02-04 07:52:31 +0100 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2012-02-04 20:23:04 +0100 |
commit | 26585d2a7f5f13bec3ed8623119bf625fdb728f4 (patch) | |
tree | 04dc2bb53b3adabbab0092092117e467dd0f6333 /libavutil/adler32.c | |
parent | 5b0103590ce4124abf6389a6905ca21eb14bb94a (diff) | |
download | ffmpeg-26585d2a7f5f13bec3ed8623119bf625fdb728f4.tar.gz |
adler32: rewrite using integer SIMD.
about twice as fast as before.
the not CONFIG_SMALL case is also droped as it is not faster than the
CONFIG_SMALL case.
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil/adler32.c')
-rw-r--r-- | libavutil/adler32.c | 41 |
1 files changed, 31 insertions, 10 deletions
diff --git a/libavutil/adler32.c b/libavutil/adler32.c index 571242e1cb..c5f96db5e5 100644 --- a/libavutil/adler32.c +++ b/libavutil/adler32.c @@ -24,6 +24,7 @@ #include "config.h" #include "adler32.h" #include "common.h" +#include "intreadwrite.h" #define BASE 65521L /* largest prime smaller than 65536 */ @@ -38,22 +39,42 @@ unsigned long av_adler32_update(unsigned long adler, const uint8_t * buf, unsigned long s2 = adler >> 16; while (len > 0) { - unsigned len2 = FFMIN((len-1) & ~15, 2048); +#if HAVE_FAST_64BIT && HAVE_FAST_UNALIGNED && !CONFIG_SMALL + unsigned len2 = FFMIN((len-1) & ~7, 23*8); if (len2) { + uint64_t a1= 0; + uint64_t a2= 0; + uint64_t b1= 0; + uint64_t b2= 0; len -= len2; - -#if CONFIG_SMALL - while (len2 >= 4) { - DO4(buf); - len2 -= 4; + s2 += s1*len2; + while (len2 >= 8) { + uint64_t v = AV_RN64(buf); + a2 += a1; + b2 += b1; + a1 += v &0x00FF00FF00FF00FF; + b1 += (v>>8)&0x00FF00FF00FF00FF; + len2 -= 8; + buf+=8; + } + s1 += ((a1+b1)*0x1000100010001)>>48; + s2 += ((((a2&0xFFFF0000FFFF)+(b2&0xFFFF0000FFFF)+((a2>>16)&0xFFFF0000FFFF)+((b2>>16)&0xFFFF0000FFFF))*0x800000008)>>32) +#if HAVE_BIGENDIAN + + 2*((b1*0x1000200030004)>>48) + + ((a1*0x1000100010001)>>48) + + 2*((a1*0x0000100020003)>>48); +#else + + 2*((a1*0x4000300020001)>>48) + + ((b1*0x1000100010001)>>48) + + 2*((b1*0x3000200010000)>>48); +#endif } #else - while (len2 >= 16) { - DO16(buf); - len2 -= 16; + while (len > 4 && s2 < (1U << 31)) { + DO4(buf); + len -= 4; } #endif - } DO1(buf); len--; s1 %= BASE; s2 %= BASE; |