aboutsummaryrefslogtreecommitdiffstats
path: root/libavutil/adler32.c
diff options
context:
space:
mode:
authorMichael Niedermayer <michaelni@gmx.at>2012-02-04 07:52:31 +0100
committerMichael Niedermayer <michaelni@gmx.at>2012-02-04 20:23:04 +0100
commit26585d2a7f5f13bec3ed8623119bf625fdb728f4 (patch)
tree04dc2bb53b3adabbab0092092117e467dd0f6333 /libavutil/adler32.c
parent5b0103590ce4124abf6389a6905ca21eb14bb94a (diff)
downloadffmpeg-26585d2a7f5f13bec3ed8623119bf625fdb728f4.tar.gz
adler32: rewrite using integer SIMD.
about twice as fast as before. the not CONFIG_SMALL case is also droped as it is not faster than the CONFIG_SMALL case. Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil/adler32.c')
-rw-r--r--libavutil/adler32.c41
1 files changed, 31 insertions, 10 deletions
diff --git a/libavutil/adler32.c b/libavutil/adler32.c
index 571242e1cb..c5f96db5e5 100644
--- a/libavutil/adler32.c
+++ b/libavutil/adler32.c
@@ -24,6 +24,7 @@
#include "config.h"
#include "adler32.h"
#include "common.h"
+#include "intreadwrite.h"
#define BASE 65521L /* largest prime smaller than 65536 */
@@ -38,22 +39,42 @@ unsigned long av_adler32_update(unsigned long adler, const uint8_t * buf,
unsigned long s2 = adler >> 16;
while (len > 0) {
- unsigned len2 = FFMIN((len-1) & ~15, 2048);
+#if HAVE_FAST_64BIT && HAVE_FAST_UNALIGNED && !CONFIG_SMALL
+ unsigned len2 = FFMIN((len-1) & ~7, 23*8);
if (len2) {
+ uint64_t a1= 0;
+ uint64_t a2= 0;
+ uint64_t b1= 0;
+ uint64_t b2= 0;
len -= len2;
-
-#if CONFIG_SMALL
- while (len2 >= 4) {
- DO4(buf);
- len2 -= 4;
+ s2 += s1*len2;
+ while (len2 >= 8) {
+ uint64_t v = AV_RN64(buf);
+ a2 += a1;
+ b2 += b1;
+ a1 += v &0x00FF00FF00FF00FF;
+ b1 += (v>>8)&0x00FF00FF00FF00FF;
+ len2 -= 8;
+ buf+=8;
+ }
+ s1 += ((a1+b1)*0x1000100010001)>>48;
+ s2 += ((((a2&0xFFFF0000FFFF)+(b2&0xFFFF0000FFFF)+((a2>>16)&0xFFFF0000FFFF)+((b2>>16)&0xFFFF0000FFFF))*0x800000008)>>32)
+#if HAVE_BIGENDIAN
+ + 2*((b1*0x1000200030004)>>48)
+ + ((a1*0x1000100010001)>>48)
+ + 2*((a1*0x0000100020003)>>48);
+#else
+ + 2*((a1*0x4000300020001)>>48)
+ + ((b1*0x1000100010001)>>48)
+ + 2*((b1*0x3000200010000)>>48);
+#endif
}
#else
- while (len2 >= 16) {
- DO16(buf);
- len2 -= 16;
+ while (len > 4 && s2 < (1U << 31)) {
+ DO4(buf);
+ len -= 4;
}
#endif
- }
DO1(buf); len--;
s1 %= BASE;
s2 %= BASE;