diff options
author | James Almer <jamrial@gmail.com> | 2017-03-03 00:25:54 -0300 |
---|---|---|
committer | James Almer <jamrial@gmail.com> | 2017-03-03 13:36:49 -0300 |
commit | e2b7ae4b198c1dc001b3b28476608eaf4daf726c (patch) | |
tree | fdfb0ae3c815e99b262fabcbd9c3bfde98149908 | |
parent | d8094a303ba36344015a44d629bafc6d7094b4ac (diff) | |
download | ffmpeg-e2b7ae4b198c1dc001b3b28476608eaf4daf726c.tar.gz |
avutil/md5: fix misaligned reads
This makes ubsan happy and also considerably increases performance on
big endian systems.
Tested on an IBM POWER7 3.55 GHz
Before:
2.24user 0.14system 0:02.39elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
2.26user 0.11system 0:02.38elapsed 99%CPU (0avgtext+0avgdata 2688maxresident)k
2.23user 0.15system 0:02.38elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
2.25user 0.12system 0:02.38elapsed 100%CPU (0avgtext+0avgdata 2624maxresident)k
2.20user 0.15system 0:02.36elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
After:
1.86user 0.13system 0:02.00elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
1.89user 0.11system 0:02.01elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
1.85user 0.14system 0:02.00elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
1.84user 0.15system 0:01.99elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
1.89user 0.13system 0:02.02elapsed 99%CPU (0avgtext+0avgdata 2688maxresident)k
Tested-by: Nicolas George <george@nsup.org>
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: James Almer <jamrial@gmail.com>
-rw-r--r-- | libavutil/md5.c | 15 |
1 files changed, 5 insertions, 10 deletions
diff --git a/libavutil/md5.c b/libavutil/md5.c index 8c36aa80c4..d3698dcb1d 100644 --- a/libavutil/md5.c +++ b/libavutil/md5.c @@ -86,14 +86,14 @@ static const uint32_t T[64] = { // T[i]= fabs(sin(i+1)<<32) \ if (i < 32) { \ if (i < 16) \ - a += (d ^ (b & (c ^ d))) + X[ i & 15]; \ + a += (d ^ (b & (c ^ d))) + AV_RL32(X+( i & 15));\ else \ - a += ((d & b) | (~d & c)) + X[(1 + 5*i) & 15]; \ + a += ((d & b) | (~d & c)) + AV_RL32(X+((1 + 5*i) & 15));\ } else { \ if (i < 48) \ - a += (b ^ c ^ d) + X[(5 + 3*i) & 15]; \ + a += (b ^ c ^ d) + AV_RL32(X+((5 + 3*i) & 15));\ else \ - a += (c ^ (b | ~d)) + X[( 7*i) & 15]; \ + a += (c ^ (b | ~d)) + AV_RL32(X+(( 7*i) & 15));\ } \ a = b + (a << t | a >> (32 - t)); \ } while (0) @@ -112,11 +112,6 @@ static void body(uint32_t ABCD[4], uint32_t *src, int nblocks) X = src + n * 16; -#if HAVE_BIGENDIAN - for (i = 0; i < 16; i++) - X[i] = av_bswap32(X[i]); -#endif - #if CONFIG_SMALL for (i = 0; i < 64; i++) { CORE(i, a, b, c, d); @@ -173,7 +168,7 @@ void av_md5_update(AVMD5 *ctx, const uint8_t *src, int len) } end = src + (len & ~63); - if (HAVE_BIGENDIAN || (!HAVE_FAST_UNALIGNED && ((intptr_t)src & 3))) { + if (!HAVE_FAST_UNALIGNED && ((intptr_t)src & 3)) { while (src < end) { memcpy(ctx->block, src, 64); body(ctx->ABCD, (uint32_t *) ctx->block, 1); |