aboutsummaryrefslogtreecommitdiffstats
path: root/libavutil/md5.c
diff options
context:
space:
mode:
authorGiorgio Vazzana <mywing81@gmail.com>2013-05-18 13:53:52 +0200
committerMichael Niedermayer <michaelni@gmx.at>2013-05-20 04:27:34 +0200
commitd0a34aeedff187873e1b42e0902d9dfbba08a1f4 (patch)
treeff7decbc6941a6e51dc4f7f99e6f00b82b703f1a /libavutil/md5.c
parentb7be8ea92a4b23de5d622097ba451c4b266e6563 (diff)
downloadffmpeg-d0a34aeedff187873e1b42e0902d9dfbba08a1f4.tar.gz
md5: optimize second round by using 4-operation form of G()
4-operation form is preferred over 3-operation because it breaks a long dependency chain, thus allowing a superscalar processor to execute more operations in parallel. The idea was taken from: http://www.zorinaq.com/papers/md5-amd64.html AMD Athlon(tm) II X3 450 Processor, x86_64 $ for i in $(seq 1 4); do ./avutil_md5_test2; done size: 1048576 runs: 1024 time: 5.821 +- 0.019 size: 1048576 runs: 1024 time: 5.822 +- 0.019 size: 1048576 runs: 1024 time: 5.841 +- 0.018 size: 1048576 runs: 1024 time: 5.821 +- 0.018 $ for i in $(seq 1 4); do ./avutil_md5_test2; done size: 1048576 runs: 1024 time: 5.646 +- 0.019 size: 1048576 runs: 1024 time: 5.646 +- 0.018 size: 1048576 runs: 1024 time: 5.642 +- 0.019 size: 1048576 runs: 1024 time: 5.641 +- 0.019 Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil/md5.c')
-rw-r--r--libavutil/md5.c2
1 files changed, 1 insertions, 1 deletions
diff --git a/libavutil/md5.c b/libavutil/md5.c
index 7375ce55a5..e3c4981217 100644
--- a/libavutil/md5.c
+++ b/libavutil/md5.c
@@ -84,7 +84,7 @@ static const uint32_t T[64] = { // T[i]= fabs(sin(i+1)<<32)
\
if (i < 32) { \
if (i < 16) a += (d ^ (b & (c ^ d))) + X[ i & 15]; \
- else a += (c ^ (d & (c ^ b))) + X[(1 + 5*i) & 15]; \
+ else a += ((d & b) | (~d & c))+ X[(1 + 5*i) & 15]; \
} else { \
if (i < 48) a += (b ^ c ^ d) + X[(5 + 3*i) & 15]; \
else a += (c ^ (b | ~d)) + X[( 7*i) & 15]; \