aboutsummaryrefslogtreecommitdiffstats
path: root/libavutil
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2013-09-09 20:16:40 -0300
committerMichael Niedermayer <michaelni@gmx.at>2013-09-11 21:55:59 +0200
commitbbcaf25d4d2130fa9c34c314628f9fd2f706b61b (patch)
tree1ae1db6c3053abb62cba18bffae176dcf7f55d52 /libavutil
parent7e4fe5162ab94a413e04caae19193c5e7a4c6478 (diff)
downloadffmpeg-bbcaf25d4d2130fa9c34c314628f9fd2f706b61b.tar.gz
lavu/sha512: Fully unroll the transform function loops
crypto_bench SHA-512 results using an AMD Athlon X2 7750+, mingw32-w64 GCC 4.7.3 x86_64 Before: lavu SHA-512 size: 1048576 runs: 1024 time: 12.737 +- 0.147 After: lavu SHA-512 size: 1048576 runs: 1024 time: 11.670 +- 0.173 Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil')
-rw-r--r--libavutil/sha512.c45
1 files changed, 25 insertions, 20 deletions
diff --git a/libavutil/sha512.c b/libavutil/sha512.c
index 84136037db..66a864f1a6 100644
--- a/libavutil/sha512.c
+++ b/libavutil/sha512.c
@@ -150,27 +150,32 @@ static void sha512_transform(uint64_t *state, const uint8_t buffer[128])
a = T1 + T2;
}
#else
- for (i = 0; i < 16 - 7;) {
- ROUND512_0_TO_15(a, b, c, d, e, f, g, h);
- ROUND512_0_TO_15(h, a, b, c, d, e, f, g);
- ROUND512_0_TO_15(g, h, a, b, c, d, e, f);
- ROUND512_0_TO_15(f, g, h, a, b, c, d, e);
- ROUND512_0_TO_15(e, f, g, h, a, b, c, d);
- ROUND512_0_TO_15(d, e, f, g, h, a, b, c);
- ROUND512_0_TO_15(c, d, e, f, g, h, a, b);
- ROUND512_0_TO_15(b, c, d, e, f, g, h, a);
- }
- for (; i < 80 - 7;) {
- ROUND512_16_TO_80(a, b, c, d, e, f, g, h);
- ROUND512_16_TO_80(h, a, b, c, d, e, f, g);
- ROUND512_16_TO_80(g, h, a, b, c, d, e, f);
- ROUND512_16_TO_80(f, g, h, a, b, c, d, e);
- ROUND512_16_TO_80(e, f, g, h, a, b, c, d);
- ROUND512_16_TO_80(d, e, f, g, h, a, b, c);
- ROUND512_16_TO_80(c, d, e, f, g, h, a, b);
- ROUND512_16_TO_80(b, c, d, e, f, g, h, a);
- }
+#define R512_0 \
+ ROUND512_0_TO_15(a, b, c, d, e, f, g, h); \
+ ROUND512_0_TO_15(h, a, b, c, d, e, f, g); \
+ ROUND512_0_TO_15(g, h, a, b, c, d, e, f); \
+ ROUND512_0_TO_15(f, g, h, a, b, c, d, e); \
+ ROUND512_0_TO_15(e, f, g, h, a, b, c, d); \
+ ROUND512_0_TO_15(d, e, f, g, h, a, b, c); \
+ ROUND512_0_TO_15(c, d, e, f, g, h, a, b); \
+ ROUND512_0_TO_15(b, c, d, e, f, g, h, a)
+
+ i = 0;
+ R512_0; R512_0;
+
+#define R512_16 \
+ ROUND512_16_TO_80(a, b, c, d, e, f, g, h); \
+ ROUND512_16_TO_80(h, a, b, c, d, e, f, g); \
+ ROUND512_16_TO_80(g, h, a, b, c, d, e, f); \
+ ROUND512_16_TO_80(f, g, h, a, b, c, d, e); \
+ ROUND512_16_TO_80(e, f, g, h, a, b, c, d); \
+ ROUND512_16_TO_80(d, e, f, g, h, a, b, c); \
+ ROUND512_16_TO_80(c, d, e, f, g, h, a, b); \
+ ROUND512_16_TO_80(b, c, d, e, f, g, h, a)
+
+ R512_16; R512_16; R512_16; R512_16;
+ R512_16; R512_16; R512_16; R512_16;
#endif
state[0] += a;
state[1] += b;