aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Almer <jamrial@gmail.com>2013-09-09 05:42:21 -0300
committerMichael Niedermayer <michaelni@gmx.at>2013-09-09 11:18:43 +0200
commit452ac2aaecf7210a2912d9156869c6314142a794 (patch)
treef45f94d6fc0e1c032e38c3f69537a952f72a79c8
parentb4e1630d4d25e7611bcd0c048deb52379abd1fc6 (diff)
downloadffmpeg-452ac2aaecf7210a2912d9156869c6314142a794.tar.gz
lavu/ripemd: Fully unroll the transform function loops
crypto_bench RIPEMD-160 results using an AMD Athlon X2 7750+, mingw32-w64 GCC 4.8.1 x86_64 Before: lavu RIPEMD-160 size: 1048576 runs: 1024 time: 12.342 +- 0.199 After: lavu RIPEMD-160 size: 1048576 runs: 1024 time: 10.143 +- 0.192 Signed-off-by: James Almer <jamrial@gmail.com> Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
-rw-r--r--libavutil/ripemd.c129
1 files changed, 70 insertions, 59 deletions
diff --git a/libavutil/ripemd.c b/libavutil/ripemd.c
index d737c381a0..37b42df2b3 100644
--- a/libavutil/ripemd.c
+++ b/libavutil/ripemd.c
@@ -128,37 +128,42 @@ static void ripemd128_transform(uint32_t *state, const uint8_t buffer[64], int e
for (n = 0; n < 16; n++)
block[n] = AV_RL32(buffer + 4 * n);
+ n = 0;
- for (n = 0; n < 16;) {
- ROUND128_0_TO_15(a,b,c,d,e,f,g,h);
- ROUND128_0_TO_15(d,a,b,c,h,e,f,g);
- ROUND128_0_TO_15(c,d,a,b,g,h,e,f);
- ROUND128_0_TO_15(b,c,d,a,f,g,h,e);
- }
+#define R128_0 \
+ ROUND128_0_TO_15(a,b,c,d,e,f,g,h); \
+ ROUND128_0_TO_15(d,a,b,c,h,e,f,g); \
+ ROUND128_0_TO_15(c,d,a,b,g,h,e,f); \
+ ROUND128_0_TO_15(b,c,d,a,f,g,h,e)
+
+ R128_0; R128_0; R128_0; R128_0;
SWAP(a,e)
- for (; n < 32;) {
- ROUND128_16_TO_31(a,b,c,d,e,f,g,h);
- ROUND128_16_TO_31(d,a,b,c,h,e,f,g);
- ROUND128_16_TO_31(c,d,a,b,g,h,e,f);
- ROUND128_16_TO_31(b,c,d,a,f,g,h,e);
- }
+#define R128_16 \
+ ROUND128_16_TO_31(a,b,c,d,e,f,g,h); \
+ ROUND128_16_TO_31(d,a,b,c,h,e,f,g); \
+ ROUND128_16_TO_31(c,d,a,b,g,h,e,f); \
+ ROUND128_16_TO_31(b,c,d,a,f,g,h,e)
+
+ R128_16; R128_16; R128_16; R128_16;
SWAP(b,f)
- for (; n < 48;) {
- ROUND128_32_TO_47(a,b,c,d,e,f,g,h);
- ROUND128_32_TO_47(d,a,b,c,h,e,f,g);
- ROUND128_32_TO_47(c,d,a,b,g,h,e,f);
- ROUND128_32_TO_47(b,c,d,a,f,g,h,e);
- }
+#define R128_32 \
+ ROUND128_32_TO_47(a,b,c,d,e,f,g,h); \
+ ROUND128_32_TO_47(d,a,b,c,h,e,f,g); \
+ ROUND128_32_TO_47(c,d,a,b,g,h,e,f); \
+ ROUND128_32_TO_47(b,c,d,a,f,g,h,e)
+
+ R128_32; R128_32; R128_32; R128_32;
SWAP(c,g)
- for (; n < 64;) {
- ROUND128_48_TO_63(a,b,c,d,e,f,g,h);
- ROUND128_48_TO_63(d,a,b,c,h,e,f,g);
- ROUND128_48_TO_63(c,d,a,b,g,h,e,f);
- ROUND128_48_TO_63(b,c,d,a,f,g,h,e);
- }
+#define R128_48 \
+ ROUND128_48_TO_63(a,b,c,d,e,f,g,h); \
+ ROUND128_48_TO_63(d,a,b,c,h,e,f,g); \
+ ROUND128_48_TO_63(c,d,a,b,g,h,e,f); \
+ ROUND128_48_TO_63(b,c,d,a,f,g,h,e)
+
+ R128_48; R128_48; R128_48; R128_48;
SWAP(d,h)
if (ext) {
@@ -222,54 +227,60 @@ static void ripemd160_transform(uint32_t *state, const uint8_t buffer[64], int e
for (n = 0; n < 16; n++)
block[n] = AV_RL32(buffer + 4 * n);
+ n = 0;
- for (n = 0; n < 16 - 1;) {
- ROUND160_0_TO_15(a,b,c,d,e,f,g,h,i,j);
- ROUND160_0_TO_15(e,a,b,c,d,j,f,g,h,i);
- ROUND160_0_TO_15(d,e,a,b,c,i,j,f,g,h);
- ROUND160_0_TO_15(c,d,e,a,b,h,i,j,f,g);
- ROUND160_0_TO_15(b,c,d,e,a,g,h,i,j,f);
- }
+#define R160_0 \
+ ROUND160_0_TO_15(a,b,c,d,e,f,g,h,i,j); \
+ ROUND160_0_TO_15(e,a,b,c,d,j,f,g,h,i); \
+ ROUND160_0_TO_15(d,e,a,b,c,i,j,f,g,h); \
+ ROUND160_0_TO_15(c,d,e,a,b,h,i,j,f,g); \
+ ROUND160_0_TO_15(b,c,d,e,a,g,h,i,j,f)
+
+ R160_0; R160_0; R160_0;
ROUND160_0_TO_15(a,b,c,d,e,f,g,h,i,j);
SWAP(a,f)
- for (; n < 32 - 1;) {
- ROUND160_16_TO_31(e,a,b,c,d,j,f,g,h,i);
- ROUND160_16_TO_31(d,e,a,b,c,i,j,f,g,h);
- ROUND160_16_TO_31(c,d,e,a,b,h,i,j,f,g);
- ROUND160_16_TO_31(b,c,d,e,a,g,h,i,j,f);
- ROUND160_16_TO_31(a,b,c,d,e,f,g,h,i,j);
- }
+#define R160_16 \
+ ROUND160_16_TO_31(e,a,b,c,d,j,f,g,h,i); \
+ ROUND160_16_TO_31(d,e,a,b,c,i,j,f,g,h); \
+ ROUND160_16_TO_31(c,d,e,a,b,h,i,j,f,g); \
+ ROUND160_16_TO_31(b,c,d,e,a,g,h,i,j,f); \
+ ROUND160_16_TO_31(a,b,c,d,e,f,g,h,i,j)
+
+ R160_16; R160_16; R160_16;
ROUND160_16_TO_31(e,a,b,c,d,j,f,g,h,i);
SWAP(b,g)
- for (; n < 48 - 1;) {
- ROUND160_32_TO_47(d,e,a,b,c,i,j,f,g,h);
- ROUND160_32_TO_47(c,d,e,a,b,h,i,j,f,g);
- ROUND160_32_TO_47(b,c,d,e,a,g,h,i,j,f);
- ROUND160_32_TO_47(a,b,c,d,e,f,g,h,i,j);
- ROUND160_32_TO_47(e,a,b,c,d,j,f,g,h,i);
- }
+#define R160_32 \
+ ROUND160_32_TO_47(d,e,a,b,c,i,j,f,g,h); \
+ ROUND160_32_TO_47(c,d,e,a,b,h,i,j,f,g); \
+ ROUND160_32_TO_47(b,c,d,e,a,g,h,i,j,f); \
+ ROUND160_32_TO_47(a,b,c,d,e,f,g,h,i,j); \
+ ROUND160_32_TO_47(e,a,b,c,d,j,f,g,h,i)
+
+ R160_32; R160_32; R160_32;
ROUND160_32_TO_47(d,e,a,b,c,i,j,f,g,h);
SWAP(c,h)
- for (; n < 64 - 1;) {
- ROUND160_48_TO_63(c,d,e,a,b,h,i,j,f,g);
- ROUND160_48_TO_63(b,c,d,e,a,g,h,i,j,f);
- ROUND160_48_TO_63(a,b,c,d,e,f,g,h,i,j);
- ROUND160_48_TO_63(e,a,b,c,d,j,f,g,h,i);
- ROUND160_48_TO_63(d,e,a,b,c,i,j,f,g,h);
- }
+#define R160_48 \
+ ROUND160_48_TO_63(c,d,e,a,b,h,i,j,f,g); \
+ ROUND160_48_TO_63(b,c,d,e,a,g,h,i,j,f); \
+ ROUND160_48_TO_63(a,b,c,d,e,f,g,h,i,j); \
+ ROUND160_48_TO_63(e,a,b,c,d,j,f,g,h,i); \
+ ROUND160_48_TO_63(d,e,a,b,c,i,j,f,g,h)
+
+ R160_48; R160_48; R160_48;
ROUND160_48_TO_63(c,d,e,a,b,h,i,j,f,g);
SWAP(d,i)
- for (; n < 75;) {
- ROUND160_64_TO_79(b,c,d,e,a,g,h,i,j,f);
- ROUND160_64_TO_79(a,b,c,d,e,f,g,h,i,j);
- ROUND160_64_TO_79(e,a,b,c,d,j,f,g,h,i);
- ROUND160_64_TO_79(d,e,a,b,c,i,j,f,g,h);
- ROUND160_64_TO_79(c,d,e,a,b,h,i,j,f,g);
- }
+#define R160_64 \
+ ROUND160_64_TO_79(b,c,d,e,a,g,h,i,j,f); \
+ ROUND160_64_TO_79(a,b,c,d,e,f,g,h,i,j); \
+ ROUND160_64_TO_79(e,a,b,c,d,j,f,g,h,i); \
+ ROUND160_64_TO_79(d,e,a,b,c,i,j,f,g,h); \
+ ROUND160_64_TO_79(c,d,e,a,b,h,i,j,f,g)
+
+ R160_64; R160_64; R160_64;
ROUND160_64_TO_79(b,c,d,e,a,g,h,i,j,f);
SWAP(e,j)