diff options
author | James Almer <jamrial@gmail.com> | 2013-09-09 05:42:21 -0300 |
---|---|---|
committer | Michael Niedermayer <michaelni@gmx.at> | 2013-09-09 11:18:43 +0200 |
commit | 452ac2aaecf7210a2912d9156869c6314142a794 (patch) | |
tree | f45f94d6fc0e1c032e38c3f69537a952f72a79c8 /libavutil | |
parent | b4e1630d4d25e7611bcd0c048deb52379abd1fc6 (diff) | |
download | ffmpeg-452ac2aaecf7210a2912d9156869c6314142a794.tar.gz |
lavu/ripemd: Fully unroll the transform function loops
crypto_bench RIPEMD-160 results using an AMD Athlon X2 7750+, mingw32-w64 GCC 4.8.1 x86_64
Before:
lavu RIPEMD-160 size: 1048576 runs: 1024 time: 12.342 +- 0.199
After:
lavu RIPEMD-160 size: 1048576 runs: 1024 time: 10.143 +- 0.192
Signed-off-by: James Almer <jamrial@gmail.com>
Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
Diffstat (limited to 'libavutil')
-rw-r--r-- | libavutil/ripemd.c | 129 |
1 files changed, 70 insertions, 59 deletions
diff --git a/libavutil/ripemd.c b/libavutil/ripemd.c index d737c381a0..37b42df2b3 100644 --- a/libavutil/ripemd.c +++ b/libavutil/ripemd.c @@ -128,37 +128,42 @@ static void ripemd128_transform(uint32_t *state, const uint8_t buffer[64], int e for (n = 0; n < 16; n++) block[n] = AV_RL32(buffer + 4 * n); + n = 0; - for (n = 0; n < 16;) { - ROUND128_0_TO_15(a,b,c,d,e,f,g,h); - ROUND128_0_TO_15(d,a,b,c,h,e,f,g); - ROUND128_0_TO_15(c,d,a,b,g,h,e,f); - ROUND128_0_TO_15(b,c,d,a,f,g,h,e); - } +#define R128_0 \ + ROUND128_0_TO_15(a,b,c,d,e,f,g,h); \ + ROUND128_0_TO_15(d,a,b,c,h,e,f,g); \ + ROUND128_0_TO_15(c,d,a,b,g,h,e,f); \ + ROUND128_0_TO_15(b,c,d,a,f,g,h,e) + + R128_0; R128_0; R128_0; R128_0; SWAP(a,e) - for (; n < 32;) { - ROUND128_16_TO_31(a,b,c,d,e,f,g,h); - ROUND128_16_TO_31(d,a,b,c,h,e,f,g); - ROUND128_16_TO_31(c,d,a,b,g,h,e,f); - ROUND128_16_TO_31(b,c,d,a,f,g,h,e); - } +#define R128_16 \ + ROUND128_16_TO_31(a,b,c,d,e,f,g,h); \ + ROUND128_16_TO_31(d,a,b,c,h,e,f,g); \ + ROUND128_16_TO_31(c,d,a,b,g,h,e,f); \ + ROUND128_16_TO_31(b,c,d,a,f,g,h,e) + + R128_16; R128_16; R128_16; R128_16; SWAP(b,f) - for (; n < 48;) { - ROUND128_32_TO_47(a,b,c,d,e,f,g,h); - ROUND128_32_TO_47(d,a,b,c,h,e,f,g); - ROUND128_32_TO_47(c,d,a,b,g,h,e,f); - ROUND128_32_TO_47(b,c,d,a,f,g,h,e); - } +#define R128_32 \ + ROUND128_32_TO_47(a,b,c,d,e,f,g,h); \ + ROUND128_32_TO_47(d,a,b,c,h,e,f,g); \ + ROUND128_32_TO_47(c,d,a,b,g,h,e,f); \ + ROUND128_32_TO_47(b,c,d,a,f,g,h,e) + + R128_32; R128_32; R128_32; R128_32; SWAP(c,g) - for (; n < 64;) { - ROUND128_48_TO_63(a,b,c,d,e,f,g,h); - ROUND128_48_TO_63(d,a,b,c,h,e,f,g); - ROUND128_48_TO_63(c,d,a,b,g,h,e,f); - ROUND128_48_TO_63(b,c,d,a,f,g,h,e); - } +#define R128_48 \ + ROUND128_48_TO_63(a,b,c,d,e,f,g,h); \ + ROUND128_48_TO_63(d,a,b,c,h,e,f,g); \ + ROUND128_48_TO_63(c,d,a,b,g,h,e,f); \ + ROUND128_48_TO_63(b,c,d,a,f,g,h,e) + + R128_48; R128_48; R128_48; R128_48; SWAP(d,h) if (ext) { @@ -222,54 +227,60 @@ static void ripemd160_transform(uint32_t *state, const uint8_t buffer[64], int e for (n = 0; n < 16; n++) block[n] = AV_RL32(buffer + 4 * n); + n = 0; - for (n = 0; n < 16 - 1;) { - ROUND160_0_TO_15(a,b,c,d,e,f,g,h,i,j); - ROUND160_0_TO_15(e,a,b,c,d,j,f,g,h,i); - ROUND160_0_TO_15(d,e,a,b,c,i,j,f,g,h); - ROUND160_0_TO_15(c,d,e,a,b,h,i,j,f,g); - ROUND160_0_TO_15(b,c,d,e,a,g,h,i,j,f); - } +#define R160_0 \ + ROUND160_0_TO_15(a,b,c,d,e,f,g,h,i,j); \ + ROUND160_0_TO_15(e,a,b,c,d,j,f,g,h,i); \ + ROUND160_0_TO_15(d,e,a,b,c,i,j,f,g,h); \ + ROUND160_0_TO_15(c,d,e,a,b,h,i,j,f,g); \ + ROUND160_0_TO_15(b,c,d,e,a,g,h,i,j,f) + + R160_0; R160_0; R160_0; ROUND160_0_TO_15(a,b,c,d,e,f,g,h,i,j); SWAP(a,f) - for (; n < 32 - 1;) { - ROUND160_16_TO_31(e,a,b,c,d,j,f,g,h,i); - ROUND160_16_TO_31(d,e,a,b,c,i,j,f,g,h); - ROUND160_16_TO_31(c,d,e,a,b,h,i,j,f,g); - ROUND160_16_TO_31(b,c,d,e,a,g,h,i,j,f); - ROUND160_16_TO_31(a,b,c,d,e,f,g,h,i,j); - } +#define R160_16 \ + ROUND160_16_TO_31(e,a,b,c,d,j,f,g,h,i); \ + ROUND160_16_TO_31(d,e,a,b,c,i,j,f,g,h); \ + ROUND160_16_TO_31(c,d,e,a,b,h,i,j,f,g); \ + ROUND160_16_TO_31(b,c,d,e,a,g,h,i,j,f); \ + ROUND160_16_TO_31(a,b,c,d,e,f,g,h,i,j) + + R160_16; R160_16; R160_16; ROUND160_16_TO_31(e,a,b,c,d,j,f,g,h,i); SWAP(b,g) - for (; n < 48 - 1;) { - ROUND160_32_TO_47(d,e,a,b,c,i,j,f,g,h); - ROUND160_32_TO_47(c,d,e,a,b,h,i,j,f,g); - ROUND160_32_TO_47(b,c,d,e,a,g,h,i,j,f); - ROUND160_32_TO_47(a,b,c,d,e,f,g,h,i,j); - ROUND160_32_TO_47(e,a,b,c,d,j,f,g,h,i); - } +#define R160_32 \ + ROUND160_32_TO_47(d,e,a,b,c,i,j,f,g,h); \ + ROUND160_32_TO_47(c,d,e,a,b,h,i,j,f,g); \ + ROUND160_32_TO_47(b,c,d,e,a,g,h,i,j,f); \ + ROUND160_32_TO_47(a,b,c,d,e,f,g,h,i,j); \ + ROUND160_32_TO_47(e,a,b,c,d,j,f,g,h,i) + + R160_32; R160_32; R160_32; ROUND160_32_TO_47(d,e,a,b,c,i,j,f,g,h); SWAP(c,h) - for (; n < 64 - 1;) { - ROUND160_48_TO_63(c,d,e,a,b,h,i,j,f,g); - ROUND160_48_TO_63(b,c,d,e,a,g,h,i,j,f); - ROUND160_48_TO_63(a,b,c,d,e,f,g,h,i,j); - ROUND160_48_TO_63(e,a,b,c,d,j,f,g,h,i); - ROUND160_48_TO_63(d,e,a,b,c,i,j,f,g,h); - } +#define R160_48 \ + ROUND160_48_TO_63(c,d,e,a,b,h,i,j,f,g); \ + ROUND160_48_TO_63(b,c,d,e,a,g,h,i,j,f); \ + ROUND160_48_TO_63(a,b,c,d,e,f,g,h,i,j); \ + ROUND160_48_TO_63(e,a,b,c,d,j,f,g,h,i); \ + ROUND160_48_TO_63(d,e,a,b,c,i,j,f,g,h) + + R160_48; R160_48; R160_48; ROUND160_48_TO_63(c,d,e,a,b,h,i,j,f,g); SWAP(d,i) - for (; n < 75;) { - ROUND160_64_TO_79(b,c,d,e,a,g,h,i,j,f); - ROUND160_64_TO_79(a,b,c,d,e,f,g,h,i,j); - ROUND160_64_TO_79(e,a,b,c,d,j,f,g,h,i); - ROUND160_64_TO_79(d,e,a,b,c,i,j,f,g,h); - ROUND160_64_TO_79(c,d,e,a,b,h,i,j,f,g); - } +#define R160_64 \ + ROUND160_64_TO_79(b,c,d,e,a,g,h,i,j,f); \ + ROUND160_64_TO_79(a,b,c,d,e,f,g,h,i,j); \ + ROUND160_64_TO_79(e,a,b,c,d,j,f,g,h,i); \ + ROUND160_64_TO_79(d,e,a,b,c,i,j,f,g,h); \ + ROUND160_64_TO_79(c,d,e,a,b,h,i,j,f,g) + + R160_64; R160_64; R160_64; ROUND160_64_TO_79(b,c,d,e,a,g,h,i,j,f); SWAP(e,j) |