1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
|
// If we have SSSE3 support, pick off 16 bytes at a time for as long as we can,
// but make sure that we quit before seeing any == markers at the end of the
// string. Also, because we write four zeroes at the end of the output, ensure
// that there are at least 6 valid bytes of input data remaining to close the
// gap. 16 + 2 + 6 = 24 bytes:
while (srclen >= 24)
{
// Load string:
__m128i str = _mm_loadu_si128((__m128i *)c);
// The input consists of six character sets in the Base64 alphabet,
// which we need to map back to the 6-bit values they represent.
// There are three ranges, two singles, and then there's the rest.
//
// # From To Add Characters
// 1 [43] [62] +19 +
// 2 [47] [63] +16 /
// 3 [48..57] [52..61] +4 0..9
// 4 [65..90] [0..25] -65 A..Z
// 5 [97..122] [26..51] -71 a..z
// (6) Everything else => invalid input
const __m128i set1 = CMPEQ(str, '+');
const __m128i set2 = CMPEQ(str, '/');
const __m128i set3 = RANGE(str, '0', '9');
const __m128i set4 = RANGE(str, 'A', 'Z');
const __m128i set5 = RANGE(str, 'a', 'z');
const __m128i set6 = CMPEQ(str, '-');
const __m128i set7 = CMPEQ(str, '_');
__m128i delta = REPLACE(set1, 19);
delta = _mm_or_si128(delta, REPLACE(set2, 16));
delta = _mm_or_si128(delta, REPLACE(set3, 4));
delta = _mm_or_si128(delta, REPLACE(set4, -65));
delta = _mm_or_si128(delta, REPLACE(set5, -71));
delta = _mm_or_si128(delta, REPLACE(set6, 17));
delta = _mm_or_si128(delta, REPLACE(set7, -32));
// Check for invalid input: if any of the delta values are zero,
// fall back on bytewise code to do error checking and reporting:
if (_mm_movemask_epi8(CMPEQ(delta, 0))) {
break;
}
// Now simply add the delta values to the input:
str = _mm_add_epi8(str, delta);
// Reshuffle the input to packed 12-byte output format:
str = dec_reshuffle(str);
// Store back:
_mm_storeu_si128((__m128i *)o, str);
c += 16;
o += 12;
outl += 12;
srclen -= 16;
}
|