Restoring authorship annotation for <yazevnul@yandex-team.ru>. Commit 2 of 2.

author: yazevnul <yazevnul@yandex-team.ru> 2022-02-10 16:46:48 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:46:48 +0300
commit: 9abfb1a53b7f7b791444d1378e645d8fad9b06ed (patch)
tree: 49e222ea1c5804306084bb3ae065bb702625360f /contrib/libs/base64/avx2/codec_avx2.c
parent: 8cbc307de0221f84c80c42dcbe07d40727537e2c (diff)
download: ydb-9abfb1a53b7f7b791444d1378e645d8fad9b06ed.tar.gz
1 files changed, 191 insertions, 191 deletions
diff --git a/contrib/libs/base64/avx2/codec_avx2.c b/contrib/libs/base64/avx2/codec_avx2.c
index 7e0dc739d6..46c351c539 100644
--- a/contrib/libs/base64/avx2/codec_avx2.c
+++ b/contrib/libs/base64/avx2/codec_avx2.c
@@ -1,191 +1,191 @@
-#include <stdint.h> 
-#include <stddef.h> 
-#include <stdlib.h> 
- 
-#include "libbase64.h" 
-#include "codecs.h" 
- 
-#ifdef __AVX2__ 
-#include <immintrin.h> 
- 
-#define CMPGT(s,n)	_mm256_cmpgt_epi8((s), _mm256_set1_epi8(n)) 
-#define CMPEQ(s,n)	_mm256_cmpeq_epi8((s), _mm256_set1_epi8(n)) 
-#define REPLACE(s,n)	_mm256_and_si256((s), _mm256_set1_epi8(n)) 
-#define RANGE(s,a,b)	_mm256_andnot_si256(CMPGT((s), (b)), CMPGT((s), (a) - 1)) 
- 
-static inline __m256i 
-_mm256_bswap_epi32 (const __m256i in) 
-{ 
-	// _mm256_shuffle_epi8() works on two 128-bit lanes separately: 
-	return _mm256_shuffle_epi8(in, _mm256_setr_epi8( 
-		 3,  2,  1,  0, 
-		 7,  6,  5,  4, 
-		11, 10,  9,  8, 
-		15, 14, 13, 12, 
-		 3,  2,  1,  0, 
-		 7,  6,  5,  4, 
-		11, 10,  9,  8, 
-		15, 14, 13, 12)); 
-} 
- 
-static inline __m256i 
-enc_reshuffle (__m256i in) 
-{ 
-	// Spread out 32-bit words over both halves of the input register: 
-	in = _mm256_permutevar8x32_epi32(in, _mm256_setr_epi32( 
-		0, 1, 2, -1, 
-		3, 4, 5, -1)); 
- 
-	// Slice into 32-bit chunks and operate on all chunks in parallel. 
-	// All processing is done within the 32-bit chunk. First, shuffle: 
-	// before: [eeeeeeff|ccdddddd|bbbbcccc|aaaaaabb] 
-	// after:  [00000000|aaaaaabb|bbbbcccc|ccdddddd] 
-	in = _mm256_shuffle_epi8(in, _mm256_set_epi8( 
-		-1, 9, 10, 11, 
-		-1, 6,  7,  8, 
-		-1, 3,  4,  5, 
-		-1, 0,  1,  2, 
-		-1, 9, 10, 11, 
-		-1, 6,  7,  8, 
-		-1, 3,  4,  5, 
-		-1, 0,  1,  2)); 
- 
-	// cd      = [00000000|00000000|0000cccc|ccdddddd] 
-	const __m256i cd = _mm256_and_si256(in, _mm256_set1_epi32(0x00000FFF)); 
- 
-	// ab      = [0000aaaa|aabbbbbb|00000000|00000000] 
-	const __m256i ab = _mm256_and_si256(_mm256_slli_epi32(in, 4), _mm256_set1_epi32(0x0FFF0000)); 
- 
-	// merged  = [0000aaaa|aabbbbbb|0000cccc|ccdddddd] 
-	const __m256i merged = _mm256_or_si256(ab, cd); 
- 
-	// bd      = [00000000|00bbbbbb|00000000|00dddddd] 
-	const __m256i bd = _mm256_and_si256(merged, _mm256_set1_epi32(0x003F003F)); 
- 
-	// ac      = [00aaaaaa|00000000|00cccccc|00000000] 
-	const __m256i ac = _mm256_and_si256(_mm256_slli_epi32(merged, 2), _mm256_set1_epi32(0x3F003F00)); 
- 
-	// indices = [00aaaaaa|00bbbbbb|00cccccc|00dddddd] 
-	const __m256i indices = _mm256_or_si256(ac, bd); 
- 
-	// return  = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] 
-	return _mm256_bswap_epi32(indices); 
-} 
- 
-static inline __m256i 
-enc_translate (const __m256i in) 
-{ 
-	// Translate values 0..63 to the Base64 alphabet. There are five sets: 
-	// #  From      To         Abs  Delta  Characters 
-	// 0  [0..25]   [65..90]   +65  +65    ABCDEFGHIJKLMNOPQRSTUVWXYZ 
-	// 1  [26..51]  [97..122]  +71   +6    abcdefghijklmnopqrstuvwxyz 
-	// 2  [52..61]  [48..57]    -4  -75    0123456789 
-	// 3  [62]      [43]       -19  -15    + 
-	// 4  [63]      [47]       -16   +3    / 
- 
-	// Create cumulative masks for characters in sets [1,2,3,4], [2,3,4], 
-	// [3,4], and [4]: 
-	const __m256i mask1 = CMPGT(in, 25); 
-	const __m256i mask2 = CMPGT(in, 51); 
-	const __m256i mask3 = CMPGT(in, 61); 
-	const __m256i mask4 = CMPEQ(in, 63); 
- 
-	// All characters are at least in cumulative set 0, so add 'A': 
-	__m256i out = _mm256_add_epi8(in, _mm256_set1_epi8(65)); 
- 
-	// For inputs which are also in any of the other cumulative sets, 
-	// add delta values against the previous set(s) to correct the shift: 
-	out = _mm256_add_epi8(out, REPLACE(mask1,  6)); 
-	out = _mm256_sub_epi8(out, REPLACE(mask2, 75)); 
-	out = _mm256_sub_epi8(out, REPLACE(mask3, 15)); 
-	out = _mm256_add_epi8(out, REPLACE(mask4,  3)); 
- 
-	return out; 
-} 
- 
-static inline __m256i 
-dec_reshuffle (__m256i in) 
-{ 
-	// Shuffle bytes to 32-bit bigendian: 
-	in = _mm256_bswap_epi32(in); 
- 
-	// Mask in a single byte per shift: 
-	__m256i mask = _mm256_set1_epi32(0x3F000000); 
- 
-	// Pack bytes together: 
-	__m256i out = _mm256_slli_epi32(_mm256_and_si256(in, mask), 2); 
-	mask = _mm256_srli_epi32(mask, 8); 
- 
-	out = _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, mask), 4)); 
-	mask = _mm256_srli_epi32(mask, 8); 
- 
-	out = _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, mask), 6)); 
-	mask = _mm256_srli_epi32(mask, 8); 
- 
-	out = _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, mask), 8)); 
- 
-	// Pack bytes together within 32-bit words, discarding words 3 and 7: 
-	out = _mm256_shuffle_epi8(out, _mm256_setr_epi8( 
-		 3,  2,  1, 
-		 7,  6,  5, 
-		11, 10,  9, 
-		15, 14, 13, 
-		-1, -1, -1, -1, 
-		 3,  2,  1, 
-		 7,  6,  5, 
-		11, 10,  9, 
-		15, 14, 13, 
-		-1, -1, -1, -1)); 
- 
-	// Pack 32-bit words together, squashing empty words 3 and 7: 
-	return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32( 
-		0, 1, 2, 4, 5, 6, -1, -1)); 
-} 
- 
-#endif	// __AVX2__ 
- 
-void 
-avx2_base64_stream_encode 
-	( struct avx2_base64_state	*state 
-	, const char		*src 
-	, size_t		 srclen 
-	, char			*out 
-	, size_t		*outlen 
-	) 
-{ 
-#if defined(__AVX2__) 
-	#include "enc_head.c" 
-	#include "enc_avx2.c" 
-	#include "enc_tail.c" 
-#else 
-    (void)state; 
-    (void)src; 
-    (void)srclen; 
-    (void)out; 
-    (void)outlen; 
-    abort(); 
-#endif 
-} 
- 
-int 
-avx2_base64_stream_decode 
-	( struct avx2_base64_state	*state 
-	, const char		*src 
-	, size_t		 srclen 
-	, char			*out 
-	, size_t		*outlen 
-	) 
-{ 
-#if defined(__AVX2__) 
-	#include "dec_head.c" 
-	#include "dec_avx2.c" 
-	#include "dec_tail.c" 
-#else 
-    (void)state; 
-    (void)src; 
-    (void)srclen; 
-    (void)out; 
-    (void)outlen; 
-    abort(); 
-#endif 
-} 
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "libbase64.h"
+#include "codecs.h"
+
+#ifdef __AVX2__
+#include <immintrin.h>
+
+#define CMPGT(s,n)	_mm256_cmpgt_epi8((s), _mm256_set1_epi8(n))
+#define CMPEQ(s,n)	_mm256_cmpeq_epi8((s), _mm256_set1_epi8(n))
+#define REPLACE(s,n)	_mm256_and_si256((s), _mm256_set1_epi8(n))
+#define RANGE(s,a,b)	_mm256_andnot_si256(CMPGT((s), (b)), CMPGT((s), (a) - 1))
+
+static inline __m256i
+_mm256_bswap_epi32 (const __m256i in)
+{
+	// _mm256_shuffle_epi8() works on two 128-bit lanes separately:
+	return _mm256_shuffle_epi8(in, _mm256_setr_epi8(
+		 3,  2,  1,  0,
+		 7,  6,  5,  4,
+		11, 10,  9,  8,
+		15, 14, 13, 12,
+		 3,  2,  1,  0,
+		 7,  6,  5,  4,
+		11, 10,  9,  8,
+		15, 14, 13, 12));
+}
+
+static inline __m256i
+enc_reshuffle (__m256i in)
+{
+	// Spread out 32-bit words over both halves of the input register:
+	in = _mm256_permutevar8x32_epi32(in, _mm256_setr_epi32(
+		0, 1, 2, -1,
+		3, 4, 5, -1));
+
+	// Slice into 32-bit chunks and operate on all chunks in parallel.
+	// All processing is done within the 32-bit chunk. First, shuffle:
+	// before: [eeeeeeff|ccdddddd|bbbbcccc|aaaaaabb]
+	// after:  [00000000|aaaaaabb|bbbbcccc|ccdddddd]
+	in = _mm256_shuffle_epi8(in, _mm256_set_epi8(
+		-1, 9, 10, 11,
+		-1, 6,  7,  8,
+		-1, 3,  4,  5,
+		-1, 0,  1,  2,
+		-1, 9, 10, 11,
+		-1, 6,  7,  8,
+		-1, 3,  4,  5,
+		-1, 0,  1,  2));
+
+	// cd      = [00000000|00000000|0000cccc|ccdddddd]
+	const __m256i cd = _mm256_and_si256(in, _mm256_set1_epi32(0x00000FFF));
+
+	// ab      = [0000aaaa|aabbbbbb|00000000|00000000]
+	const __m256i ab = _mm256_and_si256(_mm256_slli_epi32(in, 4), _mm256_set1_epi32(0x0FFF0000));
+
+	// merged  = [0000aaaa|aabbbbbb|0000cccc|ccdddddd]
+	const __m256i merged = _mm256_or_si256(ab, cd);
+
+	// bd      = [00000000|00bbbbbb|00000000|00dddddd]
+	const __m256i bd = _mm256_and_si256(merged, _mm256_set1_epi32(0x003F003F));
+
+	// ac      = [00aaaaaa|00000000|00cccccc|00000000]
+	const __m256i ac = _mm256_and_si256(_mm256_slli_epi32(merged, 2), _mm256_set1_epi32(0x3F003F00));
+
+	// indices = [00aaaaaa|00bbbbbb|00cccccc|00dddddd]
+	const __m256i indices = _mm256_or_si256(ac, bd);
+
+	// return  = [00dddddd|00cccccc|00bbbbbb|00aaaaaa]
+	return _mm256_bswap_epi32(indices);
+}
+
+static inline __m256i
+enc_translate (const __m256i in)
+{
+	// Translate values 0..63 to the Base64 alphabet. There are five sets:
+	// #  From      To         Abs  Delta  Characters
+	// 0  [0..25]   [65..90]   +65  +65    ABCDEFGHIJKLMNOPQRSTUVWXYZ
+	// 1  [26..51]  [97..122]  +71   +6    abcdefghijklmnopqrstuvwxyz
+	// 2  [52..61]  [48..57]    -4  -75    0123456789
+	// 3  [62]      [43]       -19  -15    +
+	// 4  [63]      [47]       -16   +3    /
+
+	// Create cumulative masks for characters in sets [1,2,3,4], [2,3,4],
+	// [3,4], and [4]:
+	const __m256i mask1 = CMPGT(in, 25);
+	const __m256i mask2 = CMPGT(in, 51);
+	const __m256i mask3 = CMPGT(in, 61);
+	const __m256i mask4 = CMPEQ(in, 63);
+
+	// All characters are at least in cumulative set 0, so add 'A':
+	__m256i out = _mm256_add_epi8(in, _mm256_set1_epi8(65));
+
+	// For inputs which are also in any of the other cumulative sets,
+	// add delta values against the previous set(s) to correct the shift:
+	out = _mm256_add_epi8(out, REPLACE(mask1,  6));
+	out = _mm256_sub_epi8(out, REPLACE(mask2, 75));
+	out = _mm256_sub_epi8(out, REPLACE(mask3, 15));
+	out = _mm256_add_epi8(out, REPLACE(mask4,  3));
+
+	return out;
+}
+
+static inline __m256i
+dec_reshuffle (__m256i in)
+{
+	// Shuffle bytes to 32-bit bigendian:
+	in = _mm256_bswap_epi32(in);
+
+	// Mask in a single byte per shift:
+	__m256i mask = _mm256_set1_epi32(0x3F000000);
+
+	// Pack bytes together:
+	__m256i out = _mm256_slli_epi32(_mm256_and_si256(in, mask), 2);
+	mask = _mm256_srli_epi32(mask, 8);
+
+	out = _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, mask), 4));
+	mask = _mm256_srli_epi32(mask, 8);
+
+	out = _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, mask), 6));
+	mask = _mm256_srli_epi32(mask, 8);
+
+	out = _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, mask), 8));
+
+	// Pack bytes together within 32-bit words, discarding words 3 and 7:
+	out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
+		 3,  2,  1,
+		 7,  6,  5,
+		11, 10,  9,
+		15, 14, 13,
+		-1, -1, -1, -1,
+		 3,  2,  1,
+		 7,  6,  5,
+		11, 10,  9,
+		15, 14, 13,
+		-1, -1, -1, -1));
+
+	// Pack 32-bit words together, squashing empty words 3 and 7:
+	return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(
+		0, 1, 2, 4, 5, 6, -1, -1));
+}
+
+#endif	// __AVX2__
+
+void
+avx2_base64_stream_encode
+	( struct avx2_base64_state	*state
+	, const char		*src
+	, size_t		 srclen
+	, char			*out
+	, size_t		*outlen
+	)
+{
+#if defined(__AVX2__)
+	#include "enc_head.c"
+	#include "enc_avx2.c"
+	#include "enc_tail.c"
+#else
+    (void)state;
+    (void)src;
+    (void)srclen;
+    (void)out;
+    (void)outlen;
+    abort();
+#endif
+}
+
+int
+avx2_base64_stream_decode
+	( struct avx2_base64_state	*state
+	, const char		*src
+	, size_t		 srclen
+	, char			*out
+	, size_t		*outlen
+	)
+{
+#if defined(__AVX2__)
+	#include "dec_head.c"
+	#include "dec_avx2.c"
+	#include "dec_tail.c"
+#else
+    (void)state;
+    (void)src;
+    (void)srclen;
+    (void)out;
+    (void)outlen;
+    abort();
+#endif
+}
author	yazevnul <yazevnul@yandex-team.ru>	2022-02-10 16:46:48 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:46:48 +0300
commit	9abfb1a53b7f7b791444d1378e645d8fad9b06ed (patch)
tree	49e222ea1c5804306084bb3ae065bb702625360f /contrib/libs/base64/avx2/codec_avx2.c
parent	8cbc307de0221f84c80c42dcbe07d40727537e2c (diff)
download	ydb-9abfb1a53b7f7b791444d1378e645d8fad9b06ed.tar.gz