aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/base64/neon32
diff options
context:
space:
mode:
authoryazevnul <yazevnul@yandex-team.ru>2022-02-10 16:46:46 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:46 +0300
commit8cbc307de0221f84c80c42dcbe07d40727537e2c (patch)
tree625d5a673015d1df891e051033e9fcde5c7be4e5 /contrib/libs/base64/neon32
parent30d1ef3941e0dc835be7609de5ebee66958f215a (diff)
downloadydb-8cbc307de0221f84c80c42dcbe07d40727537e2c.tar.gz
Restoring authorship annotation for <yazevnul@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/base64/neon32')
-rw-r--r--contrib/libs/base64/neon32/codec_neon32.c320
-rw-r--r--contrib/libs/base64/neon32/codecs.h70
-rw-r--r--contrib/libs/base64/neon32/dec_head.c58
-rw-r--r--contrib/libs/base64/neon32/dec_neon.c154
-rw-r--r--contrib/libs/base64/neon32/dec_tail.c130
-rw-r--r--contrib/libs/base64/neon32/dec_uint32.c96
-rw-r--r--contrib/libs/base64/neon32/enc_head.c46
-rw-r--r--contrib/libs/base64/neon32/enc_neon.c46
-rw-r--r--contrib/libs/base64/neon32/enc_tail.c56
-rw-r--r--contrib/libs/base64/neon32/enc_uint32.c48
-rw-r--r--contrib/libs/base64/neon32/lib.c242
-rw-r--r--contrib/libs/base64/neon32/libbase64.h178
-rw-r--r--contrib/libs/base64/neon32/ya.make32
13 files changed, 738 insertions, 738 deletions
diff --git a/contrib/libs/base64/neon32/codec_neon32.c b/contrib/libs/base64/neon32/codec_neon32.c
index 2c9ae02f75..05fcfc3e63 100644
--- a/contrib/libs/base64/neon32/codec_neon32.c
+++ b/contrib/libs/base64/neon32/codec_neon32.c
@@ -1,160 +1,160 @@
-#if (defined(__ARM_NEON) && !defined(__ARM_NEON__))
-#define __ARM_NEON__
-#endif
-
-#include <stdint.h>
-#include <stddef.h>
-#include <stdlib.h>
-#ifdef __ARM_NEON__
-#include <arm_neon.h>
-#endif
-
-#include "libbase64.h"
-#include "codecs.h"
-
-#if (defined(__arm__) && defined(__ARM_NEON__))
-
-#define CMPGT(s,n) vcgtq_u8((s), vdupq_n_u8(n))
-#define CMPEQ(s,n) vceqq_u8((s), vdupq_n_u8(n))
-#define REPLACE(s,n) vandq_u8((s), vdupq_n_u8(n))
-#define RANGE(s,a,b) vandq_u8(vcgeq_u8((s), vdupq_n_u8(a)), vcleq_u8((s), vdupq_n_u8(b)))
-
-static inline uint8x16x4_t
-enc_reshuffle (uint8x16x3_t in)
-{
- uint8x16x4_t out;
-
- // Divide bits of three input bytes over four output bytes:
- out.val[0] = vshrq_n_u8(in.val[0], 2);
- out.val[1] = vorrq_u8(vshrq_n_u8(in.val[1], 4), vshlq_n_u8(in.val[0], 4));
- out.val[2] = vorrq_u8(vshrq_n_u8(in.val[2], 6), vshlq_n_u8(in.val[1], 2));
- out.val[3] = in.val[2];
-
- // Clear top two bits:
- out.val[0] = vandq_u8(out.val[0], vdupq_n_u8(0x3F));
- out.val[1] = vandq_u8(out.val[1], vdupq_n_u8(0x3F));
- out.val[2] = vandq_u8(out.val[2], vdupq_n_u8(0x3F));
- out.val[3] = vandq_u8(out.val[3], vdupq_n_u8(0x3F));
-
- return out;
-}
-
-static inline uint8x16x4_t
-enc_translate (uint8x16x4_t in)
-{
- uint8x16x4_t mask1, mask2, mask3, mask4, out;
-
- // Translate values 0..63 to the Base64 alphabet. There are five sets:
- // # From To Abs Delta Characters
- // 0 [0..25] [65..90] +65 +65 ABCDEFGHIJKLMNOPQRSTUVWXYZ
- // 1 [26..51] [97..122] +71 +6 abcdefghijklmnopqrstuvwxyz
- // 2 [52..61] [48..57] -4 -75 0123456789
- // 3 [62] [43] -19 -15 +
- // 4 [63] [47] -16 +3 /
-
- // Create cumulative masks for characters in sets [1,2,3,4], [2,3,4],
- // [3,4], and [4]:
- mask1.val[0] = CMPGT(in.val[0], 25);
- mask1.val[1] = CMPGT(in.val[1], 25);
- mask1.val[2] = CMPGT(in.val[2], 25);
- mask1.val[3] = CMPGT(in.val[3], 25);
-
- mask2.val[0] = CMPGT(in.val[0], 51);
- mask2.val[1] = CMPGT(in.val[1], 51);
- mask2.val[2] = CMPGT(in.val[2], 51);
- mask2.val[3] = CMPGT(in.val[3], 51);
-
- mask3.val[0] = CMPGT(in.val[0], 61);
- mask3.val[1] = CMPGT(in.val[1], 61);
- mask3.val[2] = CMPGT(in.val[2], 61);
- mask3.val[3] = CMPGT(in.val[3], 61);
-
- mask4.val[0] = CMPEQ(in.val[0], 63);
- mask4.val[1] = CMPEQ(in.val[1], 63);
- mask4.val[2] = CMPEQ(in.val[2], 63);
- mask4.val[3] = CMPEQ(in.val[3], 63);
-
- // All characters are at least in cumulative set 0, so add 'A':
- out.val[0] = vaddq_u8(in.val[0], vdupq_n_u8(65));
- out.val[1] = vaddq_u8(in.val[1], vdupq_n_u8(65));
- out.val[2] = vaddq_u8(in.val[2], vdupq_n_u8(65));
- out.val[3] = vaddq_u8(in.val[3], vdupq_n_u8(65));
-
- // For inputs which are also in any of the other cumulative sets,
- // add delta values against the previous set(s) to correct the shift:
- out.val[0] = vaddq_u8(out.val[0], REPLACE(mask1.val[0], 6));
- out.val[1] = vaddq_u8(out.val[1], REPLACE(mask1.val[1], 6));
- out.val[2] = vaddq_u8(out.val[2], REPLACE(mask1.val[2], 6));
- out.val[3] = vaddq_u8(out.val[3], REPLACE(mask1.val[3], 6));
-
- out.val[0] = vsubq_u8(out.val[0], REPLACE(mask2.val[0], 75));
- out.val[1] = vsubq_u8(out.val[1], REPLACE(mask2.val[1], 75));
- out.val[2] = vsubq_u8(out.val[2], REPLACE(mask2.val[2], 75));
- out.val[3] = vsubq_u8(out.val[3], REPLACE(mask2.val[3], 75));
-
- out.val[0] = vsubq_u8(out.val[0], REPLACE(mask3.val[0], 15));
- out.val[1] = vsubq_u8(out.val[1], REPLACE(mask3.val[1], 15));
- out.val[2] = vsubq_u8(out.val[2], REPLACE(mask3.val[2], 15));
- out.val[3] = vsubq_u8(out.val[3], REPLACE(mask3.val[3], 15));
-
- out.val[0] = vaddq_u8(out.val[0], REPLACE(mask4.val[0], 3));
- out.val[1] = vaddq_u8(out.val[1], REPLACE(mask4.val[1], 3));
- out.val[2] = vaddq_u8(out.val[2], REPLACE(mask4.val[2], 3));
- out.val[3] = vaddq_u8(out.val[3], REPLACE(mask4.val[3], 3));
-
- return out;
-}
-
-#endif
-
-// Stride size is so large on these NEON 32-bit functions
-// (48 bytes encode, 32 bytes decode) that we inline the
-// uint32 codec to stay performant on smaller inputs.
-
-void
-neon32_base64_stream_encode
- ( struct neon32_base64_state *state
- , const char *src
- , size_t srclen
- , char *out
- , size_t *outlen
- )
-{
-#if (defined(__arm__) && defined(__ARM_NEON__))
- #include "enc_head.c"
- #include "enc_neon.c"
- #include "enc_uint32.c"
- #include "enc_tail.c"
-#else
- (void)state;
- (void)src;
- (void)srclen;
- (void)out;
- (void)outlen;
- abort();
-#endif
-}
-
-int
-neon32_base64_stream_decode
- ( struct neon32_base64_state *state
- , const char *src
- , size_t srclen
- , char *out
- , size_t *outlen
- )
-{
-#if (defined(__arm__) && defined(__ARM_NEON__))
- #include "dec_head.c"
- #include "dec_neon.c"
- #include "dec_uint32.c"
- #include "dec_tail.c"
-#else
- (void)state;
- (void)src;
- (void)srclen;
- (void)out;
- (void)outlen;
- abort();
-#endif
-}
+#if (defined(__ARM_NEON) && !defined(__ARM_NEON__))
+#define __ARM_NEON__
+#endif
+
+#include <stdint.h>
+#include <stddef.h>
+#include <stdlib.h>
+#ifdef __ARM_NEON__
+#include <arm_neon.h>
+#endif
+
+#include "libbase64.h"
+#include "codecs.h"
+
+#if (defined(__arm__) && defined(__ARM_NEON__))
+
+#define CMPGT(s,n) vcgtq_u8((s), vdupq_n_u8(n))
+#define CMPEQ(s,n) vceqq_u8((s), vdupq_n_u8(n))
+#define REPLACE(s,n) vandq_u8((s), vdupq_n_u8(n))
+#define RANGE(s,a,b) vandq_u8(vcgeq_u8((s), vdupq_n_u8(a)), vcleq_u8((s), vdupq_n_u8(b)))
+
+static inline uint8x16x4_t
+enc_reshuffle (uint8x16x3_t in)
+{
+ uint8x16x4_t out;
+
+ // Divide bits of three input bytes over four output bytes:
+ out.val[0] = vshrq_n_u8(in.val[0], 2);
+ out.val[1] = vorrq_u8(vshrq_n_u8(in.val[1], 4), vshlq_n_u8(in.val[0], 4));
+ out.val[2] = vorrq_u8(vshrq_n_u8(in.val[2], 6), vshlq_n_u8(in.val[1], 2));
+ out.val[3] = in.val[2];
+
+ // Clear top two bits:
+ out.val[0] = vandq_u8(out.val[0], vdupq_n_u8(0x3F));
+ out.val[1] = vandq_u8(out.val[1], vdupq_n_u8(0x3F));
+ out.val[2] = vandq_u8(out.val[2], vdupq_n_u8(0x3F));
+ out.val[3] = vandq_u8(out.val[3], vdupq_n_u8(0x3F));
+
+ return out;
+}
+
+static inline uint8x16x4_t
+enc_translate (uint8x16x4_t in)
+{
+ uint8x16x4_t mask1, mask2, mask3, mask4, out;
+
+ // Translate values 0..63 to the Base64 alphabet. There are five sets:
+ // # From To Abs Delta Characters
+ // 0 [0..25] [65..90] +65 +65 ABCDEFGHIJKLMNOPQRSTUVWXYZ
+ // 1 [26..51] [97..122] +71 +6 abcdefghijklmnopqrstuvwxyz
+ // 2 [52..61] [48..57] -4 -75 0123456789
+ // 3 [62] [43] -19 -15 +
+ // 4 [63] [47] -16 +3 /
+
+ // Create cumulative masks for characters in sets [1,2,3,4], [2,3,4],
+ // [3,4], and [4]:
+ mask1.val[0] = CMPGT(in.val[0], 25);
+ mask1.val[1] = CMPGT(in.val[1], 25);
+ mask1.val[2] = CMPGT(in.val[2], 25);
+ mask1.val[3] = CMPGT(in.val[3], 25);
+
+ mask2.val[0] = CMPGT(in.val[0], 51);
+ mask2.val[1] = CMPGT(in.val[1], 51);
+ mask2.val[2] = CMPGT(in.val[2], 51);
+ mask2.val[3] = CMPGT(in.val[3], 51);
+
+ mask3.val[0] = CMPGT(in.val[0], 61);
+ mask3.val[1] = CMPGT(in.val[1], 61);
+ mask3.val[2] = CMPGT(in.val[2], 61);
+ mask3.val[3] = CMPGT(in.val[3], 61);
+
+ mask4.val[0] = CMPEQ(in.val[0], 63);
+ mask4.val[1] = CMPEQ(in.val[1], 63);
+ mask4.val[2] = CMPEQ(in.val[2], 63);
+ mask4.val[3] = CMPEQ(in.val[3], 63);
+
+ // All characters are at least in cumulative set 0, so add 'A':
+ out.val[0] = vaddq_u8(in.val[0], vdupq_n_u8(65));
+ out.val[1] = vaddq_u8(in.val[1], vdupq_n_u8(65));
+ out.val[2] = vaddq_u8(in.val[2], vdupq_n_u8(65));
+ out.val[3] = vaddq_u8(in.val[3], vdupq_n_u8(65));
+
+ // For inputs which are also in any of the other cumulative sets,
+ // add delta values against the previous set(s) to correct the shift:
+ out.val[0] = vaddq_u8(out.val[0], REPLACE(mask1.val[0], 6));
+ out.val[1] = vaddq_u8(out.val[1], REPLACE(mask1.val[1], 6));
+ out.val[2] = vaddq_u8(out.val[2], REPLACE(mask1.val[2], 6));
+ out.val[3] = vaddq_u8(out.val[3], REPLACE(mask1.val[3], 6));
+
+ out.val[0] = vsubq_u8(out.val[0], REPLACE(mask2.val[0], 75));
+ out.val[1] = vsubq_u8(out.val[1], REPLACE(mask2.val[1], 75));
+ out.val[2] = vsubq_u8(out.val[2], REPLACE(mask2.val[2], 75));
+ out.val[3] = vsubq_u8(out.val[3], REPLACE(mask2.val[3], 75));
+
+ out.val[0] = vsubq_u8(out.val[0], REPLACE(mask3.val[0], 15));
+ out.val[1] = vsubq_u8(out.val[1], REPLACE(mask3.val[1], 15));
+ out.val[2] = vsubq_u8(out.val[2], REPLACE(mask3.val[2], 15));
+ out.val[3] = vsubq_u8(out.val[3], REPLACE(mask3.val[3], 15));
+
+ out.val[0] = vaddq_u8(out.val[0], REPLACE(mask4.val[0], 3));
+ out.val[1] = vaddq_u8(out.val[1], REPLACE(mask4.val[1], 3));
+ out.val[2] = vaddq_u8(out.val[2], REPLACE(mask4.val[2], 3));
+ out.val[3] = vaddq_u8(out.val[3], REPLACE(mask4.val[3], 3));
+
+ return out;
+}
+
+#endif
+
+// Stride size is so large on these NEON 32-bit functions
+// (48 bytes encode, 32 bytes decode) that we inline the
+// uint32 codec to stay performant on smaller inputs.
+
+void
+neon32_base64_stream_encode
+ ( struct neon32_base64_state *state
+ , const char *src
+ , size_t srclen
+ , char *out
+ , size_t *outlen
+ )
+{
+#if (defined(__arm__) && defined(__ARM_NEON__))
+ #include "enc_head.c"
+ #include "enc_neon.c"
+ #include "enc_uint32.c"
+ #include "enc_tail.c"
+#else
+ (void)state;
+ (void)src;
+ (void)srclen;
+ (void)out;
+ (void)outlen;
+ abort();
+#endif
+}
+
+int
+neon32_base64_stream_decode
+ ( struct neon32_base64_state *state
+ , const char *src
+ , size_t srclen
+ , char *out
+ , size_t *outlen
+ )
+{
+#if (defined(__arm__) && defined(__ARM_NEON__))
+ #include "dec_head.c"
+ #include "dec_neon.c"
+ #include "dec_uint32.c"
+ #include "dec_tail.c"
+#else
+ (void)state;
+ (void)src;
+ (void)srclen;
+ (void)out;
+ (void)outlen;
+ abort();
+#endif
+}
diff --git a/contrib/libs/base64/neon32/codecs.h b/contrib/libs/base64/neon32/codecs.h
index 5c9ec309c2..23cca82c6f 100644
--- a/contrib/libs/base64/neon32/codecs.h
+++ b/contrib/libs/base64/neon32/codecs.h
@@ -1,35 +1,35 @@
-#pragma once
-
-// Define machine endianness. This is for GCC:
-#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
- #define BASE64_NEON32_LITTLE_ENDIAN 1
-#else
- #define BASE64_NEON32_LITTLE_ENDIAN 0
-#endif
-
-// This is for Clang:
-#ifdef __LITTLE_ENDIAN__
- #define BASE64_NEON32_LITTLE_ENDIAN 1
-#endif
-
-#ifdef __BIG_ENDIAN__
- #define BASE64_NEON32_LITTLE_ENDIAN 0
-#endif
-
-// Endian conversion functions
-#if BASE64_NEON32_LITTLE_ENDIAN
- #define cpu_to_be32(x) __builtin_bswap32(x)
- #define cpu_to_be64(x) __builtin_bswap64(x)
- #define be32_to_cpu(x) __builtin_bswap32(x)
- #define be64_to_cpu(x) __builtin_bswap64(x)
-#else
- #define cpu_to_be32(x) (x)
- #define cpu_to_be64(x) (x)
- #define be32_to_cpu(x) (x)
- #define be64_to_cpu(x) (x)
-#endif
-
-// These tables are used by all codecs
-// for fallback plain encoding/decoding:
-extern const uint8_t neon32_base64_table_enc[];
-extern const uint8_t neon32_base64_table_dec[];
+#pragma once
+
+// Define machine endianness. This is for GCC:
+#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
+ #define BASE64_NEON32_LITTLE_ENDIAN 1
+#else
+ #define BASE64_NEON32_LITTLE_ENDIAN 0
+#endif
+
+// This is for Clang:
+#ifdef __LITTLE_ENDIAN__
+ #define BASE64_NEON32_LITTLE_ENDIAN 1
+#endif
+
+#ifdef __BIG_ENDIAN__
+ #define BASE64_NEON32_LITTLE_ENDIAN 0
+#endif
+
+// Endian conversion functions
+#if BASE64_NEON32_LITTLE_ENDIAN
+ #define cpu_to_be32(x) __builtin_bswap32(x)
+ #define cpu_to_be64(x) __builtin_bswap64(x)
+ #define be32_to_cpu(x) __builtin_bswap32(x)
+ #define be64_to_cpu(x) __builtin_bswap64(x)
+#else
+ #define cpu_to_be32(x) (x)
+ #define cpu_to_be64(x) (x)
+ #define be32_to_cpu(x) (x)
+ #define be64_to_cpu(x) (x)
+#endif
+
+// These tables are used by all codecs
+// for fallback plain encoding/decoding:
+extern const uint8_t neon32_base64_table_enc[];
+extern const uint8_t neon32_base64_table_dec[];
diff --git a/contrib/libs/base64/neon32/dec_head.c b/contrib/libs/base64/neon32/dec_head.c
index bd023118ff..2802093555 100644
--- a/contrib/libs/base64/neon32/dec_head.c
+++ b/contrib/libs/base64/neon32/dec_head.c
@@ -1,29 +1,29 @@
-int ret = 0;
-const uint8_t *c = (const uint8_t *)src;
-uint8_t *o = (uint8_t *)out;
-uint8_t q;
-
-// Use local temporaries to avoid cache thrashing:
-size_t outl = 0;
-struct neon32_base64_state st;
-st.eof = state->eof;
-st.bytes = state->bytes;
-st.carry = state->carry;
-
-// If we previously saw an EOF or an invalid character, bail out:
-if (st.eof) {
- *outlen = 0;
- return 0;
-}
-
-// Turn four 6-bit numbers into three bytes:
-// out[0] = 11111122
-// out[1] = 22223333
-// out[2] = 33444444
-
-// Duff's device again:
-switch (st.bytes)
-{
- for (;;)
- {
- case 0:
+int ret = 0;
+const uint8_t *c = (const uint8_t *)src;
+uint8_t *o = (uint8_t *)out;
+uint8_t q;
+
+// Use local temporaries to avoid cache thrashing:
+size_t outl = 0;
+struct neon32_base64_state st;
+st.eof = state->eof;
+st.bytes = state->bytes;
+st.carry = state->carry;
+
+// If we previously saw an EOF or an invalid character, bail out:
+if (st.eof) {
+ *outlen = 0;
+ return 0;
+}
+
+// Turn four 6-bit numbers into three bytes:
+// out[0] = 11111122
+// out[1] = 22223333
+// out[2] = 33444444
+
+// Duff's device again:
+switch (st.bytes)
+{
+ for (;;)
+ {
+ case 0:
diff --git a/contrib/libs/base64/neon32/dec_neon.c b/contrib/libs/base64/neon32/dec_neon.c
index 713d8ca9a4..30d846a916 100644
--- a/contrib/libs/base64/neon32/dec_neon.c
+++ b/contrib/libs/base64/neon32/dec_neon.c
@@ -1,77 +1,77 @@
-// If we have NEON support, pick off 64 bytes at a time for as long as we can.
-// Unlike the SSE codecs, we don't write trailing zero bytes to output, so we
-// don't need to check if we have enough remaining input to cover them:
-while (srclen >= 64)
-{
- uint8x16x4_t set1, set2, set3, set4, set5, set6, set7, delta;
- uint8x16x3_t dec;
-
- // Load 64 bytes and deinterleave:
- uint8x16x4_t str = vld4q_u8((uint8_t *)c);
-
- // The input consists of six character sets in the Base64 alphabet,
- // which we need to map back to the 6-bit values they represent.
- // There are three ranges, two singles, and then there's the rest.
- //
- // # From To Add Characters
- // 1 [43] [62] +19 +
- // 2 [47] [63] +16 /
- // 3 [48..57] [52..61] +4 0..9
- // 4 [65..90] [0..25] -65 A..Z
- // 5 [97..122] [26..51] -71 a..z
- // (6) Everything else => invalid input
-
- // Benchmarking on the Raspberry Pi 2B and Clang shows that looping
- // generates slightly faster code than explicit unrolling:
- for (int i = 0; i < 4; i++) {
- set1.val[i] = CMPEQ(str.val[i], '+');
- set2.val[i] = CMPEQ(str.val[i], '/');
- set3.val[i] = RANGE(str.val[i], '0', '9');
- set4.val[i] = RANGE(str.val[i], 'A', 'Z');
- set5.val[i] = RANGE(str.val[i], 'a', 'z');
- set6.val[i] = CMPEQ(str.val[i], '-');
- set7.val[i] = CMPEQ(str.val[i], '_');
-
- delta.val[i] = REPLACE(set1.val[i], 19);
- delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set2.val[i], 16));
- delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set3.val[i], 4));
- delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set4.val[i], -65));
- delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set5.val[i], -71));
- delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set6.val[i], 17));
- delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set7.val[i], -32));
- }
-
- // Check for invalid input: if any of the delta values are zero,
- // fall back on bytewise code to do error checking and reporting:
- uint8x16_t classified = CMPEQ(delta.val[0], 0);
- classified = vorrq_u8(classified, CMPEQ(delta.val[1], 0));
- classified = vorrq_u8(classified, CMPEQ(delta.val[2], 0));
- classified = vorrq_u8(classified, CMPEQ(delta.val[3], 0));
-
- // Extract both 32-bit halves; check that all bits are zero:
- if (vgetq_lane_u32((uint32x4_t)classified, 0) != 0
- || vgetq_lane_u32((uint32x4_t)classified, 1) != 0
- || vgetq_lane_u32((uint32x4_t)classified, 2) != 0
- || vgetq_lane_u32((uint32x4_t)classified, 3) != 0) {
- break;
- }
-
- // Now simply add the delta values to the input:
- str.val[0] = vaddq_u8(str.val[0], delta.val[0]);
- str.val[1] = vaddq_u8(str.val[1], delta.val[1]);
- str.val[2] = vaddq_u8(str.val[2], delta.val[2]);
- str.val[3] = vaddq_u8(str.val[3], delta.val[3]);
-
- // Compress four bytes into three:
- dec.val[0] = vshlq_n_u8(str.val[0], 2) | vshrq_n_u8(str.val[1], 4);
- dec.val[1] = vshlq_n_u8(str.val[1], 4) | vshrq_n_u8(str.val[2], 2);
- dec.val[2] = vshlq_n_u8(str.val[2], 6) | str.val[3];
-
- // Interleave and store decoded result:
- vst3q_u8((uint8_t *)o, dec);
-
- c += 64;
- o += 48;
- outl += 48;
- srclen -= 64;
-}
+// If we have NEON support, pick off 64 bytes at a time for as long as we can.
+// Unlike the SSE codecs, we don't write trailing zero bytes to output, so we
+// don't need to check if we have enough remaining input to cover them:
+while (srclen >= 64)
+{
+ uint8x16x4_t set1, set2, set3, set4, set5, set6, set7, delta;
+ uint8x16x3_t dec;
+
+ // Load 64 bytes and deinterleave:
+ uint8x16x4_t str = vld4q_u8((uint8_t *)c);
+
+ // The input consists of six character sets in the Base64 alphabet,
+ // which we need to map back to the 6-bit values they represent.
+ // There are three ranges, two singles, and then there's the rest.
+ //
+ // # From To Add Characters
+ // 1 [43] [62] +19 +
+ // 2 [47] [63] +16 /
+ // 3 [48..57] [52..61] +4 0..9
+ // 4 [65..90] [0..25] -65 A..Z
+ // 5 [97..122] [26..51] -71 a..z
+ // (6) Everything else => invalid input
+
+ // Benchmarking on the Raspberry Pi 2B and Clang shows that looping
+ // generates slightly faster code than explicit unrolling:
+ for (int i = 0; i < 4; i++) {
+ set1.val[i] = CMPEQ(str.val[i], '+');
+ set2.val[i] = CMPEQ(str.val[i], '/');
+ set3.val[i] = RANGE(str.val[i], '0', '9');
+ set4.val[i] = RANGE(str.val[i], 'A', 'Z');
+ set5.val[i] = RANGE(str.val[i], 'a', 'z');
+ set6.val[i] = CMPEQ(str.val[i], '-');
+ set7.val[i] = CMPEQ(str.val[i], '_');
+
+ delta.val[i] = REPLACE(set1.val[i], 19);
+ delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set2.val[i], 16));
+ delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set3.val[i], 4));
+ delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set4.val[i], -65));
+ delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set5.val[i], -71));
+ delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set6.val[i], 17));
+ delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set7.val[i], -32));
+ }
+
+ // Check for invalid input: if any of the delta values are zero,
+ // fall back on bytewise code to do error checking and reporting:
+ uint8x16_t classified = CMPEQ(delta.val[0], 0);
+ classified = vorrq_u8(classified, CMPEQ(delta.val[1], 0));
+ classified = vorrq_u8(classified, CMPEQ(delta.val[2], 0));
+ classified = vorrq_u8(classified, CMPEQ(delta.val[3], 0));
+
+ // Extract both 32-bit halves; check that all bits are zero:
+ if (vgetq_lane_u32((uint32x4_t)classified, 0) != 0
+ || vgetq_lane_u32((uint32x4_t)classified, 1) != 0
+ || vgetq_lane_u32((uint32x4_t)classified, 2) != 0
+ || vgetq_lane_u32((uint32x4_t)classified, 3) != 0) {
+ break;
+ }
+
+ // Now simply add the delta values to the input:
+ str.val[0] = vaddq_u8(str.val[0], delta.val[0]);
+ str.val[1] = vaddq_u8(str.val[1], delta.val[1]);
+ str.val[2] = vaddq_u8(str.val[2], delta.val[2]);
+ str.val[3] = vaddq_u8(str.val[3], delta.val[3]);
+
+ // Compress four bytes into three:
+ dec.val[0] = vshlq_n_u8(str.val[0], 2) | vshrq_n_u8(str.val[1], 4);
+ dec.val[1] = vshlq_n_u8(str.val[1], 4) | vshrq_n_u8(str.val[2], 2);
+ dec.val[2] = vshlq_n_u8(str.val[2], 6) | str.val[3];
+
+ // Interleave and store decoded result:
+ vst3q_u8((uint8_t *)o, dec);
+
+ c += 64;
+ o += 48;
+ outl += 48;
+ srclen -= 64;
+}
diff --git a/contrib/libs/base64/neon32/dec_tail.c b/contrib/libs/base64/neon32/dec_tail.c
index 4844677e6d..beb453a467 100644
--- a/contrib/libs/base64/neon32/dec_tail.c
+++ b/contrib/libs/base64/neon32/dec_tail.c
@@ -1,65 +1,65 @@
- if (srclen-- == 0) {
- ret = 1;
- break;
- }
- if ((q = neon32_base64_table_dec[*c++]) >= 254) {
- st.eof = 1;
- // Treat character '=' as invalid for byte 0:
- break;
- }
- st.carry = q << 2;
- st.bytes++;
-
- case 1: if (srclen-- == 0) {
- ret = 1;
- break;
- }
- if ((q = neon32_base64_table_dec[*c++]) >= 254) {
- st.eof = 1;
- // Treat character '=' as invalid for byte 1:
- break;
- }
- *o++ = st.carry | (q >> 4);
- st.carry = q << 4;
- st.bytes++;
- outl++;
-
- case 2: if (srclen-- == 0) {
- ret = 1;
- break;
- }
- if ((q = neon32_base64_table_dec[*c++]) >= 254) {
- st.eof = 1;
- // When q == 254, the input char is '='. Return 1 and EOF.
- // Technically, should check if next byte is also '=', but never mind.
- // When q == 255, the input char is invalid. Return 0 and EOF.
- ret = (q == 254) ? 1 : 0;
- break;
- }
- *o++ = st.carry | (q >> 2);
- st.carry = q << 6;
- st.bytes++;
- outl++;
-
- case 3: if (srclen-- == 0) {
- ret = 1;
- break;
- }
- if ((q = neon32_base64_table_dec[*c++]) >= 254) {
- st.eof = 1;
- // When q == 254, the input char is '='. Return 1 and EOF.
- // When q == 255, the input char is invalid. Return 0 and EOF.
- ret = (q == 254) ? 1 : 0;
- break;
- }
- *o++ = st.carry | q;
- st.carry = 0;
- st.bytes = 0;
- outl++;
- }
-}
-state->eof = st.eof;
-state->bytes = st.bytes;
-state->carry = st.carry;
-*outlen = outl;
-return ret;
+ if (srclen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = neon32_base64_table_dec[*c++]) >= 254) {
+ st.eof = 1;
+ // Treat character '=' as invalid for byte 0:
+ break;
+ }
+ st.carry = q << 2;
+ st.bytes++;
+
+ case 1: if (srclen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = neon32_base64_table_dec[*c++]) >= 254) {
+ st.eof = 1;
+ // Treat character '=' as invalid for byte 1:
+ break;
+ }
+ *o++ = st.carry | (q >> 4);
+ st.carry = q << 4;
+ st.bytes++;
+ outl++;
+
+ case 2: if (srclen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = neon32_base64_table_dec[*c++]) >= 254) {
+ st.eof = 1;
+ // When q == 254, the input char is '='. Return 1 and EOF.
+ // Technically, should check if next byte is also '=', but never mind.
+ // When q == 255, the input char is invalid. Return 0 and EOF.
+ ret = (q == 254) ? 1 : 0;
+ break;
+ }
+ *o++ = st.carry | (q >> 2);
+ st.carry = q << 6;
+ st.bytes++;
+ outl++;
+
+ case 3: if (srclen-- == 0) {
+ ret = 1;
+ break;
+ }
+ if ((q = neon32_base64_table_dec[*c++]) >= 254) {
+ st.eof = 1;
+ // When q == 254, the input char is '='. Return 1 and EOF.
+ // When q == 255, the input char is invalid. Return 0 and EOF.
+ ret = (q == 254) ? 1 : 0;
+ break;
+ }
+ *o++ = st.carry | q;
+ st.carry = 0;
+ st.bytes = 0;
+ outl++;
+ }
+}
+state->eof = st.eof;
+state->bytes = st.bytes;
+state->carry = st.carry;
+*outlen = outl;
+return ret;
diff --git a/contrib/libs/base64/neon32/dec_uint32.c b/contrib/libs/base64/neon32/dec_uint32.c
index 5856446861..052284c7e6 100644
--- a/contrib/libs/base64/neon32/dec_uint32.c
+++ b/contrib/libs/base64/neon32/dec_uint32.c
@@ -1,48 +1,48 @@
-// If we have native uint32's, pick off 4 bytes at a time for as long as we
-// can, but make sure that we quit before seeing any == markers at the end of
-// the string. Also, because we write a zero at the end of the output, ensure
-// that there are at least 2 valid bytes of input data remaining to close the
-// gap. 4 + 2 + 2 = 8 bytes:
-while (srclen >= 8)
-{
- uint32_t str, res, dec;
-
- // Load string:
- str = *(uint32_t *)c;
-
- // Shuffle bytes to 32-bit bigendian:
- str = cpu_to_be32(str);
-
- // Lookup each byte in the decoding table; if we encounter any
- // "invalid" values, fall back on the bytewise code:
- if ((dec = neon32_base64_table_dec[str >> 24]) > 63) {
- break;
- }
- res = dec << 26;
-
- if ((dec = neon32_base64_table_dec[(str >> 16) & 0xFF]) > 63) {
- break;
- }
- res |= dec << 20;
-
- if ((dec = neon32_base64_table_dec[(str >> 8) & 0xFF]) > 63) {
- break;
- }
- res |= dec << 14;
-
- if ((dec = neon32_base64_table_dec[str & 0xFF]) > 63) {
- break;
- }
- res |= dec << 8;
-
- // Reshuffle and repack into 3-byte output format:
- res = be32_to_cpu(res);
-
- // Store back:
- *(uint32_t *)o = res;
-
- c += 4;
- o += 3;
- outl += 3;
- srclen -= 4;
-}
+// If we have native uint32's, pick off 4 bytes at a time for as long as we
+// can, but make sure that we quit before seeing any == markers at the end of
+// the string. Also, because we write a zero at the end of the output, ensure
+// that there are at least 2 valid bytes of input data remaining to close the
+// gap. 4 + 2 + 2 = 8 bytes:
+while (srclen >= 8)
+{
+ uint32_t str, res, dec;
+
+ // Load string:
+ str = *(uint32_t *)c;
+
+ // Shuffle bytes to 32-bit bigendian:
+ str = cpu_to_be32(str);
+
+ // Lookup each byte in the decoding table; if we encounter any
+ // "invalid" values, fall back on the bytewise code:
+ if ((dec = neon32_base64_table_dec[str >> 24]) > 63) {
+ break;
+ }
+ res = dec << 26;
+
+ if ((dec = neon32_base64_table_dec[(str >> 16) & 0xFF]) > 63) {
+ break;
+ }
+ res |= dec << 20;
+
+ if ((dec = neon32_base64_table_dec[(str >> 8) & 0xFF]) > 63) {
+ break;
+ }
+ res |= dec << 14;
+
+ if ((dec = neon32_base64_table_dec[str & 0xFF]) > 63) {
+ break;
+ }
+ res |= dec << 8;
+
+ // Reshuffle and repack into 3-byte output format:
+ res = be32_to_cpu(res);
+
+ // Store back:
+ *(uint32_t *)o = res;
+
+ c += 4;
+ o += 3;
+ outl += 3;
+ srclen -= 4;
+}
diff --git a/contrib/libs/base64/neon32/enc_head.c b/contrib/libs/base64/neon32/enc_head.c
index 2b8b88eba3..122ad246b1 100644
--- a/contrib/libs/base64/neon32/enc_head.c
+++ b/contrib/libs/base64/neon32/enc_head.c
@@ -1,23 +1,23 @@
-// Assume that *out is large enough to contain the output.
-// Theoretically it should be 4/3 the length of src.
-const uint8_t *c = (const uint8_t *)src;
-uint8_t *o = (uint8_t *)out;
-
-// Use local temporaries to avoid cache thrashing:
-size_t outl = 0;
-struct neon32_base64_state st;
-st.bytes = state->bytes;
-st.carry = state->carry;
-
-// Turn three bytes into four 6-bit numbers:
-// in[0] = 00111111
-// in[1] = 00112222
-// in[2] = 00222233
-// in[3] = 00333333
-
-// Duff's device, a for() loop inside a switch() statement. Legal!
-switch (st.bytes)
-{
- for (;;)
- {
- case 0:
+// Assume that *out is large enough to contain the output.
+// Theoretically it should be 4/3 the length of src.
+const uint8_t *c = (const uint8_t *)src;
+uint8_t *o = (uint8_t *)out;
+
+// Use local temporaries to avoid cache thrashing:
+size_t outl = 0;
+struct neon32_base64_state st;
+st.bytes = state->bytes;
+st.carry = state->carry;
+
+// Turn three bytes into four 6-bit numbers:
+// in[0] = 00111111
+// in[1] = 00112222
+// in[2] = 00222233
+// in[3] = 00333333
+
+// Duff's device, a for() loop inside a switch() statement. Legal!
+switch (st.bytes)
+{
+ for (;;)
+ {
+ case 0:
diff --git a/contrib/libs/base64/neon32/enc_neon.c b/contrib/libs/base64/neon32/enc_neon.c
index 05d7eb41b5..effb7f9e07 100644
--- a/contrib/libs/base64/neon32/enc_neon.c
+++ b/contrib/libs/base64/neon32/enc_neon.c
@@ -1,23 +1,23 @@
-// If we have ARM NEON support, pick off 48 bytes at a time:
-while (srclen >= 48)
-{
- uint8x16x3_t str;
- uint8x16x4_t res;
-
- // Load 48 bytes and deinterleave:
- str = vld3q_u8((uint8_t *)c);
-
- // Reshuffle:
- res = enc_reshuffle(str);
-
- // Translate reshuffled bytes to the Base64 alphabet:
- res = enc_translate(res);
-
- // Interleave and store result:
- vst4q_u8((uint8_t *)o, res);
-
- c += 48; // 3 * 16 bytes of input
- o += 64; // 4 * 16 bytes of output
- outl += 64;
- srclen -= 48;
-}
+// If we have ARM NEON support, pick off 48 bytes at a time:
+while (srclen >= 48)
+{
+ uint8x16x3_t str;
+ uint8x16x4_t res;
+
+ // Load 48 bytes and deinterleave:
+ str = vld3q_u8((uint8_t *)c);
+
+ // Reshuffle:
+ res = enc_reshuffle(str);
+
+ // Translate reshuffled bytes to the Base64 alphabet:
+ res = enc_translate(res);
+
+ // Interleave and store result:
+ vst4q_u8((uint8_t *)o, res);
+
+ c += 48; // 3 * 16 bytes of input
+ o += 64; // 4 * 16 bytes of output
+ outl += 64;
+ srclen -= 48;
+}
diff --git a/contrib/libs/base64/neon32/enc_tail.c b/contrib/libs/base64/neon32/enc_tail.c
index f19ae5f736..83a5d897e2 100644
--- a/contrib/libs/base64/neon32/enc_tail.c
+++ b/contrib/libs/base64/neon32/enc_tail.c
@@ -1,28 +1,28 @@
- if (srclen-- == 0) {
- break;
- }
- *o++ = neon32_base64_table_enc[*c >> 2];
- st.carry = (*c++ << 4) & 0x30;
- st.bytes++;
- outl += 1;
-
- case 1: if (srclen-- == 0) {
- break;
- }
- *o++ = neon32_base64_table_enc[st.carry | (*c >> 4)];
- st.carry = (*c++ << 2) & 0x3C;
- st.bytes++;
- outl += 1;
-
- case 2: if (srclen-- == 0) {
- break;
- }
- *o++ = neon32_base64_table_enc[st.carry | (*c >> 6)];
- *o++ = neon32_base64_table_enc[*c++ & 0x3F];
- st.bytes = 0;
- outl += 2;
- }
-}
-state->bytes = st.bytes;
-state->carry = st.carry;
-*outlen = outl;
+ if (srclen-- == 0) {
+ break;
+ }
+ *o++ = neon32_base64_table_enc[*c >> 2];
+ st.carry = (*c++ << 4) & 0x30;
+ st.bytes++;
+ outl += 1;
+
+ case 1: if (srclen-- == 0) {
+ break;
+ }
+ *o++ = neon32_base64_table_enc[st.carry | (*c >> 4)];
+ st.carry = (*c++ << 2) & 0x3C;
+ st.bytes++;
+ outl += 1;
+
+ case 2: if (srclen-- == 0) {
+ break;
+ }
+ *o++ = neon32_base64_table_enc[st.carry | (*c >> 6)];
+ *o++ = neon32_base64_table_enc[*c++ & 0x3F];
+ st.bytes = 0;
+ outl += 2;
+ }
+}
+state->bytes = st.bytes;
+state->carry = st.carry;
+*outlen = outl;
diff --git a/contrib/libs/base64/neon32/enc_uint32.c b/contrib/libs/base64/neon32/enc_uint32.c
index c7bc3cd234..a9f49375bd 100644
--- a/contrib/libs/base64/neon32/enc_uint32.c
+++ b/contrib/libs/base64/neon32/enc_uint32.c
@@ -1,24 +1,24 @@
-// If we have 32-bit ints, pick off 3 bytes at a time for as long as we can,
-// but ensure that there are at least 4 bytes available to avoid segfaulting:
-while (srclen >= 4)
-{
- // Load string:
- uint32_t str = *(uint32_t *)c;
-
- // Reorder to 32-bit big-endian, if not already in that format. The
- // workset must be in big-endian, otherwise the shifted bits do not
- // carry over properly among adjacent bytes:
- str = cpu_to_be32(str);
-
- // Shift input by 6 bytes each round and mask in only the lower 6 bits;
- // look up the character in the Base64 encoding table and write it to
- // the output location:
- *o++ = neon32_base64_table_enc[(str >> 26) & 0x3F];
- *o++ = neon32_base64_table_enc[(str >> 20) & 0x3F];
- *o++ = neon32_base64_table_enc[(str >> 14) & 0x3F];
- *o++ = neon32_base64_table_enc[(str >> 8) & 0x3F];
-
- c += 3; // 3 bytes of input
- outl += 4; // 4 bytes of output
- srclen -= 3;
-}
+// If we have 32-bit ints, pick off 3 bytes at a time for as long as we can,
+// but ensure that there are at least 4 bytes available to avoid segfaulting:
+while (srclen >= 4)
+{
+ // Load string:
+ uint32_t str = *(uint32_t *)c;
+
+ // Reorder to 32-bit big-endian, if not already in that format. The
+ // workset must be in big-endian, otherwise the shifted bits do not
+ // carry over properly among adjacent bytes:
+ str = cpu_to_be32(str);
+
+ // Shift input by 6 bytes each round and mask in only the lower 6 bits;
+ // look up the character in the Base64 encoding table and write it to
+ // the output location:
+ *o++ = neon32_base64_table_enc[(str >> 26) & 0x3F];
+ *o++ = neon32_base64_table_enc[(str >> 20) & 0x3F];
+ *o++ = neon32_base64_table_enc[(str >> 14) & 0x3F];
+ *o++ = neon32_base64_table_enc[(str >> 8) & 0x3F];
+
+ c += 3; // 3 bytes of input
+ outl += 4; // 4 bytes of output
+ srclen -= 3;
+}
diff --git a/contrib/libs/base64/neon32/lib.c b/contrib/libs/base64/neon32/lib.c
index 52271e925a..10f92c5032 100644
--- a/contrib/libs/base64/neon32/lib.c
+++ b/contrib/libs/base64/neon32/lib.c
@@ -1,121 +1,121 @@
-#include <stdint.h>
-#include <stddef.h>
-
-#include "libbase64.h"
-#include "codecs.h"
-
-const uint8_t
-neon32_base64_table_enc[] =
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "abcdefghijklmnopqrstuvwxyz"
- "0123456789+/";
-
-// In the lookup table below, note that the value for '=' (character 61) is
-// 254, not 255. This character is used for in-band signaling of the end of
-// the datastream, and we will use that later. The characters A-Z, a-z, 0-9
-// and + / are mapped to their "decoded" values. The other bytes all map to
-// the value 255, which flags them as "invalid input".
-
-const uint8_t
-neon32_base64_table_dec[] =
-{
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 254, 62, 255, 63, // 32..47
- 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63
- 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79
- 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63, // 80..95
- 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111
- 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
- 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
-};
-
-void
-neon32_base64_stream_encode_init (struct neon32_base64_state *state)
-{
- state->eof = 0;
- state->bytes = 0;
- state->carry = 0;
-}
-
-void
-neon32_base64_stream_encode_final
- ( struct neon32_base64_state *state
- , char *out
- , size_t *outlen
- )
-{
- uint8_t *o = (uint8_t *)out;
-
- if (state->bytes == 1) {
- *o++ = neon32_base64_table_enc[state->carry];
- *o++ = '=';
- *o++ = '=';
- *outlen = 3;
- return;
- }
- if (state->bytes == 2) {
- *o++ = neon32_base64_table_enc[state->carry];
- *o++ = '=';
- *outlen = 2;
- return;
- }
- *outlen = 0;
-}
-
-void
-neon32_base64_stream_decode_init (struct neon32_base64_state *state)
-{
- state->eof = 0;
- state->bytes = 0;
- state->carry = 0;
-}
-
-void
-neon32_base64_encode
- ( const char *src
- , size_t srclen
- , char *out
- , size_t *outlen
- )
-{
- size_t s;
- size_t t;
- struct neon32_base64_state state;
-
- // Init the stream reader:
- neon32_base64_stream_encode_init(&state);
-
- // Feed the whole string to the stream reader:
- neon32_base64_stream_encode(&state, src, srclen, out, &s);
-
- // Finalize the stream by writing trailer if any:
- neon32_base64_stream_encode_final(&state, out + s, &t);
-
- // Final output length is stream length plus tail:
- *outlen = s + t;
-}
-
-int
-neon32_base64_decode
- ( const char *src
- , size_t srclen
- , char *out
- , size_t *outlen
- )
-{
- struct neon32_base64_state state;
-
- // Init the stream reader:
- neon32_base64_stream_decode_init(&state);
-
- // Feed the whole string to the stream reader:
- return neon32_base64_stream_decode(&state, src, srclen, out, outlen);
-}
+#include <stdint.h>
+#include <stddef.h>
+
+#include "libbase64.h"
+#include "codecs.h"
+
+const uint8_t
+neon32_base64_table_enc[] =
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ "abcdefghijklmnopqrstuvwxyz"
+ "0123456789+/";
+
+// In the lookup table below, note that the value for '=' (character 61) is
+// 254, not 255. This character is used for in-band signaling of the end of
+// the datastream, and we will use that later. The characters A-Z, a-z, 0-9
+// and + / are mapped to their "decoded" values. The other bytes all map to
+// the value 255, which flags them as "invalid input".
+
+const uint8_t
+neon32_base64_table_dec[] =
+{
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 254, 62, 255, 63, // 32..47
+ 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63
+ 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79
+ 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63, // 80..95
+ 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111
+ 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+ 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
+};
+
+void
+neon32_base64_stream_encode_init (struct neon32_base64_state *state)
+{
+ state->eof = 0;
+ state->bytes = 0;
+ state->carry = 0;
+}
+
+void
+neon32_base64_stream_encode_final
+ ( struct neon32_base64_state *state
+ , char *out
+ , size_t *outlen
+ )
+{
+ uint8_t *o = (uint8_t *)out;
+
+ if (state->bytes == 1) {
+ *o++ = neon32_base64_table_enc[state->carry];
+ *o++ = '=';
+ *o++ = '=';
+ *outlen = 3;
+ return;
+ }
+ if (state->bytes == 2) {
+ *o++ = neon32_base64_table_enc[state->carry];
+ *o++ = '=';
+ *outlen = 2;
+ return;
+ }
+ *outlen = 0;
+}
+
+void
+neon32_base64_stream_decode_init (struct neon32_base64_state *state)
+{
+ state->eof = 0;
+ state->bytes = 0;
+ state->carry = 0;
+}
+
+void
+neon32_base64_encode
+ ( const char *src
+ , size_t srclen
+ , char *out
+ , size_t *outlen
+ )
+{
+ size_t s;
+ size_t t;
+ struct neon32_base64_state state;
+
+ // Init the stream reader:
+ neon32_base64_stream_encode_init(&state);
+
+ // Feed the whole string to the stream reader:
+ neon32_base64_stream_encode(&state, src, srclen, out, &s);
+
+ // Finalize the stream by writing trailer if any:
+ neon32_base64_stream_encode_final(&state, out + s, &t);
+
+ // Final output length is stream length plus tail:
+ *outlen = s + t;
+}
+
+int
+neon32_base64_decode
+ ( const char *src
+ , size_t srclen
+ , char *out
+ , size_t *outlen
+ )
+{
+ struct neon32_base64_state state;
+
+ // Init the stream reader:
+ neon32_base64_stream_decode_init(&state);
+
+ // Feed the whole string to the stream reader:
+ return neon32_base64_stream_decode(&state, src, srclen, out, outlen);
+}
diff --git a/contrib/libs/base64/neon32/libbase64.h b/contrib/libs/base64/neon32/libbase64.h
index fa975550d8..b78dcc4a7e 100644
--- a/contrib/libs/base64/neon32/libbase64.h
+++ b/contrib/libs/base64/neon32/libbase64.h
@@ -1,89 +1,89 @@
-#pragma once
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-struct neon32_base64_state {
- int eof;
- int bytes;
- unsigned char carry;
-};
-
-/* Wrapper function to encode a plain string of given length. Output is written
- * to *out without trailing zero. Output length in bytes is written to *outlen.
- * The buffer in `out` has been allocated by the caller and is at least 4/3 the
- * size of the input. See above for `flags`; set to 0 for default operation: */
-void neon32_base64_encode
- ( const char *src
- , size_t srclen
- , char *out
- , size_t *outlen
- ) ;
-
-/* Call this before calling base64_stream_encode() to init the state. See above
- * for `flags`; set to 0 for default operation: */
-void neon32_base64_stream_encode_init
- ( struct neon32_base64_state *state
- ) ;
-
-/* Encodes the block of data of given length at `src`, into the buffer at
- * `out`. Caller is responsible for allocating a large enough out-buffer; it
- * must be at least 4/3 the size of the in-buffer, but take some margin. Places
- * the number of new bytes written into `outlen` (which is set to zero when the
- * function starts). Does not zero-terminate or finalize the output. */
-void neon32_base64_stream_encode
- ( struct neon32_base64_state *state
- , const char *src
- , size_t srclen
- , char *out
- , size_t *outlen
- ) ;
-
-/* Finalizes the output begun by previous calls to `base64_stream_encode()`.
- * Adds the required end-of-stream markers if appropriate. `outlen` is modified
- * and will contain the number of new bytes written at `out` (which will quite
- * often be zero). */
-void neon32_base64_stream_encode_final
- ( struct neon32_base64_state *state
- , char *out
- , size_t *outlen
- ) ;
-
-/* Wrapper function to decode a plain string of given length. Output is written
- * to *out without trailing zero. Output length in bytes is written to *outlen.
- * The buffer in `out` has been allocated by the caller and is at least 3/4 the
- * size of the input. See above for `flags`, set to 0 for default operation: */
-int neon32_base64_decode
- ( const char *src
- , size_t srclen
- , char *out
- , size_t *outlen
- ) ;
-
-/* Call this before calling base64_stream_decode() to init the state. See above
- * for `flags`; set to 0 for default operation: */
-void neon32_base64_stream_decode_init
- ( struct neon32_base64_state *state
- ) ;
-
-/* Decodes the block of data of given length at `src`, into the buffer at
- * `out`. Caller is responsible for allocating a large enough out-buffer; it
- * must be at least 3/4 the size of the in-buffer, but take some margin. Places
- * the number of new bytes written into `outlen` (which is set to zero when the
- * function starts). Does not zero-terminate the output. Returns 1 if all is
- * well, and 0 if a decoding error was found, such as an invalid character.
- * Returns -1 if the chosen codec is not included in the current build. Used by
- * the test harness to check whether a codec is available for testing. */
-int neon32_base64_stream_decode
- ( struct neon32_base64_state *state
- , const char *src
- , size_t srclen
- , char *out
- , size_t *outlen
- ) ;
-
-#ifdef __cplusplus
-}
-#endif
-
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct neon32_base64_state {
+ int eof;
+ int bytes;
+ unsigned char carry;
+};
+
+/* Wrapper function to encode a plain string of given length. Output is written
+ * to *out without trailing zero. Output length in bytes is written to *outlen.
+ * The buffer in `out` has been allocated by the caller and is at least 4/3 the
+ * size of the input. See above for `flags`; set to 0 for default operation: */
+void neon32_base64_encode
+ ( const char *src
+ , size_t srclen
+ , char *out
+ , size_t *outlen
+ ) ;
+
+/* Call this before calling base64_stream_encode() to init the state. See above
+ * for `flags`; set to 0 for default operation: */
+void neon32_base64_stream_encode_init
+ ( struct neon32_base64_state *state
+ ) ;
+
+/* Encodes the block of data of given length at `src`, into the buffer at
+ * `out`. Caller is responsible for allocating a large enough out-buffer; it
+ * must be at least 4/3 the size of the in-buffer, but take some margin. Places
+ * the number of new bytes written into `outlen` (which is set to zero when the
+ * function starts). Does not zero-terminate or finalize the output. */
+void neon32_base64_stream_encode
+ ( struct neon32_base64_state *state
+ , const char *src
+ , size_t srclen
+ , char *out
+ , size_t *outlen
+ ) ;
+
+/* Finalizes the output begun by previous calls to `base64_stream_encode()`.
+ * Adds the required end-of-stream markers if appropriate. `outlen` is modified
+ * and will contain the number of new bytes written at `out` (which will quite
+ * often be zero). */
+void neon32_base64_stream_encode_final
+ ( struct neon32_base64_state *state
+ , char *out
+ , size_t *outlen
+ ) ;
+
+/* Wrapper function to decode a plain string of given length. Output is written
+ * to *out without trailing zero. Output length in bytes is written to *outlen.
+ * The buffer in `out` has been allocated by the caller and is at least 3/4 the
+ * size of the input. See above for `flags`, set to 0 for default operation: */
+int neon32_base64_decode
+ ( const char *src
+ , size_t srclen
+ , char *out
+ , size_t *outlen
+ ) ;
+
+/* Call this before calling base64_stream_decode() to init the state. See above
+ * for `flags`; set to 0 for default operation: */
+void neon32_base64_stream_decode_init
+ ( struct neon32_base64_state *state
+ ) ;
+
+/* Decodes the block of data of given length at `src`, into the buffer at
+ * `out`. Caller is responsible for allocating a large enough out-buffer; it
+ * must be at least 3/4 the size of the in-buffer, but take some margin. Places
+ * the number of new bytes written into `outlen` (which is set to zero when the
+ * function starts). Does not zero-terminate the output. Returns 1 if all is
+ * well, and 0 if a decoding error was found, such as an invalid character.
+ * Returns -1 if the chosen codec is not included in the current build. Used by
+ * the test harness to check whether a codec is available for testing. */
+int neon32_base64_stream_decode
+ ( struct neon32_base64_state *state
+ , const char *src
+ , size_t srclen
+ , char *out
+ , size_t *outlen
+ ) ;
+
+#ifdef __cplusplus
+}
+#endif
+
diff --git a/contrib/libs/base64/neon32/ya.make b/contrib/libs/base64/neon32/ya.make
index 435b1127bb..d17e5a0e70 100644
--- a/contrib/libs/base64/neon32/ya.make
+++ b/contrib/libs/base64/neon32/ya.make
@@ -1,11 +1,11 @@
-OWNER(
- yazevnul
+OWNER(
+ yazevnul
g:contrib
g:cpp-contrib
-)
-
-LIBRARY()
-
+)
+
+LIBRARY()
+
LICENSE(
BSD-2-Clause AND
MIT
@@ -13,15 +13,15 @@ LICENSE(
LICENSE_TEXTS(.yandex_meta/licenses.list.txt)
-NO_UTIL()
-
-SRCS(
- codec_neon32.c
- lib.c
-)
-
+NO_UTIL()
+
+SRCS(
+ codec_neon32.c
+ lib.c
+)
+
IF (OS_LINUX OR OS_DARWIN OR OS_ANDROID)
CONLYFLAGS(-std=c11)
-ENDIF()
-
-END()
+ENDIF()
+
+END()