diff options
author | yazevnul <yazevnul@yandex-team.ru> | 2022-02-10 16:46:46 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:46 +0300 |
commit | 8cbc307de0221f84c80c42dcbe07d40727537e2c (patch) | |
tree | 625d5a673015d1df891e051033e9fcde5c7be4e5 /contrib/libs/base64/neon32 | |
parent | 30d1ef3941e0dc835be7609de5ebee66958f215a (diff) | |
download | ydb-8cbc307de0221f84c80c42dcbe07d40727537e2c.tar.gz |
Restoring authorship annotation for <yazevnul@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/base64/neon32')
-rw-r--r-- | contrib/libs/base64/neon32/codec_neon32.c | 320 | ||||
-rw-r--r-- | contrib/libs/base64/neon32/codecs.h | 70 | ||||
-rw-r--r-- | contrib/libs/base64/neon32/dec_head.c | 58 | ||||
-rw-r--r-- | contrib/libs/base64/neon32/dec_neon.c | 154 | ||||
-rw-r--r-- | contrib/libs/base64/neon32/dec_tail.c | 130 | ||||
-rw-r--r-- | contrib/libs/base64/neon32/dec_uint32.c | 96 | ||||
-rw-r--r-- | contrib/libs/base64/neon32/enc_head.c | 46 | ||||
-rw-r--r-- | contrib/libs/base64/neon32/enc_neon.c | 46 | ||||
-rw-r--r-- | contrib/libs/base64/neon32/enc_tail.c | 56 | ||||
-rw-r--r-- | contrib/libs/base64/neon32/enc_uint32.c | 48 | ||||
-rw-r--r-- | contrib/libs/base64/neon32/lib.c | 242 | ||||
-rw-r--r-- | contrib/libs/base64/neon32/libbase64.h | 178 | ||||
-rw-r--r-- | contrib/libs/base64/neon32/ya.make | 32 |
13 files changed, 738 insertions, 738 deletions
diff --git a/contrib/libs/base64/neon32/codec_neon32.c b/contrib/libs/base64/neon32/codec_neon32.c index 2c9ae02f75..05fcfc3e63 100644 --- a/contrib/libs/base64/neon32/codec_neon32.c +++ b/contrib/libs/base64/neon32/codec_neon32.c @@ -1,160 +1,160 @@ -#if (defined(__ARM_NEON) && !defined(__ARM_NEON__)) -#define __ARM_NEON__ -#endif - -#include <stdint.h> -#include <stddef.h> -#include <stdlib.h> -#ifdef __ARM_NEON__ -#include <arm_neon.h> -#endif - -#include "libbase64.h" -#include "codecs.h" - -#if (defined(__arm__) && defined(__ARM_NEON__)) - -#define CMPGT(s,n) vcgtq_u8((s), vdupq_n_u8(n)) -#define CMPEQ(s,n) vceqq_u8((s), vdupq_n_u8(n)) -#define REPLACE(s,n) vandq_u8((s), vdupq_n_u8(n)) -#define RANGE(s,a,b) vandq_u8(vcgeq_u8((s), vdupq_n_u8(a)), vcleq_u8((s), vdupq_n_u8(b))) - -static inline uint8x16x4_t -enc_reshuffle (uint8x16x3_t in) -{ - uint8x16x4_t out; - - // Divide bits of three input bytes over four output bytes: - out.val[0] = vshrq_n_u8(in.val[0], 2); - out.val[1] = vorrq_u8(vshrq_n_u8(in.val[1], 4), vshlq_n_u8(in.val[0], 4)); - out.val[2] = vorrq_u8(vshrq_n_u8(in.val[2], 6), vshlq_n_u8(in.val[1], 2)); - out.val[3] = in.val[2]; - - // Clear top two bits: - out.val[0] = vandq_u8(out.val[0], vdupq_n_u8(0x3F)); - out.val[1] = vandq_u8(out.val[1], vdupq_n_u8(0x3F)); - out.val[2] = vandq_u8(out.val[2], vdupq_n_u8(0x3F)); - out.val[3] = vandq_u8(out.val[3], vdupq_n_u8(0x3F)); - - return out; -} - -static inline uint8x16x4_t -enc_translate (uint8x16x4_t in) -{ - uint8x16x4_t mask1, mask2, mask3, mask4, out; - - // Translate values 0..63 to the Base64 alphabet. There are five sets: - // # From To Abs Delta Characters - // 0 [0..25] [65..90] +65 +65 ABCDEFGHIJKLMNOPQRSTUVWXYZ - // 1 [26..51] [97..122] +71 +6 abcdefghijklmnopqrstuvwxyz - // 2 [52..61] [48..57] -4 -75 0123456789 - // 3 [62] [43] -19 -15 + - // 4 [63] [47] -16 +3 / - - // Create cumulative masks for characters in sets [1,2,3,4], [2,3,4], - // [3,4], and [4]: - mask1.val[0] = CMPGT(in.val[0], 25); - mask1.val[1] = CMPGT(in.val[1], 25); - mask1.val[2] = CMPGT(in.val[2], 25); - mask1.val[3] = CMPGT(in.val[3], 25); - - mask2.val[0] = CMPGT(in.val[0], 51); - mask2.val[1] = CMPGT(in.val[1], 51); - mask2.val[2] = CMPGT(in.val[2], 51); - mask2.val[3] = CMPGT(in.val[3], 51); - - mask3.val[0] = CMPGT(in.val[0], 61); - mask3.val[1] = CMPGT(in.val[1], 61); - mask3.val[2] = CMPGT(in.val[2], 61); - mask3.val[3] = CMPGT(in.val[3], 61); - - mask4.val[0] = CMPEQ(in.val[0], 63); - mask4.val[1] = CMPEQ(in.val[1], 63); - mask4.val[2] = CMPEQ(in.val[2], 63); - mask4.val[3] = CMPEQ(in.val[3], 63); - - // All characters are at least in cumulative set 0, so add 'A': - out.val[0] = vaddq_u8(in.val[0], vdupq_n_u8(65)); - out.val[1] = vaddq_u8(in.val[1], vdupq_n_u8(65)); - out.val[2] = vaddq_u8(in.val[2], vdupq_n_u8(65)); - out.val[3] = vaddq_u8(in.val[3], vdupq_n_u8(65)); - - // For inputs which are also in any of the other cumulative sets, - // add delta values against the previous set(s) to correct the shift: - out.val[0] = vaddq_u8(out.val[0], REPLACE(mask1.val[0], 6)); - out.val[1] = vaddq_u8(out.val[1], REPLACE(mask1.val[1], 6)); - out.val[2] = vaddq_u8(out.val[2], REPLACE(mask1.val[2], 6)); - out.val[3] = vaddq_u8(out.val[3], REPLACE(mask1.val[3], 6)); - - out.val[0] = vsubq_u8(out.val[0], REPLACE(mask2.val[0], 75)); - out.val[1] = vsubq_u8(out.val[1], REPLACE(mask2.val[1], 75)); - out.val[2] = vsubq_u8(out.val[2], REPLACE(mask2.val[2], 75)); - out.val[3] = vsubq_u8(out.val[3], REPLACE(mask2.val[3], 75)); - - out.val[0] = vsubq_u8(out.val[0], REPLACE(mask3.val[0], 15)); - out.val[1] = vsubq_u8(out.val[1], REPLACE(mask3.val[1], 15)); - out.val[2] = vsubq_u8(out.val[2], REPLACE(mask3.val[2], 15)); - out.val[3] = vsubq_u8(out.val[3], REPLACE(mask3.val[3], 15)); - - out.val[0] = vaddq_u8(out.val[0], REPLACE(mask4.val[0], 3)); - out.val[1] = vaddq_u8(out.val[1], REPLACE(mask4.val[1], 3)); - out.val[2] = vaddq_u8(out.val[2], REPLACE(mask4.val[2], 3)); - out.val[3] = vaddq_u8(out.val[3], REPLACE(mask4.val[3], 3)); - - return out; -} - -#endif - -// Stride size is so large on these NEON 32-bit functions -// (48 bytes encode, 32 bytes decode) that we inline the -// uint32 codec to stay performant on smaller inputs. - -void -neon32_base64_stream_encode - ( struct neon32_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ -#if (defined(__arm__) && defined(__ARM_NEON__)) - #include "enc_head.c" - #include "enc_neon.c" - #include "enc_uint32.c" - #include "enc_tail.c" -#else - (void)state; - (void)src; - (void)srclen; - (void)out; - (void)outlen; - abort(); -#endif -} - -int -neon32_base64_stream_decode - ( struct neon32_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ -#if (defined(__arm__) && defined(__ARM_NEON__)) - #include "dec_head.c" - #include "dec_neon.c" - #include "dec_uint32.c" - #include "dec_tail.c" -#else - (void)state; - (void)src; - (void)srclen; - (void)out; - (void)outlen; - abort(); -#endif -} +#if (defined(__ARM_NEON) && !defined(__ARM_NEON__)) +#define __ARM_NEON__ +#endif + +#include <stdint.h> +#include <stddef.h> +#include <stdlib.h> +#ifdef __ARM_NEON__ +#include <arm_neon.h> +#endif + +#include "libbase64.h" +#include "codecs.h" + +#if (defined(__arm__) && defined(__ARM_NEON__)) + +#define CMPGT(s,n) vcgtq_u8((s), vdupq_n_u8(n)) +#define CMPEQ(s,n) vceqq_u8((s), vdupq_n_u8(n)) +#define REPLACE(s,n) vandq_u8((s), vdupq_n_u8(n)) +#define RANGE(s,a,b) vandq_u8(vcgeq_u8((s), vdupq_n_u8(a)), vcleq_u8((s), vdupq_n_u8(b))) + +static inline uint8x16x4_t +enc_reshuffle (uint8x16x3_t in) +{ + uint8x16x4_t out; + + // Divide bits of three input bytes over four output bytes: + out.val[0] = vshrq_n_u8(in.val[0], 2); + out.val[1] = vorrq_u8(vshrq_n_u8(in.val[1], 4), vshlq_n_u8(in.val[0], 4)); + out.val[2] = vorrq_u8(vshrq_n_u8(in.val[2], 6), vshlq_n_u8(in.val[1], 2)); + out.val[3] = in.val[2]; + + // Clear top two bits: + out.val[0] = vandq_u8(out.val[0], vdupq_n_u8(0x3F)); + out.val[1] = vandq_u8(out.val[1], vdupq_n_u8(0x3F)); + out.val[2] = vandq_u8(out.val[2], vdupq_n_u8(0x3F)); + out.val[3] = vandq_u8(out.val[3], vdupq_n_u8(0x3F)); + + return out; +} + +static inline uint8x16x4_t +enc_translate (uint8x16x4_t in) +{ + uint8x16x4_t mask1, mask2, mask3, mask4, out; + + // Translate values 0..63 to the Base64 alphabet. There are five sets: + // # From To Abs Delta Characters + // 0 [0..25] [65..90] +65 +65 ABCDEFGHIJKLMNOPQRSTUVWXYZ + // 1 [26..51] [97..122] +71 +6 abcdefghijklmnopqrstuvwxyz + // 2 [52..61] [48..57] -4 -75 0123456789 + // 3 [62] [43] -19 -15 + + // 4 [63] [47] -16 +3 / + + // Create cumulative masks for characters in sets [1,2,3,4], [2,3,4], + // [3,4], and [4]: + mask1.val[0] = CMPGT(in.val[0], 25); + mask1.val[1] = CMPGT(in.val[1], 25); + mask1.val[2] = CMPGT(in.val[2], 25); + mask1.val[3] = CMPGT(in.val[3], 25); + + mask2.val[0] = CMPGT(in.val[0], 51); + mask2.val[1] = CMPGT(in.val[1], 51); + mask2.val[2] = CMPGT(in.val[2], 51); + mask2.val[3] = CMPGT(in.val[3], 51); + + mask3.val[0] = CMPGT(in.val[0], 61); + mask3.val[1] = CMPGT(in.val[1], 61); + mask3.val[2] = CMPGT(in.val[2], 61); + mask3.val[3] = CMPGT(in.val[3], 61); + + mask4.val[0] = CMPEQ(in.val[0], 63); + mask4.val[1] = CMPEQ(in.val[1], 63); + mask4.val[2] = CMPEQ(in.val[2], 63); + mask4.val[3] = CMPEQ(in.val[3], 63); + + // All characters are at least in cumulative set 0, so add 'A': + out.val[0] = vaddq_u8(in.val[0], vdupq_n_u8(65)); + out.val[1] = vaddq_u8(in.val[1], vdupq_n_u8(65)); + out.val[2] = vaddq_u8(in.val[2], vdupq_n_u8(65)); + out.val[3] = vaddq_u8(in.val[3], vdupq_n_u8(65)); + + // For inputs which are also in any of the other cumulative sets, + // add delta values against the previous set(s) to correct the shift: + out.val[0] = vaddq_u8(out.val[0], REPLACE(mask1.val[0], 6)); + out.val[1] = vaddq_u8(out.val[1], REPLACE(mask1.val[1], 6)); + out.val[2] = vaddq_u8(out.val[2], REPLACE(mask1.val[2], 6)); + out.val[3] = vaddq_u8(out.val[3], REPLACE(mask1.val[3], 6)); + + out.val[0] = vsubq_u8(out.val[0], REPLACE(mask2.val[0], 75)); + out.val[1] = vsubq_u8(out.val[1], REPLACE(mask2.val[1], 75)); + out.val[2] = vsubq_u8(out.val[2], REPLACE(mask2.val[2], 75)); + out.val[3] = vsubq_u8(out.val[3], REPLACE(mask2.val[3], 75)); + + out.val[0] = vsubq_u8(out.val[0], REPLACE(mask3.val[0], 15)); + out.val[1] = vsubq_u8(out.val[1], REPLACE(mask3.val[1], 15)); + out.val[2] = vsubq_u8(out.val[2], REPLACE(mask3.val[2], 15)); + out.val[3] = vsubq_u8(out.val[3], REPLACE(mask3.val[3], 15)); + + out.val[0] = vaddq_u8(out.val[0], REPLACE(mask4.val[0], 3)); + out.val[1] = vaddq_u8(out.val[1], REPLACE(mask4.val[1], 3)); + out.val[2] = vaddq_u8(out.val[2], REPLACE(mask4.val[2], 3)); + out.val[3] = vaddq_u8(out.val[3], REPLACE(mask4.val[3], 3)); + + return out; +} + +#endif + +// Stride size is so large on these NEON 32-bit functions +// (48 bytes encode, 32 bytes decode) that we inline the +// uint32 codec to stay performant on smaller inputs. + +void +neon32_base64_stream_encode + ( struct neon32_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ +#if (defined(__arm__) && defined(__ARM_NEON__)) + #include "enc_head.c" + #include "enc_neon.c" + #include "enc_uint32.c" + #include "enc_tail.c" +#else + (void)state; + (void)src; + (void)srclen; + (void)out; + (void)outlen; + abort(); +#endif +} + +int +neon32_base64_stream_decode + ( struct neon32_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ +#if (defined(__arm__) && defined(__ARM_NEON__)) + #include "dec_head.c" + #include "dec_neon.c" + #include "dec_uint32.c" + #include "dec_tail.c" +#else + (void)state; + (void)src; + (void)srclen; + (void)out; + (void)outlen; + abort(); +#endif +} diff --git a/contrib/libs/base64/neon32/codecs.h b/contrib/libs/base64/neon32/codecs.h index 5c9ec309c2..23cca82c6f 100644 --- a/contrib/libs/base64/neon32/codecs.h +++ b/contrib/libs/base64/neon32/codecs.h @@ -1,35 +1,35 @@ -#pragma once - -// Define machine endianness. This is for GCC: -#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) - #define BASE64_NEON32_LITTLE_ENDIAN 1 -#else - #define BASE64_NEON32_LITTLE_ENDIAN 0 -#endif - -// This is for Clang: -#ifdef __LITTLE_ENDIAN__ - #define BASE64_NEON32_LITTLE_ENDIAN 1 -#endif - -#ifdef __BIG_ENDIAN__ - #define BASE64_NEON32_LITTLE_ENDIAN 0 -#endif - -// Endian conversion functions -#if BASE64_NEON32_LITTLE_ENDIAN - #define cpu_to_be32(x) __builtin_bswap32(x) - #define cpu_to_be64(x) __builtin_bswap64(x) - #define be32_to_cpu(x) __builtin_bswap32(x) - #define be64_to_cpu(x) __builtin_bswap64(x) -#else - #define cpu_to_be32(x) (x) - #define cpu_to_be64(x) (x) - #define be32_to_cpu(x) (x) - #define be64_to_cpu(x) (x) -#endif - -// These tables are used by all codecs -// for fallback plain encoding/decoding: -extern const uint8_t neon32_base64_table_enc[]; -extern const uint8_t neon32_base64_table_dec[]; +#pragma once + +// Define machine endianness. This is for GCC: +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + #define BASE64_NEON32_LITTLE_ENDIAN 1 +#else + #define BASE64_NEON32_LITTLE_ENDIAN 0 +#endif + +// This is for Clang: +#ifdef __LITTLE_ENDIAN__ + #define BASE64_NEON32_LITTLE_ENDIAN 1 +#endif + +#ifdef __BIG_ENDIAN__ + #define BASE64_NEON32_LITTLE_ENDIAN 0 +#endif + +// Endian conversion functions +#if BASE64_NEON32_LITTLE_ENDIAN + #define cpu_to_be32(x) __builtin_bswap32(x) + #define cpu_to_be64(x) __builtin_bswap64(x) + #define be32_to_cpu(x) __builtin_bswap32(x) + #define be64_to_cpu(x) __builtin_bswap64(x) +#else + #define cpu_to_be32(x) (x) + #define cpu_to_be64(x) (x) + #define be32_to_cpu(x) (x) + #define be64_to_cpu(x) (x) +#endif + +// These tables are used by all codecs +// for fallback plain encoding/decoding: +extern const uint8_t neon32_base64_table_enc[]; +extern const uint8_t neon32_base64_table_dec[]; diff --git a/contrib/libs/base64/neon32/dec_head.c b/contrib/libs/base64/neon32/dec_head.c index bd023118ff..2802093555 100644 --- a/contrib/libs/base64/neon32/dec_head.c +++ b/contrib/libs/base64/neon32/dec_head.c @@ -1,29 +1,29 @@ -int ret = 0; -const uint8_t *c = (const uint8_t *)src; -uint8_t *o = (uint8_t *)out; -uint8_t q; - -// Use local temporaries to avoid cache thrashing: -size_t outl = 0; -struct neon32_base64_state st; -st.eof = state->eof; -st.bytes = state->bytes; -st.carry = state->carry; - -// If we previously saw an EOF or an invalid character, bail out: -if (st.eof) { - *outlen = 0; - return 0; -} - -// Turn four 6-bit numbers into three bytes: -// out[0] = 11111122 -// out[1] = 22223333 -// out[2] = 33444444 - -// Duff's device again: -switch (st.bytes) -{ - for (;;) - { - case 0: +int ret = 0; +const uint8_t *c = (const uint8_t *)src; +uint8_t *o = (uint8_t *)out; +uint8_t q; + +// Use local temporaries to avoid cache thrashing: +size_t outl = 0; +struct neon32_base64_state st; +st.eof = state->eof; +st.bytes = state->bytes; +st.carry = state->carry; + +// If we previously saw an EOF or an invalid character, bail out: +if (st.eof) { + *outlen = 0; + return 0; +} + +// Turn four 6-bit numbers into three bytes: +// out[0] = 11111122 +// out[1] = 22223333 +// out[2] = 33444444 + +// Duff's device again: +switch (st.bytes) +{ + for (;;) + { + case 0: diff --git a/contrib/libs/base64/neon32/dec_neon.c b/contrib/libs/base64/neon32/dec_neon.c index 713d8ca9a4..30d846a916 100644 --- a/contrib/libs/base64/neon32/dec_neon.c +++ b/contrib/libs/base64/neon32/dec_neon.c @@ -1,77 +1,77 @@ -// If we have NEON support, pick off 64 bytes at a time for as long as we can. -// Unlike the SSE codecs, we don't write trailing zero bytes to output, so we -// don't need to check if we have enough remaining input to cover them: -while (srclen >= 64) -{ - uint8x16x4_t set1, set2, set3, set4, set5, set6, set7, delta; - uint8x16x3_t dec; - - // Load 64 bytes and deinterleave: - uint8x16x4_t str = vld4q_u8((uint8_t *)c); - - // The input consists of six character sets in the Base64 alphabet, - // which we need to map back to the 6-bit values they represent. - // There are three ranges, two singles, and then there's the rest. - // - // # From To Add Characters - // 1 [43] [62] +19 + - // 2 [47] [63] +16 / - // 3 [48..57] [52..61] +4 0..9 - // 4 [65..90] [0..25] -65 A..Z - // 5 [97..122] [26..51] -71 a..z - // (6) Everything else => invalid input - - // Benchmarking on the Raspberry Pi 2B and Clang shows that looping - // generates slightly faster code than explicit unrolling: - for (int i = 0; i < 4; i++) { - set1.val[i] = CMPEQ(str.val[i], '+'); - set2.val[i] = CMPEQ(str.val[i], '/'); - set3.val[i] = RANGE(str.val[i], '0', '9'); - set4.val[i] = RANGE(str.val[i], 'A', 'Z'); - set5.val[i] = RANGE(str.val[i], 'a', 'z'); - set6.val[i] = CMPEQ(str.val[i], '-'); - set7.val[i] = CMPEQ(str.val[i], '_'); - - delta.val[i] = REPLACE(set1.val[i], 19); - delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set2.val[i], 16)); - delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set3.val[i], 4)); - delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set4.val[i], -65)); - delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set5.val[i], -71)); - delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set6.val[i], 17)); - delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set7.val[i], -32)); - } - - // Check for invalid input: if any of the delta values are zero, - // fall back on bytewise code to do error checking and reporting: - uint8x16_t classified = CMPEQ(delta.val[0], 0); - classified = vorrq_u8(classified, CMPEQ(delta.val[1], 0)); - classified = vorrq_u8(classified, CMPEQ(delta.val[2], 0)); - classified = vorrq_u8(classified, CMPEQ(delta.val[3], 0)); - - // Extract both 32-bit halves; check that all bits are zero: - if (vgetq_lane_u32((uint32x4_t)classified, 0) != 0 - || vgetq_lane_u32((uint32x4_t)classified, 1) != 0 - || vgetq_lane_u32((uint32x4_t)classified, 2) != 0 - || vgetq_lane_u32((uint32x4_t)classified, 3) != 0) { - break; - } - - // Now simply add the delta values to the input: - str.val[0] = vaddq_u8(str.val[0], delta.val[0]); - str.val[1] = vaddq_u8(str.val[1], delta.val[1]); - str.val[2] = vaddq_u8(str.val[2], delta.val[2]); - str.val[3] = vaddq_u8(str.val[3], delta.val[3]); - - // Compress four bytes into three: - dec.val[0] = vshlq_n_u8(str.val[0], 2) | vshrq_n_u8(str.val[1], 4); - dec.val[1] = vshlq_n_u8(str.val[1], 4) | vshrq_n_u8(str.val[2], 2); - dec.val[2] = vshlq_n_u8(str.val[2], 6) | str.val[3]; - - // Interleave and store decoded result: - vst3q_u8((uint8_t *)o, dec); - - c += 64; - o += 48; - outl += 48; - srclen -= 64; -} +// If we have NEON support, pick off 64 bytes at a time for as long as we can. +// Unlike the SSE codecs, we don't write trailing zero bytes to output, so we +// don't need to check if we have enough remaining input to cover them: +while (srclen >= 64) +{ + uint8x16x4_t set1, set2, set3, set4, set5, set6, set7, delta; + uint8x16x3_t dec; + + // Load 64 bytes and deinterleave: + uint8x16x4_t str = vld4q_u8((uint8_t *)c); + + // The input consists of six character sets in the Base64 alphabet, + // which we need to map back to the 6-bit values they represent. + // There are three ranges, two singles, and then there's the rest. + // + // # From To Add Characters + // 1 [43] [62] +19 + + // 2 [47] [63] +16 / + // 3 [48..57] [52..61] +4 0..9 + // 4 [65..90] [0..25] -65 A..Z + // 5 [97..122] [26..51] -71 a..z + // (6) Everything else => invalid input + + // Benchmarking on the Raspberry Pi 2B and Clang shows that looping + // generates slightly faster code than explicit unrolling: + for (int i = 0; i < 4; i++) { + set1.val[i] = CMPEQ(str.val[i], '+'); + set2.val[i] = CMPEQ(str.val[i], '/'); + set3.val[i] = RANGE(str.val[i], '0', '9'); + set4.val[i] = RANGE(str.val[i], 'A', 'Z'); + set5.val[i] = RANGE(str.val[i], 'a', 'z'); + set6.val[i] = CMPEQ(str.val[i], '-'); + set7.val[i] = CMPEQ(str.val[i], '_'); + + delta.val[i] = REPLACE(set1.val[i], 19); + delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set2.val[i], 16)); + delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set3.val[i], 4)); + delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set4.val[i], -65)); + delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set5.val[i], -71)); + delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set6.val[i], 17)); + delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set7.val[i], -32)); + } + + // Check for invalid input: if any of the delta values are zero, + // fall back on bytewise code to do error checking and reporting: + uint8x16_t classified = CMPEQ(delta.val[0], 0); + classified = vorrq_u8(classified, CMPEQ(delta.val[1], 0)); + classified = vorrq_u8(classified, CMPEQ(delta.val[2], 0)); + classified = vorrq_u8(classified, CMPEQ(delta.val[3], 0)); + + // Extract both 32-bit halves; check that all bits are zero: + if (vgetq_lane_u32((uint32x4_t)classified, 0) != 0 + || vgetq_lane_u32((uint32x4_t)classified, 1) != 0 + || vgetq_lane_u32((uint32x4_t)classified, 2) != 0 + || vgetq_lane_u32((uint32x4_t)classified, 3) != 0) { + break; + } + + // Now simply add the delta values to the input: + str.val[0] = vaddq_u8(str.val[0], delta.val[0]); + str.val[1] = vaddq_u8(str.val[1], delta.val[1]); + str.val[2] = vaddq_u8(str.val[2], delta.val[2]); + str.val[3] = vaddq_u8(str.val[3], delta.val[3]); + + // Compress four bytes into three: + dec.val[0] = vshlq_n_u8(str.val[0], 2) | vshrq_n_u8(str.val[1], 4); + dec.val[1] = vshlq_n_u8(str.val[1], 4) | vshrq_n_u8(str.val[2], 2); + dec.val[2] = vshlq_n_u8(str.val[2], 6) | str.val[3]; + + // Interleave and store decoded result: + vst3q_u8((uint8_t *)o, dec); + + c += 64; + o += 48; + outl += 48; + srclen -= 64; +} diff --git a/contrib/libs/base64/neon32/dec_tail.c b/contrib/libs/base64/neon32/dec_tail.c index 4844677e6d..beb453a467 100644 --- a/contrib/libs/base64/neon32/dec_tail.c +++ b/contrib/libs/base64/neon32/dec_tail.c @@ -1,65 +1,65 @@ - if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = neon32_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // Treat character '=' as invalid for byte 0: - break; - } - st.carry = q << 2; - st.bytes++; - - case 1: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = neon32_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // Treat character '=' as invalid for byte 1: - break; - } - *o++ = st.carry | (q >> 4); - st.carry = q << 4; - st.bytes++; - outl++; - - case 2: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = neon32_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // When q == 254, the input char is '='. Return 1 and EOF. - // Technically, should check if next byte is also '=', but never mind. - // When q == 255, the input char is invalid. Return 0 and EOF. - ret = (q == 254) ? 1 : 0; - break; - } - *o++ = st.carry | (q >> 2); - st.carry = q << 6; - st.bytes++; - outl++; - - case 3: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = neon32_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // When q == 254, the input char is '='. Return 1 and EOF. - // When q == 255, the input char is invalid. Return 0 and EOF. - ret = (q == 254) ? 1 : 0; - break; - } - *o++ = st.carry | q; - st.carry = 0; - st.bytes = 0; - outl++; - } -} -state->eof = st.eof; -state->bytes = st.bytes; -state->carry = st.carry; -*outlen = outl; -return ret; + if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = neon32_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // Treat character '=' as invalid for byte 0: + break; + } + st.carry = q << 2; + st.bytes++; + + case 1: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = neon32_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // Treat character '=' as invalid for byte 1: + break; + } + *o++ = st.carry | (q >> 4); + st.carry = q << 4; + st.bytes++; + outl++; + + case 2: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = neon32_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // When q == 254, the input char is '='. Return 1 and EOF. + // Technically, should check if next byte is also '=', but never mind. + // When q == 255, the input char is invalid. Return 0 and EOF. + ret = (q == 254) ? 1 : 0; + break; + } + *o++ = st.carry | (q >> 2); + st.carry = q << 6; + st.bytes++; + outl++; + + case 3: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = neon32_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // When q == 254, the input char is '='. Return 1 and EOF. + // When q == 255, the input char is invalid. Return 0 and EOF. + ret = (q == 254) ? 1 : 0; + break; + } + *o++ = st.carry | q; + st.carry = 0; + st.bytes = 0; + outl++; + } +} +state->eof = st.eof; +state->bytes = st.bytes; +state->carry = st.carry; +*outlen = outl; +return ret; diff --git a/contrib/libs/base64/neon32/dec_uint32.c b/contrib/libs/base64/neon32/dec_uint32.c index 5856446861..052284c7e6 100644 --- a/contrib/libs/base64/neon32/dec_uint32.c +++ b/contrib/libs/base64/neon32/dec_uint32.c @@ -1,48 +1,48 @@ -// If we have native uint32's, pick off 4 bytes at a time for as long as we -// can, but make sure that we quit before seeing any == markers at the end of -// the string. Also, because we write a zero at the end of the output, ensure -// that there are at least 2 valid bytes of input data remaining to close the -// gap. 4 + 2 + 2 = 8 bytes: -while (srclen >= 8) -{ - uint32_t str, res, dec; - - // Load string: - str = *(uint32_t *)c; - - // Shuffle bytes to 32-bit bigendian: - str = cpu_to_be32(str); - - // Lookup each byte in the decoding table; if we encounter any - // "invalid" values, fall back on the bytewise code: - if ((dec = neon32_base64_table_dec[str >> 24]) > 63) { - break; - } - res = dec << 26; - - if ((dec = neon32_base64_table_dec[(str >> 16) & 0xFF]) > 63) { - break; - } - res |= dec << 20; - - if ((dec = neon32_base64_table_dec[(str >> 8) & 0xFF]) > 63) { - break; - } - res |= dec << 14; - - if ((dec = neon32_base64_table_dec[str & 0xFF]) > 63) { - break; - } - res |= dec << 8; - - // Reshuffle and repack into 3-byte output format: - res = be32_to_cpu(res); - - // Store back: - *(uint32_t *)o = res; - - c += 4; - o += 3; - outl += 3; - srclen -= 4; -} +// If we have native uint32's, pick off 4 bytes at a time for as long as we +// can, but make sure that we quit before seeing any == markers at the end of +// the string. Also, because we write a zero at the end of the output, ensure +// that there are at least 2 valid bytes of input data remaining to close the +// gap. 4 + 2 + 2 = 8 bytes: +while (srclen >= 8) +{ + uint32_t str, res, dec; + + // Load string: + str = *(uint32_t *)c; + + // Shuffle bytes to 32-bit bigendian: + str = cpu_to_be32(str); + + // Lookup each byte in the decoding table; if we encounter any + // "invalid" values, fall back on the bytewise code: + if ((dec = neon32_base64_table_dec[str >> 24]) > 63) { + break; + } + res = dec << 26; + + if ((dec = neon32_base64_table_dec[(str >> 16) & 0xFF]) > 63) { + break; + } + res |= dec << 20; + + if ((dec = neon32_base64_table_dec[(str >> 8) & 0xFF]) > 63) { + break; + } + res |= dec << 14; + + if ((dec = neon32_base64_table_dec[str & 0xFF]) > 63) { + break; + } + res |= dec << 8; + + // Reshuffle and repack into 3-byte output format: + res = be32_to_cpu(res); + + // Store back: + *(uint32_t *)o = res; + + c += 4; + o += 3; + outl += 3; + srclen -= 4; +} diff --git a/contrib/libs/base64/neon32/enc_head.c b/contrib/libs/base64/neon32/enc_head.c index 2b8b88eba3..122ad246b1 100644 --- a/contrib/libs/base64/neon32/enc_head.c +++ b/contrib/libs/base64/neon32/enc_head.c @@ -1,23 +1,23 @@ -// Assume that *out is large enough to contain the output. -// Theoretically it should be 4/3 the length of src. -const uint8_t *c = (const uint8_t *)src; -uint8_t *o = (uint8_t *)out; - -// Use local temporaries to avoid cache thrashing: -size_t outl = 0; -struct neon32_base64_state st; -st.bytes = state->bytes; -st.carry = state->carry; - -// Turn three bytes into four 6-bit numbers: -// in[0] = 00111111 -// in[1] = 00112222 -// in[2] = 00222233 -// in[3] = 00333333 - -// Duff's device, a for() loop inside a switch() statement. Legal! -switch (st.bytes) -{ - for (;;) - { - case 0: +// Assume that *out is large enough to contain the output. +// Theoretically it should be 4/3 the length of src. +const uint8_t *c = (const uint8_t *)src; +uint8_t *o = (uint8_t *)out; + +// Use local temporaries to avoid cache thrashing: +size_t outl = 0; +struct neon32_base64_state st; +st.bytes = state->bytes; +st.carry = state->carry; + +// Turn three bytes into four 6-bit numbers: +// in[0] = 00111111 +// in[1] = 00112222 +// in[2] = 00222233 +// in[3] = 00333333 + +// Duff's device, a for() loop inside a switch() statement. Legal! +switch (st.bytes) +{ + for (;;) + { + case 0: diff --git a/contrib/libs/base64/neon32/enc_neon.c b/contrib/libs/base64/neon32/enc_neon.c index 05d7eb41b5..effb7f9e07 100644 --- a/contrib/libs/base64/neon32/enc_neon.c +++ b/contrib/libs/base64/neon32/enc_neon.c @@ -1,23 +1,23 @@ -// If we have ARM NEON support, pick off 48 bytes at a time: -while (srclen >= 48) -{ - uint8x16x3_t str; - uint8x16x4_t res; - - // Load 48 bytes and deinterleave: - str = vld3q_u8((uint8_t *)c); - - // Reshuffle: - res = enc_reshuffle(str); - - // Translate reshuffled bytes to the Base64 alphabet: - res = enc_translate(res); - - // Interleave and store result: - vst4q_u8((uint8_t *)o, res); - - c += 48; // 3 * 16 bytes of input - o += 64; // 4 * 16 bytes of output - outl += 64; - srclen -= 48; -} +// If we have ARM NEON support, pick off 48 bytes at a time: +while (srclen >= 48) +{ + uint8x16x3_t str; + uint8x16x4_t res; + + // Load 48 bytes and deinterleave: + str = vld3q_u8((uint8_t *)c); + + // Reshuffle: + res = enc_reshuffle(str); + + // Translate reshuffled bytes to the Base64 alphabet: + res = enc_translate(res); + + // Interleave and store result: + vst4q_u8((uint8_t *)o, res); + + c += 48; // 3 * 16 bytes of input + o += 64; // 4 * 16 bytes of output + outl += 64; + srclen -= 48; +} diff --git a/contrib/libs/base64/neon32/enc_tail.c b/contrib/libs/base64/neon32/enc_tail.c index f19ae5f736..83a5d897e2 100644 --- a/contrib/libs/base64/neon32/enc_tail.c +++ b/contrib/libs/base64/neon32/enc_tail.c @@ -1,28 +1,28 @@ - if (srclen-- == 0) { - break; - } - *o++ = neon32_base64_table_enc[*c >> 2]; - st.carry = (*c++ << 4) & 0x30; - st.bytes++; - outl += 1; - - case 1: if (srclen-- == 0) { - break; - } - *o++ = neon32_base64_table_enc[st.carry | (*c >> 4)]; - st.carry = (*c++ << 2) & 0x3C; - st.bytes++; - outl += 1; - - case 2: if (srclen-- == 0) { - break; - } - *o++ = neon32_base64_table_enc[st.carry | (*c >> 6)]; - *o++ = neon32_base64_table_enc[*c++ & 0x3F]; - st.bytes = 0; - outl += 2; - } -} -state->bytes = st.bytes; -state->carry = st.carry; -*outlen = outl; + if (srclen-- == 0) { + break; + } + *o++ = neon32_base64_table_enc[*c >> 2]; + st.carry = (*c++ << 4) & 0x30; + st.bytes++; + outl += 1; + + case 1: if (srclen-- == 0) { + break; + } + *o++ = neon32_base64_table_enc[st.carry | (*c >> 4)]; + st.carry = (*c++ << 2) & 0x3C; + st.bytes++; + outl += 1; + + case 2: if (srclen-- == 0) { + break; + } + *o++ = neon32_base64_table_enc[st.carry | (*c >> 6)]; + *o++ = neon32_base64_table_enc[*c++ & 0x3F]; + st.bytes = 0; + outl += 2; + } +} +state->bytes = st.bytes; +state->carry = st.carry; +*outlen = outl; diff --git a/contrib/libs/base64/neon32/enc_uint32.c b/contrib/libs/base64/neon32/enc_uint32.c index c7bc3cd234..a9f49375bd 100644 --- a/contrib/libs/base64/neon32/enc_uint32.c +++ b/contrib/libs/base64/neon32/enc_uint32.c @@ -1,24 +1,24 @@ -// If we have 32-bit ints, pick off 3 bytes at a time for as long as we can, -// but ensure that there are at least 4 bytes available to avoid segfaulting: -while (srclen >= 4) -{ - // Load string: - uint32_t str = *(uint32_t *)c; - - // Reorder to 32-bit big-endian, if not already in that format. The - // workset must be in big-endian, otherwise the shifted bits do not - // carry over properly among adjacent bytes: - str = cpu_to_be32(str); - - // Shift input by 6 bytes each round and mask in only the lower 6 bits; - // look up the character in the Base64 encoding table and write it to - // the output location: - *o++ = neon32_base64_table_enc[(str >> 26) & 0x3F]; - *o++ = neon32_base64_table_enc[(str >> 20) & 0x3F]; - *o++ = neon32_base64_table_enc[(str >> 14) & 0x3F]; - *o++ = neon32_base64_table_enc[(str >> 8) & 0x3F]; - - c += 3; // 3 bytes of input - outl += 4; // 4 bytes of output - srclen -= 3; -} +// If we have 32-bit ints, pick off 3 bytes at a time for as long as we can, +// but ensure that there are at least 4 bytes available to avoid segfaulting: +while (srclen >= 4) +{ + // Load string: + uint32_t str = *(uint32_t *)c; + + // Reorder to 32-bit big-endian, if not already in that format. The + // workset must be in big-endian, otherwise the shifted bits do not + // carry over properly among adjacent bytes: + str = cpu_to_be32(str); + + // Shift input by 6 bytes each round and mask in only the lower 6 bits; + // look up the character in the Base64 encoding table and write it to + // the output location: + *o++ = neon32_base64_table_enc[(str >> 26) & 0x3F]; + *o++ = neon32_base64_table_enc[(str >> 20) & 0x3F]; + *o++ = neon32_base64_table_enc[(str >> 14) & 0x3F]; + *o++ = neon32_base64_table_enc[(str >> 8) & 0x3F]; + + c += 3; // 3 bytes of input + outl += 4; // 4 bytes of output + srclen -= 3; +} diff --git a/contrib/libs/base64/neon32/lib.c b/contrib/libs/base64/neon32/lib.c index 52271e925a..10f92c5032 100644 --- a/contrib/libs/base64/neon32/lib.c +++ b/contrib/libs/base64/neon32/lib.c @@ -1,121 +1,121 @@ -#include <stdint.h> -#include <stddef.h> - -#include "libbase64.h" -#include "codecs.h" - -const uint8_t -neon32_base64_table_enc[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; - -// In the lookup table below, note that the value for '=' (character 61) is -// 254, not 255. This character is used for in-band signaling of the end of -// the datastream, and we will use that later. The characters A-Z, a-z, 0-9 -// and + / are mapped to their "decoded" values. The other bytes all map to -// the value 255, which flags them as "invalid input". - -const uint8_t -neon32_base64_table_dec[] = -{ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 254, 62, 255, 63, // 32..47 - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63 - 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79 - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63, // 80..95 - 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111 - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, -}; - -void -neon32_base64_stream_encode_init (struct neon32_base64_state *state) -{ - state->eof = 0; - state->bytes = 0; - state->carry = 0; -} - -void -neon32_base64_stream_encode_final - ( struct neon32_base64_state *state - , char *out - , size_t *outlen - ) -{ - uint8_t *o = (uint8_t *)out; - - if (state->bytes == 1) { - *o++ = neon32_base64_table_enc[state->carry]; - *o++ = '='; - *o++ = '='; - *outlen = 3; - return; - } - if (state->bytes == 2) { - *o++ = neon32_base64_table_enc[state->carry]; - *o++ = '='; - *outlen = 2; - return; - } - *outlen = 0; -} - -void -neon32_base64_stream_decode_init (struct neon32_base64_state *state) -{ - state->eof = 0; - state->bytes = 0; - state->carry = 0; -} - -void -neon32_base64_encode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ - size_t s; - size_t t; - struct neon32_base64_state state; - - // Init the stream reader: - neon32_base64_stream_encode_init(&state); - - // Feed the whole string to the stream reader: - neon32_base64_stream_encode(&state, src, srclen, out, &s); - - // Finalize the stream by writing trailer if any: - neon32_base64_stream_encode_final(&state, out + s, &t); - - // Final output length is stream length plus tail: - *outlen = s + t; -} - -int -neon32_base64_decode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ - struct neon32_base64_state state; - - // Init the stream reader: - neon32_base64_stream_decode_init(&state); - - // Feed the whole string to the stream reader: - return neon32_base64_stream_decode(&state, src, srclen, out, outlen); -} +#include <stdint.h> +#include <stddef.h> + +#include "libbase64.h" +#include "codecs.h" + +const uint8_t +neon32_base64_table_enc[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +// In the lookup table below, note that the value for '=' (character 61) is +// 254, not 255. This character is used for in-band signaling of the end of +// the datastream, and we will use that later. The characters A-Z, a-z, 0-9 +// and + / are mapped to their "decoded" values. The other bytes all map to +// the value 255, which flags them as "invalid input". + +const uint8_t +neon32_base64_table_dec[] = +{ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 254, 62, 255, 63, // 32..47 + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63 + 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79 + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63, // 80..95 + 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111 + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +}; + +void +neon32_base64_stream_encode_init (struct neon32_base64_state *state) +{ + state->eof = 0; + state->bytes = 0; + state->carry = 0; +} + +void +neon32_base64_stream_encode_final + ( struct neon32_base64_state *state + , char *out + , size_t *outlen + ) +{ + uint8_t *o = (uint8_t *)out; + + if (state->bytes == 1) { + *o++ = neon32_base64_table_enc[state->carry]; + *o++ = '='; + *o++ = '='; + *outlen = 3; + return; + } + if (state->bytes == 2) { + *o++ = neon32_base64_table_enc[state->carry]; + *o++ = '='; + *outlen = 2; + return; + } + *outlen = 0; +} + +void +neon32_base64_stream_decode_init (struct neon32_base64_state *state) +{ + state->eof = 0; + state->bytes = 0; + state->carry = 0; +} + +void +neon32_base64_encode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + size_t s; + size_t t; + struct neon32_base64_state state; + + // Init the stream reader: + neon32_base64_stream_encode_init(&state); + + // Feed the whole string to the stream reader: + neon32_base64_stream_encode(&state, src, srclen, out, &s); + + // Finalize the stream by writing trailer if any: + neon32_base64_stream_encode_final(&state, out + s, &t); + + // Final output length is stream length plus tail: + *outlen = s + t; +} + +int +neon32_base64_decode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + struct neon32_base64_state state; + + // Init the stream reader: + neon32_base64_stream_decode_init(&state); + + // Feed the whole string to the stream reader: + return neon32_base64_stream_decode(&state, src, srclen, out, outlen); +} diff --git a/contrib/libs/base64/neon32/libbase64.h b/contrib/libs/base64/neon32/libbase64.h index fa975550d8..b78dcc4a7e 100644 --- a/contrib/libs/base64/neon32/libbase64.h +++ b/contrib/libs/base64/neon32/libbase64.h @@ -1,89 +1,89 @@ -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -struct neon32_base64_state { - int eof; - int bytes; - unsigned char carry; -}; - -/* Wrapper function to encode a plain string of given length. Output is written - * to *out without trailing zero. Output length in bytes is written to *outlen. - * The buffer in `out` has been allocated by the caller and is at least 4/3 the - * size of the input. See above for `flags`; set to 0 for default operation: */ -void neon32_base64_encode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Call this before calling base64_stream_encode() to init the state. See above - * for `flags`; set to 0 for default operation: */ -void neon32_base64_stream_encode_init - ( struct neon32_base64_state *state - ) ; - -/* Encodes the block of data of given length at `src`, into the buffer at - * `out`. Caller is responsible for allocating a large enough out-buffer; it - * must be at least 4/3 the size of the in-buffer, but take some margin. Places - * the number of new bytes written into `outlen` (which is set to zero when the - * function starts). Does not zero-terminate or finalize the output. */ -void neon32_base64_stream_encode - ( struct neon32_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Finalizes the output begun by previous calls to `base64_stream_encode()`. - * Adds the required end-of-stream markers if appropriate. `outlen` is modified - * and will contain the number of new bytes written at `out` (which will quite - * often be zero). */ -void neon32_base64_stream_encode_final - ( struct neon32_base64_state *state - , char *out - , size_t *outlen - ) ; - -/* Wrapper function to decode a plain string of given length. Output is written - * to *out without trailing zero. Output length in bytes is written to *outlen. - * The buffer in `out` has been allocated by the caller and is at least 3/4 the - * size of the input. See above for `flags`, set to 0 for default operation: */ -int neon32_base64_decode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Call this before calling base64_stream_decode() to init the state. See above - * for `flags`; set to 0 for default operation: */ -void neon32_base64_stream_decode_init - ( struct neon32_base64_state *state - ) ; - -/* Decodes the block of data of given length at `src`, into the buffer at - * `out`. Caller is responsible for allocating a large enough out-buffer; it - * must be at least 3/4 the size of the in-buffer, but take some margin. Places - * the number of new bytes written into `outlen` (which is set to zero when the - * function starts). Does not zero-terminate the output. Returns 1 if all is - * well, and 0 if a decoding error was found, such as an invalid character. - * Returns -1 if the chosen codec is not included in the current build. Used by - * the test harness to check whether a codec is available for testing. */ -int neon32_base64_stream_decode - ( struct neon32_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -#ifdef __cplusplus -} -#endif - +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +struct neon32_base64_state { + int eof; + int bytes; + unsigned char carry; +}; + +/* Wrapper function to encode a plain string of given length. Output is written + * to *out without trailing zero. Output length in bytes is written to *outlen. + * The buffer in `out` has been allocated by the caller and is at least 4/3 the + * size of the input. See above for `flags`; set to 0 for default operation: */ +void neon32_base64_encode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Call this before calling base64_stream_encode() to init the state. See above + * for `flags`; set to 0 for default operation: */ +void neon32_base64_stream_encode_init + ( struct neon32_base64_state *state + ) ; + +/* Encodes the block of data of given length at `src`, into the buffer at + * `out`. Caller is responsible for allocating a large enough out-buffer; it + * must be at least 4/3 the size of the in-buffer, but take some margin. Places + * the number of new bytes written into `outlen` (which is set to zero when the + * function starts). Does not zero-terminate or finalize the output. */ +void neon32_base64_stream_encode + ( struct neon32_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Finalizes the output begun by previous calls to `base64_stream_encode()`. + * Adds the required end-of-stream markers if appropriate. `outlen` is modified + * and will contain the number of new bytes written at `out` (which will quite + * often be zero). */ +void neon32_base64_stream_encode_final + ( struct neon32_base64_state *state + , char *out + , size_t *outlen + ) ; + +/* Wrapper function to decode a plain string of given length. Output is written + * to *out without trailing zero. Output length in bytes is written to *outlen. + * The buffer in `out` has been allocated by the caller and is at least 3/4 the + * size of the input. See above for `flags`, set to 0 for default operation: */ +int neon32_base64_decode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Call this before calling base64_stream_decode() to init the state. See above + * for `flags`; set to 0 for default operation: */ +void neon32_base64_stream_decode_init + ( struct neon32_base64_state *state + ) ; + +/* Decodes the block of data of given length at `src`, into the buffer at + * `out`. Caller is responsible for allocating a large enough out-buffer; it + * must be at least 3/4 the size of the in-buffer, but take some margin. Places + * the number of new bytes written into `outlen` (which is set to zero when the + * function starts). Does not zero-terminate the output. Returns 1 if all is + * well, and 0 if a decoding error was found, such as an invalid character. + * Returns -1 if the chosen codec is not included in the current build. Used by + * the test harness to check whether a codec is available for testing. */ +int neon32_base64_stream_decode + ( struct neon32_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +#ifdef __cplusplus +} +#endif + diff --git a/contrib/libs/base64/neon32/ya.make b/contrib/libs/base64/neon32/ya.make index 435b1127bb..d17e5a0e70 100644 --- a/contrib/libs/base64/neon32/ya.make +++ b/contrib/libs/base64/neon32/ya.make @@ -1,11 +1,11 @@ -OWNER( - yazevnul +OWNER( + yazevnul g:contrib g:cpp-contrib -) - -LIBRARY() - +) + +LIBRARY() + LICENSE( BSD-2-Clause AND MIT @@ -13,15 +13,15 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -NO_UTIL() - -SRCS( - codec_neon32.c - lib.c -) - +NO_UTIL() + +SRCS( + codec_neon32.c + lib.c +) + IF (OS_LINUX OR OS_DARWIN OR OS_ANDROID) CONLYFLAGS(-std=c11) -ENDIF() - -END() +ENDIF() + +END() |