diff options
author | yazevnul <yazevnul@yandex-team.ru> | 2022-02-10 16:46:48 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:48 +0300 |
commit | 9abfb1a53b7f7b791444d1378e645d8fad9b06ed (patch) | |
tree | 49e222ea1c5804306084bb3ae065bb702625360f /contrib/libs/base64/neon64/codec_neon64.c | |
parent | 8cbc307de0221f84c80c42dcbe07d40727537e2c (diff) | |
download | ydb-9abfb1a53b7f7b791444d1378e645d8fad9b06ed.tar.gz |
Restoring authorship annotation for <yazevnul@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/base64/neon64/codec_neon64.c')
-rw-r--r-- | contrib/libs/base64/neon64/codec_neon64.c | 196 |
1 files changed, 98 insertions, 98 deletions
diff --git a/contrib/libs/base64/neon64/codec_neon64.c b/contrib/libs/base64/neon64/codec_neon64.c index 0a954a6712..7a352c3adf 100644 --- a/contrib/libs/base64/neon64/codec_neon64.c +++ b/contrib/libs/base64/neon64/codec_neon64.c @@ -1,98 +1,98 @@ -#if (defined(__ARM_NEON) && !defined(__ARM_NEON__)) -#define __ARM_NEON__ -#endif - -#include <stdint.h> -#include <stddef.h> -#include <stdlib.h> -#ifdef __ARM_NEON__ -#include <arm_neon.h> -#endif - -#include "libbase64.h" -#include "codecs.h" - -#if (defined(__aarch64__) && defined(__ARM_NEON__)) - -#define CMPGT(s,n) vcgtq_u8((s), vdupq_n_u8(n)) -#define CMPEQ(s,n) vceqq_u8((s), vdupq_n_u8(n)) -#define REPLACE(s,n) vandq_u8((s), vdupq_n_u8(n)) -#define RANGE(s,a,b) vandq_u8(vcgeq_u8((s), vdupq_n_u8(a)), vcleq_u8((s), vdupq_n_u8(b))) - -// With this transposed encoding table, we can use -// a 64-byte lookup to do the encoding. -// Read the table top to bottom, left to right. -static const char *neon64_base64_table_enc_transposed = -{ - "AQgw" - "BRhx" - "CSiy" - "DTjz" - "EUk0" - "FVl1" - "GWm2" - "HXn3" - "IYo4" - "JZp5" - "Kaq6" - "Lbr7" - "Mcs8" - "Ndt9" - "Oeu+" - "Pfv/" -}; -#endif - -// Stride size is so large on these NEON 64-bit functions -// (48 bytes encode, 64 bytes decode) that we inline the -// uint64 codec to stay performant on smaller inputs. - -void -neon64_base64_stream_encode - ( struct neon64_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ -#if (defined(__aarch64__) && defined(__ARM_NEON__)) - uint8x16x4_t tbl_enc = vld4q_u8((uint8_t const*)neon64_base64_table_enc_transposed); - - #include "enc_head.c" - #include "enc_neon.c" - #include "enc_uint64.c" - #include "enc_tail.c" -#else - (void)state; - (void)src; - (void)srclen; - (void)out; - (void)outlen; - abort(); -#endif -} - -int -neon64_base64_stream_decode - ( struct neon64_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ -#if (defined(__aarch64__) && defined(__ARM_NEON__)) - #include "dec_head.c" - #include "dec_neon.c" - #include "dec_uint64.c" - #include "dec_tail.c" -#else - (void)state; - (void)src; - (void)srclen; - (void)out; - (void)outlen; - abort(); -#endif -} +#if (defined(__ARM_NEON) && !defined(__ARM_NEON__)) +#define __ARM_NEON__ +#endif + +#include <stdint.h> +#include <stddef.h> +#include <stdlib.h> +#ifdef __ARM_NEON__ +#include <arm_neon.h> +#endif + +#include "libbase64.h" +#include "codecs.h" + +#if (defined(__aarch64__) && defined(__ARM_NEON__)) + +#define CMPGT(s,n) vcgtq_u8((s), vdupq_n_u8(n)) +#define CMPEQ(s,n) vceqq_u8((s), vdupq_n_u8(n)) +#define REPLACE(s,n) vandq_u8((s), vdupq_n_u8(n)) +#define RANGE(s,a,b) vandq_u8(vcgeq_u8((s), vdupq_n_u8(a)), vcleq_u8((s), vdupq_n_u8(b))) + +// With this transposed encoding table, we can use +// a 64-byte lookup to do the encoding. +// Read the table top to bottom, left to right. +static const char *neon64_base64_table_enc_transposed = +{ + "AQgw" + "BRhx" + "CSiy" + "DTjz" + "EUk0" + "FVl1" + "GWm2" + "HXn3" + "IYo4" + "JZp5" + "Kaq6" + "Lbr7" + "Mcs8" + "Ndt9" + "Oeu+" + "Pfv/" +}; +#endif + +// Stride size is so large on these NEON 64-bit functions +// (48 bytes encode, 64 bytes decode) that we inline the +// uint64 codec to stay performant on smaller inputs. + +void +neon64_base64_stream_encode + ( struct neon64_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ +#if (defined(__aarch64__) && defined(__ARM_NEON__)) + uint8x16x4_t tbl_enc = vld4q_u8((uint8_t const*)neon64_base64_table_enc_transposed); + + #include "enc_head.c" + #include "enc_neon.c" + #include "enc_uint64.c" + #include "enc_tail.c" +#else + (void)state; + (void)src; + (void)srclen; + (void)out; + (void)outlen; + abort(); +#endif +} + +int +neon64_base64_stream_decode + ( struct neon64_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ +#if (defined(__aarch64__) && defined(__ARM_NEON__)) + #include "dec_head.c" + #include "dec_neon.c" + #include "dec_uint64.c" + #include "dec_tail.c" +#else + (void)state; + (void)src; + (void)srclen; + (void)out; + (void)outlen; + abort(); +#endif +} |