aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/base64/neon64/enc_neon.c
diff options
context:
space:
mode:
authoryazevnul <yazevnul@yandex-team.ru>2022-02-10 16:46:46 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:46:46 +0300
commit8cbc307de0221f84c80c42dcbe07d40727537e2c (patch)
tree625d5a673015d1df891e051033e9fcde5c7be4e5 /contrib/libs/base64/neon64/enc_neon.c
parent30d1ef3941e0dc835be7609de5ebee66958f215a (diff)
downloadydb-8cbc307de0221f84c80c42dcbe07d40727537e2c.tar.gz
Restoring authorship annotation for <yazevnul@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/libs/base64/neon64/enc_neon.c')
-rw-r--r--contrib/libs/base64/neon64/enc_neon.c74
1 files changed, 37 insertions, 37 deletions
diff --git a/contrib/libs/base64/neon64/enc_neon.c b/contrib/libs/base64/neon64/enc_neon.c
index 9cf28a11f8..2ba5a561e9 100644
--- a/contrib/libs/base64/neon64/enc_neon.c
+++ b/contrib/libs/base64/neon64/enc_neon.c
@@ -1,37 +1,37 @@
-// If we have ARM NEON support, pick off 48 bytes at a time:
-while (srclen >= 48)
-{
- uint8x16x3_t str;
- uint8x16x4_t res;
-
- // Load 48 bytes and deinterleave:
- str = vld3q_u8((uint8_t *)c);
-
- // Divide bits of three input bytes over four output bytes:
- res.val[0] = vshrq_n_u8(str.val[0], 2);
- res.val[1] = vshrq_n_u8(str.val[1], 4) | vshlq_n_u8(str.val[0], 4);
- res.val[2] = vshrq_n_u8(str.val[2], 6) | vshlq_n_u8(str.val[1], 2);
- res.val[3] = str.val[2];
-
- // Clear top two bits:
- res.val[0] &= vdupq_n_u8(0x3F);
- res.val[1] &= vdupq_n_u8(0x3F);
- res.val[2] &= vdupq_n_u8(0x3F);
- res.val[3] &= vdupq_n_u8(0x3F);
-
- // The bits have now been shifted to the right locations;
- // translate their values 0..63 to the Base64 alphabet.
- // Use a 64-byte table lookup:
- res.val[0] = vqtbl4q_u8(tbl_enc, res.val[0]);
- res.val[1] = vqtbl4q_u8(tbl_enc, res.val[1]);
- res.val[2] = vqtbl4q_u8(tbl_enc, res.val[2]);
- res.val[3] = vqtbl4q_u8(tbl_enc, res.val[3]);
-
- // Interleave and store result:
- vst4q_u8((uint8_t *)o, res);
-
- c += 48; // 3 * 16 bytes of input
- o += 64; // 4 * 16 bytes of output
- outl += 64;
- srclen -= 48;
-}
+// If we have ARM NEON support, pick off 48 bytes at a time:
+while (srclen >= 48)
+{
+ uint8x16x3_t str;
+ uint8x16x4_t res;
+
+ // Load 48 bytes and deinterleave:
+ str = vld3q_u8((uint8_t *)c);
+
+ // Divide bits of three input bytes over four output bytes:
+ res.val[0] = vshrq_n_u8(str.val[0], 2);
+ res.val[1] = vshrq_n_u8(str.val[1], 4) | vshlq_n_u8(str.val[0], 4);
+ res.val[2] = vshrq_n_u8(str.val[2], 6) | vshlq_n_u8(str.val[1], 2);
+ res.val[3] = str.val[2];
+
+ // Clear top two bits:
+ res.val[0] &= vdupq_n_u8(0x3F);
+ res.val[1] &= vdupq_n_u8(0x3F);
+ res.val[2] &= vdupq_n_u8(0x3F);
+ res.val[3] &= vdupq_n_u8(0x3F);
+
+ // The bits have now been shifted to the right locations;
+ // translate their values 0..63 to the Base64 alphabet.
+ // Use a 64-byte table lookup:
+ res.val[0] = vqtbl4q_u8(tbl_enc, res.val[0]);
+ res.val[1] = vqtbl4q_u8(tbl_enc, res.val[1]);
+ res.val[2] = vqtbl4q_u8(tbl_enc, res.val[2]);
+ res.val[3] = vqtbl4q_u8(tbl_enc, res.val[3]);
+
+ // Interleave and store result:
+ vst4q_u8((uint8_t *)o, res);
+
+ c += 48; // 3 * 16 bytes of input
+ o += 64; // 4 * 16 bytes of output
+ outl += 64;
+ srclen -= 48;
+}