diff options
author | yazevnul <yazevnul@yandex-team.ru> | 2022-02-10 16:46:48 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:48 +0300 |
commit | 9abfb1a53b7f7b791444d1378e645d8fad9b06ed (patch) | |
tree | 49e222ea1c5804306084bb3ae065bb702625360f /contrib/libs | |
parent | 8cbc307de0221f84c80c42dcbe07d40727537e2c (diff) | |
download | ydb-9abfb1a53b7f7b791444d1378e645d8fad9b06ed.tar.gz |
Restoring authorship annotation for <yazevnul@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs')
96 files changed, 4443 insertions, 4443 deletions
diff --git a/contrib/libs/base64/LICENSE b/contrib/libs/base64/LICENSE index f55fd99c26..4f784e0676 100644 --- a/contrib/libs/base64/LICENSE +++ b/contrib/libs/base64/LICENSE @@ -1,25 +1,25 @@ -Copyright (c) 2013-2015, Alfred Klomp -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are -met: - -- Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -- Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED -TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +Copyright (c) 2013-2015, Alfred Klomp +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/contrib/libs/base64/README.md b/contrib/libs/base64/README.md index 57c269ea38..b9ceb86ceb 100644 --- a/contrib/libs/base64/README.md +++ b/contrib/libs/base64/README.md @@ -1,447 +1,447 @@ -# Fast Base64 stream encoder/decoder - -This is an implementation of a base64 stream encoding/decoding library in C99 -with SIMD (AVX2, NEON, AArch64/NEON, SSSE3) and [OpenMP](http://www.openmp.org) -acceleration. It also contains wrapper functions to encode/decode simple -length-delimited strings. This library aims to be: - -- FAST; -- easy to use; -- elegant. - -On x86, the library does runtime feature detection. The first time it's called, -the library will determine the appropriate encoding/decoding routines for the -machine. It then remembers them for the lifetime of the program. If your -processor supports AVX2 or SSSE3 instructions, the library will pick an -optimized codec that lets it encode/decode 12 or 24 bytes at a time, which -gives a speedup of four or more times compared to the "plain" bytewise codec. - -NEON support is hardcoded to on or off at compile time, because portable -runtime feature detection is unavailable on ARM. - -Even if your processor does not support SIMD instructions, this is a very fast -library. The fallback routine can process 32 or 64 bits of input in one round, -depending on your processor's word width, which still makes it significantly -faster than naive bytewise implementations. On some 64-bit machines, the 64-bit -routines even outperform the SSSE3 ones. - -To the author's knowledge, at the time of original release, this was the only -Base64 library to offer SIMD acceleration. The author wrote -[an article](http://www.alfredklomp.com/programming/sse-base64) explaining one -possible SIMD approach to encoding/decoding Base64. The article can help figure -out what the code is doing, and why. - -Notable features: - -- Really fast on x86 and ARM systems by using SIMD vector processing; -- Can use [OpenMP](http://www.openmp.org) for even more parallel speedups; -- Really fast on other 32 or 64-bit platforms through optimized routines; -- Reads/writes blocks of streaming data; -- Does not dynamically allocate memory; -- Valid C99 that compiles with pedantic options on; -- Re-entrant and threadsafe; -- Unit tested; -- Uses Duff's Device. - -## Acknowledgements - -The original AVX2, NEON and Aarch64/NEON codecs were generously contributed by -[Inkymail](https://github.com/inkymail/base64), who, in their fork, also -implemented some additional features. Their work is slowly being backported -into this project. - -The SSSE3 and AVX2 codecs were substantially improved by using some very clever -optimizations described by Wojciech Muła in a -[series](http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html) of -[articles](http://0x80.pl/notesen/2016-01-17-sse-base64-decoding.html). -His own code is [here](https://github.com/WojciechMula/toys/tree/master/base64). - -The OpenMP implementation was added by Ferry Toth (@htot) from [Exalon Delft](http://www.exalondelft.nl). - -## Building - -The `lib` directory contains the code for the actual library. -Typing `make` in the toplevel directory will build `lib/libbase64.o` and `bin/base64`. -The first is a single, self-contained object file that you can link into your own project. -The second is a standalone test binary that works similarly to the `base64` system utility. - -The matching header file needed to use this library is in `include/libbase64.h`. - -To compile just the "plain" library without SIMD codecs, type: - -```sh -make lib/libbase64.o -``` - -Optional SIMD codecs can be included by specifying the `AVX2_CFLAGS`, `NEON32_CFLAGS`, `NEON64_CFLAGS` and/or `SSSE3_CFLAGS` environment variables. -A typical build invocation on x86 looks like this: - -```sh -AVX2_CFLAGS=-mavx2 SSSE3_CFLAGS=-mssse3 make lib/libbase64.o -``` - -### AVX2 - -To build and include the AVX2 codec, set the `AVX2_CFLAGS` environment variable to a value that will turn on AVX2 support in your compiler, typically `-mavx2`. -Example: - -```sh -AVX2_CFLAGS=-mavx2 make -``` - -The codec will only be used if runtime feature detection shows that the target machine supports AVX2. - -### SSSE3 - -To build and include the SSSE3 codec, set the `SSSE3_CFLAGS` environment variable to a value that will turn on SSSE3 support in your compiler, typically `-mssse3`. -Example: - -```sh -SSSE3_CFLAGS=-mssse3 make -``` - -The codec will only be used if runtime feature detection shows that the target machine supports SSSE3. - -### NEON - -This library includes two NEON codecs: one for regular 32-bit ARM and one for the 64-bit AArch64 with NEON, which has double the amount of SIMD registers and can do full 64-byte table lookups. -These codecs encode in 48-byte chunks and decode in massive 64-byte chunks, so they had to be augmented with an uint32/64 codec to stay fast on smaller inputs! - -Use LLVM/Clang for compiling the NEON codecs. -The code generation of at least GCC 4.6 (the version shipped with Raspbian and used for testing) contains a bug when compiling `vstq4_u8()`, and the generated assembly code is of low quality. -NEON intrinsics are a known weak area of GCC. -Clang does a better job. - -NEON support can unfortunately not be portably detected at runtime from userland (the `mrc` instruction is privileged), so the default value for using the NEON codec is determined at compile-time. -But you can do your own runtime detection. -You can include the NEON codec and make it the default, then do a runtime check if the CPU has NEON support, and if not, force a downgrade to non-NEON with `BASE64_FORCE_PLAIN`. - -These are your options: - -1. Don't include NEON support; -2. build NEON support and make it the default, but build all other code without NEON flags so that you can override the default at runtime with `BASE64_FORCE_PLAIN`; -3. build everything with NEON support and make it the default; -4. build everything with NEON support, but don't make it the default (which makes no sense). - -For option 1, simply don't specify any NEON-specific compiler flags at all, like so: - -```sh -CC=clang CFLAGS="-march=armv6" make -``` - -For option 2, keep your `CFLAGS` plain, but set the `NEON32_CFLAGS` environment variable to a value that will build NEON support. -The line below, for instance, will build all the code at ARMv6 level, except for the NEON codec, which is built at ARMv7. -It will also make the NEON codec the default. -For ARMv6 platforms, override that default at runtime with the `BASE64_FORCE_PLAIN` flag. -No ARMv7/NEON code will then be touched. - -```sh -CC=clang CFLAGS="-march=armv6" NEON32_CFLAGS="-march=armv7 -mfpu=neon" make -``` - -For option 3, put everything in your `CFLAGS` and use a stub, but non-empty, `NEON32_CFLAGS`. -This example works for the Raspberry Pi 2B V1.1, which has NEON support: - -```sh -CC=clang CFLAGS="-march=armv7 -mtune=cortex-a7" NEON32_CFLAGS="-mfpu=neon" make -``` - -To build and include the NEON64 codec, use `CFLAGS` as usual to define the platform and set `NEON64_CFLAGS` to a nonempty stub. -(The AArch64 target has mandatory NEON64 support.) -Example: - -```sh -CC=clang CFLAGS="--target=aarch64-linux-gnu -march=armv8-a" NEON64_CFLAGS=" " make -``` - -### OpenMP - -To enable OpenMP on GCC you need to build with `-fopenmp`. This can be by setting the the `OPENMP` environment variable to `1`. - -Example: - -```sh -OPENMP=1 make -``` - -This will let the compiler define `_OPENMP`, which in turn will include the OpenMP optimized `lib_openmp.c` into `lib.c`. - -By default the number of parallel threads will be equal to the number of cores of the processor. -On a quad core with hyperthreading eight cores will be detected, but hyperthreading will not increase the performance. - -To get verbose information about OpenMP start the program with `OMP_DISPLAY_ENV=VERBOSE`, for instance - -```sh -OMP_DISPLAY_ENV=VERBOSE test/benchmark -``` - -To put a limit on the number of threads, start the program with `OMP_THREAD_LIMIT=n`, for instance - -```sh -OMP_THREAD_LIMIT=2 test/benchmark -``` - -An example of running a benchmark with OpenMP, SSSE3 and AVX2 enabled: - -```sh -make clean && OPENMP=1 SSSE3_CFLAGS=-mssse3 AVX2_CFLAGS=-mavx2 make && OPENMP=1 make -C test -``` - -## API reference - -Strings are represented as a pointer and a length; they are not -zero-terminated. This was a conscious design decision. In the decoding step, -relying on zero-termination would make no sense since the output could contain -legitimate zero bytes. In the encoding step, returning the length saves the -overhead of calling `strlen()` on the output. If you insist on the trailing -zero, you can easily add it yourself at the given offset. - -### Flags - -Some API calls take a `flags` argument. -That argument can be used to force the use of a specific codec, even if that codec is a no-op in the current build. -Mainly there for testing purposes, this is also useful on ARM where the only way to do runtime NEON detection is to ask the OS if it's available. -The following constants can be used: - -- `BASE64_FORCE_AVX2` -- `BASE64_FORCE_NEON32` -- `BASE64_FORCE_NEON64` -- `BASE64_FORCE_PLAIN` -- `BASE64_FORCE_SSSE3` - -Set `flags` to `0` for the default behavior, which is runtime feature detection on x86, a compile-time fixed codec on ARM, and the plain codec on other platforms. - -### Encoding - -#### base64_encode - -```c -void base64_encode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - , int flags - ) ; -``` - -Wrapper function to encode a plain string of given length. -Output is written to `out` without trailing zero. -Output length in bytes is written to `outlen`. -The buffer in `out` has been allocated by the caller and is at least 4/3 the size of the input. - -#### base64_stream_encode_init - -```c -void base64_stream_encode_init - ( struct base64_state *state - , int flags - ) ; -``` - -Call this before calling `base64_stream_encode()` to init the state. - -#### base64_stream_encode - -```c -void base64_stream_encode - ( struct base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; -``` - -Encodes the block of data of given length at `src`, into the buffer at `out`. -Caller is responsible for allocating a large enough out-buffer; it must be at least 4/3 the size of the in-buffer, but take some margin. -Places the number of new bytes written into `outlen` (which is set to zero when the function starts). -Does not zero-terminate or finalize the output. - -#### base64_stream_encode_final - -```c -void base64_stream_encode_final - ( struct base64_state *state - , char *out - , size_t *outlen - ) ; -``` - -Finalizes the output begun by previous calls to `base64_stream_encode()`. -Adds the required end-of-stream markers if appropriate. -`outlen` is modified and will contain the number of new bytes written at `out` (which will quite often be zero). - -### Decoding - -#### base64_decode - -```c -int base64_decode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - , int flags - ) ; -``` - -Wrapper function to decode a plain string of given length. -Output is written to `out` without trailing zero. Output length in bytes is written to `outlen`. -The buffer in `out` has been allocated by the caller and is at least 3/4 the size of the input. -Returns `1` for success, and `0` when a decode error has occured due to invalid input. -Returns `-1` if the chosen codec is not included in the current build. - -#### base64_stream_decode_init - -```c -void base64_stream_decode_init - ( struct base64_state *state - , int flags - ) ; -``` - -Call this before calling `base64_stream_decode()` to init the state. - -#### base64_stream_decode - -```c -int base64_stream_decode - ( struct base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; -``` - -Decodes the block of data of given length at `src`, into the buffer at `out`. -Caller is responsible for allocating a large enough out-buffer; it must be at least 3/4 the size of the in-buffer, but take some margin. -Places the number of new bytes written into `outlen` (which is set to zero when the function starts). -Does not zero-terminate the output. -Returns 1 if all is well, and 0 if a decoding error was found, such as an invalid character. -Returns -1 if the chosen codec is not included in the current build. -Used by the test harness to check whether a codec is available for testing. - -## Examples - -A simple example of encoding a static string to base64 and printing the output -to stdout: - -```c -#include <stdio.h> /* fwrite */ -#include "libbase64.h" - -int main () -{ - char src[] = "hello world"; - char out[20]; - size_t srclen = sizeof(src) - 1; - size_t outlen; - - base64_encode(src, srclen, out, &outlen, 0); - - fwrite(out, outlen, 1, stdout); - - return 0; -} -``` - -A simple example (no error checking, etc) of stream encoding standard input to -standard output: - -```c -#include <stdio.h> -#include "libbase64.h" - -int main () -{ - size_t nread, nout; - char buf[12000], out[16000]; - struct base64_state state; - - // Initialize stream encoder: - base64_stream_encode_init(&state, 0); - - // Read contents of stdin into buffer: - while ((nread = fread(buf, 1, sizeof(buf), stdin)) > 0) { - - // Encode buffer: - base64_stream_encode(&state, buf, nread, out, &nout); - - // If there's output, print it to stdout: - if (nout) { - fwrite(out, nout, 1, stdout); - } - - // If an error occurred, exit the loop: - if (feof(stdin)) { - break; - } - } - - // Finalize encoding: - base64_stream_encode_final(&state, out, &nout); - - // If the finalizing resulted in extra output bytes, print them: - if (nout) { - fwrite(out, nout, 1, stdout); - } - - return 0; -} -``` - -Also see `bin/base64.c` for a simple re-implementation of the `base64` utility. -A file or standard input is fed through the encoder/decoder, and the output is -written to standard output. - -## Tests - -See `tests/` for a small test suite. Testing is automated with [Travis CI](https://travis-ci.org/aklomp/base64): - -[//]: # "[![Build Status](https://travis-ci.org/aklomp/base64.png?branch=master)](https://travis-ci.org/aklomp/base64)" - -## Benchmarks - -Benchmarks can be run with the built-in benchmark program as follows: - -```sh -make -C test benchmark <buildflags> && test/benchmark -``` - -It will run an encoding and decoding benchmark for all of the compiled-in codecs. - -The table below contains some results on random machines. All numbers measured with a 10MB buffer in MB/sec, rounded to the nearest integer. - -| Processor | Plain enc | Plain dec | SSSE3 enc | SSSE3 dec | AVX2 enc | AVX2 dec | NEON32 enc | NEON32 dec | -|-------------------------------------------|----------:|----------:|----------:|----------:|---------:|---------:|-----------:|-----------:| -| i7-4771 @ 3.5 GHz | 833 | 1111 | 3333 | 4444 | 4999 | 6666 | - | - | -| i7-4770 @ 3.4 GHz DDR1600 | 1831 | 1748 | 3570 | 3695 | 6539 | 6512 | - | - | -| i7-4770 @ 3.4 GHz DDR1600 OPENMP 1 thread | 1779 | 1727 | 3419 | 3788 | 4589 | 5871 | - | - | -| i7-4770 @ 3.4 GHz DDR1600 OPENMP 2 thread | 3367 | 3374 | 4784 | 6672 | 5120 | 7721 | - | - | -| i7-4770 @ 3.4 GHz DDR1600 OPENMP 4 thread | 4834 | 6075 | 4906 | 8154 | 4839 | 6911 | - | - | -| i7-4770 @ 3.4 GHz DDR1600 OPENMP 8 thread | 4696 | 6361 | 5227 | 7737 | 4813 | 7189 | - | - | -| i5-4590S @ 3.0 GHz | 1721 | 1643 | 3255 | 3404 | 4124 | 5403 | - | - | -| Xeon X5570 @ 2.93 GHz | 1097 | 1048 | 2077 | 2215 | - | - | - | - | -| Pentium4 @ 3.4 GHz | 528 | 448 | - | - | - | - | - | - | -| Atom N270 | 112 | 125 | 331 | 368 | - | - | - | - | -| AMD E-450 | 370 | 332 | 405 | 366 | - | - | - | - | -| PowerPC E6500 @ 1.8GHz | 270 | 265 | - | - | - | - | - | - | -| Raspberry PI B+ V1.2 | 46 | 40 | - | - | - | - | - | - | -| Raspberry PI 2 B V1.1 | 104 | 88 | - | - | - | - | 158 | 116 | -| Intel Edison @ 500 MHz | 79 | 92 | 152 | 172 | - | - | - | - | -| Intel Edison @ 500 MHz OPENMP 2 thread | 158 | 184 | 300 | 343 | - | - | - | - | - -Benchmarks on i7-4770 @ 3.4 GHz DDR1600 with varrying buffer sizes: -![Benchmarks](https://jing.yandex-team.ru/files/yazevnul/base64-benchmarks.png) - -Note: optimal buffer size to take advantage of the cache is in the range of 100 kB to 1 MB, leading to 12x faster AVX encoding/decoding compared to Plain, or a throughput of 24/27GB/sec. -Also note the performance degradation when the buffer size is less than 10 kB due to thread creation overhead. -To prevent this from happening `lib_openmp.c` defines `OMP_THRESHOLD 20000`, requiring at least a 20000 byte buffer to enable multithreading. - -## License - -This repository is licensed under the -[BSD 2-clause License](http://opensource.org/licenses/BSD-2-Clause). See the -LICENSE file. +# Fast Base64 stream encoder/decoder + +This is an implementation of a base64 stream encoding/decoding library in C99 +with SIMD (AVX2, NEON, AArch64/NEON, SSSE3) and [OpenMP](http://www.openmp.org) +acceleration. It also contains wrapper functions to encode/decode simple +length-delimited strings. This library aims to be: + +- FAST; +- easy to use; +- elegant. + +On x86, the library does runtime feature detection. The first time it's called, +the library will determine the appropriate encoding/decoding routines for the +machine. It then remembers them for the lifetime of the program. If your +processor supports AVX2 or SSSE3 instructions, the library will pick an +optimized codec that lets it encode/decode 12 or 24 bytes at a time, which +gives a speedup of four or more times compared to the "plain" bytewise codec. + +NEON support is hardcoded to on or off at compile time, because portable +runtime feature detection is unavailable on ARM. + +Even if your processor does not support SIMD instructions, this is a very fast +library. The fallback routine can process 32 or 64 bits of input in one round, +depending on your processor's word width, which still makes it significantly +faster than naive bytewise implementations. On some 64-bit machines, the 64-bit +routines even outperform the SSSE3 ones. + +To the author's knowledge, at the time of original release, this was the only +Base64 library to offer SIMD acceleration. The author wrote +[an article](http://www.alfredklomp.com/programming/sse-base64) explaining one +possible SIMD approach to encoding/decoding Base64. The article can help figure +out what the code is doing, and why. + +Notable features: + +- Really fast on x86 and ARM systems by using SIMD vector processing; +- Can use [OpenMP](http://www.openmp.org) for even more parallel speedups; +- Really fast on other 32 or 64-bit platforms through optimized routines; +- Reads/writes blocks of streaming data; +- Does not dynamically allocate memory; +- Valid C99 that compiles with pedantic options on; +- Re-entrant and threadsafe; +- Unit tested; +- Uses Duff's Device. + +## Acknowledgements + +The original AVX2, NEON and Aarch64/NEON codecs were generously contributed by +[Inkymail](https://github.com/inkymail/base64), who, in their fork, also +implemented some additional features. Their work is slowly being backported +into this project. + +The SSSE3 and AVX2 codecs were substantially improved by using some very clever +optimizations described by Wojciech Muła in a +[series](http://0x80.pl/notesen/2016-01-12-sse-base64-encoding.html) of +[articles](http://0x80.pl/notesen/2016-01-17-sse-base64-decoding.html). +His own code is [here](https://github.com/WojciechMula/toys/tree/master/base64). + +The OpenMP implementation was added by Ferry Toth (@htot) from [Exalon Delft](http://www.exalondelft.nl). + +## Building + +The `lib` directory contains the code for the actual library. +Typing `make` in the toplevel directory will build `lib/libbase64.o` and `bin/base64`. +The first is a single, self-contained object file that you can link into your own project. +The second is a standalone test binary that works similarly to the `base64` system utility. + +The matching header file needed to use this library is in `include/libbase64.h`. + +To compile just the "plain" library without SIMD codecs, type: + +```sh +make lib/libbase64.o +``` + +Optional SIMD codecs can be included by specifying the `AVX2_CFLAGS`, `NEON32_CFLAGS`, `NEON64_CFLAGS` and/or `SSSE3_CFLAGS` environment variables. +A typical build invocation on x86 looks like this: + +```sh +AVX2_CFLAGS=-mavx2 SSSE3_CFLAGS=-mssse3 make lib/libbase64.o +``` + +### AVX2 + +To build and include the AVX2 codec, set the `AVX2_CFLAGS` environment variable to a value that will turn on AVX2 support in your compiler, typically `-mavx2`. +Example: + +```sh +AVX2_CFLAGS=-mavx2 make +``` + +The codec will only be used if runtime feature detection shows that the target machine supports AVX2. + +### SSSE3 + +To build and include the SSSE3 codec, set the `SSSE3_CFLAGS` environment variable to a value that will turn on SSSE3 support in your compiler, typically `-mssse3`. +Example: + +```sh +SSSE3_CFLAGS=-mssse3 make +``` + +The codec will only be used if runtime feature detection shows that the target machine supports SSSE3. + +### NEON + +This library includes two NEON codecs: one for regular 32-bit ARM and one for the 64-bit AArch64 with NEON, which has double the amount of SIMD registers and can do full 64-byte table lookups. +These codecs encode in 48-byte chunks and decode in massive 64-byte chunks, so they had to be augmented with an uint32/64 codec to stay fast on smaller inputs! + +Use LLVM/Clang for compiling the NEON codecs. +The code generation of at least GCC 4.6 (the version shipped with Raspbian and used for testing) contains a bug when compiling `vstq4_u8()`, and the generated assembly code is of low quality. +NEON intrinsics are a known weak area of GCC. +Clang does a better job. + +NEON support can unfortunately not be portably detected at runtime from userland (the `mrc` instruction is privileged), so the default value for using the NEON codec is determined at compile-time. +But you can do your own runtime detection. +You can include the NEON codec and make it the default, then do a runtime check if the CPU has NEON support, and if not, force a downgrade to non-NEON with `BASE64_FORCE_PLAIN`. + +These are your options: + +1. Don't include NEON support; +2. build NEON support and make it the default, but build all other code without NEON flags so that you can override the default at runtime with `BASE64_FORCE_PLAIN`; +3. build everything with NEON support and make it the default; +4. build everything with NEON support, but don't make it the default (which makes no sense). + +For option 1, simply don't specify any NEON-specific compiler flags at all, like so: + +```sh +CC=clang CFLAGS="-march=armv6" make +``` + +For option 2, keep your `CFLAGS` plain, but set the `NEON32_CFLAGS` environment variable to a value that will build NEON support. +The line below, for instance, will build all the code at ARMv6 level, except for the NEON codec, which is built at ARMv7. +It will also make the NEON codec the default. +For ARMv6 platforms, override that default at runtime with the `BASE64_FORCE_PLAIN` flag. +No ARMv7/NEON code will then be touched. + +```sh +CC=clang CFLAGS="-march=armv6" NEON32_CFLAGS="-march=armv7 -mfpu=neon" make +``` + +For option 3, put everything in your `CFLAGS` and use a stub, but non-empty, `NEON32_CFLAGS`. +This example works for the Raspberry Pi 2B V1.1, which has NEON support: + +```sh +CC=clang CFLAGS="-march=armv7 -mtune=cortex-a7" NEON32_CFLAGS="-mfpu=neon" make +``` + +To build and include the NEON64 codec, use `CFLAGS` as usual to define the platform and set `NEON64_CFLAGS` to a nonempty stub. +(The AArch64 target has mandatory NEON64 support.) +Example: + +```sh +CC=clang CFLAGS="--target=aarch64-linux-gnu -march=armv8-a" NEON64_CFLAGS=" " make +``` + +### OpenMP + +To enable OpenMP on GCC you need to build with `-fopenmp`. This can be by setting the the `OPENMP` environment variable to `1`. + +Example: + +```sh +OPENMP=1 make +``` + +This will let the compiler define `_OPENMP`, which in turn will include the OpenMP optimized `lib_openmp.c` into `lib.c`. + +By default the number of parallel threads will be equal to the number of cores of the processor. +On a quad core with hyperthreading eight cores will be detected, but hyperthreading will not increase the performance. + +To get verbose information about OpenMP start the program with `OMP_DISPLAY_ENV=VERBOSE`, for instance + +```sh +OMP_DISPLAY_ENV=VERBOSE test/benchmark +``` + +To put a limit on the number of threads, start the program with `OMP_THREAD_LIMIT=n`, for instance + +```sh +OMP_THREAD_LIMIT=2 test/benchmark +``` + +An example of running a benchmark with OpenMP, SSSE3 and AVX2 enabled: + +```sh +make clean && OPENMP=1 SSSE3_CFLAGS=-mssse3 AVX2_CFLAGS=-mavx2 make && OPENMP=1 make -C test +``` + +## API reference + +Strings are represented as a pointer and a length; they are not +zero-terminated. This was a conscious design decision. In the decoding step, +relying on zero-termination would make no sense since the output could contain +legitimate zero bytes. In the encoding step, returning the length saves the +overhead of calling `strlen()` on the output. If you insist on the trailing +zero, you can easily add it yourself at the given offset. + +### Flags + +Some API calls take a `flags` argument. +That argument can be used to force the use of a specific codec, even if that codec is a no-op in the current build. +Mainly there for testing purposes, this is also useful on ARM where the only way to do runtime NEON detection is to ask the OS if it's available. +The following constants can be used: + +- `BASE64_FORCE_AVX2` +- `BASE64_FORCE_NEON32` +- `BASE64_FORCE_NEON64` +- `BASE64_FORCE_PLAIN` +- `BASE64_FORCE_SSSE3` + +Set `flags` to `0` for the default behavior, which is runtime feature detection on x86, a compile-time fixed codec on ARM, and the plain codec on other platforms. + +### Encoding + +#### base64_encode + +```c +void base64_encode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + , int flags + ) ; +``` + +Wrapper function to encode a plain string of given length. +Output is written to `out` without trailing zero. +Output length in bytes is written to `outlen`. +The buffer in `out` has been allocated by the caller and is at least 4/3 the size of the input. + +#### base64_stream_encode_init + +```c +void base64_stream_encode_init + ( struct base64_state *state + , int flags + ) ; +``` + +Call this before calling `base64_stream_encode()` to init the state. + +#### base64_stream_encode + +```c +void base64_stream_encode + ( struct base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; +``` + +Encodes the block of data of given length at `src`, into the buffer at `out`. +Caller is responsible for allocating a large enough out-buffer; it must be at least 4/3 the size of the in-buffer, but take some margin. +Places the number of new bytes written into `outlen` (which is set to zero when the function starts). +Does not zero-terminate or finalize the output. + +#### base64_stream_encode_final + +```c +void base64_stream_encode_final + ( struct base64_state *state + , char *out + , size_t *outlen + ) ; +``` + +Finalizes the output begun by previous calls to `base64_stream_encode()`. +Adds the required end-of-stream markers if appropriate. +`outlen` is modified and will contain the number of new bytes written at `out` (which will quite often be zero). + +### Decoding + +#### base64_decode + +```c +int base64_decode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + , int flags + ) ; +``` + +Wrapper function to decode a plain string of given length. +Output is written to `out` without trailing zero. Output length in bytes is written to `outlen`. +The buffer in `out` has been allocated by the caller and is at least 3/4 the size of the input. +Returns `1` for success, and `0` when a decode error has occured due to invalid input. +Returns `-1` if the chosen codec is not included in the current build. + +#### base64_stream_decode_init + +```c +void base64_stream_decode_init + ( struct base64_state *state + , int flags + ) ; +``` + +Call this before calling `base64_stream_decode()` to init the state. + +#### base64_stream_decode + +```c +int base64_stream_decode + ( struct base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; +``` + +Decodes the block of data of given length at `src`, into the buffer at `out`. +Caller is responsible for allocating a large enough out-buffer; it must be at least 3/4 the size of the in-buffer, but take some margin. +Places the number of new bytes written into `outlen` (which is set to zero when the function starts). +Does not zero-terminate the output. +Returns 1 if all is well, and 0 if a decoding error was found, such as an invalid character. +Returns -1 if the chosen codec is not included in the current build. +Used by the test harness to check whether a codec is available for testing. + +## Examples + +A simple example of encoding a static string to base64 and printing the output +to stdout: + +```c +#include <stdio.h> /* fwrite */ +#include "libbase64.h" + +int main () +{ + char src[] = "hello world"; + char out[20]; + size_t srclen = sizeof(src) - 1; + size_t outlen; + + base64_encode(src, srclen, out, &outlen, 0); + + fwrite(out, outlen, 1, stdout); + + return 0; +} +``` + +A simple example (no error checking, etc) of stream encoding standard input to +standard output: + +```c +#include <stdio.h> +#include "libbase64.h" + +int main () +{ + size_t nread, nout; + char buf[12000], out[16000]; + struct base64_state state; + + // Initialize stream encoder: + base64_stream_encode_init(&state, 0); + + // Read contents of stdin into buffer: + while ((nread = fread(buf, 1, sizeof(buf), stdin)) > 0) { + + // Encode buffer: + base64_stream_encode(&state, buf, nread, out, &nout); + + // If there's output, print it to stdout: + if (nout) { + fwrite(out, nout, 1, stdout); + } + + // If an error occurred, exit the loop: + if (feof(stdin)) { + break; + } + } + + // Finalize encoding: + base64_stream_encode_final(&state, out, &nout); + + // If the finalizing resulted in extra output bytes, print them: + if (nout) { + fwrite(out, nout, 1, stdout); + } + + return 0; +} +``` + +Also see `bin/base64.c` for a simple re-implementation of the `base64` utility. +A file or standard input is fed through the encoder/decoder, and the output is +written to standard output. + +## Tests + +See `tests/` for a small test suite. Testing is automated with [Travis CI](https://travis-ci.org/aklomp/base64): + +[//]: # "[![Build Status](https://travis-ci.org/aklomp/base64.png?branch=master)](https://travis-ci.org/aklomp/base64)" + +## Benchmarks + +Benchmarks can be run with the built-in benchmark program as follows: + +```sh +make -C test benchmark <buildflags> && test/benchmark +``` + +It will run an encoding and decoding benchmark for all of the compiled-in codecs. + +The table below contains some results on random machines. All numbers measured with a 10MB buffer in MB/sec, rounded to the nearest integer. + +| Processor | Plain enc | Plain dec | SSSE3 enc | SSSE3 dec | AVX2 enc | AVX2 dec | NEON32 enc | NEON32 dec | +|-------------------------------------------|----------:|----------:|----------:|----------:|---------:|---------:|-----------:|-----------:| +| i7-4771 @ 3.5 GHz | 833 | 1111 | 3333 | 4444 | 4999 | 6666 | - | - | +| i7-4770 @ 3.4 GHz DDR1600 | 1831 | 1748 | 3570 | 3695 | 6539 | 6512 | - | - | +| i7-4770 @ 3.4 GHz DDR1600 OPENMP 1 thread | 1779 | 1727 | 3419 | 3788 | 4589 | 5871 | - | - | +| i7-4770 @ 3.4 GHz DDR1600 OPENMP 2 thread | 3367 | 3374 | 4784 | 6672 | 5120 | 7721 | - | - | +| i7-4770 @ 3.4 GHz DDR1600 OPENMP 4 thread | 4834 | 6075 | 4906 | 8154 | 4839 | 6911 | - | - | +| i7-4770 @ 3.4 GHz DDR1600 OPENMP 8 thread | 4696 | 6361 | 5227 | 7737 | 4813 | 7189 | - | - | +| i5-4590S @ 3.0 GHz | 1721 | 1643 | 3255 | 3404 | 4124 | 5403 | - | - | +| Xeon X5570 @ 2.93 GHz | 1097 | 1048 | 2077 | 2215 | - | - | - | - | +| Pentium4 @ 3.4 GHz | 528 | 448 | - | - | - | - | - | - | +| Atom N270 | 112 | 125 | 331 | 368 | - | - | - | - | +| AMD E-450 | 370 | 332 | 405 | 366 | - | - | - | - | +| PowerPC E6500 @ 1.8GHz | 270 | 265 | - | - | - | - | - | - | +| Raspberry PI B+ V1.2 | 46 | 40 | - | - | - | - | - | - | +| Raspberry PI 2 B V1.1 | 104 | 88 | - | - | - | - | 158 | 116 | +| Intel Edison @ 500 MHz | 79 | 92 | 152 | 172 | - | - | - | - | +| Intel Edison @ 500 MHz OPENMP 2 thread | 158 | 184 | 300 | 343 | - | - | - | - | + +Benchmarks on i7-4770 @ 3.4 GHz DDR1600 with varrying buffer sizes: +![Benchmarks](https://jing.yandex-team.ru/files/yazevnul/base64-benchmarks.png) + +Note: optimal buffer size to take advantage of the cache is in the range of 100 kB to 1 MB, leading to 12x faster AVX encoding/decoding compared to Plain, or a throughput of 24/27GB/sec. +Also note the performance degradation when the buffer size is less than 10 kB due to thread creation overhead. +To prevent this from happening `lib_openmp.c` defines `OMP_THRESHOLD 20000`, requiring at least a 20000 byte buffer to enable multithreading. + +## License + +This repository is licensed under the +[BSD 2-clause License](http://opensource.org/licenses/BSD-2-Clause). See the +LICENSE file. diff --git a/contrib/libs/base64/REVISION b/contrib/libs/base64/REVISION index c426a596cd..587536c2a0 100644 --- a/contrib/libs/base64/REVISION +++ b/contrib/libs/base64/REVISION @@ -1 +1 @@ -https://github.com/aklomp/base64/tree/e82ac0ff813fc1f318d215879783683fe7071cc3 +https://github.com/aklomp/base64/tree/e82ac0ff813fc1f318d215879783683fe7071cc3 diff --git a/contrib/libs/base64/avx2/codec_avx2.c b/contrib/libs/base64/avx2/codec_avx2.c index 7e0dc739d6..46c351c539 100644 --- a/contrib/libs/base64/avx2/codec_avx2.c +++ b/contrib/libs/base64/avx2/codec_avx2.c @@ -1,191 +1,191 @@ -#include <stdint.h> -#include <stddef.h> -#include <stdlib.h> - -#include "libbase64.h" -#include "codecs.h" - -#ifdef __AVX2__ -#include <immintrin.h> - -#define CMPGT(s,n) _mm256_cmpgt_epi8((s), _mm256_set1_epi8(n)) -#define CMPEQ(s,n) _mm256_cmpeq_epi8((s), _mm256_set1_epi8(n)) -#define REPLACE(s,n) _mm256_and_si256((s), _mm256_set1_epi8(n)) -#define RANGE(s,a,b) _mm256_andnot_si256(CMPGT((s), (b)), CMPGT((s), (a) - 1)) - -static inline __m256i -_mm256_bswap_epi32 (const __m256i in) -{ - // _mm256_shuffle_epi8() works on two 128-bit lanes separately: - return _mm256_shuffle_epi8(in, _mm256_setr_epi8( - 3, 2, 1, 0, - 7, 6, 5, 4, - 11, 10, 9, 8, - 15, 14, 13, 12, - 3, 2, 1, 0, - 7, 6, 5, 4, - 11, 10, 9, 8, - 15, 14, 13, 12)); -} - -static inline __m256i -enc_reshuffle (__m256i in) -{ - // Spread out 32-bit words over both halves of the input register: - in = _mm256_permutevar8x32_epi32(in, _mm256_setr_epi32( - 0, 1, 2, -1, - 3, 4, 5, -1)); - - // Slice into 32-bit chunks and operate on all chunks in parallel. - // All processing is done within the 32-bit chunk. First, shuffle: - // before: [eeeeeeff|ccdddddd|bbbbcccc|aaaaaabb] - // after: [00000000|aaaaaabb|bbbbcccc|ccdddddd] - in = _mm256_shuffle_epi8(in, _mm256_set_epi8( - -1, 9, 10, 11, - -1, 6, 7, 8, - -1, 3, 4, 5, - -1, 0, 1, 2, - -1, 9, 10, 11, - -1, 6, 7, 8, - -1, 3, 4, 5, - -1, 0, 1, 2)); - - // cd = [00000000|00000000|0000cccc|ccdddddd] - const __m256i cd = _mm256_and_si256(in, _mm256_set1_epi32(0x00000FFF)); - - // ab = [0000aaaa|aabbbbbb|00000000|00000000] - const __m256i ab = _mm256_and_si256(_mm256_slli_epi32(in, 4), _mm256_set1_epi32(0x0FFF0000)); - - // merged = [0000aaaa|aabbbbbb|0000cccc|ccdddddd] - const __m256i merged = _mm256_or_si256(ab, cd); - - // bd = [00000000|00bbbbbb|00000000|00dddddd] - const __m256i bd = _mm256_and_si256(merged, _mm256_set1_epi32(0x003F003F)); - - // ac = [00aaaaaa|00000000|00cccccc|00000000] - const __m256i ac = _mm256_and_si256(_mm256_slli_epi32(merged, 2), _mm256_set1_epi32(0x3F003F00)); - - // indices = [00aaaaaa|00bbbbbb|00cccccc|00dddddd] - const __m256i indices = _mm256_or_si256(ac, bd); - - // return = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] - return _mm256_bswap_epi32(indices); -} - -static inline __m256i -enc_translate (const __m256i in) -{ - // Translate values 0..63 to the Base64 alphabet. There are five sets: - // # From To Abs Delta Characters - // 0 [0..25] [65..90] +65 +65 ABCDEFGHIJKLMNOPQRSTUVWXYZ - // 1 [26..51] [97..122] +71 +6 abcdefghijklmnopqrstuvwxyz - // 2 [52..61] [48..57] -4 -75 0123456789 - // 3 [62] [43] -19 -15 + - // 4 [63] [47] -16 +3 / - - // Create cumulative masks for characters in sets [1,2,3,4], [2,3,4], - // [3,4], and [4]: - const __m256i mask1 = CMPGT(in, 25); - const __m256i mask2 = CMPGT(in, 51); - const __m256i mask3 = CMPGT(in, 61); - const __m256i mask4 = CMPEQ(in, 63); - - // All characters are at least in cumulative set 0, so add 'A': - __m256i out = _mm256_add_epi8(in, _mm256_set1_epi8(65)); - - // For inputs which are also in any of the other cumulative sets, - // add delta values against the previous set(s) to correct the shift: - out = _mm256_add_epi8(out, REPLACE(mask1, 6)); - out = _mm256_sub_epi8(out, REPLACE(mask2, 75)); - out = _mm256_sub_epi8(out, REPLACE(mask3, 15)); - out = _mm256_add_epi8(out, REPLACE(mask4, 3)); - - return out; -} - -static inline __m256i -dec_reshuffle (__m256i in) -{ - // Shuffle bytes to 32-bit bigendian: - in = _mm256_bswap_epi32(in); - - // Mask in a single byte per shift: - __m256i mask = _mm256_set1_epi32(0x3F000000); - - // Pack bytes together: - __m256i out = _mm256_slli_epi32(_mm256_and_si256(in, mask), 2); - mask = _mm256_srli_epi32(mask, 8); - - out = _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, mask), 4)); - mask = _mm256_srli_epi32(mask, 8); - - out = _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, mask), 6)); - mask = _mm256_srli_epi32(mask, 8); - - out = _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, mask), 8)); - - // Pack bytes together within 32-bit words, discarding words 3 and 7: - out = _mm256_shuffle_epi8(out, _mm256_setr_epi8( - 3, 2, 1, - 7, 6, 5, - 11, 10, 9, - 15, 14, 13, - -1, -1, -1, -1, - 3, 2, 1, - 7, 6, 5, - 11, 10, 9, - 15, 14, 13, - -1, -1, -1, -1)); - - // Pack 32-bit words together, squashing empty words 3 and 7: - return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32( - 0, 1, 2, 4, 5, 6, -1, -1)); -} - -#endif // __AVX2__ - -void -avx2_base64_stream_encode - ( struct avx2_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ -#if defined(__AVX2__) - #include "enc_head.c" - #include "enc_avx2.c" - #include "enc_tail.c" -#else - (void)state; - (void)src; - (void)srclen; - (void)out; - (void)outlen; - abort(); -#endif -} - -int -avx2_base64_stream_decode - ( struct avx2_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ -#if defined(__AVX2__) - #include "dec_head.c" - #include "dec_avx2.c" - #include "dec_tail.c" -#else - (void)state; - (void)src; - (void)srclen; - (void)out; - (void)outlen; - abort(); -#endif -} +#include <stdint.h> +#include <stddef.h> +#include <stdlib.h> + +#include "libbase64.h" +#include "codecs.h" + +#ifdef __AVX2__ +#include <immintrin.h> + +#define CMPGT(s,n) _mm256_cmpgt_epi8((s), _mm256_set1_epi8(n)) +#define CMPEQ(s,n) _mm256_cmpeq_epi8((s), _mm256_set1_epi8(n)) +#define REPLACE(s,n) _mm256_and_si256((s), _mm256_set1_epi8(n)) +#define RANGE(s,a,b) _mm256_andnot_si256(CMPGT((s), (b)), CMPGT((s), (a) - 1)) + +static inline __m256i +_mm256_bswap_epi32 (const __m256i in) +{ + // _mm256_shuffle_epi8() works on two 128-bit lanes separately: + return _mm256_shuffle_epi8(in, _mm256_setr_epi8( + 3, 2, 1, 0, + 7, 6, 5, 4, + 11, 10, 9, 8, + 15, 14, 13, 12, + 3, 2, 1, 0, + 7, 6, 5, 4, + 11, 10, 9, 8, + 15, 14, 13, 12)); +} + +static inline __m256i +enc_reshuffle (__m256i in) +{ + // Spread out 32-bit words over both halves of the input register: + in = _mm256_permutevar8x32_epi32(in, _mm256_setr_epi32( + 0, 1, 2, -1, + 3, 4, 5, -1)); + + // Slice into 32-bit chunks and operate on all chunks in parallel. + // All processing is done within the 32-bit chunk. First, shuffle: + // before: [eeeeeeff|ccdddddd|bbbbcccc|aaaaaabb] + // after: [00000000|aaaaaabb|bbbbcccc|ccdddddd] + in = _mm256_shuffle_epi8(in, _mm256_set_epi8( + -1, 9, 10, 11, + -1, 6, 7, 8, + -1, 3, 4, 5, + -1, 0, 1, 2, + -1, 9, 10, 11, + -1, 6, 7, 8, + -1, 3, 4, 5, + -1, 0, 1, 2)); + + // cd = [00000000|00000000|0000cccc|ccdddddd] + const __m256i cd = _mm256_and_si256(in, _mm256_set1_epi32(0x00000FFF)); + + // ab = [0000aaaa|aabbbbbb|00000000|00000000] + const __m256i ab = _mm256_and_si256(_mm256_slli_epi32(in, 4), _mm256_set1_epi32(0x0FFF0000)); + + // merged = [0000aaaa|aabbbbbb|0000cccc|ccdddddd] + const __m256i merged = _mm256_or_si256(ab, cd); + + // bd = [00000000|00bbbbbb|00000000|00dddddd] + const __m256i bd = _mm256_and_si256(merged, _mm256_set1_epi32(0x003F003F)); + + // ac = [00aaaaaa|00000000|00cccccc|00000000] + const __m256i ac = _mm256_and_si256(_mm256_slli_epi32(merged, 2), _mm256_set1_epi32(0x3F003F00)); + + // indices = [00aaaaaa|00bbbbbb|00cccccc|00dddddd] + const __m256i indices = _mm256_or_si256(ac, bd); + + // return = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] + return _mm256_bswap_epi32(indices); +} + +static inline __m256i +enc_translate (const __m256i in) +{ + // Translate values 0..63 to the Base64 alphabet. There are five sets: + // # From To Abs Delta Characters + // 0 [0..25] [65..90] +65 +65 ABCDEFGHIJKLMNOPQRSTUVWXYZ + // 1 [26..51] [97..122] +71 +6 abcdefghijklmnopqrstuvwxyz + // 2 [52..61] [48..57] -4 -75 0123456789 + // 3 [62] [43] -19 -15 + + // 4 [63] [47] -16 +3 / + + // Create cumulative masks for characters in sets [1,2,3,4], [2,3,4], + // [3,4], and [4]: + const __m256i mask1 = CMPGT(in, 25); + const __m256i mask2 = CMPGT(in, 51); + const __m256i mask3 = CMPGT(in, 61); + const __m256i mask4 = CMPEQ(in, 63); + + // All characters are at least in cumulative set 0, so add 'A': + __m256i out = _mm256_add_epi8(in, _mm256_set1_epi8(65)); + + // For inputs which are also in any of the other cumulative sets, + // add delta values against the previous set(s) to correct the shift: + out = _mm256_add_epi8(out, REPLACE(mask1, 6)); + out = _mm256_sub_epi8(out, REPLACE(mask2, 75)); + out = _mm256_sub_epi8(out, REPLACE(mask3, 15)); + out = _mm256_add_epi8(out, REPLACE(mask4, 3)); + + return out; +} + +static inline __m256i +dec_reshuffle (__m256i in) +{ + // Shuffle bytes to 32-bit bigendian: + in = _mm256_bswap_epi32(in); + + // Mask in a single byte per shift: + __m256i mask = _mm256_set1_epi32(0x3F000000); + + // Pack bytes together: + __m256i out = _mm256_slli_epi32(_mm256_and_si256(in, mask), 2); + mask = _mm256_srli_epi32(mask, 8); + + out = _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, mask), 4)); + mask = _mm256_srli_epi32(mask, 8); + + out = _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, mask), 6)); + mask = _mm256_srli_epi32(mask, 8); + + out = _mm256_or_si256(out, _mm256_slli_epi32(_mm256_and_si256(in, mask), 8)); + + // Pack bytes together within 32-bit words, discarding words 3 and 7: + out = _mm256_shuffle_epi8(out, _mm256_setr_epi8( + 3, 2, 1, + 7, 6, 5, + 11, 10, 9, + 15, 14, 13, + -1, -1, -1, -1, + 3, 2, 1, + 7, 6, 5, + 11, 10, 9, + 15, 14, 13, + -1, -1, -1, -1)); + + // Pack 32-bit words together, squashing empty words 3 and 7: + return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32( + 0, 1, 2, 4, 5, 6, -1, -1)); +} + +#endif // __AVX2__ + +void +avx2_base64_stream_encode + ( struct avx2_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ +#if defined(__AVX2__) + #include "enc_head.c" + #include "enc_avx2.c" + #include "enc_tail.c" +#else + (void)state; + (void)src; + (void)srclen; + (void)out; + (void)outlen; + abort(); +#endif +} + +int +avx2_base64_stream_decode + ( struct avx2_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ +#if defined(__AVX2__) + #include "dec_head.c" + #include "dec_avx2.c" + #include "dec_tail.c" +#else + (void)state; + (void)src; + (void)srclen; + (void)out; + (void)outlen; + abort(); +#endif +} diff --git a/contrib/libs/base64/avx2/codecs.h b/contrib/libs/base64/avx2/codecs.h index 34857fa8e5..16a285ac0b 100644 --- a/contrib/libs/base64/avx2/codecs.h +++ b/contrib/libs/base64/avx2/codecs.h @@ -1,35 +1,35 @@ -#pragma once - -// Define machine endianness. This is for GCC: -#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) - #define BASE64_AVX2_LITTLE_ENDIAN 1 -#else - #define BASE64_AVX2_LITTLE_ENDIAN 0 -#endif - -// This is for Clang: -#ifdef __LITTLE_ENDIAN__ - #define BASE64_AVX2_LITTLE_ENDIAN 1 -#endif - -#ifdef __BIG_ENDIAN__ - #define BASE64_AVX2_LITTLE_ENDIAN 0 -#endif - -// Endian conversion functions -#if BASE64_AVX2_LITTLE_ENDIAN - #define cpu_to_be32(x) __builtin_bswap32(x) - #define cpu_to_be64(x) __builtin_bswap64(x) - #define be32_to_cpu(x) __builtin_bswap32(x) - #define be64_to_cpu(x) __builtin_bswap64(x) -#else - #define cpu_to_be32(x) (x) - #define cpu_to_be64(x) (x) - #define be32_to_cpu(x) (x) - #define be64_to_cpu(x) (x) -#endif - -// These tables are used by all codecs -// for fallback plain encoding/decoding: -extern const uint8_t avx2_base64_table_enc[]; -extern const uint8_t avx2_base64_table_dec[]; +#pragma once + +// Define machine endianness. This is for GCC: +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + #define BASE64_AVX2_LITTLE_ENDIAN 1 +#else + #define BASE64_AVX2_LITTLE_ENDIAN 0 +#endif + +// This is for Clang: +#ifdef __LITTLE_ENDIAN__ + #define BASE64_AVX2_LITTLE_ENDIAN 1 +#endif + +#ifdef __BIG_ENDIAN__ + #define BASE64_AVX2_LITTLE_ENDIAN 0 +#endif + +// Endian conversion functions +#if BASE64_AVX2_LITTLE_ENDIAN + #define cpu_to_be32(x) __builtin_bswap32(x) + #define cpu_to_be64(x) __builtin_bswap64(x) + #define be32_to_cpu(x) __builtin_bswap32(x) + #define be64_to_cpu(x) __builtin_bswap64(x) +#else + #define cpu_to_be32(x) (x) + #define cpu_to_be64(x) (x) + #define be32_to_cpu(x) (x) + #define be64_to_cpu(x) (x) +#endif + +// These tables are used by all codecs +// for fallback plain encoding/decoding: +extern const uint8_t avx2_base64_table_enc[]; +extern const uint8_t avx2_base64_table_dec[]; diff --git a/contrib/libs/base64/avx2/dec_avx2.c b/contrib/libs/base64/avx2/dec_avx2.c index 90dde140fb..cd3cafd30a 100644 --- a/contrib/libs/base64/avx2/dec_avx2.c +++ b/contrib/libs/base64/avx2/dec_avx2.c @@ -1,43 +1,43 @@ -// If we have AVX2 support, pick off 32 bytes at a time for as long as we can, -// but make sure that we quit before seeing any == markers at the end of the -// string. Also, because we write 8 zeroes at the end of the output, ensure -// that there are at least 11 valid bytes of input data remaining to close the -// gap. 32 + 2 + 11 = 45 bytes: -while (srclen >= 45) -{ - // Load string: - __m256i str = _mm256_loadu_si256((__m256i *)c); - - // The input consists of six character sets in the Base64 alphabet, - // which we need to map back to the 6-bit values they represent. - // There are three ranges, two singles, and then there's the rest. - // - // # From To Add Characters - // 1 [43] [62] +19 + - // 2 [47] [63] +16 / - // 3 [48..57] [52..61] +4 0..9 - // 4 [65..90] [0..25] -65 A..Z - // 5 [97..122] [26..51] -71 a..z - // (6) Everything else => invalid input - - const __m256i set1 = CMPEQ(str, '+'); - const __m256i set2 = CMPEQ(str, '/'); - const __m256i set3 = RANGE(str, '0', '9'); - const __m256i set4 = RANGE(str, 'A', 'Z'); - const __m256i set5 = RANGE(str, 'a', 'z'); - const __m256i set6 = CMPEQ(str, '-'); - const __m256i set7 = CMPEQ(str, '_'); - - __m256i delta = REPLACE(set1, 19); - delta = _mm256_or_si256(delta, REPLACE(set2, 16)); - delta = _mm256_or_si256(delta, REPLACE(set3, 4)); - delta = _mm256_or_si256(delta, REPLACE(set4, -65)); - delta = _mm256_or_si256(delta, REPLACE(set5, -71)); - delta = _mm256_or_si256(delta, REPLACE(set6, 17)); - delta = _mm256_or_si256(delta, REPLACE(set7, -32)); - - // Check for invalid input: if any of the delta values are zero, - // fall back on bytewise code to do error checking and reporting: +// If we have AVX2 support, pick off 32 bytes at a time for as long as we can, +// but make sure that we quit before seeing any == markers at the end of the +// string. Also, because we write 8 zeroes at the end of the output, ensure +// that there are at least 11 valid bytes of input data remaining to close the +// gap. 32 + 2 + 11 = 45 bytes: +while (srclen >= 45) +{ + // Load string: + __m256i str = _mm256_loadu_si256((__m256i *)c); + + // The input consists of six character sets in the Base64 alphabet, + // which we need to map back to the 6-bit values they represent. + // There are three ranges, two singles, and then there's the rest. + // + // # From To Add Characters + // 1 [43] [62] +19 + + // 2 [47] [63] +16 / + // 3 [48..57] [52..61] +4 0..9 + // 4 [65..90] [0..25] -65 A..Z + // 5 [97..122] [26..51] -71 a..z + // (6) Everything else => invalid input + + const __m256i set1 = CMPEQ(str, '+'); + const __m256i set2 = CMPEQ(str, '/'); + const __m256i set3 = RANGE(str, '0', '9'); + const __m256i set4 = RANGE(str, 'A', 'Z'); + const __m256i set5 = RANGE(str, 'a', 'z'); + const __m256i set6 = CMPEQ(str, '-'); + const __m256i set7 = CMPEQ(str, '_'); + + __m256i delta = REPLACE(set1, 19); + delta = _mm256_or_si256(delta, REPLACE(set2, 16)); + delta = _mm256_or_si256(delta, REPLACE(set3, 4)); + delta = _mm256_or_si256(delta, REPLACE(set4, -65)); + delta = _mm256_or_si256(delta, REPLACE(set5, -71)); + delta = _mm256_or_si256(delta, REPLACE(set6, 17)); + delta = _mm256_or_si256(delta, REPLACE(set7, -32)); + + // Check for invalid input: if any of the delta values are zero, + // fall back on bytewise code to do error checking and reporting: #ifdef _MSC_VER // Hack for MSVC miscompilation - it inserts vzeroupper for the break // (we need to clear YMM registers before exiting the function) @@ -45,21 +45,21 @@ while (srclen >= 45) // Save delta/str in memory manually. _mm256_zeroupper(); #endif - if (_mm256_movemask_epi8(CMPEQ(delta, 0))) { - break; - } - - // Now simply add the delta values to the input: - str = _mm256_add_epi8(str, delta); - - // Reshuffle the input to packed 12-byte output format: - str = dec_reshuffle(str); - - // Store back: - _mm256_storeu_si256((__m256i *)o, str); - - c += 32; - o += 24; - outl += 24; - srclen -= 32; -} + if (_mm256_movemask_epi8(CMPEQ(delta, 0))) { + break; + } + + // Now simply add the delta values to the input: + str = _mm256_add_epi8(str, delta); + + // Reshuffle the input to packed 12-byte output format: + str = dec_reshuffle(str); + + // Store back: + _mm256_storeu_si256((__m256i *)o, str); + + c += 32; + o += 24; + outl += 24; + srclen -= 32; +} diff --git a/contrib/libs/base64/avx2/dec_head.c b/contrib/libs/base64/avx2/dec_head.c index 6f10a3ff7f..8bbd46ebc4 100644 --- a/contrib/libs/base64/avx2/dec_head.c +++ b/contrib/libs/base64/avx2/dec_head.c @@ -1,29 +1,29 @@ -int ret = 0; -const uint8_t *c = (const uint8_t *)src; -uint8_t *o = (uint8_t *)out; -uint8_t q; - -// Use local temporaries to avoid cache thrashing: -size_t outl = 0; -struct avx2_base64_state st; -st.eof = state->eof; -st.bytes = state->bytes; -st.carry = state->carry; - -// If we previously saw an EOF or an invalid character, bail out: -if (st.eof) { - *outlen = 0; - return 0; -} - -// Turn four 6-bit numbers into three bytes: -// out[0] = 11111122 -// out[1] = 22223333 -// out[2] = 33444444 - -// Duff's device again: -switch (st.bytes) -{ - for (;;) - { - case 0: +int ret = 0; +const uint8_t *c = (const uint8_t *)src; +uint8_t *o = (uint8_t *)out; +uint8_t q; + +// Use local temporaries to avoid cache thrashing: +size_t outl = 0; +struct avx2_base64_state st; +st.eof = state->eof; +st.bytes = state->bytes; +st.carry = state->carry; + +// If we previously saw an EOF or an invalid character, bail out: +if (st.eof) { + *outlen = 0; + return 0; +} + +// Turn four 6-bit numbers into three bytes: +// out[0] = 11111122 +// out[1] = 22223333 +// out[2] = 33444444 + +// Duff's device again: +switch (st.bytes) +{ + for (;;) + { + case 0: diff --git a/contrib/libs/base64/avx2/dec_tail.c b/contrib/libs/base64/avx2/dec_tail.c index 0226ba651e..b472f91b33 100644 --- a/contrib/libs/base64/avx2/dec_tail.c +++ b/contrib/libs/base64/avx2/dec_tail.c @@ -1,65 +1,65 @@ - if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = avx2_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // Treat character '=' as invalid for byte 0: - break; - } - st.carry = q << 2; - st.bytes++; - - case 1: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = avx2_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // Treat character '=' as invalid for byte 1: - break; - } - *o++ = st.carry | (q >> 4); - st.carry = q << 4; - st.bytes++; - outl++; - - case 2: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = avx2_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // When q == 254, the input char is '='. Return 1 and EOF. - // Technically, should check if next byte is also '=', but never mind. - // When q == 255, the input char is invalid. Return 0 and EOF. - ret = (q == 254) ? 1 : 0; - break; - } - *o++ = st.carry | (q >> 2); - st.carry = q << 6; - st.bytes++; - outl++; - - case 3: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = avx2_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // When q == 254, the input char is '='. Return 1 and EOF. - // When q == 255, the input char is invalid. Return 0 and EOF. - ret = (q == 254) ? 1 : 0; - break; - } - *o++ = st.carry | q; - st.carry = 0; - st.bytes = 0; - outl++; - } -} -state->eof = st.eof; -state->bytes = st.bytes; -state->carry = st.carry; -*outlen = outl; -return ret; + if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = avx2_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // Treat character '=' as invalid for byte 0: + break; + } + st.carry = q << 2; + st.bytes++; + + case 1: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = avx2_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // Treat character '=' as invalid for byte 1: + break; + } + *o++ = st.carry | (q >> 4); + st.carry = q << 4; + st.bytes++; + outl++; + + case 2: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = avx2_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // When q == 254, the input char is '='. Return 1 and EOF. + // Technically, should check if next byte is also '=', but never mind. + // When q == 255, the input char is invalid. Return 0 and EOF. + ret = (q == 254) ? 1 : 0; + break; + } + *o++ = st.carry | (q >> 2); + st.carry = q << 6; + st.bytes++; + outl++; + + case 3: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = avx2_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // When q == 254, the input char is '='. Return 1 and EOF. + // When q == 255, the input char is invalid. Return 0 and EOF. + ret = (q == 254) ? 1 : 0; + break; + } + *o++ = st.carry | q; + st.carry = 0; + st.bytes = 0; + outl++; + } +} +state->eof = st.eof; +state->bytes = st.bytes; +state->carry = st.carry; +*outlen = outl; +return ret; diff --git a/contrib/libs/base64/avx2/enc_avx2.c b/contrib/libs/base64/avx2/enc_avx2.c index 5875684de4..25e36f40b4 100644 --- a/contrib/libs/base64/avx2/enc_avx2.c +++ b/contrib/libs/base64/avx2/enc_avx2.c @@ -1,22 +1,22 @@ -// If we have AVX2 support, pick off 24 bytes at a time for as long as we can. -// But because we read 32 bytes at a time, ensure we have enough room to do a -// full 32-byte read without segfaulting: -while (srclen >= 32) -{ - // Load string: - __m256i str = _mm256_loadu_si256((__m256i *)c); - - // Reshuffle: - str = enc_reshuffle(str); - - // Translate reshuffled bytes to the Base64 alphabet: - str = enc_translate(str); - - // Store: - _mm256_storeu_si256((__m256i *)o, str); - - c += 24; // 6 * 4 bytes of input - o += 32; // 8 * 4 bytes of output - outl += 32; - srclen -= 24; -} +// If we have AVX2 support, pick off 24 bytes at a time for as long as we can. +// But because we read 32 bytes at a time, ensure we have enough room to do a +// full 32-byte read without segfaulting: +while (srclen >= 32) +{ + // Load string: + __m256i str = _mm256_loadu_si256((__m256i *)c); + + // Reshuffle: + str = enc_reshuffle(str); + + // Translate reshuffled bytes to the Base64 alphabet: + str = enc_translate(str); + + // Store: + _mm256_storeu_si256((__m256i *)o, str); + + c += 24; // 6 * 4 bytes of input + o += 32; // 8 * 4 bytes of output + outl += 32; + srclen -= 24; +} diff --git a/contrib/libs/base64/avx2/enc_head.c b/contrib/libs/base64/avx2/enc_head.c index 5b03c7f71b..3d05b0bd04 100644 --- a/contrib/libs/base64/avx2/enc_head.c +++ b/contrib/libs/base64/avx2/enc_head.c @@ -1,23 +1,23 @@ -// Assume that *out is large enough to contain the output. -// Theoretically it should be 4/3 the length of src. -const uint8_t *c = (const uint8_t *)src; -uint8_t *o = (uint8_t *)out; - -// Use local temporaries to avoid cache thrashing: -size_t outl = 0; -struct avx2_base64_state st; -st.bytes = state->bytes; -st.carry = state->carry; - -// Turn three bytes into four 6-bit numbers: -// in[0] = 00111111 -// in[1] = 00112222 -// in[2] = 00222233 -// in[3] = 00333333 - -// Duff's device, a for() loop inside a switch() statement. Legal! -switch (st.bytes) -{ - for (;;) - { - case 0: +// Assume that *out is large enough to contain the output. +// Theoretically it should be 4/3 the length of src. +const uint8_t *c = (const uint8_t *)src; +uint8_t *o = (uint8_t *)out; + +// Use local temporaries to avoid cache thrashing: +size_t outl = 0; +struct avx2_base64_state st; +st.bytes = state->bytes; +st.carry = state->carry; + +// Turn three bytes into four 6-bit numbers: +// in[0] = 00111111 +// in[1] = 00112222 +// in[2] = 00222233 +// in[3] = 00333333 + +// Duff's device, a for() loop inside a switch() statement. Legal! +switch (st.bytes) +{ + for (;;) + { + case 0: diff --git a/contrib/libs/base64/avx2/enc_tail.c b/contrib/libs/base64/avx2/enc_tail.c index 8d6c247019..e4362db594 100644 --- a/contrib/libs/base64/avx2/enc_tail.c +++ b/contrib/libs/base64/avx2/enc_tail.c @@ -1,28 +1,28 @@ - if (srclen-- == 0) { - break; - } - *o++ = avx2_base64_table_enc[*c >> 2]; - st.carry = (*c++ << 4) & 0x30; - st.bytes++; - outl += 1; - - case 1: if (srclen-- == 0) { - break; - } - *o++ = avx2_base64_table_enc[st.carry | (*c >> 4)]; - st.carry = (*c++ << 2) & 0x3C; - st.bytes++; - outl += 1; - - case 2: if (srclen-- == 0) { - break; - } - *o++ = avx2_base64_table_enc[st.carry | (*c >> 6)]; - *o++ = avx2_base64_table_enc[*c++ & 0x3F]; - st.bytes = 0; - outl += 2; - } -} -state->bytes = st.bytes; -state->carry = st.carry; -*outlen = outl; + if (srclen-- == 0) { + break; + } + *o++ = avx2_base64_table_enc[*c >> 2]; + st.carry = (*c++ << 4) & 0x30; + st.bytes++; + outl += 1; + + case 1: if (srclen-- == 0) { + break; + } + *o++ = avx2_base64_table_enc[st.carry | (*c >> 4)]; + st.carry = (*c++ << 2) & 0x3C; + st.bytes++; + outl += 1; + + case 2: if (srclen-- == 0) { + break; + } + *o++ = avx2_base64_table_enc[st.carry | (*c >> 6)]; + *o++ = avx2_base64_table_enc[*c++ & 0x3F]; + st.bytes = 0; + outl += 2; + } +} +state->bytes = st.bytes; +state->carry = st.carry; +*outlen = outl; diff --git a/contrib/libs/base64/avx2/lib.c b/contrib/libs/base64/avx2/lib.c index 81ead08503..b0671c8cae 100644 --- a/contrib/libs/base64/avx2/lib.c +++ b/contrib/libs/base64/avx2/lib.c @@ -1,121 +1,121 @@ -#include <stdint.h> -#include <stddef.h> - -#include "libbase64.h" -#include "codecs.h" - -const uint8_t -avx2_base64_table_enc[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; - -// In the lookup table below, note that the value for '=' (character 61) is -// 254, not 255. This character is used for in-band signaling of the end of -// the datastream, and we will use that later. The characters A-Z, a-z, 0-9 -// and + / are mapped to their "decoded" values. The other bytes all map to -// the value 255, which flags them as "invalid input". - -const uint8_t -avx2_base64_table_dec[] = -{ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 254, 62, 255, 63, // 32..47 - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63 - 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79 - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63, // 80..95 - 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111 - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, -}; - -void -avx2_base64_stream_encode_init (struct avx2_base64_state *state) -{ - state->eof = 0; - state->bytes = 0; - state->carry = 0; -} - -void -avx2_base64_stream_encode_final - ( struct avx2_base64_state *state - , char *out - , size_t *outlen - ) -{ - uint8_t *o = (uint8_t *)out; - - if (state->bytes == 1) { - *o++ = avx2_base64_table_enc[state->carry]; - *o++ = '='; - *o++ = '='; - *outlen = 3; - return; - } - if (state->bytes == 2) { - *o++ = avx2_base64_table_enc[state->carry]; - *o++ = '='; - *outlen = 2; - return; - } - *outlen = 0; -} - -void -avx2_base64_stream_decode_init (struct avx2_base64_state *state) -{ - state->eof = 0; - state->bytes = 0; - state->carry = 0; -} - -void -avx2_base64_encode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ - size_t s; - size_t t; - struct avx2_base64_state state; - - // Init the stream reader: - avx2_base64_stream_encode_init(&state); - - // Feed the whole string to the stream reader: - avx2_base64_stream_encode(&state, src, srclen, out, &s); - - // Finalize the stream by writing trailer if any: - avx2_base64_stream_encode_final(&state, out + s, &t); - - // Final output length is stream length plus tail: - *outlen = s + t; -} - -int -avx2_base64_decode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ - struct avx2_base64_state state; - - // Init the stream reader: - avx2_base64_stream_decode_init(&state); - - // Feed the whole string to the stream reader: - return avx2_base64_stream_decode(&state, src, srclen, out, outlen); -} +#include <stdint.h> +#include <stddef.h> + +#include "libbase64.h" +#include "codecs.h" + +const uint8_t +avx2_base64_table_enc[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +// In the lookup table below, note that the value for '=' (character 61) is +// 254, not 255. This character is used for in-band signaling of the end of +// the datastream, and we will use that later. The characters A-Z, a-z, 0-9 +// and + / are mapped to their "decoded" values. The other bytes all map to +// the value 255, which flags them as "invalid input". + +const uint8_t +avx2_base64_table_dec[] = +{ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 254, 62, 255, 63, // 32..47 + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63 + 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79 + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63, // 80..95 + 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111 + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +}; + +void +avx2_base64_stream_encode_init (struct avx2_base64_state *state) +{ + state->eof = 0; + state->bytes = 0; + state->carry = 0; +} + +void +avx2_base64_stream_encode_final + ( struct avx2_base64_state *state + , char *out + , size_t *outlen + ) +{ + uint8_t *o = (uint8_t *)out; + + if (state->bytes == 1) { + *o++ = avx2_base64_table_enc[state->carry]; + *o++ = '='; + *o++ = '='; + *outlen = 3; + return; + } + if (state->bytes == 2) { + *o++ = avx2_base64_table_enc[state->carry]; + *o++ = '='; + *outlen = 2; + return; + } + *outlen = 0; +} + +void +avx2_base64_stream_decode_init (struct avx2_base64_state *state) +{ + state->eof = 0; + state->bytes = 0; + state->carry = 0; +} + +void +avx2_base64_encode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + size_t s; + size_t t; + struct avx2_base64_state state; + + // Init the stream reader: + avx2_base64_stream_encode_init(&state); + + // Feed the whole string to the stream reader: + avx2_base64_stream_encode(&state, src, srclen, out, &s); + + // Finalize the stream by writing trailer if any: + avx2_base64_stream_encode_final(&state, out + s, &t); + + // Final output length is stream length plus tail: + *outlen = s + t; +} + +int +avx2_base64_decode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + struct avx2_base64_state state; + + // Init the stream reader: + avx2_base64_stream_decode_init(&state); + + // Feed the whole string to the stream reader: + return avx2_base64_stream_decode(&state, src, srclen, out, outlen); +} diff --git a/contrib/libs/base64/avx2/libbase64.h b/contrib/libs/base64/avx2/libbase64.h index 2d63217c78..91a8ab1ed2 100644 --- a/contrib/libs/base64/avx2/libbase64.h +++ b/contrib/libs/base64/avx2/libbase64.h @@ -1,89 +1,89 @@ -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -struct avx2_base64_state { - int eof; - int bytes; - unsigned char carry; -}; - -/* Wrapper function to encode a plain string of given length. Output is written - * to *out without trailing zero. Output length in bytes is written to *outlen. - * The buffer in `out` has been allocated by the caller and is at least 4/3 the - * size of the input. See above for `flags`; set to 0 for default operation: */ -void avx2_base64_encode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Call this before calling base64_stream_encode() to init the state. See above - * for `flags`; set to 0 for default operation: */ -void avx2_base64_stream_encode_init - ( struct avx2_base64_state *state - ) ; - -/* Encodes the block of data of given length at `src`, into the buffer at - * `out`. Caller is responsible for allocating a large enough out-buffer; it - * must be at least 4/3 the size of the in-buffer, but take some margin. Places - * the number of new bytes written into `outlen` (which is set to zero when the - * function starts). Does not zero-terminate or finalize the output. */ -void avx2_base64_stream_encode - ( struct avx2_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Finalizes the output begun by previous calls to `base64_stream_encode()`. - * Adds the required end-of-stream markers if appropriate. `outlen` is modified - * and will contain the number of new bytes written at `out` (which will quite - * often be zero). */ -void avx2_base64_stream_encode_final - ( struct avx2_base64_state *state - , char *out - , size_t *outlen - ) ; - -/* Wrapper function to decode a plain string of given length. Output is written - * to *out without trailing zero. Output length in bytes is written to *outlen. - * The buffer in `out` has been allocated by the caller and is at least 3/4 the - * size of the input. See above for `flags`, set to 0 for default operation: */ -int avx2_base64_decode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Call this before calling base64_stream_decode() to init the state. See above - * for `flags`; set to 0 for default operation: */ -void avx2_base64_stream_decode_init - ( struct avx2_base64_state *state - ) ; - -/* Decodes the block of data of given length at `src`, into the buffer at - * `out`. Caller is responsible for allocating a large enough out-buffer; it - * must be at least 3/4 the size of the in-buffer, but take some margin. Places - * the number of new bytes written into `outlen` (which is set to zero when the - * function starts). Does not zero-terminate the output. Returns 1 if all is - * well, and 0 if a decoding error was found, such as an invalid character. - * Returns -1 if the chosen codec is not included in the current build. Used by - * the test harness to check whether a codec is available for testing. */ -int avx2_base64_stream_decode - ( struct avx2_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -#ifdef __cplusplus -} -#endif - +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +struct avx2_base64_state { + int eof; + int bytes; + unsigned char carry; +}; + +/* Wrapper function to encode a plain string of given length. Output is written + * to *out without trailing zero. Output length in bytes is written to *outlen. + * The buffer in `out` has been allocated by the caller and is at least 4/3 the + * size of the input. See above for `flags`; set to 0 for default operation: */ +void avx2_base64_encode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Call this before calling base64_stream_encode() to init the state. See above + * for `flags`; set to 0 for default operation: */ +void avx2_base64_stream_encode_init + ( struct avx2_base64_state *state + ) ; + +/* Encodes the block of data of given length at `src`, into the buffer at + * `out`. Caller is responsible for allocating a large enough out-buffer; it + * must be at least 4/3 the size of the in-buffer, but take some margin. Places + * the number of new bytes written into `outlen` (which is set to zero when the + * function starts). Does not zero-terminate or finalize the output. */ +void avx2_base64_stream_encode + ( struct avx2_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Finalizes the output begun by previous calls to `base64_stream_encode()`. + * Adds the required end-of-stream markers if appropriate. `outlen` is modified + * and will contain the number of new bytes written at `out` (which will quite + * often be zero). */ +void avx2_base64_stream_encode_final + ( struct avx2_base64_state *state + , char *out + , size_t *outlen + ) ; + +/* Wrapper function to decode a plain string of given length. Output is written + * to *out without trailing zero. Output length in bytes is written to *outlen. + * The buffer in `out` has been allocated by the caller and is at least 3/4 the + * size of the input. See above for `flags`, set to 0 for default operation: */ +int avx2_base64_decode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Call this before calling base64_stream_decode() to init the state. See above + * for `flags`; set to 0 for default operation: */ +void avx2_base64_stream_decode_init + ( struct avx2_base64_state *state + ) ; + +/* Decodes the block of data of given length at `src`, into the buffer at + * `out`. Caller is responsible for allocating a large enough out-buffer; it + * must be at least 3/4 the size of the in-buffer, but take some margin. Places + * the number of new bytes written into `outlen` (which is set to zero when the + * function starts). Does not zero-terminate the output. Returns 1 if all is + * well, and 0 if a decoding error was found, such as an invalid character. + * Returns -1 if the chosen codec is not included in the current build. Used by + * the test harness to check whether a codec is available for testing. */ +int avx2_base64_stream_decode + ( struct avx2_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +#ifdef __cplusplus +} +#endif + diff --git a/contrib/libs/base64/avx2/ya.make b/contrib/libs/base64/avx2/ya.make index fe719768d8..b0dc5ce772 100644 --- a/contrib/libs/base64/avx2/ya.make +++ b/contrib/libs/base64/avx2/ya.make @@ -1,11 +1,11 @@ -OWNER( - yazevnul +OWNER( + yazevnul g:contrib g:cpp-contrib -) - -LIBRARY() - +) + +LIBRARY() + LICENSE( BSD-2-Clause AND MIT @@ -13,14 +13,14 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -NO_UTIL() - -SRCS( - codec_avx2.c - lib.c -) - -IF (ARCH_X86_64 OR ARCH_I386) +NO_UTIL() + +SRCS( + codec_avx2.c + lib.c +) + +IF (ARCH_X86_64 OR ARCH_I386) IF (MSVC AND NOT CLANG_CL) CONLYFLAGS(/D__AVX2__=1) ELSEIF (CLANG_CL) @@ -30,7 +30,7 @@ IF (ARCH_X86_64 OR ARCH_I386) -mavx2 -std=c11 ) - ENDIF() -ENDIF() - -END() + ENDIF() +ENDIF() + +END() diff --git a/contrib/libs/base64/neon32/codec_neon32.c b/contrib/libs/base64/neon32/codec_neon32.c index 05fcfc3e63..2c9ae02f75 100644 --- a/contrib/libs/base64/neon32/codec_neon32.c +++ b/contrib/libs/base64/neon32/codec_neon32.c @@ -1,160 +1,160 @@ -#if (defined(__ARM_NEON) && !defined(__ARM_NEON__)) -#define __ARM_NEON__ -#endif - -#include <stdint.h> -#include <stddef.h> -#include <stdlib.h> -#ifdef __ARM_NEON__ -#include <arm_neon.h> -#endif - -#include "libbase64.h" -#include "codecs.h" - -#if (defined(__arm__) && defined(__ARM_NEON__)) - -#define CMPGT(s,n) vcgtq_u8((s), vdupq_n_u8(n)) -#define CMPEQ(s,n) vceqq_u8((s), vdupq_n_u8(n)) -#define REPLACE(s,n) vandq_u8((s), vdupq_n_u8(n)) -#define RANGE(s,a,b) vandq_u8(vcgeq_u8((s), vdupq_n_u8(a)), vcleq_u8((s), vdupq_n_u8(b))) - -static inline uint8x16x4_t -enc_reshuffle (uint8x16x3_t in) -{ - uint8x16x4_t out; - - // Divide bits of three input bytes over four output bytes: - out.val[0] = vshrq_n_u8(in.val[0], 2); - out.val[1] = vorrq_u8(vshrq_n_u8(in.val[1], 4), vshlq_n_u8(in.val[0], 4)); - out.val[2] = vorrq_u8(vshrq_n_u8(in.val[2], 6), vshlq_n_u8(in.val[1], 2)); - out.val[3] = in.val[2]; - - // Clear top two bits: - out.val[0] = vandq_u8(out.val[0], vdupq_n_u8(0x3F)); - out.val[1] = vandq_u8(out.val[1], vdupq_n_u8(0x3F)); - out.val[2] = vandq_u8(out.val[2], vdupq_n_u8(0x3F)); - out.val[3] = vandq_u8(out.val[3], vdupq_n_u8(0x3F)); - - return out; -} - -static inline uint8x16x4_t -enc_translate (uint8x16x4_t in) -{ - uint8x16x4_t mask1, mask2, mask3, mask4, out; - - // Translate values 0..63 to the Base64 alphabet. There are five sets: - // # From To Abs Delta Characters - // 0 [0..25] [65..90] +65 +65 ABCDEFGHIJKLMNOPQRSTUVWXYZ - // 1 [26..51] [97..122] +71 +6 abcdefghijklmnopqrstuvwxyz - // 2 [52..61] [48..57] -4 -75 0123456789 - // 3 [62] [43] -19 -15 + - // 4 [63] [47] -16 +3 / - - // Create cumulative masks for characters in sets [1,2,3,4], [2,3,4], - // [3,4], and [4]: - mask1.val[0] = CMPGT(in.val[0], 25); - mask1.val[1] = CMPGT(in.val[1], 25); - mask1.val[2] = CMPGT(in.val[2], 25); - mask1.val[3] = CMPGT(in.val[3], 25); - - mask2.val[0] = CMPGT(in.val[0], 51); - mask2.val[1] = CMPGT(in.val[1], 51); - mask2.val[2] = CMPGT(in.val[2], 51); - mask2.val[3] = CMPGT(in.val[3], 51); - - mask3.val[0] = CMPGT(in.val[0], 61); - mask3.val[1] = CMPGT(in.val[1], 61); - mask3.val[2] = CMPGT(in.val[2], 61); - mask3.val[3] = CMPGT(in.val[3], 61); - - mask4.val[0] = CMPEQ(in.val[0], 63); - mask4.val[1] = CMPEQ(in.val[1], 63); - mask4.val[2] = CMPEQ(in.val[2], 63); - mask4.val[3] = CMPEQ(in.val[3], 63); - - // All characters are at least in cumulative set 0, so add 'A': - out.val[0] = vaddq_u8(in.val[0], vdupq_n_u8(65)); - out.val[1] = vaddq_u8(in.val[1], vdupq_n_u8(65)); - out.val[2] = vaddq_u8(in.val[2], vdupq_n_u8(65)); - out.val[3] = vaddq_u8(in.val[3], vdupq_n_u8(65)); - - // For inputs which are also in any of the other cumulative sets, - // add delta values against the previous set(s) to correct the shift: - out.val[0] = vaddq_u8(out.val[0], REPLACE(mask1.val[0], 6)); - out.val[1] = vaddq_u8(out.val[1], REPLACE(mask1.val[1], 6)); - out.val[2] = vaddq_u8(out.val[2], REPLACE(mask1.val[2], 6)); - out.val[3] = vaddq_u8(out.val[3], REPLACE(mask1.val[3], 6)); - - out.val[0] = vsubq_u8(out.val[0], REPLACE(mask2.val[0], 75)); - out.val[1] = vsubq_u8(out.val[1], REPLACE(mask2.val[1], 75)); - out.val[2] = vsubq_u8(out.val[2], REPLACE(mask2.val[2], 75)); - out.val[3] = vsubq_u8(out.val[3], REPLACE(mask2.val[3], 75)); - - out.val[0] = vsubq_u8(out.val[0], REPLACE(mask3.val[0], 15)); - out.val[1] = vsubq_u8(out.val[1], REPLACE(mask3.val[1], 15)); - out.val[2] = vsubq_u8(out.val[2], REPLACE(mask3.val[2], 15)); - out.val[3] = vsubq_u8(out.val[3], REPLACE(mask3.val[3], 15)); - - out.val[0] = vaddq_u8(out.val[0], REPLACE(mask4.val[0], 3)); - out.val[1] = vaddq_u8(out.val[1], REPLACE(mask4.val[1], 3)); - out.val[2] = vaddq_u8(out.val[2], REPLACE(mask4.val[2], 3)); - out.val[3] = vaddq_u8(out.val[3], REPLACE(mask4.val[3], 3)); - - return out; -} - -#endif - -// Stride size is so large on these NEON 32-bit functions -// (48 bytes encode, 32 bytes decode) that we inline the -// uint32 codec to stay performant on smaller inputs. - -void -neon32_base64_stream_encode - ( struct neon32_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ -#if (defined(__arm__) && defined(__ARM_NEON__)) - #include "enc_head.c" - #include "enc_neon.c" - #include "enc_uint32.c" - #include "enc_tail.c" -#else - (void)state; - (void)src; - (void)srclen; - (void)out; - (void)outlen; - abort(); -#endif -} - -int -neon32_base64_stream_decode - ( struct neon32_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ -#if (defined(__arm__) && defined(__ARM_NEON__)) - #include "dec_head.c" - #include "dec_neon.c" - #include "dec_uint32.c" - #include "dec_tail.c" -#else - (void)state; - (void)src; - (void)srclen; - (void)out; - (void)outlen; - abort(); -#endif -} +#if (defined(__ARM_NEON) && !defined(__ARM_NEON__)) +#define __ARM_NEON__ +#endif + +#include <stdint.h> +#include <stddef.h> +#include <stdlib.h> +#ifdef __ARM_NEON__ +#include <arm_neon.h> +#endif + +#include "libbase64.h" +#include "codecs.h" + +#if (defined(__arm__) && defined(__ARM_NEON__)) + +#define CMPGT(s,n) vcgtq_u8((s), vdupq_n_u8(n)) +#define CMPEQ(s,n) vceqq_u8((s), vdupq_n_u8(n)) +#define REPLACE(s,n) vandq_u8((s), vdupq_n_u8(n)) +#define RANGE(s,a,b) vandq_u8(vcgeq_u8((s), vdupq_n_u8(a)), vcleq_u8((s), vdupq_n_u8(b))) + +static inline uint8x16x4_t +enc_reshuffle (uint8x16x3_t in) +{ + uint8x16x4_t out; + + // Divide bits of three input bytes over four output bytes: + out.val[0] = vshrq_n_u8(in.val[0], 2); + out.val[1] = vorrq_u8(vshrq_n_u8(in.val[1], 4), vshlq_n_u8(in.val[0], 4)); + out.val[2] = vorrq_u8(vshrq_n_u8(in.val[2], 6), vshlq_n_u8(in.val[1], 2)); + out.val[3] = in.val[2]; + + // Clear top two bits: + out.val[0] = vandq_u8(out.val[0], vdupq_n_u8(0x3F)); + out.val[1] = vandq_u8(out.val[1], vdupq_n_u8(0x3F)); + out.val[2] = vandq_u8(out.val[2], vdupq_n_u8(0x3F)); + out.val[3] = vandq_u8(out.val[3], vdupq_n_u8(0x3F)); + + return out; +} + +static inline uint8x16x4_t +enc_translate (uint8x16x4_t in) +{ + uint8x16x4_t mask1, mask2, mask3, mask4, out; + + // Translate values 0..63 to the Base64 alphabet. There are five sets: + // # From To Abs Delta Characters + // 0 [0..25] [65..90] +65 +65 ABCDEFGHIJKLMNOPQRSTUVWXYZ + // 1 [26..51] [97..122] +71 +6 abcdefghijklmnopqrstuvwxyz + // 2 [52..61] [48..57] -4 -75 0123456789 + // 3 [62] [43] -19 -15 + + // 4 [63] [47] -16 +3 / + + // Create cumulative masks for characters in sets [1,2,3,4], [2,3,4], + // [3,4], and [4]: + mask1.val[0] = CMPGT(in.val[0], 25); + mask1.val[1] = CMPGT(in.val[1], 25); + mask1.val[2] = CMPGT(in.val[2], 25); + mask1.val[3] = CMPGT(in.val[3], 25); + + mask2.val[0] = CMPGT(in.val[0], 51); + mask2.val[1] = CMPGT(in.val[1], 51); + mask2.val[2] = CMPGT(in.val[2], 51); + mask2.val[3] = CMPGT(in.val[3], 51); + + mask3.val[0] = CMPGT(in.val[0], 61); + mask3.val[1] = CMPGT(in.val[1], 61); + mask3.val[2] = CMPGT(in.val[2], 61); + mask3.val[3] = CMPGT(in.val[3], 61); + + mask4.val[0] = CMPEQ(in.val[0], 63); + mask4.val[1] = CMPEQ(in.val[1], 63); + mask4.val[2] = CMPEQ(in.val[2], 63); + mask4.val[3] = CMPEQ(in.val[3], 63); + + // All characters are at least in cumulative set 0, so add 'A': + out.val[0] = vaddq_u8(in.val[0], vdupq_n_u8(65)); + out.val[1] = vaddq_u8(in.val[1], vdupq_n_u8(65)); + out.val[2] = vaddq_u8(in.val[2], vdupq_n_u8(65)); + out.val[3] = vaddq_u8(in.val[3], vdupq_n_u8(65)); + + // For inputs which are also in any of the other cumulative sets, + // add delta values against the previous set(s) to correct the shift: + out.val[0] = vaddq_u8(out.val[0], REPLACE(mask1.val[0], 6)); + out.val[1] = vaddq_u8(out.val[1], REPLACE(mask1.val[1], 6)); + out.val[2] = vaddq_u8(out.val[2], REPLACE(mask1.val[2], 6)); + out.val[3] = vaddq_u8(out.val[3], REPLACE(mask1.val[3], 6)); + + out.val[0] = vsubq_u8(out.val[0], REPLACE(mask2.val[0], 75)); + out.val[1] = vsubq_u8(out.val[1], REPLACE(mask2.val[1], 75)); + out.val[2] = vsubq_u8(out.val[2], REPLACE(mask2.val[2], 75)); + out.val[3] = vsubq_u8(out.val[3], REPLACE(mask2.val[3], 75)); + + out.val[0] = vsubq_u8(out.val[0], REPLACE(mask3.val[0], 15)); + out.val[1] = vsubq_u8(out.val[1], REPLACE(mask3.val[1], 15)); + out.val[2] = vsubq_u8(out.val[2], REPLACE(mask3.val[2], 15)); + out.val[3] = vsubq_u8(out.val[3], REPLACE(mask3.val[3], 15)); + + out.val[0] = vaddq_u8(out.val[0], REPLACE(mask4.val[0], 3)); + out.val[1] = vaddq_u8(out.val[1], REPLACE(mask4.val[1], 3)); + out.val[2] = vaddq_u8(out.val[2], REPLACE(mask4.val[2], 3)); + out.val[3] = vaddq_u8(out.val[3], REPLACE(mask4.val[3], 3)); + + return out; +} + +#endif + +// Stride size is so large on these NEON 32-bit functions +// (48 bytes encode, 32 bytes decode) that we inline the +// uint32 codec to stay performant on smaller inputs. + +void +neon32_base64_stream_encode + ( struct neon32_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ +#if (defined(__arm__) && defined(__ARM_NEON__)) + #include "enc_head.c" + #include "enc_neon.c" + #include "enc_uint32.c" + #include "enc_tail.c" +#else + (void)state; + (void)src; + (void)srclen; + (void)out; + (void)outlen; + abort(); +#endif +} + +int +neon32_base64_stream_decode + ( struct neon32_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ +#if (defined(__arm__) && defined(__ARM_NEON__)) + #include "dec_head.c" + #include "dec_neon.c" + #include "dec_uint32.c" + #include "dec_tail.c" +#else + (void)state; + (void)src; + (void)srclen; + (void)out; + (void)outlen; + abort(); +#endif +} diff --git a/contrib/libs/base64/neon32/codecs.h b/contrib/libs/base64/neon32/codecs.h index 23cca82c6f..5c9ec309c2 100644 --- a/contrib/libs/base64/neon32/codecs.h +++ b/contrib/libs/base64/neon32/codecs.h @@ -1,35 +1,35 @@ -#pragma once - -// Define machine endianness. This is for GCC: -#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) - #define BASE64_NEON32_LITTLE_ENDIAN 1 -#else - #define BASE64_NEON32_LITTLE_ENDIAN 0 -#endif - -// This is for Clang: -#ifdef __LITTLE_ENDIAN__ - #define BASE64_NEON32_LITTLE_ENDIAN 1 -#endif - -#ifdef __BIG_ENDIAN__ - #define BASE64_NEON32_LITTLE_ENDIAN 0 -#endif - -// Endian conversion functions -#if BASE64_NEON32_LITTLE_ENDIAN - #define cpu_to_be32(x) __builtin_bswap32(x) - #define cpu_to_be64(x) __builtin_bswap64(x) - #define be32_to_cpu(x) __builtin_bswap32(x) - #define be64_to_cpu(x) __builtin_bswap64(x) -#else - #define cpu_to_be32(x) (x) - #define cpu_to_be64(x) (x) - #define be32_to_cpu(x) (x) - #define be64_to_cpu(x) (x) -#endif - -// These tables are used by all codecs -// for fallback plain encoding/decoding: -extern const uint8_t neon32_base64_table_enc[]; -extern const uint8_t neon32_base64_table_dec[]; +#pragma once + +// Define machine endianness. This is for GCC: +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + #define BASE64_NEON32_LITTLE_ENDIAN 1 +#else + #define BASE64_NEON32_LITTLE_ENDIAN 0 +#endif + +// This is for Clang: +#ifdef __LITTLE_ENDIAN__ + #define BASE64_NEON32_LITTLE_ENDIAN 1 +#endif + +#ifdef __BIG_ENDIAN__ + #define BASE64_NEON32_LITTLE_ENDIAN 0 +#endif + +// Endian conversion functions +#if BASE64_NEON32_LITTLE_ENDIAN + #define cpu_to_be32(x) __builtin_bswap32(x) + #define cpu_to_be64(x) __builtin_bswap64(x) + #define be32_to_cpu(x) __builtin_bswap32(x) + #define be64_to_cpu(x) __builtin_bswap64(x) +#else + #define cpu_to_be32(x) (x) + #define cpu_to_be64(x) (x) + #define be32_to_cpu(x) (x) + #define be64_to_cpu(x) (x) +#endif + +// These tables are used by all codecs +// for fallback plain encoding/decoding: +extern const uint8_t neon32_base64_table_enc[]; +extern const uint8_t neon32_base64_table_dec[]; diff --git a/contrib/libs/base64/neon32/dec_head.c b/contrib/libs/base64/neon32/dec_head.c index 2802093555..bd023118ff 100644 --- a/contrib/libs/base64/neon32/dec_head.c +++ b/contrib/libs/base64/neon32/dec_head.c @@ -1,29 +1,29 @@ -int ret = 0; -const uint8_t *c = (const uint8_t *)src; -uint8_t *o = (uint8_t *)out; -uint8_t q; - -// Use local temporaries to avoid cache thrashing: -size_t outl = 0; -struct neon32_base64_state st; -st.eof = state->eof; -st.bytes = state->bytes; -st.carry = state->carry; - -// If we previously saw an EOF or an invalid character, bail out: -if (st.eof) { - *outlen = 0; - return 0; -} - -// Turn four 6-bit numbers into three bytes: -// out[0] = 11111122 -// out[1] = 22223333 -// out[2] = 33444444 - -// Duff's device again: -switch (st.bytes) -{ - for (;;) - { - case 0: +int ret = 0; +const uint8_t *c = (const uint8_t *)src; +uint8_t *o = (uint8_t *)out; +uint8_t q; + +// Use local temporaries to avoid cache thrashing: +size_t outl = 0; +struct neon32_base64_state st; +st.eof = state->eof; +st.bytes = state->bytes; +st.carry = state->carry; + +// If we previously saw an EOF or an invalid character, bail out: +if (st.eof) { + *outlen = 0; + return 0; +} + +// Turn four 6-bit numbers into three bytes: +// out[0] = 11111122 +// out[1] = 22223333 +// out[2] = 33444444 + +// Duff's device again: +switch (st.bytes) +{ + for (;;) + { + case 0: diff --git a/contrib/libs/base64/neon32/dec_neon.c b/contrib/libs/base64/neon32/dec_neon.c index 30d846a916..713d8ca9a4 100644 --- a/contrib/libs/base64/neon32/dec_neon.c +++ b/contrib/libs/base64/neon32/dec_neon.c @@ -1,77 +1,77 @@ -// If we have NEON support, pick off 64 bytes at a time for as long as we can. -// Unlike the SSE codecs, we don't write trailing zero bytes to output, so we -// don't need to check if we have enough remaining input to cover them: -while (srclen >= 64) -{ - uint8x16x4_t set1, set2, set3, set4, set5, set6, set7, delta; - uint8x16x3_t dec; - - // Load 64 bytes and deinterleave: - uint8x16x4_t str = vld4q_u8((uint8_t *)c); - - // The input consists of six character sets in the Base64 alphabet, - // which we need to map back to the 6-bit values they represent. - // There are three ranges, two singles, and then there's the rest. - // - // # From To Add Characters - // 1 [43] [62] +19 + - // 2 [47] [63] +16 / - // 3 [48..57] [52..61] +4 0..9 - // 4 [65..90] [0..25] -65 A..Z - // 5 [97..122] [26..51] -71 a..z - // (6) Everything else => invalid input - - // Benchmarking on the Raspberry Pi 2B and Clang shows that looping - // generates slightly faster code than explicit unrolling: - for (int i = 0; i < 4; i++) { - set1.val[i] = CMPEQ(str.val[i], '+'); - set2.val[i] = CMPEQ(str.val[i], '/'); - set3.val[i] = RANGE(str.val[i], '0', '9'); - set4.val[i] = RANGE(str.val[i], 'A', 'Z'); - set5.val[i] = RANGE(str.val[i], 'a', 'z'); - set6.val[i] = CMPEQ(str.val[i], '-'); - set7.val[i] = CMPEQ(str.val[i], '_'); - - delta.val[i] = REPLACE(set1.val[i], 19); - delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set2.val[i], 16)); - delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set3.val[i], 4)); - delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set4.val[i], -65)); - delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set5.val[i], -71)); - delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set6.val[i], 17)); - delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set7.val[i], -32)); - } - - // Check for invalid input: if any of the delta values are zero, - // fall back on bytewise code to do error checking and reporting: - uint8x16_t classified = CMPEQ(delta.val[0], 0); - classified = vorrq_u8(classified, CMPEQ(delta.val[1], 0)); - classified = vorrq_u8(classified, CMPEQ(delta.val[2], 0)); - classified = vorrq_u8(classified, CMPEQ(delta.val[3], 0)); - - // Extract both 32-bit halves; check that all bits are zero: - if (vgetq_lane_u32((uint32x4_t)classified, 0) != 0 - || vgetq_lane_u32((uint32x4_t)classified, 1) != 0 - || vgetq_lane_u32((uint32x4_t)classified, 2) != 0 - || vgetq_lane_u32((uint32x4_t)classified, 3) != 0) { - break; - } - - // Now simply add the delta values to the input: - str.val[0] = vaddq_u8(str.val[0], delta.val[0]); - str.val[1] = vaddq_u8(str.val[1], delta.val[1]); - str.val[2] = vaddq_u8(str.val[2], delta.val[2]); - str.val[3] = vaddq_u8(str.val[3], delta.val[3]); - - // Compress four bytes into three: - dec.val[0] = vshlq_n_u8(str.val[0], 2) | vshrq_n_u8(str.val[1], 4); - dec.val[1] = vshlq_n_u8(str.val[1], 4) | vshrq_n_u8(str.val[2], 2); - dec.val[2] = vshlq_n_u8(str.val[2], 6) | str.val[3]; - - // Interleave and store decoded result: - vst3q_u8((uint8_t *)o, dec); - - c += 64; - o += 48; - outl += 48; - srclen -= 64; -} +// If we have NEON support, pick off 64 bytes at a time for as long as we can. +// Unlike the SSE codecs, we don't write trailing zero bytes to output, so we +// don't need to check if we have enough remaining input to cover them: +while (srclen >= 64) +{ + uint8x16x4_t set1, set2, set3, set4, set5, set6, set7, delta; + uint8x16x3_t dec; + + // Load 64 bytes and deinterleave: + uint8x16x4_t str = vld4q_u8((uint8_t *)c); + + // The input consists of six character sets in the Base64 alphabet, + // which we need to map back to the 6-bit values they represent. + // There are three ranges, two singles, and then there's the rest. + // + // # From To Add Characters + // 1 [43] [62] +19 + + // 2 [47] [63] +16 / + // 3 [48..57] [52..61] +4 0..9 + // 4 [65..90] [0..25] -65 A..Z + // 5 [97..122] [26..51] -71 a..z + // (6) Everything else => invalid input + + // Benchmarking on the Raspberry Pi 2B and Clang shows that looping + // generates slightly faster code than explicit unrolling: + for (int i = 0; i < 4; i++) { + set1.val[i] = CMPEQ(str.val[i], '+'); + set2.val[i] = CMPEQ(str.val[i], '/'); + set3.val[i] = RANGE(str.val[i], '0', '9'); + set4.val[i] = RANGE(str.val[i], 'A', 'Z'); + set5.val[i] = RANGE(str.val[i], 'a', 'z'); + set6.val[i] = CMPEQ(str.val[i], '-'); + set7.val[i] = CMPEQ(str.val[i], '_'); + + delta.val[i] = REPLACE(set1.val[i], 19); + delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set2.val[i], 16)); + delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set3.val[i], 4)); + delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set4.val[i], -65)); + delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set5.val[i], -71)); + delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set6.val[i], 17)); + delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set7.val[i], -32)); + } + + // Check for invalid input: if any of the delta values are zero, + // fall back on bytewise code to do error checking and reporting: + uint8x16_t classified = CMPEQ(delta.val[0], 0); + classified = vorrq_u8(classified, CMPEQ(delta.val[1], 0)); + classified = vorrq_u8(classified, CMPEQ(delta.val[2], 0)); + classified = vorrq_u8(classified, CMPEQ(delta.val[3], 0)); + + // Extract both 32-bit halves; check that all bits are zero: + if (vgetq_lane_u32((uint32x4_t)classified, 0) != 0 + || vgetq_lane_u32((uint32x4_t)classified, 1) != 0 + || vgetq_lane_u32((uint32x4_t)classified, 2) != 0 + || vgetq_lane_u32((uint32x4_t)classified, 3) != 0) { + break; + } + + // Now simply add the delta values to the input: + str.val[0] = vaddq_u8(str.val[0], delta.val[0]); + str.val[1] = vaddq_u8(str.val[1], delta.val[1]); + str.val[2] = vaddq_u8(str.val[2], delta.val[2]); + str.val[3] = vaddq_u8(str.val[3], delta.val[3]); + + // Compress four bytes into three: + dec.val[0] = vshlq_n_u8(str.val[0], 2) | vshrq_n_u8(str.val[1], 4); + dec.val[1] = vshlq_n_u8(str.val[1], 4) | vshrq_n_u8(str.val[2], 2); + dec.val[2] = vshlq_n_u8(str.val[2], 6) | str.val[3]; + + // Interleave and store decoded result: + vst3q_u8((uint8_t *)o, dec); + + c += 64; + o += 48; + outl += 48; + srclen -= 64; +} diff --git a/contrib/libs/base64/neon32/dec_tail.c b/contrib/libs/base64/neon32/dec_tail.c index beb453a467..4844677e6d 100644 --- a/contrib/libs/base64/neon32/dec_tail.c +++ b/contrib/libs/base64/neon32/dec_tail.c @@ -1,65 +1,65 @@ - if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = neon32_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // Treat character '=' as invalid for byte 0: - break; - } - st.carry = q << 2; - st.bytes++; - - case 1: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = neon32_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // Treat character '=' as invalid for byte 1: - break; - } - *o++ = st.carry | (q >> 4); - st.carry = q << 4; - st.bytes++; - outl++; - - case 2: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = neon32_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // When q == 254, the input char is '='. Return 1 and EOF. - // Technically, should check if next byte is also '=', but never mind. - // When q == 255, the input char is invalid. Return 0 and EOF. - ret = (q == 254) ? 1 : 0; - break; - } - *o++ = st.carry | (q >> 2); - st.carry = q << 6; - st.bytes++; - outl++; - - case 3: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = neon32_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // When q == 254, the input char is '='. Return 1 and EOF. - // When q == 255, the input char is invalid. Return 0 and EOF. - ret = (q == 254) ? 1 : 0; - break; - } - *o++ = st.carry | q; - st.carry = 0; - st.bytes = 0; - outl++; - } -} -state->eof = st.eof; -state->bytes = st.bytes; -state->carry = st.carry; -*outlen = outl; -return ret; + if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = neon32_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // Treat character '=' as invalid for byte 0: + break; + } + st.carry = q << 2; + st.bytes++; + + case 1: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = neon32_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // Treat character '=' as invalid for byte 1: + break; + } + *o++ = st.carry | (q >> 4); + st.carry = q << 4; + st.bytes++; + outl++; + + case 2: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = neon32_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // When q == 254, the input char is '='. Return 1 and EOF. + // Technically, should check if next byte is also '=', but never mind. + // When q == 255, the input char is invalid. Return 0 and EOF. + ret = (q == 254) ? 1 : 0; + break; + } + *o++ = st.carry | (q >> 2); + st.carry = q << 6; + st.bytes++; + outl++; + + case 3: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = neon32_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // When q == 254, the input char is '='. Return 1 and EOF. + // When q == 255, the input char is invalid. Return 0 and EOF. + ret = (q == 254) ? 1 : 0; + break; + } + *o++ = st.carry | q; + st.carry = 0; + st.bytes = 0; + outl++; + } +} +state->eof = st.eof; +state->bytes = st.bytes; +state->carry = st.carry; +*outlen = outl; +return ret; diff --git a/contrib/libs/base64/neon32/dec_uint32.c b/contrib/libs/base64/neon32/dec_uint32.c index 052284c7e6..5856446861 100644 --- a/contrib/libs/base64/neon32/dec_uint32.c +++ b/contrib/libs/base64/neon32/dec_uint32.c @@ -1,48 +1,48 @@ -// If we have native uint32's, pick off 4 bytes at a time for as long as we -// can, but make sure that we quit before seeing any == markers at the end of -// the string. Also, because we write a zero at the end of the output, ensure -// that there are at least 2 valid bytes of input data remaining to close the -// gap. 4 + 2 + 2 = 8 bytes: -while (srclen >= 8) -{ - uint32_t str, res, dec; - - // Load string: - str = *(uint32_t *)c; - - // Shuffle bytes to 32-bit bigendian: - str = cpu_to_be32(str); - - // Lookup each byte in the decoding table; if we encounter any - // "invalid" values, fall back on the bytewise code: - if ((dec = neon32_base64_table_dec[str >> 24]) > 63) { - break; - } - res = dec << 26; - - if ((dec = neon32_base64_table_dec[(str >> 16) & 0xFF]) > 63) { - break; - } - res |= dec << 20; - - if ((dec = neon32_base64_table_dec[(str >> 8) & 0xFF]) > 63) { - break; - } - res |= dec << 14; - - if ((dec = neon32_base64_table_dec[str & 0xFF]) > 63) { - break; - } - res |= dec << 8; - - // Reshuffle and repack into 3-byte output format: - res = be32_to_cpu(res); - - // Store back: - *(uint32_t *)o = res; - - c += 4; - o += 3; - outl += 3; - srclen -= 4; -} +// If we have native uint32's, pick off 4 bytes at a time for as long as we +// can, but make sure that we quit before seeing any == markers at the end of +// the string. Also, because we write a zero at the end of the output, ensure +// that there are at least 2 valid bytes of input data remaining to close the +// gap. 4 + 2 + 2 = 8 bytes: +while (srclen >= 8) +{ + uint32_t str, res, dec; + + // Load string: + str = *(uint32_t *)c; + + // Shuffle bytes to 32-bit bigendian: + str = cpu_to_be32(str); + + // Lookup each byte in the decoding table; if we encounter any + // "invalid" values, fall back on the bytewise code: + if ((dec = neon32_base64_table_dec[str >> 24]) > 63) { + break; + } + res = dec << 26; + + if ((dec = neon32_base64_table_dec[(str >> 16) & 0xFF]) > 63) { + break; + } + res |= dec << 20; + + if ((dec = neon32_base64_table_dec[(str >> 8) & 0xFF]) > 63) { + break; + } + res |= dec << 14; + + if ((dec = neon32_base64_table_dec[str & 0xFF]) > 63) { + break; + } + res |= dec << 8; + + // Reshuffle and repack into 3-byte output format: + res = be32_to_cpu(res); + + // Store back: + *(uint32_t *)o = res; + + c += 4; + o += 3; + outl += 3; + srclen -= 4; +} diff --git a/contrib/libs/base64/neon32/enc_head.c b/contrib/libs/base64/neon32/enc_head.c index 122ad246b1..2b8b88eba3 100644 --- a/contrib/libs/base64/neon32/enc_head.c +++ b/contrib/libs/base64/neon32/enc_head.c @@ -1,23 +1,23 @@ -// Assume that *out is large enough to contain the output. -// Theoretically it should be 4/3 the length of src. -const uint8_t *c = (const uint8_t *)src; -uint8_t *o = (uint8_t *)out; - -// Use local temporaries to avoid cache thrashing: -size_t outl = 0; -struct neon32_base64_state st; -st.bytes = state->bytes; -st.carry = state->carry; - -// Turn three bytes into four 6-bit numbers: -// in[0] = 00111111 -// in[1] = 00112222 -// in[2] = 00222233 -// in[3] = 00333333 - -// Duff's device, a for() loop inside a switch() statement. Legal! -switch (st.bytes) -{ - for (;;) - { - case 0: +// Assume that *out is large enough to contain the output. +// Theoretically it should be 4/3 the length of src. +const uint8_t *c = (const uint8_t *)src; +uint8_t *o = (uint8_t *)out; + +// Use local temporaries to avoid cache thrashing: +size_t outl = 0; +struct neon32_base64_state st; +st.bytes = state->bytes; +st.carry = state->carry; + +// Turn three bytes into four 6-bit numbers: +// in[0] = 00111111 +// in[1] = 00112222 +// in[2] = 00222233 +// in[3] = 00333333 + +// Duff's device, a for() loop inside a switch() statement. Legal! +switch (st.bytes) +{ + for (;;) + { + case 0: diff --git a/contrib/libs/base64/neon32/enc_neon.c b/contrib/libs/base64/neon32/enc_neon.c index effb7f9e07..05d7eb41b5 100644 --- a/contrib/libs/base64/neon32/enc_neon.c +++ b/contrib/libs/base64/neon32/enc_neon.c @@ -1,23 +1,23 @@ -// If we have ARM NEON support, pick off 48 bytes at a time: -while (srclen >= 48) -{ - uint8x16x3_t str; - uint8x16x4_t res; - - // Load 48 bytes and deinterleave: - str = vld3q_u8((uint8_t *)c); - - // Reshuffle: - res = enc_reshuffle(str); - - // Translate reshuffled bytes to the Base64 alphabet: - res = enc_translate(res); - - // Interleave and store result: - vst4q_u8((uint8_t *)o, res); - - c += 48; // 3 * 16 bytes of input - o += 64; // 4 * 16 bytes of output - outl += 64; - srclen -= 48; -} +// If we have ARM NEON support, pick off 48 bytes at a time: +while (srclen >= 48) +{ + uint8x16x3_t str; + uint8x16x4_t res; + + // Load 48 bytes and deinterleave: + str = vld3q_u8((uint8_t *)c); + + // Reshuffle: + res = enc_reshuffle(str); + + // Translate reshuffled bytes to the Base64 alphabet: + res = enc_translate(res); + + // Interleave and store result: + vst4q_u8((uint8_t *)o, res); + + c += 48; // 3 * 16 bytes of input + o += 64; // 4 * 16 bytes of output + outl += 64; + srclen -= 48; +} diff --git a/contrib/libs/base64/neon32/enc_tail.c b/contrib/libs/base64/neon32/enc_tail.c index 83a5d897e2..f19ae5f736 100644 --- a/contrib/libs/base64/neon32/enc_tail.c +++ b/contrib/libs/base64/neon32/enc_tail.c @@ -1,28 +1,28 @@ - if (srclen-- == 0) { - break; - } - *o++ = neon32_base64_table_enc[*c >> 2]; - st.carry = (*c++ << 4) & 0x30; - st.bytes++; - outl += 1; - - case 1: if (srclen-- == 0) { - break; - } - *o++ = neon32_base64_table_enc[st.carry | (*c >> 4)]; - st.carry = (*c++ << 2) & 0x3C; - st.bytes++; - outl += 1; - - case 2: if (srclen-- == 0) { - break; - } - *o++ = neon32_base64_table_enc[st.carry | (*c >> 6)]; - *o++ = neon32_base64_table_enc[*c++ & 0x3F]; - st.bytes = 0; - outl += 2; - } -} -state->bytes = st.bytes; -state->carry = st.carry; -*outlen = outl; + if (srclen-- == 0) { + break; + } + *o++ = neon32_base64_table_enc[*c >> 2]; + st.carry = (*c++ << 4) & 0x30; + st.bytes++; + outl += 1; + + case 1: if (srclen-- == 0) { + break; + } + *o++ = neon32_base64_table_enc[st.carry | (*c >> 4)]; + st.carry = (*c++ << 2) & 0x3C; + st.bytes++; + outl += 1; + + case 2: if (srclen-- == 0) { + break; + } + *o++ = neon32_base64_table_enc[st.carry | (*c >> 6)]; + *o++ = neon32_base64_table_enc[*c++ & 0x3F]; + st.bytes = 0; + outl += 2; + } +} +state->bytes = st.bytes; +state->carry = st.carry; +*outlen = outl; diff --git a/contrib/libs/base64/neon32/enc_uint32.c b/contrib/libs/base64/neon32/enc_uint32.c index a9f49375bd..c7bc3cd234 100644 --- a/contrib/libs/base64/neon32/enc_uint32.c +++ b/contrib/libs/base64/neon32/enc_uint32.c @@ -1,24 +1,24 @@ -// If we have 32-bit ints, pick off 3 bytes at a time for as long as we can, -// but ensure that there are at least 4 bytes available to avoid segfaulting: -while (srclen >= 4) -{ - // Load string: - uint32_t str = *(uint32_t *)c; - - // Reorder to 32-bit big-endian, if not already in that format. The - // workset must be in big-endian, otherwise the shifted bits do not - // carry over properly among adjacent bytes: - str = cpu_to_be32(str); - - // Shift input by 6 bytes each round and mask in only the lower 6 bits; - // look up the character in the Base64 encoding table and write it to - // the output location: - *o++ = neon32_base64_table_enc[(str >> 26) & 0x3F]; - *o++ = neon32_base64_table_enc[(str >> 20) & 0x3F]; - *o++ = neon32_base64_table_enc[(str >> 14) & 0x3F]; - *o++ = neon32_base64_table_enc[(str >> 8) & 0x3F]; - - c += 3; // 3 bytes of input - outl += 4; // 4 bytes of output - srclen -= 3; -} +// If we have 32-bit ints, pick off 3 bytes at a time for as long as we can, +// but ensure that there are at least 4 bytes available to avoid segfaulting: +while (srclen >= 4) +{ + // Load string: + uint32_t str = *(uint32_t *)c; + + // Reorder to 32-bit big-endian, if not already in that format. The + // workset must be in big-endian, otherwise the shifted bits do not + // carry over properly among adjacent bytes: + str = cpu_to_be32(str); + + // Shift input by 6 bytes each round and mask in only the lower 6 bits; + // look up the character in the Base64 encoding table and write it to + // the output location: + *o++ = neon32_base64_table_enc[(str >> 26) & 0x3F]; + *o++ = neon32_base64_table_enc[(str >> 20) & 0x3F]; + *o++ = neon32_base64_table_enc[(str >> 14) & 0x3F]; + *o++ = neon32_base64_table_enc[(str >> 8) & 0x3F]; + + c += 3; // 3 bytes of input + outl += 4; // 4 bytes of output + srclen -= 3; +} diff --git a/contrib/libs/base64/neon32/lib.c b/contrib/libs/base64/neon32/lib.c index 10f92c5032..52271e925a 100644 --- a/contrib/libs/base64/neon32/lib.c +++ b/contrib/libs/base64/neon32/lib.c @@ -1,121 +1,121 @@ -#include <stdint.h> -#include <stddef.h> - -#include "libbase64.h" -#include "codecs.h" - -const uint8_t -neon32_base64_table_enc[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; - -// In the lookup table below, note that the value for '=' (character 61) is -// 254, not 255. This character is used for in-band signaling of the end of -// the datastream, and we will use that later. The characters A-Z, a-z, 0-9 -// and + / are mapped to their "decoded" values. The other bytes all map to -// the value 255, which flags them as "invalid input". - -const uint8_t -neon32_base64_table_dec[] = -{ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 254, 62, 255, 63, // 32..47 - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63 - 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79 - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63, // 80..95 - 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111 - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, -}; - -void -neon32_base64_stream_encode_init (struct neon32_base64_state *state) -{ - state->eof = 0; - state->bytes = 0; - state->carry = 0; -} - -void -neon32_base64_stream_encode_final - ( struct neon32_base64_state *state - , char *out - , size_t *outlen - ) -{ - uint8_t *o = (uint8_t *)out; - - if (state->bytes == 1) { - *o++ = neon32_base64_table_enc[state->carry]; - *o++ = '='; - *o++ = '='; - *outlen = 3; - return; - } - if (state->bytes == 2) { - *o++ = neon32_base64_table_enc[state->carry]; - *o++ = '='; - *outlen = 2; - return; - } - *outlen = 0; -} - -void -neon32_base64_stream_decode_init (struct neon32_base64_state *state) -{ - state->eof = 0; - state->bytes = 0; - state->carry = 0; -} - -void -neon32_base64_encode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ - size_t s; - size_t t; - struct neon32_base64_state state; - - // Init the stream reader: - neon32_base64_stream_encode_init(&state); - - // Feed the whole string to the stream reader: - neon32_base64_stream_encode(&state, src, srclen, out, &s); - - // Finalize the stream by writing trailer if any: - neon32_base64_stream_encode_final(&state, out + s, &t); - - // Final output length is stream length plus tail: - *outlen = s + t; -} - -int -neon32_base64_decode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ - struct neon32_base64_state state; - - // Init the stream reader: - neon32_base64_stream_decode_init(&state); - - // Feed the whole string to the stream reader: - return neon32_base64_stream_decode(&state, src, srclen, out, outlen); -} +#include <stdint.h> +#include <stddef.h> + +#include "libbase64.h" +#include "codecs.h" + +const uint8_t +neon32_base64_table_enc[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +// In the lookup table below, note that the value for '=' (character 61) is +// 254, not 255. This character is used for in-band signaling of the end of +// the datastream, and we will use that later. The characters A-Z, a-z, 0-9 +// and + / are mapped to their "decoded" values. The other bytes all map to +// the value 255, which flags them as "invalid input". + +const uint8_t +neon32_base64_table_dec[] = +{ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 254, 62, 255, 63, // 32..47 + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63 + 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79 + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63, // 80..95 + 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111 + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +}; + +void +neon32_base64_stream_encode_init (struct neon32_base64_state *state) +{ + state->eof = 0; + state->bytes = 0; + state->carry = 0; +} + +void +neon32_base64_stream_encode_final + ( struct neon32_base64_state *state + , char *out + , size_t *outlen + ) +{ + uint8_t *o = (uint8_t *)out; + + if (state->bytes == 1) { + *o++ = neon32_base64_table_enc[state->carry]; + *o++ = '='; + *o++ = '='; + *outlen = 3; + return; + } + if (state->bytes == 2) { + *o++ = neon32_base64_table_enc[state->carry]; + *o++ = '='; + *outlen = 2; + return; + } + *outlen = 0; +} + +void +neon32_base64_stream_decode_init (struct neon32_base64_state *state) +{ + state->eof = 0; + state->bytes = 0; + state->carry = 0; +} + +void +neon32_base64_encode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + size_t s; + size_t t; + struct neon32_base64_state state; + + // Init the stream reader: + neon32_base64_stream_encode_init(&state); + + // Feed the whole string to the stream reader: + neon32_base64_stream_encode(&state, src, srclen, out, &s); + + // Finalize the stream by writing trailer if any: + neon32_base64_stream_encode_final(&state, out + s, &t); + + // Final output length is stream length plus tail: + *outlen = s + t; +} + +int +neon32_base64_decode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + struct neon32_base64_state state; + + // Init the stream reader: + neon32_base64_stream_decode_init(&state); + + // Feed the whole string to the stream reader: + return neon32_base64_stream_decode(&state, src, srclen, out, outlen); +} diff --git a/contrib/libs/base64/neon32/libbase64.h b/contrib/libs/base64/neon32/libbase64.h index b78dcc4a7e..fa975550d8 100644 --- a/contrib/libs/base64/neon32/libbase64.h +++ b/contrib/libs/base64/neon32/libbase64.h @@ -1,89 +1,89 @@ -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -struct neon32_base64_state { - int eof; - int bytes; - unsigned char carry; -}; - -/* Wrapper function to encode a plain string of given length. Output is written - * to *out without trailing zero. Output length in bytes is written to *outlen. - * The buffer in `out` has been allocated by the caller and is at least 4/3 the - * size of the input. See above for `flags`; set to 0 for default operation: */ -void neon32_base64_encode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Call this before calling base64_stream_encode() to init the state. See above - * for `flags`; set to 0 for default operation: */ -void neon32_base64_stream_encode_init - ( struct neon32_base64_state *state - ) ; - -/* Encodes the block of data of given length at `src`, into the buffer at - * `out`. Caller is responsible for allocating a large enough out-buffer; it - * must be at least 4/3 the size of the in-buffer, but take some margin. Places - * the number of new bytes written into `outlen` (which is set to zero when the - * function starts). Does not zero-terminate or finalize the output. */ -void neon32_base64_stream_encode - ( struct neon32_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Finalizes the output begun by previous calls to `base64_stream_encode()`. - * Adds the required end-of-stream markers if appropriate. `outlen` is modified - * and will contain the number of new bytes written at `out` (which will quite - * often be zero). */ -void neon32_base64_stream_encode_final - ( struct neon32_base64_state *state - , char *out - , size_t *outlen - ) ; - -/* Wrapper function to decode a plain string of given length. Output is written - * to *out without trailing zero. Output length in bytes is written to *outlen. - * The buffer in `out` has been allocated by the caller and is at least 3/4 the - * size of the input. See above for `flags`, set to 0 for default operation: */ -int neon32_base64_decode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Call this before calling base64_stream_decode() to init the state. See above - * for `flags`; set to 0 for default operation: */ -void neon32_base64_stream_decode_init - ( struct neon32_base64_state *state - ) ; - -/* Decodes the block of data of given length at `src`, into the buffer at - * `out`. Caller is responsible for allocating a large enough out-buffer; it - * must be at least 3/4 the size of the in-buffer, but take some margin. Places - * the number of new bytes written into `outlen` (which is set to zero when the - * function starts). Does not zero-terminate the output. Returns 1 if all is - * well, and 0 if a decoding error was found, such as an invalid character. - * Returns -1 if the chosen codec is not included in the current build. Used by - * the test harness to check whether a codec is available for testing. */ -int neon32_base64_stream_decode - ( struct neon32_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -#ifdef __cplusplus -} -#endif - +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +struct neon32_base64_state { + int eof; + int bytes; + unsigned char carry; +}; + +/* Wrapper function to encode a plain string of given length. Output is written + * to *out without trailing zero. Output length in bytes is written to *outlen. + * The buffer in `out` has been allocated by the caller and is at least 4/3 the + * size of the input. See above for `flags`; set to 0 for default operation: */ +void neon32_base64_encode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Call this before calling base64_stream_encode() to init the state. See above + * for `flags`; set to 0 for default operation: */ +void neon32_base64_stream_encode_init + ( struct neon32_base64_state *state + ) ; + +/* Encodes the block of data of given length at `src`, into the buffer at + * `out`. Caller is responsible for allocating a large enough out-buffer; it + * must be at least 4/3 the size of the in-buffer, but take some margin. Places + * the number of new bytes written into `outlen` (which is set to zero when the + * function starts). Does not zero-terminate or finalize the output. */ +void neon32_base64_stream_encode + ( struct neon32_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Finalizes the output begun by previous calls to `base64_stream_encode()`. + * Adds the required end-of-stream markers if appropriate. `outlen` is modified + * and will contain the number of new bytes written at `out` (which will quite + * often be zero). */ +void neon32_base64_stream_encode_final + ( struct neon32_base64_state *state + , char *out + , size_t *outlen + ) ; + +/* Wrapper function to decode a plain string of given length. Output is written + * to *out without trailing zero. Output length in bytes is written to *outlen. + * The buffer in `out` has been allocated by the caller and is at least 3/4 the + * size of the input. See above for `flags`, set to 0 for default operation: */ +int neon32_base64_decode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Call this before calling base64_stream_decode() to init the state. See above + * for `flags`; set to 0 for default operation: */ +void neon32_base64_stream_decode_init + ( struct neon32_base64_state *state + ) ; + +/* Decodes the block of data of given length at `src`, into the buffer at + * `out`. Caller is responsible for allocating a large enough out-buffer; it + * must be at least 3/4 the size of the in-buffer, but take some margin. Places + * the number of new bytes written into `outlen` (which is set to zero when the + * function starts). Does not zero-terminate the output. Returns 1 if all is + * well, and 0 if a decoding error was found, such as an invalid character. + * Returns -1 if the chosen codec is not included in the current build. Used by + * the test harness to check whether a codec is available for testing. */ +int neon32_base64_stream_decode + ( struct neon32_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +#ifdef __cplusplus +} +#endif + diff --git a/contrib/libs/base64/neon32/ya.make b/contrib/libs/base64/neon32/ya.make index d17e5a0e70..435b1127bb 100644 --- a/contrib/libs/base64/neon32/ya.make +++ b/contrib/libs/base64/neon32/ya.make @@ -1,11 +1,11 @@ -OWNER( - yazevnul +OWNER( + yazevnul g:contrib g:cpp-contrib -) - -LIBRARY() - +) + +LIBRARY() + LICENSE( BSD-2-Clause AND MIT @@ -13,15 +13,15 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -NO_UTIL() - -SRCS( - codec_neon32.c - lib.c -) - +NO_UTIL() + +SRCS( + codec_neon32.c + lib.c +) + IF (OS_LINUX OR OS_DARWIN OR OS_ANDROID) CONLYFLAGS(-std=c11) -ENDIF() - -END() +ENDIF() + +END() diff --git a/contrib/libs/base64/neon64/codec_neon64.c b/contrib/libs/base64/neon64/codec_neon64.c index 0a954a6712..7a352c3adf 100644 --- a/contrib/libs/base64/neon64/codec_neon64.c +++ b/contrib/libs/base64/neon64/codec_neon64.c @@ -1,98 +1,98 @@ -#if (defined(__ARM_NEON) && !defined(__ARM_NEON__)) -#define __ARM_NEON__ -#endif - -#include <stdint.h> -#include <stddef.h> -#include <stdlib.h> -#ifdef __ARM_NEON__ -#include <arm_neon.h> -#endif - -#include "libbase64.h" -#include "codecs.h" - -#if (defined(__aarch64__) && defined(__ARM_NEON__)) - -#define CMPGT(s,n) vcgtq_u8((s), vdupq_n_u8(n)) -#define CMPEQ(s,n) vceqq_u8((s), vdupq_n_u8(n)) -#define REPLACE(s,n) vandq_u8((s), vdupq_n_u8(n)) -#define RANGE(s,a,b) vandq_u8(vcgeq_u8((s), vdupq_n_u8(a)), vcleq_u8((s), vdupq_n_u8(b))) - -// With this transposed encoding table, we can use -// a 64-byte lookup to do the encoding. -// Read the table top to bottom, left to right. -static const char *neon64_base64_table_enc_transposed = -{ - "AQgw" - "BRhx" - "CSiy" - "DTjz" - "EUk0" - "FVl1" - "GWm2" - "HXn3" - "IYo4" - "JZp5" - "Kaq6" - "Lbr7" - "Mcs8" - "Ndt9" - "Oeu+" - "Pfv/" -}; -#endif - -// Stride size is so large on these NEON 64-bit functions -// (48 bytes encode, 64 bytes decode) that we inline the -// uint64 codec to stay performant on smaller inputs. - -void -neon64_base64_stream_encode - ( struct neon64_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ -#if (defined(__aarch64__) && defined(__ARM_NEON__)) - uint8x16x4_t tbl_enc = vld4q_u8((uint8_t const*)neon64_base64_table_enc_transposed); - - #include "enc_head.c" - #include "enc_neon.c" - #include "enc_uint64.c" - #include "enc_tail.c" -#else - (void)state; - (void)src; - (void)srclen; - (void)out; - (void)outlen; - abort(); -#endif -} - -int -neon64_base64_stream_decode - ( struct neon64_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ -#if (defined(__aarch64__) && defined(__ARM_NEON__)) - #include "dec_head.c" - #include "dec_neon.c" - #include "dec_uint64.c" - #include "dec_tail.c" -#else - (void)state; - (void)src; - (void)srclen; - (void)out; - (void)outlen; - abort(); -#endif -} +#if (defined(__ARM_NEON) && !defined(__ARM_NEON__)) +#define __ARM_NEON__ +#endif + +#include <stdint.h> +#include <stddef.h> +#include <stdlib.h> +#ifdef __ARM_NEON__ +#include <arm_neon.h> +#endif + +#include "libbase64.h" +#include "codecs.h" + +#if (defined(__aarch64__) && defined(__ARM_NEON__)) + +#define CMPGT(s,n) vcgtq_u8((s), vdupq_n_u8(n)) +#define CMPEQ(s,n) vceqq_u8((s), vdupq_n_u8(n)) +#define REPLACE(s,n) vandq_u8((s), vdupq_n_u8(n)) +#define RANGE(s,a,b) vandq_u8(vcgeq_u8((s), vdupq_n_u8(a)), vcleq_u8((s), vdupq_n_u8(b))) + +// With this transposed encoding table, we can use +// a 64-byte lookup to do the encoding. +// Read the table top to bottom, left to right. +static const char *neon64_base64_table_enc_transposed = +{ + "AQgw" + "BRhx" + "CSiy" + "DTjz" + "EUk0" + "FVl1" + "GWm2" + "HXn3" + "IYo4" + "JZp5" + "Kaq6" + "Lbr7" + "Mcs8" + "Ndt9" + "Oeu+" + "Pfv/" +}; +#endif + +// Stride size is so large on these NEON 64-bit functions +// (48 bytes encode, 64 bytes decode) that we inline the +// uint64 codec to stay performant on smaller inputs. + +void +neon64_base64_stream_encode + ( struct neon64_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ +#if (defined(__aarch64__) && defined(__ARM_NEON__)) + uint8x16x4_t tbl_enc = vld4q_u8((uint8_t const*)neon64_base64_table_enc_transposed); + + #include "enc_head.c" + #include "enc_neon.c" + #include "enc_uint64.c" + #include "enc_tail.c" +#else + (void)state; + (void)src; + (void)srclen; + (void)out; + (void)outlen; + abort(); +#endif +} + +int +neon64_base64_stream_decode + ( struct neon64_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ +#if (defined(__aarch64__) && defined(__ARM_NEON__)) + #include "dec_head.c" + #include "dec_neon.c" + #include "dec_uint64.c" + #include "dec_tail.c" +#else + (void)state; + (void)src; + (void)srclen; + (void)out; + (void)outlen; + abort(); +#endif +} diff --git a/contrib/libs/base64/neon64/codecs.h b/contrib/libs/base64/neon64/codecs.h index 0b8138c5bb..d97269a4a5 100644 --- a/contrib/libs/base64/neon64/codecs.h +++ b/contrib/libs/base64/neon64/codecs.h @@ -1,35 +1,35 @@ -#pragma once - -// Define machine endianness. This is for GCC: -#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) - #define BASE64_NEON64_LITTLE_ENDIAN 1 -#else - #define BASE64_NEON64_LITTLE_ENDIAN 0 -#endif - -// This is for Clang: -#ifdef __LITTLE_ENDIAN__ - #define BASE64_NEON64_LITTLE_ENDIAN 1 -#endif - -#ifdef __BIG_ENDIAN__ - #define BASE64_NEON64_LITTLE_ENDIAN 0 -#endif - -// Endian conversion functions -#if BASE64_NEON64_LITTLE_ENDIAN - #define cpu_to_be32(x) __builtin_bswap32(x) - #define cpu_to_be64(x) __builtin_bswap64(x) - #define be32_to_cpu(x) __builtin_bswap32(x) - #define be64_to_cpu(x) __builtin_bswap64(x) -#else - #define cpu_to_be32(x) (x) - #define cpu_to_be64(x) (x) - #define be32_to_cpu(x) (x) - #define be64_to_cpu(x) (x) -#endif - -// These tables are used by all codecs -// for fallback plain encoding/decoding: -extern const uint8_t neon64_base64_table_enc[]; -extern const uint8_t neon64_base64_table_dec[]; +#pragma once + +// Define machine endianness. This is for GCC: +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + #define BASE64_NEON64_LITTLE_ENDIAN 1 +#else + #define BASE64_NEON64_LITTLE_ENDIAN 0 +#endif + +// This is for Clang: +#ifdef __LITTLE_ENDIAN__ + #define BASE64_NEON64_LITTLE_ENDIAN 1 +#endif + +#ifdef __BIG_ENDIAN__ + #define BASE64_NEON64_LITTLE_ENDIAN 0 +#endif + +// Endian conversion functions +#if BASE64_NEON64_LITTLE_ENDIAN + #define cpu_to_be32(x) __builtin_bswap32(x) + #define cpu_to_be64(x) __builtin_bswap64(x) + #define be32_to_cpu(x) __builtin_bswap32(x) + #define be64_to_cpu(x) __builtin_bswap64(x) +#else + #define cpu_to_be32(x) (x) + #define cpu_to_be64(x) (x) + #define be32_to_cpu(x) (x) + #define be64_to_cpu(x) (x) +#endif + +// These tables are used by all codecs +// for fallback plain encoding/decoding: +extern const uint8_t neon64_base64_table_enc[]; +extern const uint8_t neon64_base64_table_dec[]; diff --git a/contrib/libs/base64/neon64/dec_head.c b/contrib/libs/base64/neon64/dec_head.c index 89decb1be7..10f0def5be 100644 --- a/contrib/libs/base64/neon64/dec_head.c +++ b/contrib/libs/base64/neon64/dec_head.c @@ -1,29 +1,29 @@ -int ret = 0; -const uint8_t *c = (const uint8_t *)src; -uint8_t *o = (uint8_t *)out; -uint8_t q; - -// Use local temporaries to avoid cache thrashing: -size_t outl = 0; -struct neon64_base64_state st; -st.eof = state->eof; -st.bytes = state->bytes; -st.carry = state->carry; - -// If we previously saw an EOF or an invalid character, bail out: -if (st.eof) { - *outlen = 0; - return 0; -} - -// Turn four 6-bit numbers into three bytes: -// out[0] = 11111122 -// out[1] = 22223333 -// out[2] = 33444444 - -// Duff's device again: -switch (st.bytes) -{ - for (;;) - { - case 0: +int ret = 0; +const uint8_t *c = (const uint8_t *)src; +uint8_t *o = (uint8_t *)out; +uint8_t q; + +// Use local temporaries to avoid cache thrashing: +size_t outl = 0; +struct neon64_base64_state st; +st.eof = state->eof; +st.bytes = state->bytes; +st.carry = state->carry; + +// If we previously saw an EOF or an invalid character, bail out: +if (st.eof) { + *outlen = 0; + return 0; +} + +// Turn four 6-bit numbers into three bytes: +// out[0] = 11111122 +// out[1] = 22223333 +// out[2] = 33444444 + +// Duff's device again: +switch (st.bytes) +{ + for (;;) + { + case 0: diff --git a/contrib/libs/base64/neon64/dec_neon.c b/contrib/libs/base64/neon64/dec_neon.c index 30d846a916..713d8ca9a4 100644 --- a/contrib/libs/base64/neon64/dec_neon.c +++ b/contrib/libs/base64/neon64/dec_neon.c @@ -1,77 +1,77 @@ -// If we have NEON support, pick off 64 bytes at a time for as long as we can. -// Unlike the SSE codecs, we don't write trailing zero bytes to output, so we -// don't need to check if we have enough remaining input to cover them: -while (srclen >= 64) -{ - uint8x16x4_t set1, set2, set3, set4, set5, set6, set7, delta; - uint8x16x3_t dec; - - // Load 64 bytes and deinterleave: - uint8x16x4_t str = vld4q_u8((uint8_t *)c); - - // The input consists of six character sets in the Base64 alphabet, - // which we need to map back to the 6-bit values they represent. - // There are three ranges, two singles, and then there's the rest. - // - // # From To Add Characters - // 1 [43] [62] +19 + - // 2 [47] [63] +16 / - // 3 [48..57] [52..61] +4 0..9 - // 4 [65..90] [0..25] -65 A..Z - // 5 [97..122] [26..51] -71 a..z - // (6) Everything else => invalid input - - // Benchmarking on the Raspberry Pi 2B and Clang shows that looping - // generates slightly faster code than explicit unrolling: - for (int i = 0; i < 4; i++) { - set1.val[i] = CMPEQ(str.val[i], '+'); - set2.val[i] = CMPEQ(str.val[i], '/'); - set3.val[i] = RANGE(str.val[i], '0', '9'); - set4.val[i] = RANGE(str.val[i], 'A', 'Z'); - set5.val[i] = RANGE(str.val[i], 'a', 'z'); - set6.val[i] = CMPEQ(str.val[i], '-'); - set7.val[i] = CMPEQ(str.val[i], '_'); - - delta.val[i] = REPLACE(set1.val[i], 19); - delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set2.val[i], 16)); - delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set3.val[i], 4)); - delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set4.val[i], -65)); - delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set5.val[i], -71)); - delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set6.val[i], 17)); - delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set7.val[i], -32)); - } - - // Check for invalid input: if any of the delta values are zero, - // fall back on bytewise code to do error checking and reporting: - uint8x16_t classified = CMPEQ(delta.val[0], 0); - classified = vorrq_u8(classified, CMPEQ(delta.val[1], 0)); - classified = vorrq_u8(classified, CMPEQ(delta.val[2], 0)); - classified = vorrq_u8(classified, CMPEQ(delta.val[3], 0)); - - // Extract both 32-bit halves; check that all bits are zero: - if (vgetq_lane_u32((uint32x4_t)classified, 0) != 0 - || vgetq_lane_u32((uint32x4_t)classified, 1) != 0 - || vgetq_lane_u32((uint32x4_t)classified, 2) != 0 - || vgetq_lane_u32((uint32x4_t)classified, 3) != 0) { - break; - } - - // Now simply add the delta values to the input: - str.val[0] = vaddq_u8(str.val[0], delta.val[0]); - str.val[1] = vaddq_u8(str.val[1], delta.val[1]); - str.val[2] = vaddq_u8(str.val[2], delta.val[2]); - str.val[3] = vaddq_u8(str.val[3], delta.val[3]); - - // Compress four bytes into three: - dec.val[0] = vshlq_n_u8(str.val[0], 2) | vshrq_n_u8(str.val[1], 4); - dec.val[1] = vshlq_n_u8(str.val[1], 4) | vshrq_n_u8(str.val[2], 2); - dec.val[2] = vshlq_n_u8(str.val[2], 6) | str.val[3]; - - // Interleave and store decoded result: - vst3q_u8((uint8_t *)o, dec); - - c += 64; - o += 48; - outl += 48; - srclen -= 64; -} +// If we have NEON support, pick off 64 bytes at a time for as long as we can. +// Unlike the SSE codecs, we don't write trailing zero bytes to output, so we +// don't need to check if we have enough remaining input to cover them: +while (srclen >= 64) +{ + uint8x16x4_t set1, set2, set3, set4, set5, set6, set7, delta; + uint8x16x3_t dec; + + // Load 64 bytes and deinterleave: + uint8x16x4_t str = vld4q_u8((uint8_t *)c); + + // The input consists of six character sets in the Base64 alphabet, + // which we need to map back to the 6-bit values they represent. + // There are three ranges, two singles, and then there's the rest. + // + // # From To Add Characters + // 1 [43] [62] +19 + + // 2 [47] [63] +16 / + // 3 [48..57] [52..61] +4 0..9 + // 4 [65..90] [0..25] -65 A..Z + // 5 [97..122] [26..51] -71 a..z + // (6) Everything else => invalid input + + // Benchmarking on the Raspberry Pi 2B and Clang shows that looping + // generates slightly faster code than explicit unrolling: + for (int i = 0; i < 4; i++) { + set1.val[i] = CMPEQ(str.val[i], '+'); + set2.val[i] = CMPEQ(str.val[i], '/'); + set3.val[i] = RANGE(str.val[i], '0', '9'); + set4.val[i] = RANGE(str.val[i], 'A', 'Z'); + set5.val[i] = RANGE(str.val[i], 'a', 'z'); + set6.val[i] = CMPEQ(str.val[i], '-'); + set7.val[i] = CMPEQ(str.val[i], '_'); + + delta.val[i] = REPLACE(set1.val[i], 19); + delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set2.val[i], 16)); + delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set3.val[i], 4)); + delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set4.val[i], -65)); + delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set5.val[i], -71)); + delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set6.val[i], 17)); + delta.val[i] = vorrq_u8(delta.val[i], REPLACE(set7.val[i], -32)); + } + + // Check for invalid input: if any of the delta values are zero, + // fall back on bytewise code to do error checking and reporting: + uint8x16_t classified = CMPEQ(delta.val[0], 0); + classified = vorrq_u8(classified, CMPEQ(delta.val[1], 0)); + classified = vorrq_u8(classified, CMPEQ(delta.val[2], 0)); + classified = vorrq_u8(classified, CMPEQ(delta.val[3], 0)); + + // Extract both 32-bit halves; check that all bits are zero: + if (vgetq_lane_u32((uint32x4_t)classified, 0) != 0 + || vgetq_lane_u32((uint32x4_t)classified, 1) != 0 + || vgetq_lane_u32((uint32x4_t)classified, 2) != 0 + || vgetq_lane_u32((uint32x4_t)classified, 3) != 0) { + break; + } + + // Now simply add the delta values to the input: + str.val[0] = vaddq_u8(str.val[0], delta.val[0]); + str.val[1] = vaddq_u8(str.val[1], delta.val[1]); + str.val[2] = vaddq_u8(str.val[2], delta.val[2]); + str.val[3] = vaddq_u8(str.val[3], delta.val[3]); + + // Compress four bytes into three: + dec.val[0] = vshlq_n_u8(str.val[0], 2) | vshrq_n_u8(str.val[1], 4); + dec.val[1] = vshlq_n_u8(str.val[1], 4) | vshrq_n_u8(str.val[2], 2); + dec.val[2] = vshlq_n_u8(str.val[2], 6) | str.val[3]; + + // Interleave and store decoded result: + vst3q_u8((uint8_t *)o, dec); + + c += 64; + o += 48; + outl += 48; + srclen -= 64; +} diff --git a/contrib/libs/base64/neon64/dec_tail.c b/contrib/libs/base64/neon64/dec_tail.c index b530aa62cc..90a3eec2d3 100644 --- a/contrib/libs/base64/neon64/dec_tail.c +++ b/contrib/libs/base64/neon64/dec_tail.c @@ -1,65 +1,65 @@ - if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = neon64_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // Treat character '=' as invalid for byte 0: - break; - } - st.carry = q << 2; - st.bytes++; - - case 1: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = neon64_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // Treat character '=' as invalid for byte 1: - break; - } - *o++ = st.carry | (q >> 4); - st.carry = q << 4; - st.bytes++; - outl++; - - case 2: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = neon64_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // When q == 254, the input char is '='. Return 1 and EOF. - // Technically, should check if next byte is also '=', but never mind. - // When q == 255, the input char is invalid. Return 0 and EOF. - ret = (q == 254) ? 1 : 0; - break; - } - *o++ = st.carry | (q >> 2); - st.carry = q << 6; - st.bytes++; - outl++; - - case 3: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = neon64_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // When q == 254, the input char is '='. Return 1 and EOF. - // When q == 255, the input char is invalid. Return 0 and EOF. - ret = (q == 254) ? 1 : 0; - break; - } - *o++ = st.carry | q; - st.carry = 0; - st.bytes = 0; - outl++; - } -} -state->eof = st.eof; -state->bytes = st.bytes; -state->carry = st.carry; -*outlen = outl; -return ret; + if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = neon64_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // Treat character '=' as invalid for byte 0: + break; + } + st.carry = q << 2; + st.bytes++; + + case 1: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = neon64_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // Treat character '=' as invalid for byte 1: + break; + } + *o++ = st.carry | (q >> 4); + st.carry = q << 4; + st.bytes++; + outl++; + + case 2: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = neon64_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // When q == 254, the input char is '='. Return 1 and EOF. + // Technically, should check if next byte is also '=', but never mind. + // When q == 255, the input char is invalid. Return 0 and EOF. + ret = (q == 254) ? 1 : 0; + break; + } + *o++ = st.carry | (q >> 2); + st.carry = q << 6; + st.bytes++; + outl++; + + case 3: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = neon64_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // When q == 254, the input char is '='. Return 1 and EOF. + // When q == 255, the input char is invalid. Return 0 and EOF. + ret = (q == 254) ? 1 : 0; + break; + } + *o++ = st.carry | q; + st.carry = 0; + st.bytes = 0; + outl++; + } +} +state->eof = st.eof; +state->bytes = st.bytes; +state->carry = st.carry; +*outlen = outl; +return ret; diff --git a/contrib/libs/base64/neon64/dec_uint64.c b/contrib/libs/base64/neon64/dec_uint64.c index 90d134af38..0ea70a3424 100644 --- a/contrib/libs/base64/neon64/dec_uint64.c +++ b/contrib/libs/base64/neon64/dec_uint64.c @@ -1,68 +1,68 @@ -// If we have native uint64's, pick off 8 bytes at a time for as long as we -// can, but make sure that we quit before seeing any == markers at the end of -// the string. Also, because we write two zeroes at the end of the output, -// ensure that there are at least 3 valid bytes of input data remaining to -// close the gap. 8 + 2 + 3 = 13 bytes: -while (srclen >= 13) -{ - uint64_t str, res, dec; - - // Load string: - str = *(uint64_t *)c; - - // Shuffle bytes to 64-bit bigendian: - str = cpu_to_be64(str); - - // Lookup each byte in the decoding table; if we encounter any - // "invalid" values, fall back on the bytewise code: - if ((dec = neon64_base64_table_dec[str >> 56]) > 63) { - break; - } - res = dec << 58; - - if ((dec = neon64_base64_table_dec[(str >> 48) & 0xFF]) > 63) { - break; - } - res |= dec << 52; - - if ((dec = neon64_base64_table_dec[(str >> 40) & 0xFF]) > 63) { - break; - } - res |= dec << 46; - - if ((dec = neon64_base64_table_dec[(str >> 32) & 0xFF]) > 63) { - break; - } - res |= dec << 40; - - if ((dec = neon64_base64_table_dec[(str >> 24) & 0xFF]) > 63) { - break; - } - res |= dec << 34; - - if ((dec = neon64_base64_table_dec[(str >> 16) & 0xFF]) > 63) { - break; - } - res |= dec << 28; - - if ((dec = neon64_base64_table_dec[(str >> 8) & 0xFF]) > 63) { - break; - } - res |= dec << 22; - - if ((dec = neon64_base64_table_dec[str & 0xFF]) > 63) { - break; - } - res |= dec << 16; - - // Reshuffle and repack into 6-byte output format: - res = be64_to_cpu(res); - - // Store back: - *(uint64_t *)o = res; - - c += 8; - o += 6; - outl += 6; - srclen -= 8; -} +// If we have native uint64's, pick off 8 bytes at a time for as long as we +// can, but make sure that we quit before seeing any == markers at the end of +// the string. Also, because we write two zeroes at the end of the output, +// ensure that there are at least 3 valid bytes of input data remaining to +// close the gap. 8 + 2 + 3 = 13 bytes: +while (srclen >= 13) +{ + uint64_t str, res, dec; + + // Load string: + str = *(uint64_t *)c; + + // Shuffle bytes to 64-bit bigendian: + str = cpu_to_be64(str); + + // Lookup each byte in the decoding table; if we encounter any + // "invalid" values, fall back on the bytewise code: + if ((dec = neon64_base64_table_dec[str >> 56]) > 63) { + break; + } + res = dec << 58; + + if ((dec = neon64_base64_table_dec[(str >> 48) & 0xFF]) > 63) { + break; + } + res |= dec << 52; + + if ((dec = neon64_base64_table_dec[(str >> 40) & 0xFF]) > 63) { + break; + } + res |= dec << 46; + + if ((dec = neon64_base64_table_dec[(str >> 32) & 0xFF]) > 63) { + break; + } + res |= dec << 40; + + if ((dec = neon64_base64_table_dec[(str >> 24) & 0xFF]) > 63) { + break; + } + res |= dec << 34; + + if ((dec = neon64_base64_table_dec[(str >> 16) & 0xFF]) > 63) { + break; + } + res |= dec << 28; + + if ((dec = neon64_base64_table_dec[(str >> 8) & 0xFF]) > 63) { + break; + } + res |= dec << 22; + + if ((dec = neon64_base64_table_dec[str & 0xFF]) > 63) { + break; + } + res |= dec << 16; + + // Reshuffle and repack into 6-byte output format: + res = be64_to_cpu(res); + + // Store back: + *(uint64_t *)o = res; + + c += 8; + o += 6; + outl += 6; + srclen -= 8; +} diff --git a/contrib/libs/base64/neon64/enc_head.c b/contrib/libs/base64/neon64/enc_head.c index 9f8147d608..92d5ed50b4 100644 --- a/contrib/libs/base64/neon64/enc_head.c +++ b/contrib/libs/base64/neon64/enc_head.c @@ -1,23 +1,23 @@ -// Assume that *out is large enough to contain the output. -// Theoretically it should be 4/3 the length of src. -const uint8_t *c = (const uint8_t *)src; -uint8_t *o = (uint8_t *)out; - -// Use local temporaries to avoid cache thrashing: -size_t outl = 0; -struct neon64_base64_state st; -st.bytes = state->bytes; -st.carry = state->carry; - -// Turn three bytes into four 6-bit numbers: -// in[0] = 00111111 -// in[1] = 00112222 -// in[2] = 00222233 -// in[3] = 00333333 - -// Duff's device, a for() loop inside a switch() statement. Legal! -switch (st.bytes) -{ - for (;;) - { - case 0: +// Assume that *out is large enough to contain the output. +// Theoretically it should be 4/3 the length of src. +const uint8_t *c = (const uint8_t *)src; +uint8_t *o = (uint8_t *)out; + +// Use local temporaries to avoid cache thrashing: +size_t outl = 0; +struct neon64_base64_state st; +st.bytes = state->bytes; +st.carry = state->carry; + +// Turn three bytes into four 6-bit numbers: +// in[0] = 00111111 +// in[1] = 00112222 +// in[2] = 00222233 +// in[3] = 00333333 + +// Duff's device, a for() loop inside a switch() statement. Legal! +switch (st.bytes) +{ + for (;;) + { + case 0: diff --git a/contrib/libs/base64/neon64/enc_neon.c b/contrib/libs/base64/neon64/enc_neon.c index 2ba5a561e9..9cf28a11f8 100644 --- a/contrib/libs/base64/neon64/enc_neon.c +++ b/contrib/libs/base64/neon64/enc_neon.c @@ -1,37 +1,37 @@ -// If we have ARM NEON support, pick off 48 bytes at a time: -while (srclen >= 48) -{ - uint8x16x3_t str; - uint8x16x4_t res; - - // Load 48 bytes and deinterleave: - str = vld3q_u8((uint8_t *)c); - - // Divide bits of three input bytes over four output bytes: - res.val[0] = vshrq_n_u8(str.val[0], 2); - res.val[1] = vshrq_n_u8(str.val[1], 4) | vshlq_n_u8(str.val[0], 4); - res.val[2] = vshrq_n_u8(str.val[2], 6) | vshlq_n_u8(str.val[1], 2); - res.val[3] = str.val[2]; - - // Clear top two bits: - res.val[0] &= vdupq_n_u8(0x3F); - res.val[1] &= vdupq_n_u8(0x3F); - res.val[2] &= vdupq_n_u8(0x3F); - res.val[3] &= vdupq_n_u8(0x3F); - - // The bits have now been shifted to the right locations; - // translate their values 0..63 to the Base64 alphabet. - // Use a 64-byte table lookup: - res.val[0] = vqtbl4q_u8(tbl_enc, res.val[0]); - res.val[1] = vqtbl4q_u8(tbl_enc, res.val[1]); - res.val[2] = vqtbl4q_u8(tbl_enc, res.val[2]); - res.val[3] = vqtbl4q_u8(tbl_enc, res.val[3]); - - // Interleave and store result: - vst4q_u8((uint8_t *)o, res); - - c += 48; // 3 * 16 bytes of input - o += 64; // 4 * 16 bytes of output - outl += 64; - srclen -= 48; -} +// If we have ARM NEON support, pick off 48 bytes at a time: +while (srclen >= 48) +{ + uint8x16x3_t str; + uint8x16x4_t res; + + // Load 48 bytes and deinterleave: + str = vld3q_u8((uint8_t *)c); + + // Divide bits of three input bytes over four output bytes: + res.val[0] = vshrq_n_u8(str.val[0], 2); + res.val[1] = vshrq_n_u8(str.val[1], 4) | vshlq_n_u8(str.val[0], 4); + res.val[2] = vshrq_n_u8(str.val[2], 6) | vshlq_n_u8(str.val[1], 2); + res.val[3] = str.val[2]; + + // Clear top two bits: + res.val[0] &= vdupq_n_u8(0x3F); + res.val[1] &= vdupq_n_u8(0x3F); + res.val[2] &= vdupq_n_u8(0x3F); + res.val[3] &= vdupq_n_u8(0x3F); + + // The bits have now been shifted to the right locations; + // translate their values 0..63 to the Base64 alphabet. + // Use a 64-byte table lookup: + res.val[0] = vqtbl4q_u8(tbl_enc, res.val[0]); + res.val[1] = vqtbl4q_u8(tbl_enc, res.val[1]); + res.val[2] = vqtbl4q_u8(tbl_enc, res.val[2]); + res.val[3] = vqtbl4q_u8(tbl_enc, res.val[3]); + + // Interleave and store result: + vst4q_u8((uint8_t *)o, res); + + c += 48; // 3 * 16 bytes of input + o += 64; // 4 * 16 bytes of output + outl += 64; + srclen -= 48; +} diff --git a/contrib/libs/base64/neon64/enc_tail.c b/contrib/libs/base64/neon64/enc_tail.c index 86b49fa6af..3c97141902 100644 --- a/contrib/libs/base64/neon64/enc_tail.c +++ b/contrib/libs/base64/neon64/enc_tail.c @@ -1,28 +1,28 @@ - if (srclen-- == 0) { - break; - } - *o++ = neon64_base64_table_enc[*c >> 2]; - st.carry = (*c++ << 4) & 0x30; - st.bytes++; - outl += 1; - - case 1: if (srclen-- == 0) { - break; - } - *o++ = neon64_base64_table_enc[st.carry | (*c >> 4)]; - st.carry = (*c++ << 2) & 0x3C; - st.bytes++; - outl += 1; - - case 2: if (srclen-- == 0) { - break; - } - *o++ = neon64_base64_table_enc[st.carry | (*c >> 6)]; - *o++ = neon64_base64_table_enc[*c++ & 0x3F]; - st.bytes = 0; - outl += 2; - } -} -state->bytes = st.bytes; -state->carry = st.carry; -*outlen = outl; + if (srclen-- == 0) { + break; + } + *o++ = neon64_base64_table_enc[*c >> 2]; + st.carry = (*c++ << 4) & 0x30; + st.bytes++; + outl += 1; + + case 1: if (srclen-- == 0) { + break; + } + *o++ = neon64_base64_table_enc[st.carry | (*c >> 4)]; + st.carry = (*c++ << 2) & 0x3C; + st.bytes++; + outl += 1; + + case 2: if (srclen-- == 0) { + break; + } + *o++ = neon64_base64_table_enc[st.carry | (*c >> 6)]; + *o++ = neon64_base64_table_enc[*c++ & 0x3F]; + st.bytes = 0; + outl += 2; + } +} +state->bytes = st.bytes; +state->carry = st.carry; +*outlen = outl; diff --git a/contrib/libs/base64/neon64/enc_uint64.c b/contrib/libs/base64/neon64/enc_uint64.c index 15e4436fe3..2db3c0be97 100644 --- a/contrib/libs/base64/neon64/enc_uint64.c +++ b/contrib/libs/base64/neon64/enc_uint64.c @@ -1,28 +1,28 @@ -// If we have 64-bit ints, pick off 6 bytes at a time for as long as we can, -// but ensure that there are at least 8 bytes available to avoid segfaulting: -while (srclen >= 8) -{ - // Load string: - uint64_t str = *(uint64_t *)c; - - // Reorder to 64-bit big-endian, if not already in that format. The - // workset must be in big-endian, otherwise the shifted bits do not - // carry over properly among adjacent bytes: - str = cpu_to_be64(str); - - // Shift input by 6 bytes each round and mask in only the lower 6 bits; - // look up the character in the Base64 encoding table and write it to - // the output location: - *o++ = neon64_base64_table_enc[(str >> 58) & 0x3F]; - *o++ = neon64_base64_table_enc[(str >> 52) & 0x3F]; - *o++ = neon64_base64_table_enc[(str >> 46) & 0x3F]; - *o++ = neon64_base64_table_enc[(str >> 40) & 0x3F]; - *o++ = neon64_base64_table_enc[(str >> 34) & 0x3F]; - *o++ = neon64_base64_table_enc[(str >> 28) & 0x3F]; - *o++ = neon64_base64_table_enc[(str >> 22) & 0x3F]; - *o++ = neon64_base64_table_enc[(str >> 16) & 0x3F]; - - c += 6; // 6 bytes of input - outl += 8; // 8 bytes of output - srclen -= 6; -} +// If we have 64-bit ints, pick off 6 bytes at a time for as long as we can, +// but ensure that there are at least 8 bytes available to avoid segfaulting: +while (srclen >= 8) +{ + // Load string: + uint64_t str = *(uint64_t *)c; + + // Reorder to 64-bit big-endian, if not already in that format. The + // workset must be in big-endian, otherwise the shifted bits do not + // carry over properly among adjacent bytes: + str = cpu_to_be64(str); + + // Shift input by 6 bytes each round and mask in only the lower 6 bits; + // look up the character in the Base64 encoding table and write it to + // the output location: + *o++ = neon64_base64_table_enc[(str >> 58) & 0x3F]; + *o++ = neon64_base64_table_enc[(str >> 52) & 0x3F]; + *o++ = neon64_base64_table_enc[(str >> 46) & 0x3F]; + *o++ = neon64_base64_table_enc[(str >> 40) & 0x3F]; + *o++ = neon64_base64_table_enc[(str >> 34) & 0x3F]; + *o++ = neon64_base64_table_enc[(str >> 28) & 0x3F]; + *o++ = neon64_base64_table_enc[(str >> 22) & 0x3F]; + *o++ = neon64_base64_table_enc[(str >> 16) & 0x3F]; + + c += 6; // 6 bytes of input + outl += 8; // 8 bytes of output + srclen -= 6; +} diff --git a/contrib/libs/base64/neon64/lib.c b/contrib/libs/base64/neon64/lib.c index 63d514476e..de5d49e711 100644 --- a/contrib/libs/base64/neon64/lib.c +++ b/contrib/libs/base64/neon64/lib.c @@ -1,122 +1,122 @@ -#include <stdint.h> -#include <stddef.h> -#include <stdlib.h> - -#include "libbase64.h" -#include "codecs.h" - -const uint8_t -neon64_base64_table_enc[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; - -// In the lookup table below, note that the value for '=' (character 61) is -// 254, not 255. This character is used for in-band signaling of the end of -// the datastream, and we will use that later. The characters A-Z, a-z, 0-9 -// and + / are mapped to their "decoded" values. The other bytes all map to -// the value 255, which flags them as "invalid input". - -const uint8_t -neon64_base64_table_dec[] = -{ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 254, 62, 255, 63, // 32..47 - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63 - 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79 - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63, // 80..95 - 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111 - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, -}; - -void -neon64_base64_stream_encode_init (struct neon64_base64_state *state) -{ - state->eof = 0; - state->bytes = 0; - state->carry = 0; -} - -void -neon64_base64_stream_encode_final - ( struct neon64_base64_state *state - , char *out - , size_t *outlen - ) -{ - uint8_t *o = (uint8_t *)out; - - if (state->bytes == 1) { - *o++ = neon64_base64_table_enc[state->carry]; - *o++ = '='; - *o++ = '='; - *outlen = 3; - return; - } - if (state->bytes == 2) { - *o++ = neon64_base64_table_enc[state->carry]; - *o++ = '='; - *outlen = 2; - return; - } - *outlen = 0; -} - -void -neon64_base64_stream_decode_init (struct neon64_base64_state *state) -{ - state->eof = 0; - state->bytes = 0; - state->carry = 0; -} - -void -neon64_base64_encode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ - size_t s; - size_t t; - struct neon64_base64_state state; - - // Init the stream reader: - neon64_base64_stream_encode_init(&state); - - // Feed the whole string to the stream reader: - neon64_base64_stream_encode(&state, src, srclen, out, &s); - - // Finalize the stream by writing trailer if any: - neon64_base64_stream_encode_final(&state, out + s, &t); - - // Final output length is stream length plus tail: - *outlen = s + t; -} - -int -neon64_base64_decode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ - struct neon64_base64_state state; - - // Init the stream reader: - neon64_base64_stream_decode_init(&state); - - // Feed the whole string to the stream reader: - return neon64_base64_stream_decode(&state, src, srclen, out, outlen); -} +#include <stdint.h> +#include <stddef.h> +#include <stdlib.h> + +#include "libbase64.h" +#include "codecs.h" + +const uint8_t +neon64_base64_table_enc[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +// In the lookup table below, note that the value for '=' (character 61) is +// 254, not 255. This character is used for in-band signaling of the end of +// the datastream, and we will use that later. The characters A-Z, a-z, 0-9 +// and + / are mapped to their "decoded" values. The other bytes all map to +// the value 255, which flags them as "invalid input". + +const uint8_t +neon64_base64_table_dec[] = +{ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 254, 62, 255, 63, // 32..47 + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63 + 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79 + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63, // 80..95 + 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111 + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +}; + +void +neon64_base64_stream_encode_init (struct neon64_base64_state *state) +{ + state->eof = 0; + state->bytes = 0; + state->carry = 0; +} + +void +neon64_base64_stream_encode_final + ( struct neon64_base64_state *state + , char *out + , size_t *outlen + ) +{ + uint8_t *o = (uint8_t *)out; + + if (state->bytes == 1) { + *o++ = neon64_base64_table_enc[state->carry]; + *o++ = '='; + *o++ = '='; + *outlen = 3; + return; + } + if (state->bytes == 2) { + *o++ = neon64_base64_table_enc[state->carry]; + *o++ = '='; + *outlen = 2; + return; + } + *outlen = 0; +} + +void +neon64_base64_stream_decode_init (struct neon64_base64_state *state) +{ + state->eof = 0; + state->bytes = 0; + state->carry = 0; +} + +void +neon64_base64_encode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + size_t s; + size_t t; + struct neon64_base64_state state; + + // Init the stream reader: + neon64_base64_stream_encode_init(&state); + + // Feed the whole string to the stream reader: + neon64_base64_stream_encode(&state, src, srclen, out, &s); + + // Finalize the stream by writing trailer if any: + neon64_base64_stream_encode_final(&state, out + s, &t); + + // Final output length is stream length plus tail: + *outlen = s + t; +} + +int +neon64_base64_decode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + struct neon64_base64_state state; + + // Init the stream reader: + neon64_base64_stream_decode_init(&state); + + // Feed the whole string to the stream reader: + return neon64_base64_stream_decode(&state, src, srclen, out, outlen); +} diff --git a/contrib/libs/base64/neon64/libbase64.h b/contrib/libs/base64/neon64/libbase64.h index 7566eb7545..a7224965d7 100644 --- a/contrib/libs/base64/neon64/libbase64.h +++ b/contrib/libs/base64/neon64/libbase64.h @@ -1,89 +1,89 @@ -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -struct neon64_base64_state { - int eof; - int bytes; - unsigned char carry; -}; - -/* Wrapper function to encode a plain string of given length. Output is written - * to *out without trailing zero. Output length in bytes is written to *outlen. - * The buffer in `out` has been allocated by the caller and is at least 4/3 the - * size of the input. See above for `flags`; set to 0 for default operation: */ -void neon64_base64_encode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Call this before calling base64_stream_encode() to init the state. See above - * for `flags`; set to 0 for default operation: */ -void neon64_base64_stream_encode_init - ( struct neon64_base64_state *state - ) ; - -/* Encodes the block of data of given length at `src`, into the buffer at - * `out`. Caller is responsible for allocating a large enough out-buffer; it - * must be at least 4/3 the size of the in-buffer, but take some margin. Places - * the number of new bytes written into `outlen` (which is set to zero when the - * function starts). Does not zero-terminate or finalize the output. */ -void neon64_base64_stream_encode - ( struct neon64_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Finalizes the output begun by previous calls to `base64_stream_encode()`. - * Adds the required end-of-stream markers if appropriate. `outlen` is modified - * and will contain the number of new bytes written at `out` (which will quite - * often be zero). */ -void neon64_base64_stream_encode_final - ( struct neon64_base64_state *state - , char *out - , size_t *outlen - ) ; - -/* Wrapper function to decode a plain string of given length. Output is written - * to *out without trailing zero. Output length in bytes is written to *outlen. - * The buffer in `out` has been allocated by the caller and is at least 3/4 the - * size of the input. See above for `flags`, set to 0 for default operation: */ -int neon64_base64_decode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Call this before calling base64_stream_decode() to init the state. See above - * for `flags`; set to 0 for default operation: */ -void neon64_base64_stream_decode_init - ( struct neon64_base64_state *state - ) ; - -/* Decodes the block of data of given length at `src`, into the buffer at - * `out`. Caller is responsible for allocating a large enough out-buffer; it - * must be at least 3/4 the size of the in-buffer, but take some margin. Places - * the number of new bytes written into `outlen` (which is set to zero when the - * function starts). Does not zero-terminate the output. Returns 1 if all is - * well, and 0 if a decoding error was found, such as an invalid character. - * Returns -1 if the chosen codec is not included in the current build. Used by - * the test harness to check whether a codec is available for testing. */ -int neon64_base64_stream_decode - ( struct neon64_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -#ifdef __cplusplus -} -#endif - +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +struct neon64_base64_state { + int eof; + int bytes; + unsigned char carry; +}; + +/* Wrapper function to encode a plain string of given length. Output is written + * to *out without trailing zero. Output length in bytes is written to *outlen. + * The buffer in `out` has been allocated by the caller and is at least 4/3 the + * size of the input. See above for `flags`; set to 0 for default operation: */ +void neon64_base64_encode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Call this before calling base64_stream_encode() to init the state. See above + * for `flags`; set to 0 for default operation: */ +void neon64_base64_stream_encode_init + ( struct neon64_base64_state *state + ) ; + +/* Encodes the block of data of given length at `src`, into the buffer at + * `out`. Caller is responsible for allocating a large enough out-buffer; it + * must be at least 4/3 the size of the in-buffer, but take some margin. Places + * the number of new bytes written into `outlen` (which is set to zero when the + * function starts). Does not zero-terminate or finalize the output. */ +void neon64_base64_stream_encode + ( struct neon64_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Finalizes the output begun by previous calls to `base64_stream_encode()`. + * Adds the required end-of-stream markers if appropriate. `outlen` is modified + * and will contain the number of new bytes written at `out` (which will quite + * often be zero). */ +void neon64_base64_stream_encode_final + ( struct neon64_base64_state *state + , char *out + , size_t *outlen + ) ; + +/* Wrapper function to decode a plain string of given length. Output is written + * to *out without trailing zero. Output length in bytes is written to *outlen. + * The buffer in `out` has been allocated by the caller and is at least 3/4 the + * size of the input. See above for `flags`, set to 0 for default operation: */ +int neon64_base64_decode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Call this before calling base64_stream_decode() to init the state. See above + * for `flags`; set to 0 for default operation: */ +void neon64_base64_stream_decode_init + ( struct neon64_base64_state *state + ) ; + +/* Decodes the block of data of given length at `src`, into the buffer at + * `out`. Caller is responsible for allocating a large enough out-buffer; it + * must be at least 3/4 the size of the in-buffer, but take some margin. Places + * the number of new bytes written into `outlen` (which is set to zero when the + * function starts). Does not zero-terminate the output. Returns 1 if all is + * well, and 0 if a decoding error was found, such as an invalid character. + * Returns -1 if the chosen codec is not included in the current build. Used by + * the test harness to check whether a codec is available for testing. */ +int neon64_base64_stream_decode + ( struct neon64_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +#ifdef __cplusplus +} +#endif + diff --git a/contrib/libs/base64/neon64/ya.make b/contrib/libs/base64/neon64/ya.make index 2b171abf95..82d821b4ee 100644 --- a/contrib/libs/base64/neon64/ya.make +++ b/contrib/libs/base64/neon64/ya.make @@ -1,11 +1,11 @@ -OWNER( - yazevnul +OWNER( + yazevnul g:contrib g:cpp-contrib -) - -LIBRARY() - +) + +LIBRARY() + LICENSE( BSD-2-Clause AND MIT @@ -13,20 +13,20 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -NO_UTIL() - -SRCS( - codec_neon64.c - lib.c -) - -IF (ARCH_AARCH64 OR ARCH_ARM64) +NO_UTIL() + +SRCS( + codec_neon64.c + lib.c +) + +IF (ARCH_AARCH64 OR ARCH_ARM64) IF (OS_LINUX OR OS_DARWIN OR OS_ANDROID) CONLYFLAGS( -march=armv8-a -std=c11 ) - ENDIF() -ENDIF() - -END() + ENDIF() +ENDIF() + +END() diff --git a/contrib/libs/base64/plain32/codec_plain.c b/contrib/libs/base64/plain32/codec_plain.c index 0960e8dfb9..740d343468 100644 --- a/contrib/libs/base64/plain32/codec_plain.c +++ b/contrib/libs/base64/plain32/codec_plain.c @@ -1,35 +1,35 @@ -#include <stdint.h> -#include <stddef.h> -#include <stdlib.h> +#include <stdint.h> +#include <stddef.h> +#include <stdlib.h> #include <string.h> - -#include "libbase64.h" -#include "codecs.h" - -void -plain32_base64_stream_encode - ( struct plain32_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ - #include "enc_head.c" - #include "enc_uint32.c" - #include "enc_tail.c" -} - -int -plain32_base64_stream_decode - ( struct plain32_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ - #include "dec_head.c" - #include "dec_uint32.c" - #include "dec_tail.c" -} + +#include "libbase64.h" +#include "codecs.h" + +void +plain32_base64_stream_encode + ( struct plain32_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + #include "enc_head.c" + #include "enc_uint32.c" + #include "enc_tail.c" +} + +int +plain32_base64_stream_decode + ( struct plain32_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + #include "dec_head.c" + #include "dec_uint32.c" + #include "dec_tail.c" +} diff --git a/contrib/libs/base64/plain32/codecs.h b/contrib/libs/base64/plain32/codecs.h index 233814b09f..0b31c97cd7 100644 --- a/contrib/libs/base64/plain32/codecs.h +++ b/contrib/libs/base64/plain32/codecs.h @@ -1,42 +1,42 @@ -#pragma once - -// Define machine endianness. This is for GCC: -#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) - #define PLAIN32_LITTLE_ENDIAN 1 -#else - #define PLAIN32_LITTLE_ENDIAN 0 -#endif - -// This is for Clang: -#ifdef __LITTLE_ENDIAN__ - #define PLAIN32_LITTLE_ENDIAN 1 -#endif - -#ifdef __BIG_ENDIAN__ - #define PLAIN32_LITTLE_ENDIAN 0 -#endif - -// Endian conversion functions -#if PLAIN32_LITTLE_ENDIAN -#if defined(_WIN64) || defined(__WIN32__) || defined(_WIN32) - #define cpu_to_be32(x) _byteswap_ulong(x) - #define cpu_to_be64(x) _byteswap_uint64(x) - #define be32_to_cpu(x) _byteswap_ulong(x) - #define be64_to_cpu(x) _byteswap_uint64(x) -#else - #define cpu_to_be32(x) __builtin_bswap32(x) - #define cpu_to_be64(x) __builtin_bswap64(x) - #define be32_to_cpu(x) __builtin_bswap32(x) - #define be64_to_cpu(x) __builtin_bswap64(x) -#endif -#else - #define cpu_to_be32(x) (x) - #define cpu_to_be64(x) (x) - #define be32_to_cpu(x) (x) - #define be64_to_cpu(x) (x) -#endif - -// These tables are used by all codecs -// for fallback plain encoding/decoding: -extern const uint8_t plain32_base64_table_enc[]; -extern const uint8_t plain32_base64_table_dec[]; +#pragma once + +// Define machine endianness. This is for GCC: +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + #define PLAIN32_LITTLE_ENDIAN 1 +#else + #define PLAIN32_LITTLE_ENDIAN 0 +#endif + +// This is for Clang: +#ifdef __LITTLE_ENDIAN__ + #define PLAIN32_LITTLE_ENDIAN 1 +#endif + +#ifdef __BIG_ENDIAN__ + #define PLAIN32_LITTLE_ENDIAN 0 +#endif + +// Endian conversion functions +#if PLAIN32_LITTLE_ENDIAN +#if defined(_WIN64) || defined(__WIN32__) || defined(_WIN32) + #define cpu_to_be32(x) _byteswap_ulong(x) + #define cpu_to_be64(x) _byteswap_uint64(x) + #define be32_to_cpu(x) _byteswap_ulong(x) + #define be64_to_cpu(x) _byteswap_uint64(x) +#else + #define cpu_to_be32(x) __builtin_bswap32(x) + #define cpu_to_be64(x) __builtin_bswap64(x) + #define be32_to_cpu(x) __builtin_bswap32(x) + #define be64_to_cpu(x) __builtin_bswap64(x) +#endif +#else + #define cpu_to_be32(x) (x) + #define cpu_to_be64(x) (x) + #define be32_to_cpu(x) (x) + #define be64_to_cpu(x) (x) +#endif + +// These tables are used by all codecs +// for fallback plain encoding/decoding: +extern const uint8_t plain32_base64_table_enc[]; +extern const uint8_t plain32_base64_table_dec[]; diff --git a/contrib/libs/base64/plain32/dec_head.c b/contrib/libs/base64/plain32/dec_head.c index d95b8b477b..472608fddf 100644 --- a/contrib/libs/base64/plain32/dec_head.c +++ b/contrib/libs/base64/plain32/dec_head.c @@ -1,29 +1,29 @@ -int ret = 0; -const uint8_t *c = (const uint8_t *)src; -uint8_t *o = (uint8_t *)out; -uint8_t q; - -// Use local temporaries to avoid cache thrashing: -size_t outl = 0; -struct plain32_base64_state st; -st.eof = state->eof; -st.bytes = state->bytes; -st.carry = state->carry; - -// If we previously saw an EOF or an invalid character, bail out: -if (st.eof) { - *outlen = 0; - return 0; -} - -// Turn four 6-bit numbers into three bytes: -// out[0] = 11111122 -// out[1] = 22223333 -// out[2] = 33444444 - -// Duff's device again: -switch (st.bytes) -{ - for (;;) - { - case 0: +int ret = 0; +const uint8_t *c = (const uint8_t *)src; +uint8_t *o = (uint8_t *)out; +uint8_t q; + +// Use local temporaries to avoid cache thrashing: +size_t outl = 0; +struct plain32_base64_state st; +st.eof = state->eof; +st.bytes = state->bytes; +st.carry = state->carry; + +// If we previously saw an EOF or an invalid character, bail out: +if (st.eof) { + *outlen = 0; + return 0; +} + +// Turn four 6-bit numbers into three bytes: +// out[0] = 11111122 +// out[1] = 22223333 +// out[2] = 33444444 + +// Duff's device again: +switch (st.bytes) +{ + for (;;) + { + case 0: diff --git a/contrib/libs/base64/plain32/dec_tail.c b/contrib/libs/base64/plain32/dec_tail.c index 27870a94f0..06ab784877 100644 --- a/contrib/libs/base64/plain32/dec_tail.c +++ b/contrib/libs/base64/plain32/dec_tail.c @@ -1,65 +1,65 @@ - if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = plain32_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // Treat character '=' as invalid for byte 0: - break; - } - st.carry = q << 2; - st.bytes++; - - case 1: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = plain32_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // Treat character '=' as invalid for byte 1: - break; - } - *o++ = st.carry | (q >> 4); - st.carry = q << 4; - st.bytes++; - outl++; - - case 2: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = plain32_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // When q == 254, the input char is '='. Return 1 and EOF. - // Technically, should check if next byte is also '=', but never mind. - // When q == 255, the input char is invalid. Return 0 and EOF. - ret = (q == 254) ? 1 : 0; - break; - } - *o++ = st.carry | (q >> 2); - st.carry = q << 6; - st.bytes++; - outl++; - - case 3: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = plain32_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // When q == 254, the input char is '='. Return 1 and EOF. - // When q == 255, the input char is invalid. Return 0 and EOF. - ret = (q == 254) ? 1 : 0; - break; - } - *o++ = st.carry | q; - st.carry = 0; - st.bytes = 0; - outl++; - } -} -state->eof = st.eof; -state->bytes = st.bytes; -state->carry = st.carry; -*outlen = outl; -return ret; + if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = plain32_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // Treat character '=' as invalid for byte 0: + break; + } + st.carry = q << 2; + st.bytes++; + + case 1: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = plain32_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // Treat character '=' as invalid for byte 1: + break; + } + *o++ = st.carry | (q >> 4); + st.carry = q << 4; + st.bytes++; + outl++; + + case 2: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = plain32_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // When q == 254, the input char is '='. Return 1 and EOF. + // Technically, should check if next byte is also '=', but never mind. + // When q == 255, the input char is invalid. Return 0 and EOF. + ret = (q == 254) ? 1 : 0; + break; + } + *o++ = st.carry | (q >> 2); + st.carry = q << 6; + st.bytes++; + outl++; + + case 3: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = plain32_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // When q == 254, the input char is '='. Return 1 and EOF. + // When q == 255, the input char is invalid. Return 0 and EOF. + ret = (q == 254) ? 1 : 0; + break; + } + *o++ = st.carry | q; + st.carry = 0; + st.bytes = 0; + outl++; + } +} +state->eof = st.eof; +state->bytes = st.bytes; +state->carry = st.carry; +*outlen = outl; +return ret; diff --git a/contrib/libs/base64/plain32/dec_uint32.c b/contrib/libs/base64/plain32/dec_uint32.c index 58fd78c742..db701d73d4 100644 --- a/contrib/libs/base64/plain32/dec_uint32.c +++ b/contrib/libs/base64/plain32/dec_uint32.c @@ -1,50 +1,50 @@ -// If we have native uint32's, pick off 4 bytes at a time for as long as we -// can, but make sure that we quit before seeing any == markers at the end of -// the string. Also, because we write a zero at the end of the output, ensure -// that there are at least 2 valid bytes of input data remaining to close the -// gap. 4 + 2 + 2 = 8 bytes: -while (srclen >= 8) -{ - uint32_t str, res, dec; - - // Load string: +// If we have native uint32's, pick off 4 bytes at a time for as long as we +// can, but make sure that we quit before seeing any == markers at the end of +// the string. Also, because we write a zero at the end of the output, ensure +// that there are at least 2 valid bytes of input data remaining to close the +// gap. 4 + 2 + 2 = 8 bytes: +while (srclen >= 8) +{ + uint32_t str, res, dec; + + // Load string: //str = *(uint32_t *)c; memcpy(&str, c, sizeof(str)); - - // Shuffle bytes to 32-bit bigendian: - str = cpu_to_be32(str); - - // Lookup each byte in the decoding table; if we encounter any - // "invalid" values, fall back on the bytewise code: - if ((dec = plain32_base64_table_dec[str >> 24]) > 63) { - break; - } - res = dec << 26; - - if ((dec = plain32_base64_table_dec[(str >> 16) & 0xFF]) > 63) { - break; - } - res |= dec << 20; - - if ((dec = plain32_base64_table_dec[(str >> 8) & 0xFF]) > 63) { - break; - } - res |= dec << 14; - - if ((dec = plain32_base64_table_dec[str & 0xFF]) > 63) { - break; - } - res |= dec << 8; - - // Reshuffle and repack into 3-byte output format: - res = be32_to_cpu(res); - - // Store back: + + // Shuffle bytes to 32-bit bigendian: + str = cpu_to_be32(str); + + // Lookup each byte in the decoding table; if we encounter any + // "invalid" values, fall back on the bytewise code: + if ((dec = plain32_base64_table_dec[str >> 24]) > 63) { + break; + } + res = dec << 26; + + if ((dec = plain32_base64_table_dec[(str >> 16) & 0xFF]) > 63) { + break; + } + res |= dec << 20; + + if ((dec = plain32_base64_table_dec[(str >> 8) & 0xFF]) > 63) { + break; + } + res |= dec << 14; + + if ((dec = plain32_base64_table_dec[str & 0xFF]) > 63) { + break; + } + res |= dec << 8; + + // Reshuffle and repack into 3-byte output format: + res = be32_to_cpu(res); + + // Store back: //*(uint32_t *)o = res; memcpy(o, &res, sizeof(res)); - - c += 4; - o += 3; - outl += 3; - srclen -= 4; -} + + c += 4; + o += 3; + outl += 3; + srclen -= 4; +} diff --git a/contrib/libs/base64/plain32/enc_head.c b/contrib/libs/base64/plain32/enc_head.c index e06f2b727a..451f1bdad0 100644 --- a/contrib/libs/base64/plain32/enc_head.c +++ b/contrib/libs/base64/plain32/enc_head.c @@ -1,23 +1,23 @@ -// Assume that *out is large enough to contain the output. -// Theoretically it should be 4/3 the length of src. -const uint8_t *c = (const uint8_t *)src; -uint8_t *o = (uint8_t *)out; - -// Use local temporaries to avoid cache thrashing: -size_t outl = 0; -struct plain32_base64_state st; -st.bytes = state->bytes; -st.carry = state->carry; - -// Turn three bytes into four 6-bit numbers: -// in[0] = 00111111 -// in[1] = 00112222 -// in[2] = 00222233 -// in[3] = 00333333 - -// Duff's device, a for() loop inside a switch() statement. Legal! -switch (st.bytes) -{ - for (;;) - { - case 0: +// Assume that *out is large enough to contain the output. +// Theoretically it should be 4/3 the length of src. +const uint8_t *c = (const uint8_t *)src; +uint8_t *o = (uint8_t *)out; + +// Use local temporaries to avoid cache thrashing: +size_t outl = 0; +struct plain32_base64_state st; +st.bytes = state->bytes; +st.carry = state->carry; + +// Turn three bytes into four 6-bit numbers: +// in[0] = 00111111 +// in[1] = 00112222 +// in[2] = 00222233 +// in[3] = 00333333 + +// Duff's device, a for() loop inside a switch() statement. Legal! +switch (st.bytes) +{ + for (;;) + { + case 0: diff --git a/contrib/libs/base64/plain32/enc_tail.c b/contrib/libs/base64/plain32/enc_tail.c index f5a8c2756a..5840068264 100644 --- a/contrib/libs/base64/plain32/enc_tail.c +++ b/contrib/libs/base64/plain32/enc_tail.c @@ -1,28 +1,28 @@ - if (srclen-- == 0) { - break; - } - *o++ = plain32_base64_table_enc[*c >> 2]; - st.carry = (*c++ << 4) & 0x30; - st.bytes++; - outl += 1; - - case 1: if (srclen-- == 0) { - break; - } - *o++ = plain32_base64_table_enc[st.carry | (*c >> 4)]; - st.carry = (*c++ << 2) & 0x3C; - st.bytes++; - outl += 1; - - case 2: if (srclen-- == 0) { - break; - } - *o++ = plain32_base64_table_enc[st.carry | (*c >> 6)]; - *o++ = plain32_base64_table_enc[*c++ & 0x3F]; - st.bytes = 0; - outl += 2; - } -} -state->bytes = st.bytes; -state->carry = st.carry; -*outlen = outl; + if (srclen-- == 0) { + break; + } + *o++ = plain32_base64_table_enc[*c >> 2]; + st.carry = (*c++ << 4) & 0x30; + st.bytes++; + outl += 1; + + case 1: if (srclen-- == 0) { + break; + } + *o++ = plain32_base64_table_enc[st.carry | (*c >> 4)]; + st.carry = (*c++ << 2) & 0x3C; + st.bytes++; + outl += 1; + + case 2: if (srclen-- == 0) { + break; + } + *o++ = plain32_base64_table_enc[st.carry | (*c >> 6)]; + *o++ = plain32_base64_table_enc[*c++ & 0x3F]; + st.bytes = 0; + outl += 2; + } +} +state->bytes = st.bytes; +state->carry = st.carry; +*outlen = outl; diff --git a/contrib/libs/base64/plain32/enc_uint32.c b/contrib/libs/base64/plain32/enc_uint32.c index a22f599ce4..1dbe5fbe53 100644 --- a/contrib/libs/base64/plain32/enc_uint32.c +++ b/contrib/libs/base64/plain32/enc_uint32.c @@ -1,27 +1,27 @@ -// If we have 32-bit ints, pick off 3 bytes at a time for as long as we can, -// but ensure that there are at least 4 bytes available to avoid segfaulting: -while (srclen >= 4) -{ - // Load string: +// If we have 32-bit ints, pick off 3 bytes at a time for as long as we can, +// but ensure that there are at least 4 bytes available to avoid segfaulting: +while (srclen >= 4) +{ + // Load string: //uint32_t str = *(uint32_t *)c; uint32_t str; - + memcpy(&str, c, sizeof(str)); - // Reorder to 32-bit big-endian, if not already in that format. The - // workset must be in big-endian, otherwise the shifted bits do not - // carry over properly among adjacent bytes: - str = cpu_to_be32(str); - - // Shift input by 6 bytes each round and mask in only the lower 6 bits; - // look up the character in the Base64 encoding table and write it to - // the output location: - *o++ = plain32_base64_table_enc[(str >> 26) & 0x3F]; - *o++ = plain32_base64_table_enc[(str >> 20) & 0x3F]; - *o++ = plain32_base64_table_enc[(str >> 14) & 0x3F]; - *o++ = plain32_base64_table_enc[(str >> 8) & 0x3F]; - - c += 3; // 3 bytes of input - outl += 4; // 4 bytes of output - srclen -= 3; -} + // Reorder to 32-bit big-endian, if not already in that format. The + // workset must be in big-endian, otherwise the shifted bits do not + // carry over properly among adjacent bytes: + str = cpu_to_be32(str); + + // Shift input by 6 bytes each round and mask in only the lower 6 bits; + // look up the character in the Base64 encoding table and write it to + // the output location: + *o++ = plain32_base64_table_enc[(str >> 26) & 0x3F]; + *o++ = plain32_base64_table_enc[(str >> 20) & 0x3F]; + *o++ = plain32_base64_table_enc[(str >> 14) & 0x3F]; + *o++ = plain32_base64_table_enc[(str >> 8) & 0x3F]; + + c += 3; // 3 bytes of input + outl += 4; // 4 bytes of output + srclen -= 3; +} diff --git a/contrib/libs/base64/plain32/lib.c b/contrib/libs/base64/plain32/lib.c index d42ef50c36..9eb2e705b4 100644 --- a/contrib/libs/base64/plain32/lib.c +++ b/contrib/libs/base64/plain32/lib.c @@ -1,121 +1,121 @@ -#include <stdint.h> -#include <stddef.h> - -#include "libbase64.h" -#include "codecs.h" - -const uint8_t -plain32_base64_table_enc[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; - -// In the lookup table below, note that the value for '=' (character 61) is -// 254, not 255. This character is used for in-band signaling of the end of -// the datastream, and we will use that later. The characters A-Z, a-z, 0-9 -// and + / are mapped to their "decoded" values. The other bytes all map to -// the value 255, which flags them as "invalid input". - -const uint8_t -plain32_base64_table_dec[] = -{ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 254, 62, 255, 63, // 32..47 - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63 - 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79 - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63, // 80..95 - 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111 - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, -}; - -void -plain32_base64_stream_encode_init (struct plain32_base64_state *state) -{ - state->eof = 0; - state->bytes = 0; - state->carry = 0; -} - -void -plain32_base64_stream_encode_final - ( struct plain32_base64_state *state - , char *out - , size_t *outlen - ) -{ - uint8_t *o = (uint8_t *)out; - - if (state->bytes == 1) { - *o++ = plain32_base64_table_enc[state->carry]; - *o++ = '='; - *o++ = '='; - *outlen = 3; - return; - } - if (state->bytes == 2) { - *o++ = plain32_base64_table_enc[state->carry]; - *o++ = '='; - *outlen = 2; - return; - } - *outlen = 0; -} - -void -plain32_base64_stream_decode_init (struct plain32_base64_state *state) -{ - state->eof = 0; - state->bytes = 0; - state->carry = 0; -} - -void -plain32_base64_encode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ - size_t s; - size_t t; - struct plain32_base64_state state; - - // Init the stream reader: - plain32_base64_stream_encode_init(&state); - - // Feed the whole string to the stream reader: - plain32_base64_stream_encode(&state, src, srclen, out, &s); - - // Finalize the stream by writing trailer if any: - plain32_base64_stream_encode_final(&state, out + s, &t); - - // Final output length is stream length plus tail: - *outlen = s + t; -} - -int -plain32_base64_decode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ - struct plain32_base64_state state; - - // Init the stream reader: - plain32_base64_stream_decode_init(&state); - - // Feed the whole string to the stream reader: - return plain32_base64_stream_decode(&state, src, srclen, out, outlen); -} +#include <stdint.h> +#include <stddef.h> + +#include "libbase64.h" +#include "codecs.h" + +const uint8_t +plain32_base64_table_enc[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +// In the lookup table below, note that the value for '=' (character 61) is +// 254, not 255. This character is used for in-band signaling of the end of +// the datastream, and we will use that later. The characters A-Z, a-z, 0-9 +// and + / are mapped to their "decoded" values. The other bytes all map to +// the value 255, which flags them as "invalid input". + +const uint8_t +plain32_base64_table_dec[] = +{ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 254, 62, 255, 63, // 32..47 + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63 + 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79 + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63, // 80..95 + 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111 + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +}; + +void +plain32_base64_stream_encode_init (struct plain32_base64_state *state) +{ + state->eof = 0; + state->bytes = 0; + state->carry = 0; +} + +void +plain32_base64_stream_encode_final + ( struct plain32_base64_state *state + , char *out + , size_t *outlen + ) +{ + uint8_t *o = (uint8_t *)out; + + if (state->bytes == 1) { + *o++ = plain32_base64_table_enc[state->carry]; + *o++ = '='; + *o++ = '='; + *outlen = 3; + return; + } + if (state->bytes == 2) { + *o++ = plain32_base64_table_enc[state->carry]; + *o++ = '='; + *outlen = 2; + return; + } + *outlen = 0; +} + +void +plain32_base64_stream_decode_init (struct plain32_base64_state *state) +{ + state->eof = 0; + state->bytes = 0; + state->carry = 0; +} + +void +plain32_base64_encode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + size_t s; + size_t t; + struct plain32_base64_state state; + + // Init the stream reader: + plain32_base64_stream_encode_init(&state); + + // Feed the whole string to the stream reader: + plain32_base64_stream_encode(&state, src, srclen, out, &s); + + // Finalize the stream by writing trailer if any: + plain32_base64_stream_encode_final(&state, out + s, &t); + + // Final output length is stream length plus tail: + *outlen = s + t; +} + +int +plain32_base64_decode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + struct plain32_base64_state state; + + // Init the stream reader: + plain32_base64_stream_decode_init(&state); + + // Feed the whole string to the stream reader: + return plain32_base64_stream_decode(&state, src, srclen, out, outlen); +} diff --git a/contrib/libs/base64/plain32/libbase64.h b/contrib/libs/base64/plain32/libbase64.h index 2c2ec175e0..db54ea4fc2 100644 --- a/contrib/libs/base64/plain32/libbase64.h +++ b/contrib/libs/base64/plain32/libbase64.h @@ -1,89 +1,89 @@ -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -struct plain32_base64_state { - int eof; - int bytes; - unsigned char carry; -}; - -/* Wrapper function to encode a plain string of given length. Output is written - * to *out without trailing zero. Output length in bytes is written to *outlen. - * The buffer in `out` has been allocated by the caller and is at least 4/3 the - * size of the input. See above for `flags`; set to 0 for default operation: */ -void plain32_base64_encode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Call this before calling base64_stream_encode() to init the state. See above - * for `flags`; set to 0 for default operation: */ -void plain32_base64_stream_encode_init - ( struct plain32_base64_state *state - ) ; - -/* Encodes the block of data of given length at `src`, into the buffer at - * `out`. Caller is responsible for allocating a large enough out-buffer; it - * must be at least 4/3 the size of the in-buffer, but take some margin. Places - * the number of new bytes written into `outlen` (which is set to zero when the - * function starts). Does not zero-terminate or finalize the output. */ -void plain32_base64_stream_encode - ( struct plain32_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Finalizes the output begun by previous calls to `base64_stream_encode()`. - * Adds the required end-of-stream markers if appropriate. `outlen` is modified - * and will contain the number of new bytes written at `out` (which will quite - * often be zero). */ -void plain32_base64_stream_encode_final - ( struct plain32_base64_state *state - , char *out - , size_t *outlen - ) ; - -/* Wrapper function to decode a plain string of given length. Output is written - * to *out without trailing zero. Output length in bytes is written to *outlen. - * The buffer in `out` has been allocated by the caller and is at least 3/4 the - * size of the input. See above for `flags`, set to 0 for default operation: */ -int plain32_base64_decode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Call this before calling base64_stream_decode() to init the state. See above - * for `flags`; set to 0 for default operation: */ -void plain32_base64_stream_decode_init - ( struct plain32_base64_state *state - ) ; - -/* Decodes the block of data of given length at `src`, into the buffer at - * `out`. Caller is responsible for allocating a large enough out-buffer; it - * must be at least 3/4 the size of the in-buffer, but take some margin. Places - * the number of new bytes written into `outlen` (which is set to zero when the - * function starts). Does not zero-terminate the output. Returns 1 if all is - * well, and 0 if a decoding error was found, such as an invalid character. - * Returns -1 if the chosen codec is not included in the current build. Used by - * the test harness to check whether a codec is available for testing. */ -int plain32_base64_stream_decode - ( struct plain32_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -#ifdef __cplusplus -} -#endif - +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +struct plain32_base64_state { + int eof; + int bytes; + unsigned char carry; +}; + +/* Wrapper function to encode a plain string of given length. Output is written + * to *out without trailing zero. Output length in bytes is written to *outlen. + * The buffer in `out` has been allocated by the caller and is at least 4/3 the + * size of the input. See above for `flags`; set to 0 for default operation: */ +void plain32_base64_encode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Call this before calling base64_stream_encode() to init the state. See above + * for `flags`; set to 0 for default operation: */ +void plain32_base64_stream_encode_init + ( struct plain32_base64_state *state + ) ; + +/* Encodes the block of data of given length at `src`, into the buffer at + * `out`. Caller is responsible for allocating a large enough out-buffer; it + * must be at least 4/3 the size of the in-buffer, but take some margin. Places + * the number of new bytes written into `outlen` (which is set to zero when the + * function starts). Does not zero-terminate or finalize the output. */ +void plain32_base64_stream_encode + ( struct plain32_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Finalizes the output begun by previous calls to `base64_stream_encode()`. + * Adds the required end-of-stream markers if appropriate. `outlen` is modified + * and will contain the number of new bytes written at `out` (which will quite + * often be zero). */ +void plain32_base64_stream_encode_final + ( struct plain32_base64_state *state + , char *out + , size_t *outlen + ) ; + +/* Wrapper function to decode a plain string of given length. Output is written + * to *out without trailing zero. Output length in bytes is written to *outlen. + * The buffer in `out` has been allocated by the caller and is at least 3/4 the + * size of the input. See above for `flags`, set to 0 for default operation: */ +int plain32_base64_decode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Call this before calling base64_stream_decode() to init the state. See above + * for `flags`; set to 0 for default operation: */ +void plain32_base64_stream_decode_init + ( struct plain32_base64_state *state + ) ; + +/* Decodes the block of data of given length at `src`, into the buffer at + * `out`. Caller is responsible for allocating a large enough out-buffer; it + * must be at least 3/4 the size of the in-buffer, but take some margin. Places + * the number of new bytes written into `outlen` (which is set to zero when the + * function starts). Does not zero-terminate the output. Returns 1 if all is + * well, and 0 if a decoding error was found, such as an invalid character. + * Returns -1 if the chosen codec is not included in the current build. Used by + * the test harness to check whether a codec is available for testing. */ +int plain32_base64_stream_decode + ( struct plain32_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +#ifdef __cplusplus +} +#endif + diff --git a/contrib/libs/base64/plain32/ya.make b/contrib/libs/base64/plain32/ya.make index 2055f8f513..1f5a9ad204 100644 --- a/contrib/libs/base64/plain32/ya.make +++ b/contrib/libs/base64/plain32/ya.make @@ -1,11 +1,11 @@ -OWNER( - yazevnul +OWNER( + yazevnul g:contrib g:cpp-contrib -) - -LIBRARY() - +) + +LIBRARY() + LICENSE( BSD-2-Clause AND MIT @@ -13,15 +13,15 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -NO_UTIL() - -SRCS( - codec_plain.c - lib.c -) - +NO_UTIL() + +SRCS( + codec_plain.c + lib.c +) + IF (OS_LINUX OR OS_DARWIN) CONLYFLAGS(-std=c11) -ENDIF() - -END() +ENDIF() + +END() diff --git a/contrib/libs/base64/plain64/codec_plain.c b/contrib/libs/base64/plain64/codec_plain.c index 841bd96bfd..26a5af9097 100644 --- a/contrib/libs/base64/plain64/codec_plain.c +++ b/contrib/libs/base64/plain64/codec_plain.c @@ -1,35 +1,35 @@ -#include <stdint.h> -#include <stddef.h> -#include <stdlib.h> +#include <stdint.h> +#include <stddef.h> +#include <stdlib.h> #include <string.h> - -#include "libbase64.h" -#include "codecs.h" - -void -plain64_base64_stream_encode - ( struct plain64_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ - #include "enc_head.c" - #include "enc_uint64.c" - #include "enc_tail.c" -} - -int -plain64_base64_stream_decode - ( struct plain64_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ - #include "dec_head.c" - #include "dec_uint64.c" - #include "dec_tail.c" -} + +#include "libbase64.h" +#include "codecs.h" + +void +plain64_base64_stream_encode + ( struct plain64_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + #include "enc_head.c" + #include "enc_uint64.c" + #include "enc_tail.c" +} + +int +plain64_base64_stream_decode + ( struct plain64_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + #include "dec_head.c" + #include "dec_uint64.c" + #include "dec_tail.c" +} diff --git a/contrib/libs/base64/plain64/codecs.h b/contrib/libs/base64/plain64/codecs.h index 24cfbbc7d2..25430f04c0 100644 --- a/contrib/libs/base64/plain64/codecs.h +++ b/contrib/libs/base64/plain64/codecs.h @@ -1,42 +1,42 @@ -#pragma once - -// Define machine endianness. This is for GCC: -#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) - #define PLAIN64_LITTLE_ENDIAN 1 -#else - #define PLAIN64_LITTLE_ENDIAN 0 -#endif - -// This is for Clang: -#ifdef __LITTLE_ENDIAN__ - #define PLAIN64_LITTLE_ENDIAN 1 -#endif - -#ifdef __BIG_ENDIAN__ - #define PLAIN64_LITTLE_ENDIAN 0 -#endif - -// Endian conversion functions -#if PLAIN64_LITTLE_ENDIAN -#if defined(_WIN64) || defined(__WIN32__) || defined(_WIN32) - #define cpu_to_be32(x) _byteswap_ulong(x) - #define cpu_to_be64(x) _byteswap_uint64(x) - #define be32_to_cpu(x) _byteswap_ulong(x) - #define be64_to_cpu(x) _byteswap_uint64(x) -#else - #define cpu_to_be32(x) __builtin_bswap32(x) - #define cpu_to_be64(x) __builtin_bswap64(x) - #define be32_to_cpu(x) __builtin_bswap32(x) - #define be64_to_cpu(x) __builtin_bswap64(x) -#endif -#else - #define cpu_to_be32(x) (x) - #define cpu_to_be64(x) (x) - #define be32_to_cpu(x) (x) - #define be64_to_cpu(x) (x) -#endif - -// These tables are used by all codecs -// for fallback plain encoding/decoding: -extern const uint8_t plain64_base64_table_enc[]; -extern const uint8_t plain64_base64_table_dec[]; +#pragma once + +// Define machine endianness. This is for GCC: +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + #define PLAIN64_LITTLE_ENDIAN 1 +#else + #define PLAIN64_LITTLE_ENDIAN 0 +#endif + +// This is for Clang: +#ifdef __LITTLE_ENDIAN__ + #define PLAIN64_LITTLE_ENDIAN 1 +#endif + +#ifdef __BIG_ENDIAN__ + #define PLAIN64_LITTLE_ENDIAN 0 +#endif + +// Endian conversion functions +#if PLAIN64_LITTLE_ENDIAN +#if defined(_WIN64) || defined(__WIN32__) || defined(_WIN32) + #define cpu_to_be32(x) _byteswap_ulong(x) + #define cpu_to_be64(x) _byteswap_uint64(x) + #define be32_to_cpu(x) _byteswap_ulong(x) + #define be64_to_cpu(x) _byteswap_uint64(x) +#else + #define cpu_to_be32(x) __builtin_bswap32(x) + #define cpu_to_be64(x) __builtin_bswap64(x) + #define be32_to_cpu(x) __builtin_bswap32(x) + #define be64_to_cpu(x) __builtin_bswap64(x) +#endif +#else + #define cpu_to_be32(x) (x) + #define cpu_to_be64(x) (x) + #define be32_to_cpu(x) (x) + #define be64_to_cpu(x) (x) +#endif + +// These tables are used by all codecs +// for fallback plain encoding/decoding: +extern const uint8_t plain64_base64_table_enc[]; +extern const uint8_t plain64_base64_table_dec[]; diff --git a/contrib/libs/base64/plain64/dec_head.c b/contrib/libs/base64/plain64/dec_head.c index f343f82eba..b505833d52 100644 --- a/contrib/libs/base64/plain64/dec_head.c +++ b/contrib/libs/base64/plain64/dec_head.c @@ -1,29 +1,29 @@ -int ret = 0; -const uint8_t *c = (const uint8_t *)src; -uint8_t *o = (uint8_t *)out; -uint8_t q; - -// Use local temporaries to avoid cache thrashing: -size_t outl = 0; -struct plain64_base64_state st; -st.eof = state->eof; -st.bytes = state->bytes; -st.carry = state->carry; - -// If we previously saw an EOF or an invalid character, bail out: -if (st.eof) { - *outlen = 0; - return 0; -} - -// Turn four 6-bit numbers into three bytes: -// out[0] = 11111122 -// out[1] = 22223333 -// out[2] = 33444444 - -// Duff's device again: -switch (st.bytes) -{ - for (;;) - { - case 0: +int ret = 0; +const uint8_t *c = (const uint8_t *)src; +uint8_t *o = (uint8_t *)out; +uint8_t q; + +// Use local temporaries to avoid cache thrashing: +size_t outl = 0; +struct plain64_base64_state st; +st.eof = state->eof; +st.bytes = state->bytes; +st.carry = state->carry; + +// If we previously saw an EOF or an invalid character, bail out: +if (st.eof) { + *outlen = 0; + return 0; +} + +// Turn four 6-bit numbers into three bytes: +// out[0] = 11111122 +// out[1] = 22223333 +// out[2] = 33444444 + +// Duff's device again: +switch (st.bytes) +{ + for (;;) + { + case 0: diff --git a/contrib/libs/base64/plain64/dec_tail.c b/contrib/libs/base64/plain64/dec_tail.c index 66b279e741..7db2408190 100644 --- a/contrib/libs/base64/plain64/dec_tail.c +++ b/contrib/libs/base64/plain64/dec_tail.c @@ -1,65 +1,65 @@ - if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = plain64_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // Treat character '=' as invalid for byte 0: - break; - } - st.carry = q << 2; - st.bytes++; - - case 1: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = plain64_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // Treat character '=' as invalid for byte 1: - break; - } - *o++ = st.carry | (q >> 4); - st.carry = q << 4; - st.bytes++; - outl++; - - case 2: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = plain64_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // When q == 254, the input char is '='. Return 1 and EOF. - // Technically, should check if next byte is also '=', but never mind. - // When q == 255, the input char is invalid. Return 0 and EOF. - ret = (q == 254) ? 1 : 0; - break; - } - *o++ = st.carry | (q >> 2); - st.carry = q << 6; - st.bytes++; - outl++; - - case 3: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = plain64_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // When q == 254, the input char is '='. Return 1 and EOF. - // When q == 255, the input char is invalid. Return 0 and EOF. - ret = (q == 254) ? 1 : 0; - break; - } - *o++ = st.carry | q; - st.carry = 0; - st.bytes = 0; - outl++; - } -} -state->eof = st.eof; -state->bytes = st.bytes; -state->carry = st.carry; -*outlen = outl; -return ret; + if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = plain64_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // Treat character '=' as invalid for byte 0: + break; + } + st.carry = q << 2; + st.bytes++; + + case 1: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = plain64_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // Treat character '=' as invalid for byte 1: + break; + } + *o++ = st.carry | (q >> 4); + st.carry = q << 4; + st.bytes++; + outl++; + + case 2: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = plain64_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // When q == 254, the input char is '='. Return 1 and EOF. + // Technically, should check if next byte is also '=', but never mind. + // When q == 255, the input char is invalid. Return 0 and EOF. + ret = (q == 254) ? 1 : 0; + break; + } + *o++ = st.carry | (q >> 2); + st.carry = q << 6; + st.bytes++; + outl++; + + case 3: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = plain64_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // When q == 254, the input char is '='. Return 1 and EOF. + // When q == 255, the input char is invalid. Return 0 and EOF. + ret = (q == 254) ? 1 : 0; + break; + } + *o++ = st.carry | q; + st.carry = 0; + st.bytes = 0; + outl++; + } +} +state->eof = st.eof; +state->bytes = st.bytes; +state->carry = st.carry; +*outlen = outl; +return ret; diff --git a/contrib/libs/base64/plain64/dec_uint64.c b/contrib/libs/base64/plain64/dec_uint64.c index a09fca5438..fe26e9881b 100644 --- a/contrib/libs/base64/plain64/dec_uint64.c +++ b/contrib/libs/base64/plain64/dec_uint64.c @@ -1,70 +1,70 @@ -// If we have native uint64's, pick off 8 bytes at a time for as long as we -// can, but make sure that we quit before seeing any == markers at the end of -// the string. Also, because we write two zeroes at the end of the output, -// ensure that there are at least 3 valid bytes of input data remaining to -// close the gap. 8 + 2 + 3 = 13 bytes: -while (srclen >= 13) -{ - uint64_t str, res, dec; - - // Load string: +// If we have native uint64's, pick off 8 bytes at a time for as long as we +// can, but make sure that we quit before seeing any == markers at the end of +// the string. Also, because we write two zeroes at the end of the output, +// ensure that there are at least 3 valid bytes of input data remaining to +// close the gap. 8 + 2 + 3 = 13 bytes: +while (srclen >= 13) +{ + uint64_t str, res, dec; + + // Load string: //str = *(uint64_t *)c; memcpy(&str, c, sizeof(str)); - - // Shuffle bytes to 64-bit bigendian: - str = cpu_to_be64(str); - - // Lookup each byte in the decoding table; if we encounter any - // "invalid" values, fall back on the bytewise code: - if ((dec = plain64_base64_table_dec[str >> 56]) > 63) { - break; - } - res = dec << 58; - - if ((dec = plain64_base64_table_dec[(str >> 48) & 0xFF]) > 63) { - break; - } - res |= dec << 52; - - if ((dec = plain64_base64_table_dec[(str >> 40) & 0xFF]) > 63) { - break; - } - res |= dec << 46; - - if ((dec = plain64_base64_table_dec[(str >> 32) & 0xFF]) > 63) { - break; - } - res |= dec << 40; - - if ((dec = plain64_base64_table_dec[(str >> 24) & 0xFF]) > 63) { - break; - } - res |= dec << 34; - - if ((dec = plain64_base64_table_dec[(str >> 16) & 0xFF]) > 63) { - break; - } - res |= dec << 28; - - if ((dec = plain64_base64_table_dec[(str >> 8) & 0xFF]) > 63) { - break; - } - res |= dec << 22; - - if ((dec = plain64_base64_table_dec[str & 0xFF]) > 63) { - break; - } - res |= dec << 16; - - // Reshuffle and repack into 6-byte output format: - res = be64_to_cpu(res); - - // Store back: + + // Shuffle bytes to 64-bit bigendian: + str = cpu_to_be64(str); + + // Lookup each byte in the decoding table; if we encounter any + // "invalid" values, fall back on the bytewise code: + if ((dec = plain64_base64_table_dec[str >> 56]) > 63) { + break; + } + res = dec << 58; + + if ((dec = plain64_base64_table_dec[(str >> 48) & 0xFF]) > 63) { + break; + } + res |= dec << 52; + + if ((dec = plain64_base64_table_dec[(str >> 40) & 0xFF]) > 63) { + break; + } + res |= dec << 46; + + if ((dec = plain64_base64_table_dec[(str >> 32) & 0xFF]) > 63) { + break; + } + res |= dec << 40; + + if ((dec = plain64_base64_table_dec[(str >> 24) & 0xFF]) > 63) { + break; + } + res |= dec << 34; + + if ((dec = plain64_base64_table_dec[(str >> 16) & 0xFF]) > 63) { + break; + } + res |= dec << 28; + + if ((dec = plain64_base64_table_dec[(str >> 8) & 0xFF]) > 63) { + break; + } + res |= dec << 22; + + if ((dec = plain64_base64_table_dec[str & 0xFF]) > 63) { + break; + } + res |= dec << 16; + + // Reshuffle and repack into 6-byte output format: + res = be64_to_cpu(res); + + // Store back: //*(uint64_t *)o = res; memcpy(o, &res, sizeof(res)); - - c += 8; - o += 6; - outl += 6; - srclen -= 8; -} + + c += 8; + o += 6; + outl += 6; + srclen -= 8; +} diff --git a/contrib/libs/base64/plain64/enc_head.c b/contrib/libs/base64/plain64/enc_head.c index f898eb080a..037a1fff99 100644 --- a/contrib/libs/base64/plain64/enc_head.c +++ b/contrib/libs/base64/plain64/enc_head.c @@ -1,23 +1,23 @@ -// Assume that *out is large enough to contain the output. -// Theoretically it should be 4/3 the length of src. -const uint8_t *c = (const uint8_t *)src; -uint8_t *o = (uint8_t *)out; - -// Use local temporaries to avoid cache thrashing: -size_t outl = 0; -struct plain64_base64_state st; -st.bytes = state->bytes; -st.carry = state->carry; - -// Turn three bytes into four 6-bit numbers: -// in[0] = 00111111 -// in[1] = 00112222 -// in[2] = 00222233 -// in[3] = 00333333 - -// Duff's device, a for() loop inside a switch() statement. Legal! -switch (st.bytes) -{ - for (;;) - { - case 0: +// Assume that *out is large enough to contain the output. +// Theoretically it should be 4/3 the length of src. +const uint8_t *c = (const uint8_t *)src; +uint8_t *o = (uint8_t *)out; + +// Use local temporaries to avoid cache thrashing: +size_t outl = 0; +struct plain64_base64_state st; +st.bytes = state->bytes; +st.carry = state->carry; + +// Turn three bytes into four 6-bit numbers: +// in[0] = 00111111 +// in[1] = 00112222 +// in[2] = 00222233 +// in[3] = 00333333 + +// Duff's device, a for() loop inside a switch() statement. Legal! +switch (st.bytes) +{ + for (;;) + { + case 0: diff --git a/contrib/libs/base64/plain64/enc_tail.c b/contrib/libs/base64/plain64/enc_tail.c index af3628192e..f98414b777 100644 --- a/contrib/libs/base64/plain64/enc_tail.c +++ b/contrib/libs/base64/plain64/enc_tail.c @@ -1,28 +1,28 @@ - if (srclen-- == 0) { - break; - } - *o++ = plain64_base64_table_enc[*c >> 2]; - st.carry = (*c++ << 4) & 0x30; - st.bytes++; - outl += 1; - - case 1: if (srclen-- == 0) { - break; - } - *o++ = plain64_base64_table_enc[st.carry | (*c >> 4)]; - st.carry = (*c++ << 2) & 0x3C; - st.bytes++; - outl += 1; - - case 2: if (srclen-- == 0) { - break; - } - *o++ = plain64_base64_table_enc[st.carry | (*c >> 6)]; - *o++ = plain64_base64_table_enc[*c++ & 0x3F]; - st.bytes = 0; - outl += 2; - } -} -state->bytes = st.bytes; -state->carry = st.carry; -*outlen = outl; + if (srclen-- == 0) { + break; + } + *o++ = plain64_base64_table_enc[*c >> 2]; + st.carry = (*c++ << 4) & 0x30; + st.bytes++; + outl += 1; + + case 1: if (srclen-- == 0) { + break; + } + *o++ = plain64_base64_table_enc[st.carry | (*c >> 4)]; + st.carry = (*c++ << 2) & 0x3C; + st.bytes++; + outl += 1; + + case 2: if (srclen-- == 0) { + break; + } + *o++ = plain64_base64_table_enc[st.carry | (*c >> 6)]; + *o++ = plain64_base64_table_enc[*c++ & 0x3F]; + st.bytes = 0; + outl += 2; + } +} +state->bytes = st.bytes; +state->carry = st.carry; +*outlen = outl; diff --git a/contrib/libs/base64/plain64/enc_uint64.c b/contrib/libs/base64/plain64/enc_uint64.c index 8fd60e3b79..3d5955af24 100644 --- a/contrib/libs/base64/plain64/enc_uint64.c +++ b/contrib/libs/base64/plain64/enc_uint64.c @@ -1,31 +1,31 @@ -// If we have 64-bit ints, pick off 6 bytes at a time for as long as we can, -// but ensure that there are at least 8 bytes available to avoid segfaulting: -while (srclen >= 8) -{ - // Load string: +// If we have 64-bit ints, pick off 6 bytes at a time for as long as we can, +// but ensure that there are at least 8 bytes available to avoid segfaulting: +while (srclen >= 8) +{ + // Load string: //uint64_t str = *(uint64_t *)c; uint64_t str; - + memcpy(&str, c, sizeof(str)); - // Reorder to 64-bit big-endian, if not already in that format. The - // workset must be in big-endian, otherwise the shifted bits do not - // carry over properly among adjacent bytes: - str = cpu_to_be64(str); - - // Shift input by 6 bytes each round and mask in only the lower 6 bits; - // look up the character in the Base64 encoding table and write it to - // the output location: - *o++ = plain64_base64_table_enc[(str >> 58) & 0x3F]; - *o++ = plain64_base64_table_enc[(str >> 52) & 0x3F]; - *o++ = plain64_base64_table_enc[(str >> 46) & 0x3F]; - *o++ = plain64_base64_table_enc[(str >> 40) & 0x3F]; - *o++ = plain64_base64_table_enc[(str >> 34) & 0x3F]; - *o++ = plain64_base64_table_enc[(str >> 28) & 0x3F]; - *o++ = plain64_base64_table_enc[(str >> 22) & 0x3F]; - *o++ = plain64_base64_table_enc[(str >> 16) & 0x3F]; - - c += 6; // 6 bytes of input - outl += 8; // 8 bytes of output - srclen -= 6; -} + // Reorder to 64-bit big-endian, if not already in that format. The + // workset must be in big-endian, otherwise the shifted bits do not + // carry over properly among adjacent bytes: + str = cpu_to_be64(str); + + // Shift input by 6 bytes each round and mask in only the lower 6 bits; + // look up the character in the Base64 encoding table and write it to + // the output location: + *o++ = plain64_base64_table_enc[(str >> 58) & 0x3F]; + *o++ = plain64_base64_table_enc[(str >> 52) & 0x3F]; + *o++ = plain64_base64_table_enc[(str >> 46) & 0x3F]; + *o++ = plain64_base64_table_enc[(str >> 40) & 0x3F]; + *o++ = plain64_base64_table_enc[(str >> 34) & 0x3F]; + *o++ = plain64_base64_table_enc[(str >> 28) & 0x3F]; + *o++ = plain64_base64_table_enc[(str >> 22) & 0x3F]; + *o++ = plain64_base64_table_enc[(str >> 16) & 0x3F]; + + c += 6; // 6 bytes of input + outl += 8; // 8 bytes of output + srclen -= 6; +} diff --git a/contrib/libs/base64/plain64/lib.c b/contrib/libs/base64/plain64/lib.c index bb434ad5ec..99d91d2c3c 100644 --- a/contrib/libs/base64/plain64/lib.c +++ b/contrib/libs/base64/plain64/lib.c @@ -1,121 +1,121 @@ -#include <stdint.h> -#include <stddef.h> - -#include "libbase64.h" -#include "codecs.h" - -const uint8_t -plain64_base64_table_enc[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; - -// In the lookup table below, note that the value for '=' (character 61) is -// 254, not 255. This character is used for in-band signaling of the end of -// the datastream, and we will use that later. The characters A-Z, a-z, 0-9 -// and + / are mapped to their "decoded" values. The other bytes all map to -// the value 255, which flags them as "invalid input". - -const uint8_t -plain64_base64_table_dec[] = -{ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 254, 62, 255, 63, // 32..47 - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63 - 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79 - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63, // 80..95 - 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111 - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, -}; - -void -plain64_base64_stream_encode_init (struct plain64_base64_state *state) -{ - state->eof = 0; - state->bytes = 0; - state->carry = 0; -} - -void -plain64_base64_stream_encode_final - ( struct plain64_base64_state *state - , char *out - , size_t *outlen - ) -{ - uint8_t *o = (uint8_t *)out; - - if (state->bytes == 1) { - *o++ = plain64_base64_table_enc[state->carry]; - *o++ = '='; - *o++ = '='; - *outlen = 3; - return; - } - if (state->bytes == 2) { - *o++ = plain64_base64_table_enc[state->carry]; - *o++ = '='; - *outlen = 2; - return; - } - *outlen = 0; -} - -void -plain64_base64_stream_decode_init (struct plain64_base64_state *state) -{ - state->eof = 0; - state->bytes = 0; - state->carry = 0; -} - -void -plain64_base64_encode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ - size_t s; - size_t t; - struct plain64_base64_state state; - - // Init the stream reader: - plain64_base64_stream_encode_init(&state); - - // Feed the whole string to the stream reader: - plain64_base64_stream_encode(&state, src, srclen, out, &s); - - // Finalize the stream by writing trailer if any: - plain64_base64_stream_encode_final(&state, out + s, &t); - - // Final output length is stream length plus tail: - *outlen = s + t; -} - -int -plain64_base64_decode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ - struct plain64_base64_state state; - - // Init the stream reader: - plain64_base64_stream_decode_init(&state); - - // Feed the whole string to the stream reader: - return plain64_base64_stream_decode(&state, src, srclen, out, outlen); -} +#include <stdint.h> +#include <stddef.h> + +#include "libbase64.h" +#include "codecs.h" + +const uint8_t +plain64_base64_table_enc[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +// In the lookup table below, note that the value for '=' (character 61) is +// 254, not 255. This character is used for in-band signaling of the end of +// the datastream, and we will use that later. The characters A-Z, a-z, 0-9 +// and + / are mapped to their "decoded" values. The other bytes all map to +// the value 255, which flags them as "invalid input". + +const uint8_t +plain64_base64_table_dec[] = +{ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 254, 62, 255, 63, // 32..47 + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63 + 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79 + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63, // 80..95 + 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111 + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +}; + +void +plain64_base64_stream_encode_init (struct plain64_base64_state *state) +{ + state->eof = 0; + state->bytes = 0; + state->carry = 0; +} + +void +plain64_base64_stream_encode_final + ( struct plain64_base64_state *state + , char *out + , size_t *outlen + ) +{ + uint8_t *o = (uint8_t *)out; + + if (state->bytes == 1) { + *o++ = plain64_base64_table_enc[state->carry]; + *o++ = '='; + *o++ = '='; + *outlen = 3; + return; + } + if (state->bytes == 2) { + *o++ = plain64_base64_table_enc[state->carry]; + *o++ = '='; + *outlen = 2; + return; + } + *outlen = 0; +} + +void +plain64_base64_stream_decode_init (struct plain64_base64_state *state) +{ + state->eof = 0; + state->bytes = 0; + state->carry = 0; +} + +void +plain64_base64_encode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + size_t s; + size_t t; + struct plain64_base64_state state; + + // Init the stream reader: + plain64_base64_stream_encode_init(&state); + + // Feed the whole string to the stream reader: + plain64_base64_stream_encode(&state, src, srclen, out, &s); + + // Finalize the stream by writing trailer if any: + plain64_base64_stream_encode_final(&state, out + s, &t); + + // Final output length is stream length plus tail: + *outlen = s + t; +} + +int +plain64_base64_decode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + struct plain64_base64_state state; + + // Init the stream reader: + plain64_base64_stream_decode_init(&state); + + // Feed the whole string to the stream reader: + return plain64_base64_stream_decode(&state, src, srclen, out, outlen); +} diff --git a/contrib/libs/base64/plain64/libbase64.h b/contrib/libs/base64/plain64/libbase64.h index b91b6af924..27a9ce8626 100644 --- a/contrib/libs/base64/plain64/libbase64.h +++ b/contrib/libs/base64/plain64/libbase64.h @@ -1,89 +1,89 @@ -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -struct plain64_base64_state { - int eof; - int bytes; - unsigned char carry; -}; - -/* Wrapper function to encode a plain string of given length. Output is written - * to *out without trailing zero. Output length in bytes is written to *outlen. - * The buffer in `out` has been allocated by the caller and is at least 4/3 the - * size of the input. See above for `flags`; set to 0 for default operation: */ -void plain64_base64_encode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Call this before calling base64_stream_encode() to init the state. See above - * for `flags`; set to 0 for default operation: */ -void plain64_base64_stream_encode_init - ( struct plain64_base64_state *state - ) ; - -/* Encodes the block of data of given length at `src`, into the buffer at - * `out`. Caller is responsible for allocating a large enough out-buffer; it - * must be at least 4/3 the size of the in-buffer, but take some margin. Places - * the number of new bytes written into `outlen` (which is set to zero when the - * function starts). Does not zero-terminate or finalize the output. */ -void plain64_base64_stream_encode - ( struct plain64_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Finalizes the output begun by previous calls to `base64_stream_encode()`. - * Adds the required end-of-stream markers if appropriate. `outlen` is modified - * and will contain the number of new bytes written at `out` (which will quite - * often be zero). */ -void plain64_base64_stream_encode_final - ( struct plain64_base64_state *state - , char *out - , size_t *outlen - ) ; - -/* Wrapper function to decode a plain string of given length. Output is written - * to *out without trailing zero. Output length in bytes is written to *outlen. - * The buffer in `out` has been allocated by the caller and is at least 3/4 the - * size of the input. See above for `flags`, set to 0 for default operation: */ -int plain64_base64_decode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Call this before calling base64_stream_decode() to init the state. See above - * for `flags`; set to 0 for default operation: */ -void plain64_base64_stream_decode_init - ( struct plain64_base64_state *state - ) ; - -/* Decodes the block of data of given length at `src`, into the buffer at - * `out`. Caller is responsible for allocating a large enough out-buffer; it - * must be at least 3/4 the size of the in-buffer, but take some margin. Places - * the number of new bytes written into `outlen` (which is set to zero when the - * function starts). Does not zero-terminate the output. Returns 1 if all is - * well, and 0 if a decoding error was found, such as an invalid character. - * Returns -1 if the chosen codec is not included in the current build. Used by - * the test harness to check whether a codec is available for testing. */ -int plain64_base64_stream_decode - ( struct plain64_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -#ifdef __cplusplus -} -#endif - +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +struct plain64_base64_state { + int eof; + int bytes; + unsigned char carry; +}; + +/* Wrapper function to encode a plain string of given length. Output is written + * to *out without trailing zero. Output length in bytes is written to *outlen. + * The buffer in `out` has been allocated by the caller and is at least 4/3 the + * size of the input. See above for `flags`; set to 0 for default operation: */ +void plain64_base64_encode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Call this before calling base64_stream_encode() to init the state. See above + * for `flags`; set to 0 for default operation: */ +void plain64_base64_stream_encode_init + ( struct plain64_base64_state *state + ) ; + +/* Encodes the block of data of given length at `src`, into the buffer at + * `out`. Caller is responsible for allocating a large enough out-buffer; it + * must be at least 4/3 the size of the in-buffer, but take some margin. Places + * the number of new bytes written into `outlen` (which is set to zero when the + * function starts). Does not zero-terminate or finalize the output. */ +void plain64_base64_stream_encode + ( struct plain64_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Finalizes the output begun by previous calls to `base64_stream_encode()`. + * Adds the required end-of-stream markers if appropriate. `outlen` is modified + * and will contain the number of new bytes written at `out` (which will quite + * often be zero). */ +void plain64_base64_stream_encode_final + ( struct plain64_base64_state *state + , char *out + , size_t *outlen + ) ; + +/* Wrapper function to decode a plain string of given length. Output is written + * to *out without trailing zero. Output length in bytes is written to *outlen. + * The buffer in `out` has been allocated by the caller and is at least 3/4 the + * size of the input. See above for `flags`, set to 0 for default operation: */ +int plain64_base64_decode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Call this before calling base64_stream_decode() to init the state. See above + * for `flags`; set to 0 for default operation: */ +void plain64_base64_stream_decode_init + ( struct plain64_base64_state *state + ) ; + +/* Decodes the block of data of given length at `src`, into the buffer at + * `out`. Caller is responsible for allocating a large enough out-buffer; it + * must be at least 3/4 the size of the in-buffer, but take some margin. Places + * the number of new bytes written into `outlen` (which is set to zero when the + * function starts). Does not zero-terminate the output. Returns 1 if all is + * well, and 0 if a decoding error was found, such as an invalid character. + * Returns -1 if the chosen codec is not included in the current build. Used by + * the test harness to check whether a codec is available for testing. */ +int plain64_base64_stream_decode + ( struct plain64_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +#ifdef __cplusplus +} +#endif + diff --git a/contrib/libs/base64/plain64/ya.make b/contrib/libs/base64/plain64/ya.make index 2055f8f513..1f5a9ad204 100644 --- a/contrib/libs/base64/plain64/ya.make +++ b/contrib/libs/base64/plain64/ya.make @@ -1,11 +1,11 @@ -OWNER( - yazevnul +OWNER( + yazevnul g:contrib g:cpp-contrib -) - -LIBRARY() - +) + +LIBRARY() + LICENSE( BSD-2-Clause AND MIT @@ -13,15 +13,15 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -NO_UTIL() - -SRCS( - codec_plain.c - lib.c -) - +NO_UTIL() + +SRCS( + codec_plain.c + lib.c +) + IF (OS_LINUX OR OS_DARWIN) CONLYFLAGS(-std=c11) -ENDIF() - -END() +ENDIF() + +END() diff --git a/contrib/libs/base64/ssse3/codec_ssse3.c b/contrib/libs/base64/ssse3/codec_ssse3.c index c8c9c72c3f..acf4109e7a 100644 --- a/contrib/libs/base64/ssse3/codec_ssse3.c +++ b/contrib/libs/base64/ssse3/codec_ssse3.c @@ -1,168 +1,168 @@ -#include <stdint.h> -#include <stddef.h> -#include <stdlib.h> - -#include "libbase64.h" -#include "codecs.h" - -#ifdef __SSSE3__ -#include <tmmintrin.h> - -#define CMPGT(s,n) _mm_cmpgt_epi8((s), _mm_set1_epi8(n)) -#define CMPEQ(s,n) _mm_cmpeq_epi8((s), _mm_set1_epi8(n)) -#define REPLACE(s,n) _mm_and_si128((s), _mm_set1_epi8(n)) -#define RANGE(s,a,b) _mm_andnot_si128(CMPGT((s), (b)), CMPGT((s), (a) - 1)) - -static inline __m128i -_mm_bswap_epi32 (const __m128i in) -{ - return _mm_shuffle_epi8(in, _mm_setr_epi8( - 3, 2, 1, 0, - 7, 6, 5, 4, - 11, 10, 9, 8, - 15, 14, 13, 12)); -} - -static inline __m128i -enc_reshuffle (__m128i in) -{ - // Slice into 32-bit chunks and operate on all chunks in parallel. - // All processing is done within the 32-bit chunk. First, shuffle: - // before: [eeeeeeff|ccdddddd|bbbbcccc|aaaaaabb] - // after: [00000000|aaaaaabb|bbbbcccc|ccdddddd] - in = _mm_shuffle_epi8(in, _mm_set_epi8( - -1, 9, 10, 11, - -1, 6, 7, 8, - -1, 3, 4, 5, - -1, 0, 1, 2)); - - // cd = [00000000|00000000|0000cccc|ccdddddd] - const __m128i cd = _mm_and_si128(in, _mm_set1_epi32(0x00000FFF)); - - // ab = [0000aaaa|aabbbbbb|00000000|00000000] - const __m128i ab = _mm_and_si128(_mm_slli_epi32(in, 4), _mm_set1_epi32(0x0FFF0000)); - - // merged = [0000aaaa|aabbbbbb|0000cccc|ccdddddd] - const __m128i merged = _mm_or_si128(ab, cd); - - // bd = [00000000|00bbbbbb|00000000|00dddddd] - const __m128i bd = _mm_and_si128(merged, _mm_set1_epi32(0x003F003F)); - - // ac = [00aaaaaa|00000000|00cccccc|00000000] - const __m128i ac = _mm_and_si128(_mm_slli_epi32(merged, 2), _mm_set1_epi32(0x3F003F00)); - - // indices = [00aaaaaa|00bbbbbb|00cccccc|00dddddd] - const __m128i indices = _mm_or_si128(ac, bd); - - // return = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] - return _mm_bswap_epi32(indices); -} - -static inline __m128i -enc_translate (const __m128i in) -{ - // Translate values 0..63 to the Base64 alphabet. There are five sets: - // # From To Abs Delta Characters - // 0 [0..25] [65..90] +65 +65 ABCDEFGHIJKLMNOPQRSTUVWXYZ - // 1 [26..51] [97..122] +71 +6 abcdefghijklmnopqrstuvwxyz - // 2 [52..61] [48..57] -4 -75 0123456789 - // 3 [62] [43] -19 -15 + - // 4 [63] [47] -16 +3 / - - // Create cumulative masks for characters in sets [1,2,3,4], [2,3,4], - // [3,4], and [4]: - const __m128i mask1 = CMPGT(in, 25); - const __m128i mask2 = CMPGT(in, 51); - const __m128i mask3 = CMPGT(in, 61); - const __m128i mask4 = CMPEQ(in, 63); - - // All characters are at least in cumulative set 0, so add 'A': - __m128i out = _mm_add_epi8(in, _mm_set1_epi8(65)); - - // For inputs which are also in any of the other cumulative sets, - // add delta values against the previous set(s) to correct the shift: - out = _mm_add_epi8(out, REPLACE(mask1, 6)); - out = _mm_sub_epi8(out, REPLACE(mask2, 75)); - out = _mm_sub_epi8(out, REPLACE(mask3, 15)); - out = _mm_add_epi8(out, REPLACE(mask4, 3)); - - return out; -} - -static inline __m128i -dec_reshuffle (__m128i in) -{ - // Shuffle bytes to 32-bit bigendian: - in = _mm_bswap_epi32(in); - - // Mask in a single byte per shift: - __m128i mask = _mm_set1_epi32(0x3F000000); - - // Pack bytes together: - __m128i out = _mm_slli_epi32(_mm_and_si128(in, mask), 2); - mask = _mm_srli_epi32(mask, 8); - - out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, mask), 4)); - mask = _mm_srli_epi32(mask, 8); - - out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, mask), 6)); - mask = _mm_srli_epi32(mask, 8); - - out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, mask), 8)); - - // Reshuffle and repack into 12-byte output format: - return _mm_shuffle_epi8(out, _mm_setr_epi8( - 3, 2, 1, - 7, 6, 5, - 11, 10, 9, - 15, 14, 13, - -1, -1, -1, -1)); -} - -#endif // __SSSE3__ - -void -ssse3_base64_stream_encode - ( struct ssse3_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ -#ifdef __SSSE3__ - #include "enc_head.c" - #include "enc_ssse3.c" - #include "enc_tail.c" -#else - (void)state; - (void)src; - (void)srclen; - (void)out; - (void)outlen; - abort(); -#endif -} - -int -ssse3_base64_stream_decode - ( struct ssse3_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ -#ifdef __SSSE3__ - #include "dec_head.c" - #include "dec_ssse3.c" - #include "dec_tail.c" -#else - (void)state; - (void)src; - (void)srclen; - (void)out; - (void)outlen; - abort(); -#endif -} +#include <stdint.h> +#include <stddef.h> +#include <stdlib.h> + +#include "libbase64.h" +#include "codecs.h" + +#ifdef __SSSE3__ +#include <tmmintrin.h> + +#define CMPGT(s,n) _mm_cmpgt_epi8((s), _mm_set1_epi8(n)) +#define CMPEQ(s,n) _mm_cmpeq_epi8((s), _mm_set1_epi8(n)) +#define REPLACE(s,n) _mm_and_si128((s), _mm_set1_epi8(n)) +#define RANGE(s,a,b) _mm_andnot_si128(CMPGT((s), (b)), CMPGT((s), (a) - 1)) + +static inline __m128i +_mm_bswap_epi32 (const __m128i in) +{ + return _mm_shuffle_epi8(in, _mm_setr_epi8( + 3, 2, 1, 0, + 7, 6, 5, 4, + 11, 10, 9, 8, + 15, 14, 13, 12)); +} + +static inline __m128i +enc_reshuffle (__m128i in) +{ + // Slice into 32-bit chunks and operate on all chunks in parallel. + // All processing is done within the 32-bit chunk. First, shuffle: + // before: [eeeeeeff|ccdddddd|bbbbcccc|aaaaaabb] + // after: [00000000|aaaaaabb|bbbbcccc|ccdddddd] + in = _mm_shuffle_epi8(in, _mm_set_epi8( + -1, 9, 10, 11, + -1, 6, 7, 8, + -1, 3, 4, 5, + -1, 0, 1, 2)); + + // cd = [00000000|00000000|0000cccc|ccdddddd] + const __m128i cd = _mm_and_si128(in, _mm_set1_epi32(0x00000FFF)); + + // ab = [0000aaaa|aabbbbbb|00000000|00000000] + const __m128i ab = _mm_and_si128(_mm_slli_epi32(in, 4), _mm_set1_epi32(0x0FFF0000)); + + // merged = [0000aaaa|aabbbbbb|0000cccc|ccdddddd] + const __m128i merged = _mm_or_si128(ab, cd); + + // bd = [00000000|00bbbbbb|00000000|00dddddd] + const __m128i bd = _mm_and_si128(merged, _mm_set1_epi32(0x003F003F)); + + // ac = [00aaaaaa|00000000|00cccccc|00000000] + const __m128i ac = _mm_and_si128(_mm_slli_epi32(merged, 2), _mm_set1_epi32(0x3F003F00)); + + // indices = [00aaaaaa|00bbbbbb|00cccccc|00dddddd] + const __m128i indices = _mm_or_si128(ac, bd); + + // return = [00dddddd|00cccccc|00bbbbbb|00aaaaaa] + return _mm_bswap_epi32(indices); +} + +static inline __m128i +enc_translate (const __m128i in) +{ + // Translate values 0..63 to the Base64 alphabet. There are five sets: + // # From To Abs Delta Characters + // 0 [0..25] [65..90] +65 +65 ABCDEFGHIJKLMNOPQRSTUVWXYZ + // 1 [26..51] [97..122] +71 +6 abcdefghijklmnopqrstuvwxyz + // 2 [52..61] [48..57] -4 -75 0123456789 + // 3 [62] [43] -19 -15 + + // 4 [63] [47] -16 +3 / + + // Create cumulative masks for characters in sets [1,2,3,4], [2,3,4], + // [3,4], and [4]: + const __m128i mask1 = CMPGT(in, 25); + const __m128i mask2 = CMPGT(in, 51); + const __m128i mask3 = CMPGT(in, 61); + const __m128i mask4 = CMPEQ(in, 63); + + // All characters are at least in cumulative set 0, so add 'A': + __m128i out = _mm_add_epi8(in, _mm_set1_epi8(65)); + + // For inputs which are also in any of the other cumulative sets, + // add delta values against the previous set(s) to correct the shift: + out = _mm_add_epi8(out, REPLACE(mask1, 6)); + out = _mm_sub_epi8(out, REPLACE(mask2, 75)); + out = _mm_sub_epi8(out, REPLACE(mask3, 15)); + out = _mm_add_epi8(out, REPLACE(mask4, 3)); + + return out; +} + +static inline __m128i +dec_reshuffle (__m128i in) +{ + // Shuffle bytes to 32-bit bigendian: + in = _mm_bswap_epi32(in); + + // Mask in a single byte per shift: + __m128i mask = _mm_set1_epi32(0x3F000000); + + // Pack bytes together: + __m128i out = _mm_slli_epi32(_mm_and_si128(in, mask), 2); + mask = _mm_srli_epi32(mask, 8); + + out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, mask), 4)); + mask = _mm_srli_epi32(mask, 8); + + out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, mask), 6)); + mask = _mm_srli_epi32(mask, 8); + + out = _mm_or_si128(out, _mm_slli_epi32(_mm_and_si128(in, mask), 8)); + + // Reshuffle and repack into 12-byte output format: + return _mm_shuffle_epi8(out, _mm_setr_epi8( + 3, 2, 1, + 7, 6, 5, + 11, 10, 9, + 15, 14, 13, + -1, -1, -1, -1)); +} + +#endif // __SSSE3__ + +void +ssse3_base64_stream_encode + ( struct ssse3_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ +#ifdef __SSSE3__ + #include "enc_head.c" + #include "enc_ssse3.c" + #include "enc_tail.c" +#else + (void)state; + (void)src; + (void)srclen; + (void)out; + (void)outlen; + abort(); +#endif +} + +int +ssse3_base64_stream_decode + ( struct ssse3_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ +#ifdef __SSSE3__ + #include "dec_head.c" + #include "dec_ssse3.c" + #include "dec_tail.c" +#else + (void)state; + (void)src; + (void)srclen; + (void)out; + (void)outlen; + abort(); +#endif +} diff --git a/contrib/libs/base64/ssse3/codecs.h b/contrib/libs/base64/ssse3/codecs.h index dcf91c2a60..7b1afac872 100644 --- a/contrib/libs/base64/ssse3/codecs.h +++ b/contrib/libs/base64/ssse3/codecs.h @@ -1,35 +1,35 @@ -#pragma once - -// Define machine endianness. This is for GCC: -#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) - #define BASE64_SSSE3_LITTLE_ENDIAN 1 -#else - #define BASE64_SSSE3_LITTLE_ENDIAN 0 -#endif - -// This is for Clang: -#ifdef __LITTLE_ENDIAN__ - #define BASE64_SSSE3_LITTLE_ENDIAN 1 -#endif - -#ifdef __BIG_ENDIAN__ - #define BASE64_SSSE3_LITTLE_ENDIAN 0 -#endif - -// Endian conversion functions -#if BASE64_SSSE3_LITTLE_ENDIAN - #define cpu_to_be32(x) __builtin_bswap32(x) - #define cpu_to_be64(x) __builtin_bswap64(x) - #define be32_to_cpu(x) __builtin_bswap32(x) - #define be64_to_cpu(x) __builtin_bswap64(x) -#else - #define cpu_to_be32(x) (x) - #define cpu_to_be64(x) (x) - #define be32_to_cpu(x) (x) - #define be64_to_cpu(x) (x) -#endif - -// These tables are used by all codecs -// for fallback plain encoding/decoding: -extern const uint8_t ssse3_base64_table_enc[]; -extern const uint8_t ssse3_base64_table_dec[]; +#pragma once + +// Define machine endianness. This is for GCC: +#if (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) + #define BASE64_SSSE3_LITTLE_ENDIAN 1 +#else + #define BASE64_SSSE3_LITTLE_ENDIAN 0 +#endif + +// This is for Clang: +#ifdef __LITTLE_ENDIAN__ + #define BASE64_SSSE3_LITTLE_ENDIAN 1 +#endif + +#ifdef __BIG_ENDIAN__ + #define BASE64_SSSE3_LITTLE_ENDIAN 0 +#endif + +// Endian conversion functions +#if BASE64_SSSE3_LITTLE_ENDIAN + #define cpu_to_be32(x) __builtin_bswap32(x) + #define cpu_to_be64(x) __builtin_bswap64(x) + #define be32_to_cpu(x) __builtin_bswap32(x) + #define be64_to_cpu(x) __builtin_bswap64(x) +#else + #define cpu_to_be32(x) (x) + #define cpu_to_be64(x) (x) + #define be32_to_cpu(x) (x) + #define be64_to_cpu(x) (x) +#endif + +// These tables are used by all codecs +// for fallback plain encoding/decoding: +extern const uint8_t ssse3_base64_table_enc[]; +extern const uint8_t ssse3_base64_table_dec[]; diff --git a/contrib/libs/base64/ssse3/dec_head.c b/contrib/libs/base64/ssse3/dec_head.c index cdb284fa39..aa77d52aa6 100644 --- a/contrib/libs/base64/ssse3/dec_head.c +++ b/contrib/libs/base64/ssse3/dec_head.c @@ -1,29 +1,29 @@ -int ret = 0; -const uint8_t *c = (const uint8_t *)src; -uint8_t *o = (uint8_t *)out; -uint8_t q; - -// Use local temporaries to avoid cache thrashing: -size_t outl = 0; -struct ssse3_base64_state st; -st.eof = state->eof; -st.bytes = state->bytes; -st.carry = state->carry; - -// If we previously saw an EOF or an invalid character, bail out: -if (st.eof) { - *outlen = 0; - return 0; -} - -// Turn four 6-bit numbers into three bytes: -// out[0] = 11111122 -// out[1] = 22223333 -// out[2] = 33444444 - -// Duff's device again: -switch (st.bytes) -{ - for (;;) - { - case 0: +int ret = 0; +const uint8_t *c = (const uint8_t *)src; +uint8_t *o = (uint8_t *)out; +uint8_t q; + +// Use local temporaries to avoid cache thrashing: +size_t outl = 0; +struct ssse3_base64_state st; +st.eof = state->eof; +st.bytes = state->bytes; +st.carry = state->carry; + +// If we previously saw an EOF or an invalid character, bail out: +if (st.eof) { + *outlen = 0; + return 0; +} + +// Turn four 6-bit numbers into three bytes: +// out[0] = 11111122 +// out[1] = 22223333 +// out[2] = 33444444 + +// Duff's device again: +switch (st.bytes) +{ + for (;;) + { + case 0: diff --git a/contrib/libs/base64/ssse3/dec_ssse3.c b/contrib/libs/base64/ssse3/dec_ssse3.c index cf2e2781a4..ce11b3cb1b 100644 --- a/contrib/libs/base64/ssse3/dec_ssse3.c +++ b/contrib/libs/base64/ssse3/dec_ssse3.c @@ -1,58 +1,58 @@ -// If we have SSSE3 support, pick off 16 bytes at a time for as long as we can, -// but make sure that we quit before seeing any == markers at the end of the -// string. Also, because we write four zeroes at the end of the output, ensure -// that there are at least 6 valid bytes of input data remaining to close the -// gap. 16 + 2 + 6 = 24 bytes: -while (srclen >= 24) -{ - // Load string: - __m128i str = _mm_loadu_si128((__m128i *)c); - - // The input consists of six character sets in the Base64 alphabet, - // which we need to map back to the 6-bit values they represent. - // There are three ranges, two singles, and then there's the rest. - // - // # From To Add Characters - // 1 [43] [62] +19 + - // 2 [47] [63] +16 / - // 3 [48..57] [52..61] +4 0..9 - // 4 [65..90] [0..25] -65 A..Z - // 5 [97..122] [26..51] -71 a..z - // (6) Everything else => invalid input - - const __m128i set1 = CMPEQ(str, '+'); - const __m128i set2 = CMPEQ(str, '/'); - const __m128i set3 = RANGE(str, '0', '9'); - const __m128i set4 = RANGE(str, 'A', 'Z'); - const __m128i set5 = RANGE(str, 'a', 'z'); - const __m128i set6 = CMPEQ(str, '-'); - const __m128i set7 = CMPEQ(str, '_'); - - __m128i delta = REPLACE(set1, 19); - delta = _mm_or_si128(delta, REPLACE(set2, 16)); - delta = _mm_or_si128(delta, REPLACE(set3, 4)); - delta = _mm_or_si128(delta, REPLACE(set4, -65)); - delta = _mm_or_si128(delta, REPLACE(set5, -71)); - delta = _mm_or_si128(delta, REPLACE(set6, 17)); - delta = _mm_or_si128(delta, REPLACE(set7, -32)); - - // Check for invalid input: if any of the delta values are zero, - // fall back on bytewise code to do error checking and reporting: - if (_mm_movemask_epi8(CMPEQ(delta, 0))) { - break; - } - - // Now simply add the delta values to the input: - str = _mm_add_epi8(str, delta); - - // Reshuffle the input to packed 12-byte output format: - str = dec_reshuffle(str); - - // Store back: - _mm_storeu_si128((__m128i *)o, str); - - c += 16; - o += 12; - outl += 12; - srclen -= 16; -} +// If we have SSSE3 support, pick off 16 bytes at a time for as long as we can, +// but make sure that we quit before seeing any == markers at the end of the +// string. Also, because we write four zeroes at the end of the output, ensure +// that there are at least 6 valid bytes of input data remaining to close the +// gap. 16 + 2 + 6 = 24 bytes: +while (srclen >= 24) +{ + // Load string: + __m128i str = _mm_loadu_si128((__m128i *)c); + + // The input consists of six character sets in the Base64 alphabet, + // which we need to map back to the 6-bit values they represent. + // There are three ranges, two singles, and then there's the rest. + // + // # From To Add Characters + // 1 [43] [62] +19 + + // 2 [47] [63] +16 / + // 3 [48..57] [52..61] +4 0..9 + // 4 [65..90] [0..25] -65 A..Z + // 5 [97..122] [26..51] -71 a..z + // (6) Everything else => invalid input + + const __m128i set1 = CMPEQ(str, '+'); + const __m128i set2 = CMPEQ(str, '/'); + const __m128i set3 = RANGE(str, '0', '9'); + const __m128i set4 = RANGE(str, 'A', 'Z'); + const __m128i set5 = RANGE(str, 'a', 'z'); + const __m128i set6 = CMPEQ(str, '-'); + const __m128i set7 = CMPEQ(str, '_'); + + __m128i delta = REPLACE(set1, 19); + delta = _mm_or_si128(delta, REPLACE(set2, 16)); + delta = _mm_or_si128(delta, REPLACE(set3, 4)); + delta = _mm_or_si128(delta, REPLACE(set4, -65)); + delta = _mm_or_si128(delta, REPLACE(set5, -71)); + delta = _mm_or_si128(delta, REPLACE(set6, 17)); + delta = _mm_or_si128(delta, REPLACE(set7, -32)); + + // Check for invalid input: if any of the delta values are zero, + // fall back on bytewise code to do error checking and reporting: + if (_mm_movemask_epi8(CMPEQ(delta, 0))) { + break; + } + + // Now simply add the delta values to the input: + str = _mm_add_epi8(str, delta); + + // Reshuffle the input to packed 12-byte output format: + str = dec_reshuffle(str); + + // Store back: + _mm_storeu_si128((__m128i *)o, str); + + c += 16; + o += 12; + outl += 12; + srclen -= 16; +} diff --git a/contrib/libs/base64/ssse3/dec_tail.c b/contrib/libs/base64/ssse3/dec_tail.c index 69b6f2c9d3..69e0050710 100644 --- a/contrib/libs/base64/ssse3/dec_tail.c +++ b/contrib/libs/base64/ssse3/dec_tail.c @@ -1,65 +1,65 @@ - if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = ssse3_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // Treat character '=' as invalid for byte 0: - break; - } - st.carry = q << 2; - st.bytes++; - - case 1: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = ssse3_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // Treat character '=' as invalid for byte 1: - break; - } - *o++ = st.carry | (q >> 4); - st.carry = q << 4; - st.bytes++; - outl++; - - case 2: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = ssse3_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // When q == 254, the input char is '='. Return 1 and EOF. - // Technically, should check if next byte is also '=', but never mind. - // When q == 255, the input char is invalid. Return 0 and EOF. - ret = (q == 254) ? 1 : 0; - break; - } - *o++ = st.carry | (q >> 2); - st.carry = q << 6; - st.bytes++; - outl++; - - case 3: if (srclen-- == 0) { - ret = 1; - break; - } - if ((q = ssse3_base64_table_dec[*c++]) >= 254) { - st.eof = 1; - // When q == 254, the input char is '='. Return 1 and EOF. - // When q == 255, the input char is invalid. Return 0 and EOF. - ret = (q == 254) ? 1 : 0; - break; - } - *o++ = st.carry | q; - st.carry = 0; - st.bytes = 0; - outl++; - } -} -state->eof = st.eof; -state->bytes = st.bytes; -state->carry = st.carry; -*outlen = outl; -return ret; + if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = ssse3_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // Treat character '=' as invalid for byte 0: + break; + } + st.carry = q << 2; + st.bytes++; + + case 1: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = ssse3_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // Treat character '=' as invalid for byte 1: + break; + } + *o++ = st.carry | (q >> 4); + st.carry = q << 4; + st.bytes++; + outl++; + + case 2: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = ssse3_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // When q == 254, the input char is '='. Return 1 and EOF. + // Technically, should check if next byte is also '=', but never mind. + // When q == 255, the input char is invalid. Return 0 and EOF. + ret = (q == 254) ? 1 : 0; + break; + } + *o++ = st.carry | (q >> 2); + st.carry = q << 6; + st.bytes++; + outl++; + + case 3: if (srclen-- == 0) { + ret = 1; + break; + } + if ((q = ssse3_base64_table_dec[*c++]) >= 254) { + st.eof = 1; + // When q == 254, the input char is '='. Return 1 and EOF. + // When q == 255, the input char is invalid. Return 0 and EOF. + ret = (q == 254) ? 1 : 0; + break; + } + *o++ = st.carry | q; + st.carry = 0; + st.bytes = 0; + outl++; + } +} +state->eof = st.eof; +state->bytes = st.bytes; +state->carry = st.carry; +*outlen = outl; +return ret; diff --git a/contrib/libs/base64/ssse3/enc_head.c b/contrib/libs/base64/ssse3/enc_head.c index fc6ba5347e..b8ad582df5 100644 --- a/contrib/libs/base64/ssse3/enc_head.c +++ b/contrib/libs/base64/ssse3/enc_head.c @@ -1,23 +1,23 @@ -// Assume that *out is large enough to contain the output. -// Theoretically it should be 4/3 the length of src. -const uint8_t *c = (const uint8_t *)src; -uint8_t *o = (uint8_t *)out; - -// Use local temporaries to avoid cache thrashing: -size_t outl = 0; -struct ssse3_base64_state st; -st.bytes = state->bytes; -st.carry = state->carry; - -// Turn three bytes into four 6-bit numbers: -// in[0] = 00111111 -// in[1] = 00112222 -// in[2] = 00222233 -// in[3] = 00333333 - -// Duff's device, a for() loop inside a switch() statement. Legal! -switch (st.bytes) -{ - for (;;) - { - case 0: +// Assume that *out is large enough to contain the output. +// Theoretically it should be 4/3 the length of src. +const uint8_t *c = (const uint8_t *)src; +uint8_t *o = (uint8_t *)out; + +// Use local temporaries to avoid cache thrashing: +size_t outl = 0; +struct ssse3_base64_state st; +st.bytes = state->bytes; +st.carry = state->carry; + +// Turn three bytes into four 6-bit numbers: +// in[0] = 00111111 +// in[1] = 00112222 +// in[2] = 00222233 +// in[3] = 00333333 + +// Duff's device, a for() loop inside a switch() statement. Legal! +switch (st.bytes) +{ + for (;;) + { + case 0: diff --git a/contrib/libs/base64/ssse3/enc_ssse3.c b/contrib/libs/base64/ssse3/enc_ssse3.c index e8903cf6c4..0518595e61 100644 --- a/contrib/libs/base64/ssse3/enc_ssse3.c +++ b/contrib/libs/base64/ssse3/enc_ssse3.c @@ -1,22 +1,22 @@ -// If we have SSSE3 support, pick off 12 bytes at a time for as long as we can. -// But because we read 16 bytes at a time, ensure we have enough room to do a -// full 16-byte read without segfaulting: -while (srclen >= 16) -{ - // Load string: - __m128i str = _mm_loadu_si128((__m128i *)c); - - // Reshuffle: - str = enc_reshuffle(str); - - // Translate reshuffled bytes to the Base64 alphabet: - str = enc_translate(str); - - // Store: - _mm_storeu_si128((__m128i *)o, str); - - c += 12; // 3 * 4 bytes of input - o += 16; // 4 * 4 bytes of output - outl += 16; - srclen -= 12; -} +// If we have SSSE3 support, pick off 12 bytes at a time for as long as we can. +// But because we read 16 bytes at a time, ensure we have enough room to do a +// full 16-byte read without segfaulting: +while (srclen >= 16) +{ + // Load string: + __m128i str = _mm_loadu_si128((__m128i *)c); + + // Reshuffle: + str = enc_reshuffle(str); + + // Translate reshuffled bytes to the Base64 alphabet: + str = enc_translate(str); + + // Store: + _mm_storeu_si128((__m128i *)o, str); + + c += 12; // 3 * 4 bytes of input + o += 16; // 4 * 4 bytes of output + outl += 16; + srclen -= 12; +} diff --git a/contrib/libs/base64/ssse3/enc_tail.c b/contrib/libs/base64/ssse3/enc_tail.c index 83815b5eec..69e0644af3 100644 --- a/contrib/libs/base64/ssse3/enc_tail.c +++ b/contrib/libs/base64/ssse3/enc_tail.c @@ -1,28 +1,28 @@ - if (srclen-- == 0) { - break; - } - *o++ = ssse3_base64_table_enc[*c >> 2]; - st.carry = (*c++ << 4) & 0x30; - st.bytes++; - outl += 1; - - case 1: if (srclen-- == 0) { - break; - } - *o++ = ssse3_base64_table_enc[st.carry | (*c >> 4)]; - st.carry = (*c++ << 2) & 0x3C; - st.bytes++; - outl += 1; - - case 2: if (srclen-- == 0) { - break; - } - *o++ = ssse3_base64_table_enc[st.carry | (*c >> 6)]; - *o++ = ssse3_base64_table_enc[*c++ & 0x3F]; - st.bytes = 0; - outl += 2; - } -} -state->bytes = st.bytes; -state->carry = st.carry; -*outlen = outl; + if (srclen-- == 0) { + break; + } + *o++ = ssse3_base64_table_enc[*c >> 2]; + st.carry = (*c++ << 4) & 0x30; + st.bytes++; + outl += 1; + + case 1: if (srclen-- == 0) { + break; + } + *o++ = ssse3_base64_table_enc[st.carry | (*c >> 4)]; + st.carry = (*c++ << 2) & 0x3C; + st.bytes++; + outl += 1; + + case 2: if (srclen-- == 0) { + break; + } + *o++ = ssse3_base64_table_enc[st.carry | (*c >> 6)]; + *o++ = ssse3_base64_table_enc[*c++ & 0x3F]; + st.bytes = 0; + outl += 2; + } +} +state->bytes = st.bytes; +state->carry = st.carry; +*outlen = outl; diff --git a/contrib/libs/base64/ssse3/lib.c b/contrib/libs/base64/ssse3/lib.c index 3c9cf99fc2..d6f94c4df5 100644 --- a/contrib/libs/base64/ssse3/lib.c +++ b/contrib/libs/base64/ssse3/lib.c @@ -1,121 +1,121 @@ -#include <stdint.h> -#include <stddef.h> - -#include "libbase64.h" -#include "codecs.h" - -const uint8_t -ssse3_base64_table_enc[] = - "ABCDEFGHIJKLMNOPQRSTUVWXYZ" - "abcdefghijklmnopqrstuvwxyz" - "0123456789+/"; - -// In the lookup table below, note that the value for '=' (character 61) is -// 254, not 255. This character is used for in-band signaling of the end of -// the datastream, and we will use that later. The characters A-Z, a-z, 0-9 -// and + / are mapped to their "decoded" values. The other bytes all map to -// the value 255, which flags them as "invalid input". - -const uint8_t -ssse3_base64_table_dec[] = -{ - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 254, 62, 255, 63, // 32..47 - 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63 - 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79 - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63, // 80..95 - 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111 - 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143 - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, - 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, -}; - -void -ssse3_base64_stream_encode_init (struct ssse3_base64_state *state) -{ - state->eof = 0; - state->bytes = 0; - state->carry = 0; -} - -void -ssse3_base64_stream_encode_final - ( struct ssse3_base64_state *state - , char *out - , size_t *outlen - ) -{ - uint8_t *o = (uint8_t *)out; - - if (state->bytes == 1) { - *o++ = ssse3_base64_table_enc[state->carry]; - *o++ = '='; - *o++ = '='; - *outlen = 3; - return; - } - if (state->bytes == 2) { - *o++ = ssse3_base64_table_enc[state->carry]; - *o++ = '='; - *outlen = 2; - return; - } - *outlen = 0; -} - -void -ssse3_base64_stream_decode_init (struct ssse3_base64_state *state) -{ - state->eof = 0; - state->bytes = 0; - state->carry = 0; -} - -void -ssse3_base64_encode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ - size_t s; - size_t t; - struct ssse3_base64_state state; - - // Init the stream reader: - ssse3_base64_stream_encode_init(&state); - - // Feed the whole string to the stream reader: - ssse3_base64_stream_encode(&state, src, srclen, out, &s); - - // Finalize the stream by writing trailer if any: - ssse3_base64_stream_encode_final(&state, out + s, &t); - - // Final output length is stream length plus tail: - *outlen = s + t; -} - -int -ssse3_base64_decode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) -{ - struct ssse3_base64_state state; - - // Init the stream reader: - ssse3_base64_stream_decode_init(&state); - - // Feed the whole string to the stream reader: - return ssse3_base64_stream_decode(&state, src, srclen, out, outlen); -} +#include <stdint.h> +#include <stddef.h> + +#include "libbase64.h" +#include "codecs.h" + +const uint8_t +ssse3_base64_table_enc[] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + +// In the lookup table below, note that the value for '=' (character 61) is +// 254, not 255. This character is used for in-band signaling of the end of +// the datastream, and we will use that later. The characters A-Z, a-z, 0-9 +// and + / are mapped to their "decoded" values. The other bytes all map to +// the value 255, which flags them as "invalid input". + +const uint8_t +ssse3_base64_table_dec[] = +{ + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 0..15 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 16..31 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 62, 254, 62, 255, 63, // 32..47 + 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 255, 255, 255, 254, 255, 255, // 48..63 + 255, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, // 64..79 + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 255, 255, 255, 255, 63, // 80..95 + 255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, // 96..111 + 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 255, 255, 255, 255, 255, // 112..127 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, // 128..143 + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, +}; + +void +ssse3_base64_stream_encode_init (struct ssse3_base64_state *state) +{ + state->eof = 0; + state->bytes = 0; + state->carry = 0; +} + +void +ssse3_base64_stream_encode_final + ( struct ssse3_base64_state *state + , char *out + , size_t *outlen + ) +{ + uint8_t *o = (uint8_t *)out; + + if (state->bytes == 1) { + *o++ = ssse3_base64_table_enc[state->carry]; + *o++ = '='; + *o++ = '='; + *outlen = 3; + return; + } + if (state->bytes == 2) { + *o++ = ssse3_base64_table_enc[state->carry]; + *o++ = '='; + *outlen = 2; + return; + } + *outlen = 0; +} + +void +ssse3_base64_stream_decode_init (struct ssse3_base64_state *state) +{ + state->eof = 0; + state->bytes = 0; + state->carry = 0; +} + +void +ssse3_base64_encode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + size_t s; + size_t t; + struct ssse3_base64_state state; + + // Init the stream reader: + ssse3_base64_stream_encode_init(&state); + + // Feed the whole string to the stream reader: + ssse3_base64_stream_encode(&state, src, srclen, out, &s); + + // Finalize the stream by writing trailer if any: + ssse3_base64_stream_encode_final(&state, out + s, &t); + + // Final output length is stream length plus tail: + *outlen = s + t; +} + +int +ssse3_base64_decode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) +{ + struct ssse3_base64_state state; + + // Init the stream reader: + ssse3_base64_stream_decode_init(&state); + + // Feed the whole string to the stream reader: + return ssse3_base64_stream_decode(&state, src, srclen, out, outlen); +} diff --git a/contrib/libs/base64/ssse3/libbase64.h b/contrib/libs/base64/ssse3/libbase64.h index 8f19564e47..064ee7e658 100644 --- a/contrib/libs/base64/ssse3/libbase64.h +++ b/contrib/libs/base64/ssse3/libbase64.h @@ -1,89 +1,89 @@ -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -struct ssse3_base64_state { - int eof; - int bytes; - unsigned char carry; -}; - -/* Wrapper function to encode a plain string of given length. Output is written - * to *out without trailing zero. Output length in bytes is written to *outlen. - * The buffer in `out` has been allocated by the caller and is at least 4/3 the - * size of the input. See above for `flags`; set to 0 for default operation: */ -void ssse3_base64_encode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Call this before calling base64_stream_encode() to init the state. See above - * for `flags`; set to 0 for default operation: */ -void ssse3_base64_stream_encode_init - ( struct ssse3_base64_state *state - ) ; - -/* Encodes the block of data of given length at `src`, into the buffer at - * `out`. Caller is responsible for allocating a large enough out-buffer; it - * must be at least 4/3 the size of the in-buffer, but take some margin. Places - * the number of new bytes written into `outlen` (which is set to zero when the - * function starts). Does not zero-terminate or finalize the output. */ -void ssse3_base64_stream_encode - ( struct ssse3_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Finalizes the output begun by previous calls to `base64_stream_encode()`. - * Adds the required end-of-stream markers if appropriate. `outlen` is modified - * and will contain the number of new bytes written at `out` (which will quite - * often be zero). */ -void ssse3_base64_stream_encode_final - ( struct ssse3_base64_state *state - , char *out - , size_t *outlen - ) ; - -/* Wrapper function to decode a plain string of given length. Output is written - * to *out without trailing zero. Output length in bytes is written to *outlen. - * The buffer in `out` has been allocated by the caller and is at least 3/4 the - * size of the input. See above for `flags`, set to 0 for default operation: */ -int ssse3_base64_decode - ( const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -/* Call this before calling base64_stream_decode() to init the state. See above - * for `flags`; set to 0 for default operation: */ -void ssse3_base64_stream_decode_init - ( struct ssse3_base64_state *state - ) ; - -/* Decodes the block of data of given length at `src`, into the buffer at - * `out`. Caller is responsible for allocating a large enough out-buffer; it - * must be at least 3/4 the size of the in-buffer, but take some margin. Places - * the number of new bytes written into `outlen` (which is set to zero when the - * function starts). Does not zero-terminate the output. Returns 1 if all is - * well, and 0 if a decoding error was found, such as an invalid character. - * Returns -1 if the chosen codec is not included in the current build. Used by - * the test harness to check whether a codec is available for testing. */ -int ssse3_base64_stream_decode - ( struct ssse3_base64_state *state - , const char *src - , size_t srclen - , char *out - , size_t *outlen - ) ; - -#ifdef __cplusplus -} -#endif - +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +struct ssse3_base64_state { + int eof; + int bytes; + unsigned char carry; +}; + +/* Wrapper function to encode a plain string of given length. Output is written + * to *out without trailing zero. Output length in bytes is written to *outlen. + * The buffer in `out` has been allocated by the caller and is at least 4/3 the + * size of the input. See above for `flags`; set to 0 for default operation: */ +void ssse3_base64_encode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Call this before calling base64_stream_encode() to init the state. See above + * for `flags`; set to 0 for default operation: */ +void ssse3_base64_stream_encode_init + ( struct ssse3_base64_state *state + ) ; + +/* Encodes the block of data of given length at `src`, into the buffer at + * `out`. Caller is responsible for allocating a large enough out-buffer; it + * must be at least 4/3 the size of the in-buffer, but take some margin. Places + * the number of new bytes written into `outlen` (which is set to zero when the + * function starts). Does not zero-terminate or finalize the output. */ +void ssse3_base64_stream_encode + ( struct ssse3_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Finalizes the output begun by previous calls to `base64_stream_encode()`. + * Adds the required end-of-stream markers if appropriate. `outlen` is modified + * and will contain the number of new bytes written at `out` (which will quite + * often be zero). */ +void ssse3_base64_stream_encode_final + ( struct ssse3_base64_state *state + , char *out + , size_t *outlen + ) ; + +/* Wrapper function to decode a plain string of given length. Output is written + * to *out without trailing zero. Output length in bytes is written to *outlen. + * The buffer in `out` has been allocated by the caller and is at least 3/4 the + * size of the input. See above for `flags`, set to 0 for default operation: */ +int ssse3_base64_decode + ( const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +/* Call this before calling base64_stream_decode() to init the state. See above + * for `flags`; set to 0 for default operation: */ +void ssse3_base64_stream_decode_init + ( struct ssse3_base64_state *state + ) ; + +/* Decodes the block of data of given length at `src`, into the buffer at + * `out`. Caller is responsible for allocating a large enough out-buffer; it + * must be at least 3/4 the size of the in-buffer, but take some margin. Places + * the number of new bytes written into `outlen` (which is set to zero when the + * function starts). Does not zero-terminate the output. Returns 1 if all is + * well, and 0 if a decoding error was found, such as an invalid character. + * Returns -1 if the chosen codec is not included in the current build. Used by + * the test harness to check whether a codec is available for testing. */ +int ssse3_base64_stream_decode + ( struct ssse3_base64_state *state + , const char *src + , size_t srclen + , char *out + , size_t *outlen + ) ; + +#ifdef __cplusplus +} +#endif + diff --git a/contrib/libs/base64/ssse3/ya.make b/contrib/libs/base64/ssse3/ya.make index 5a3191c9f1..c9cf647001 100644 --- a/contrib/libs/base64/ssse3/ya.make +++ b/contrib/libs/base64/ssse3/ya.make @@ -1,11 +1,11 @@ -OWNER( - yazevnul +OWNER( + yazevnul g:contrib g:cpp-contrib -) - -LIBRARY() - +) + +LIBRARY() + LICENSE( BSD-2-Clause AND MIT @@ -13,14 +13,14 @@ LICENSE( LICENSE_TEXTS(.yandex_meta/licenses.list.txt) -NO_UTIL() - -SRCS( - codec_ssse3.c - lib.c -) - -IF (ARCH_X86_64 OR ARCH_I386) +NO_UTIL() + +SRCS( + codec_ssse3.c + lib.c +) + +IF (ARCH_X86_64 OR ARCH_I386) IF (MSVC AND NOT CLANG_CL) CONLYFLAGS(/D__SSSE3__=1) ELSEIF (CLANG_CL) @@ -30,7 +30,7 @@ IF (ARCH_X86_64 OR ARCH_I386) -mssse3 -std=c11 ) - ENDIF() -ENDIF() - -END() + ENDIF() +ENDIF() + +END() diff --git a/contrib/libs/base64/ya.make b/contrib/libs/base64/ya.make index f86ded70d9..659983fe66 100644 --- a/contrib/libs/base64/ya.make +++ b/contrib/libs/base64/ya.make @@ -1,16 +1,16 @@ -OWNER( - yazevnul +OWNER( + yazevnul g:contrib g:cpp-contrib -) - +) + VERSION(0.3.0) -RECURSE( - avx2 - ssse3 - neon32 - neon64 - plain32 - plain64 -) +RECURSE( + avx2 + ssse3 + neon32 + neon64 + plain32 + plain64 +) diff --git a/contrib/libs/cxxsupp/libcxx/include/iosfwd b/contrib/libs/cxxsupp/libcxx/include/iosfwd index b31529ce6e..477dd97260 100644 --- a/contrib/libs/cxxsupp/libcxx/include/iosfwd +++ b/contrib/libs/cxxsupp/libcxx/include/iosfwd @@ -250,7 +250,7 @@ typedef basic_string<char, char_traits<char>, allocator<char> > string; #ifndef _LIBCPP_HAS_NO_WIDE_CHARACTERS typedef basic_string<wchar_t, char_traits<wchar_t>, allocator<wchar_t> > wstring; #endif -typedef basic_string<char16_t, char_traits<char16_t>, allocator<char16_t> > u16string; +typedef basic_string<char16_t, char_traits<char16_t>, allocator<char16_t> > u16string; template <class _CharT, class _Traits, class _Allocator> class _LIBCPP_PREFERRED_NAME(string) _LIBCPP_IF_WIDE_CHARACTERS(_LIBCPP_PREFERRED_NAME(wstring)) basic_string; diff --git a/contrib/libs/cxxsupp/libcxx/include/stlfwd b/contrib/libs/cxxsupp/libcxx/include/stlfwd index 3913623f98..adad6790dc 100644 --- a/contrib/libs/cxxsupp/libcxx/include/stlfwd +++ b/contrib/libs/cxxsupp/libcxx/include/stlfwd @@ -1,13 +1,13 @@ #pragma once -#include <__config> +#include <__config> #include <cstddef> -#include <iosfwd> +#include <iosfwd> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -#pragma GCC system_header -#endif - _LIBCPP_BEGIN_NAMESPACE_STD template <class F, class S> @@ -42,18 +42,18 @@ _LIBCPP_BEGIN_NAMESPACE_STD constexpr void get(...) = delete; - template <class> - class _LIBCPP_TEMPLATE_VIS function; - - template <class _Tp> - struct _LIBCPP_TEMPLATE_VIS default_delete; - - template <class _Tp, class Deleter = default_delete<_Tp>> - class _LIBCPP_TEMPLATE_VIS unique_ptr; - - template <class T> - class _LIBCPP_TEMPLATE_VIS shared_ptr; - + template <class> + class _LIBCPP_TEMPLATE_VIS function; + + template <class _Tp> + struct _LIBCPP_TEMPLATE_VIS default_delete; + + template <class _Tp, class Deleter = default_delete<_Tp>> + class _LIBCPP_TEMPLATE_VIS unique_ptr; + + template <class T> + class _LIBCPP_TEMPLATE_VIS shared_ptr; + /* basic_string is already forward-declared in <iosfwd> */ template<class T, class Traits> diff --git a/contrib/libs/farmhash/test.cc b/contrib/libs/farmhash/test.cc index 5bbf3e7a1e..08d191ae7c 100644 --- a/contrib/libs/farmhash/test.cc +++ b/contrib/libs/farmhash/test.cc @@ -9855,8 +9855,8 @@ int main(int argc, char** argv) { } #endif -Y_UNIT_TEST_SUITE(FarmhashTest) { - Y_UNIT_TEST(Test) { +Y_UNIT_TEST_SUITE(FarmhashTest) { + Y_UNIT_TEST(Test) { UNIT_ASSERT(farmhashccTest::RunTest() == 0); UNIT_ASSERT(farmhashmkTest::RunTest() == 0); UNIT_ASSERT(farmhashnaTest::RunTest() == 0); diff --git a/contrib/libs/flatbuffers/ya.make b/contrib/libs/flatbuffers/ya.make index e91b7ebda4..4af01ae72c 100644 --- a/contrib/libs/flatbuffers/ya.make +++ b/contrib/libs/flatbuffers/ya.make @@ -1,7 +1,7 @@ # Generated by devtools/yamaker from nixpkgs 28acaac96f0cc203c63a3d50634541feff7fc31c. -LIBRARY() - +LIBRARY() + OWNER( eeight kirillovs diff --git a/contrib/libs/libunwind/ut/libunwind_ut.cpp b/contrib/libs/libunwind/ut/libunwind_ut.cpp index 3e199d4210..93767c50c8 100644 --- a/contrib/libs/libunwind/ut/libunwind_ut.cpp +++ b/contrib/libs/libunwind/ut/libunwind_ut.cpp @@ -10,14 +10,14 @@ Y_NO_INLINE void Except(int arg, ...) { throw std::exception(); } -Y_UNIT_TEST_SUITE(LibunwindSuite) { +Y_UNIT_TEST_SUITE(LibunwindSuite) { static void Y_NO_INLINE DoTestVarargs() { std::vector<int> v; v.push_back(0); Except(0x11, 0x22, 0x33, 0x44, 0xAA, 0xBB, 0xCC, 0xDD); } - Y_UNIT_TEST(TestVarargs) { + Y_UNIT_TEST(TestVarargs) { try { DoTestVarargs(); } catch (const std::exception& e) { diff --git a/contrib/libs/pire/pire/encoding.cpp b/contrib/libs/pire/pire/encoding.cpp index 9707c10d54..842e2b534d 100644 --- a/contrib/libs/pire/pire/encoding.cpp +++ b/contrib/libs/pire/pire/encoding.cpp @@ -95,7 +95,7 @@ namespace { size_t len; unsigned char* p = (unsigned char*) &*ret.begin(); if (SafeWriteUTF8Char(c, len, p, p + ret.size()) != RECODE_OK) - Y_ASSERT(!"Pire::UTF8::toLocal(): Internal error"); + Y_ASSERT(!"Pire::UTF8::toLocal(): Internal error"); return ret; } diff --git a/contrib/libs/pire/pire/extra/count.cpp b/contrib/libs/pire/pire/extra/count.cpp index 5285aed998..468ff61d92 100644 --- a/contrib/libs/pire/pire/extra/count.cpp +++ b/contrib/libs/pire/pire/extra/count.cpp @@ -796,9 +796,9 @@ CountingScanner::CountingScanner(const Fsm& re, const Fsm& sep) const Fsm::StatesSet& br = sep_re.Destinations(states[curstate].second, letter); if (mr.size() != 1) - Y_ASSERT(!"Wrong transition size for main"); + Y_ASSERT(!"Wrong transition size for main"); if (br.size() != 1) - Y_ASSERT(!"Wrong transition size for backup"); + Y_ASSERT(!"Wrong transition size for backup"); NewState ns(*mr.begin(), *br.begin()); PIRE_IFDEBUG(NewState savedNs = ns); diff --git a/contrib/libs/pire/pire/fsm.cpp b/contrib/libs/pire/pire/fsm.cpp index 5df0c7ec4b..984d708dfa 100644 --- a/contrib/libs/pire/pire/fsm.cpp +++ b/contrib/libs/pire/pire/fsm.cpp @@ -80,7 +80,7 @@ void Fsm::DumpState(yostream& s, size_t state) const for (auto&& transitionState : transition.second) { if (transitionState >= Size()) { std::cerr << "WTF?! Transition from " << state << " on letter " << transition.first << " leads to non-existing state " << transitionState << "\n"; - Y_ASSERT(false); + Y_ASSERT(false); } if (Letters().Contains(transition.first)) { const TVector<Char>& letters = Letters().Klass(Letters().Representative(transition.first)); @@ -977,7 +977,7 @@ public: void Connect(size_t from, size_t to, Char letter) { PIRE_IFDEBUG(Cdbg << "Connecting " << from << " --" << letter << "--> " << to << Endl); - Y_ASSERT(mNewTerminals.find(from) == mNewTerminals.end()); + Y_ASSERT(mNewTerminals.find(from) == mNewTerminals.end()); mNewFsm.Connect(from, to, letter); } typedef bool Result; diff --git a/contrib/libs/pire/pire/partition.h b/contrib/libs/pire/pire/partition.h index 39a26b53e4..85a9af8863 100644 --- a/contrib/libs/pire/pire/partition.h +++ b/contrib/libs/pire/pire/partition.h @@ -98,7 +98,7 @@ public: if (it == m_inv.end()) throw Error("Partition::index(): attempted to obtain an index of nonexistent item"); auto it2 = m_set.find(it->second); - Y_ASSERT(it2 != m_set.end()); + Y_ASSERT(it2 != m_set.end()); return it2->second.first; } /// Returns the whole equivalence class of @p t (i.e. item @p i @@ -109,7 +109,7 @@ public: if (it == m_inv.end()) throw Error("Partition::index(): attempted to obtain an index of nonexistent item"); auto it2 = m_set.find(it->second); - Y_ASSERT(it2 != m_set.end()); + Y_ASSERT(it2 != m_set.end()); return it2->second.second; } diff --git a/contrib/libs/pire/pire/run.h b/contrib/libs/pire/pire/run.h index 75959379ed..f6e1ff734d 100644 --- a/contrib/libs/pire/pire/run.h +++ b/contrib/libs/pire/pire/run.h @@ -54,7 +54,7 @@ template<class Scanner> PIRE_FORCED_INLINE PIRE_HOT_FUNCTION void Step(const Scanner& scanner, typename Scanner::State& state, Char ch) { - Y_ASSERT(ch < MaxCharUnaligned); + Y_ASSERT(ch < MaxCharUnaligned); typename Scanner::Action a = scanner.Next(state, ch); scanner.TakeAction(state, a); } @@ -112,9 +112,9 @@ namespace Impl { PIRE_FORCED_INLINE PIRE_HOT_FUNCTION Action SafeRunChunk(const Scanner& scanner, typename Scanner::State& state, const size_t* p, size_t pos, size_t size, Pred pred) { - Y_ASSERT(pos <= sizeof(size_t)); - Y_ASSERT(size <= sizeof(size_t)); - Y_ASSERT(pos + size <= sizeof(size_t)); + Y_ASSERT(pos <= sizeof(size_t)); + Y_ASSERT(size <= sizeof(size_t)); + Y_ASSERT(pos + size <= sizeof(size_t)); if (PIRE_UNLIKELY(size == 0)) return Continue; @@ -133,9 +133,9 @@ namespace Impl { PIRE_FORCED_INLINE PIRE_HOT_FUNCTION Action RunChunk(const Scanner& scanner, typename Scanner::State& state, const size_t* p, size_t pos, size_t size, Pred pred) { - Y_ASSERT(pos <= sizeof(size_t)); - Y_ASSERT(size <= sizeof(size_t)); - Y_ASSERT(pos + size <= sizeof(size_t)); + Y_ASSERT(pos <= sizeof(size_t)); + Y_ASSERT(size <= sizeof(size_t)); + Y_ASSERT(pos + size <= sizeof(size_t)); if (PIRE_UNLIKELY(size == 0)) return Continue; diff --git a/contrib/libs/pire/pire/scanner_io.cpp b/contrib/libs/pire/pire/scanner_io.cpp index 4fd5e9792c..3956e3c6ed 100644 --- a/contrib/libs/pire/pire/scanner_io.cpp +++ b/contrib/libs/pire/pire/scanner_io.cpp @@ -43,7 +43,7 @@ void SimpleScanner::Save(yostream* s) const SavePodType(s, Empty()); Impl::AlignSave(s, sizeof(Empty())); if (!Empty()) { - Y_ASSERT(m_buffer); + Y_ASSERT(m_buffer); Impl::AlignedSaveArray(s, m_buffer.Get(), BufSize()); } } @@ -77,7 +77,7 @@ void SlowScanner::Save(yostream* s) const SavePodType(s, Empty()); Impl::AlignSave(s, sizeof(Empty())); if (!Empty()) { - Y_ASSERT(!m_vec.empty()); + Y_ASSERT(!m_vec.empty()); Impl::AlignedSaveArray(s, m_letters, MaxChar); Impl::AlignedSaveArray(s, m_finals, m.statesCount); diff --git a/contrib/libs/pire/pire/scanners/loaded.h b/contrib/libs/pire/pire/scanners/loaded.h index 4d72d783a9..120dc403b7 100644 --- a/contrib/libs/pire/pire/scanners/loaded.h +++ b/contrib/libs/pire/pire/scanners/loaded.h @@ -188,9 +188,9 @@ public: void SetJump(size_t oldState, Char c, size_t newState, Action action) { - Y_ASSERT(m_buffer); - Y_ASSERT(oldState < m.statesCount); - Y_ASSERT(newState < m.statesCount); + Y_ASSERT(m_buffer); + Y_ASSERT(oldState < m.statesCount); + Y_ASSERT(newState < m.statesCount); size_t shift = (newState - oldState) * StateSize(); Transition tr; @@ -201,8 +201,8 @@ public: Action RemapAction(Action action) { return action; } - void SetInitial(size_t state) { Y_ASSERT(m_buffer); m.initial = reinterpret_cast<size_t>(m_jumps + state * m.lettersCount); } - void SetTag(size_t state, Tag tag) { Y_ASSERT(m_buffer); m_tags[state] = tag; } + void SetInitial(size_t state) { Y_ASSERT(m_buffer); m.initial = reinterpret_cast<size_t>(m_jumps + state * m.lettersCount); } + void SetTag(size_t state, Tag tag) { Y_ASSERT(m_buffer); m_tags[state] = tag; } void FinishBuild() {} size_t StateIdx(InternalState s) const diff --git a/contrib/libs/pire/pire/scanners/multi.h b/contrib/libs/pire/pire/scanners/multi.h index 52b97aadc1..29679e416e 100644 --- a/contrib/libs/pire/pire/scanners/multi.h +++ b/contrib/libs/pire/pire/scanners/multi.h @@ -170,17 +170,17 @@ public: Action NextTranslated(State& state, Char letter) const { PIRE_IFDEBUG( - Y_ASSERT(state >= (size_t)m_transitions); - Y_ASSERT(state < (size_t)(m_transitions + RowSize()*Size())); - Y_ASSERT((state - (size_t)m_transitions) % (RowSize()*sizeof(Transition)) == 0); + Y_ASSERT(state >= (size_t)m_transitions); + Y_ASSERT(state < (size_t)(m_transitions + RowSize()*Size())); + Y_ASSERT((state - (size_t)m_transitions) % (RowSize()*sizeof(Transition)) == 0); ); state = Relocation::Go(state, reinterpret_cast<const Transition*>(state)[letter]); PIRE_IFDEBUG( - Y_ASSERT(state >= (size_t)m_transitions); - Y_ASSERT(state < (size_t)(m_transitions + RowSize()*Size())); - Y_ASSERT((state - (size_t)m_transitions) % (RowSize()*sizeof(Transition)) == 0); + Y_ASSERT(state >= (size_t)m_transitions); + Y_ASSERT(state < (size_t)(m_transitions + RowSize()*Size())); + Y_ASSERT((state - (size_t)m_transitions) % (RowSize()*sizeof(Transition)) == 0); ); return 0; @@ -222,8 +222,8 @@ public: void Swap(Scanner& s) { - Y_ASSERT(m.relocationSignature == s.m.relocationSignature); - Y_ASSERT(m.shortcuttingSignature == s.m.shortcuttingSignature); + Y_ASSERT(m.relocationSignature == s.m.relocationSignature); + Y_ASSERT(m.shortcuttingSignature == s.m.shortcuttingSignature); DoSwap(m_buffer, s.m_buffer); DoSwap(m.statesCount, s.m.statesCount); DoSwap(m.lettersCount, s.m.lettersCount); @@ -413,8 +413,8 @@ protected: // Values in letter-to-leterclass table take into account row header size for (size_t c = 0; c < MaxChar; ++c) { m_letters[c] = s.m_letters[c] - s.HEADER_SIZE + HEADER_SIZE; - Y_ASSERT(c == Epsilon || m_letters[c] >= HEADER_SIZE); - Y_ASSERT(c == Epsilon || m_letters[c] < RowSize()); + Y_ASSERT(c == Epsilon || m_letters[c] >= HEADER_SIZE); + Y_ASSERT(c == Epsilon || m_letters[c] < RowSize()); } memcpy(m_final, s.m_final, m.finalTableSize * sizeof(*m_final)); memcpy(m_finalIndex, s.m_finalIndex, m.statesCount * sizeof(*m_finalIndex)); @@ -433,8 +433,8 @@ protected: size_t destIndex = s.StateIndex(AnotherRelocation::Go(oldstate, os[let + s.HEADER_SIZE])); Transition tr = Relocation::Diff(newstate, IndexToState(destIndex)); ns[let + HEADER_SIZE] = tr; - Y_ASSERT(Relocation::Go(newstate, tr) >= (size_t)m_transitions); - Y_ASSERT(Relocation::Go(newstate, tr) < (size_t)(m_transitions + RowSize()*Size())); + Y_ASSERT(Relocation::Go(newstate, tr) >= (size_t)m_transitions); + Y_ASSERT(Relocation::Go(newstate, tr) < (size_t)(m_transitions + RowSize()*Size())); } } } @@ -447,9 +447,9 @@ protected: void SetJump(size_t oldState, Char c, size_t newState, unsigned long /*payload*/ = 0) { - Y_ASSERT(m_buffer); - Y_ASSERT(oldState < m.statesCount); - Y_ASSERT(newState < m.statesCount); + Y_ASSERT(m_buffer); + Y_ASSERT(oldState < m.statesCount); + Y_ASSERT(newState < m.statesCount); m_transitions[oldState * RowSize() + m_letters[c]] = Relocation::Diff(IndexToState(oldState), IndexToState(newState)); @@ -459,20 +459,20 @@ protected: void SetInitial(size_t state) { - Y_ASSERT(m_buffer); + Y_ASSERT(m_buffer); m.initial = IndexToState(state); } void SetTag(size_t state, size_t value) { - Y_ASSERT(m_buffer); + Y_ASSERT(m_buffer); Header(IndexToState(state)).Common.Flags = value; } // Fill shortcut masks for all the states void BuildShortcuts() { - Y_ASSERT(m_buffer); + Y_ASSERT(m_buffer); // Build the mapping from letter classes to characters TVector< TVector<char> > letters(RowSize()); @@ -512,7 +512,7 @@ protected: // Fills final states table and builds shortcuts if possible void FinishBuild() { - Y_ASSERT(m_buffer); + Y_ASSERT(m_buffer); auto finalWriter = m_final; for (size_t state = 0; state != Size(); ++state) { m_finalIndex[state] = finalWriter - m_final; @@ -710,17 +710,17 @@ public: inline const Word& Mask(size_t i, size_t alignOffset) const { - Y_ASSERT(i < ExitMaskCount); - Y_ASSERT(alignOffset < SizeTInMaxSizeWord); + Y_ASSERT(i < ExitMaskCount); + Y_ASSERT(alignOffset < SizeTInMaxSizeWord); const Word* p = (const Word*)(ExitMasksArray + alignOffset + MaskSizeInSizeT * i); - Y_ASSERT(IsAligned(p, sizeof(Word))); + Y_ASSERT(IsAligned(p, sizeof(Word))); return *p; } PIRE_FORCED_INLINE PIRE_HOT_FUNCTION size_t Mask(size_t i) const { - Y_ASSERT(i < ExitMaskCount); + Y_ASSERT(i < ExitMaskCount); return ExitMasksArray[MaskSizeInSizeT*i]; } @@ -922,7 +922,7 @@ private: typename ScannerType::State stateBefore = st; for (const char* pos = begin; pos != end; ++pos) { Step(scanner, st, (unsigned char)*pos); - Y_ASSERT(st == stateBefore); + Y_ASSERT(st == stateBefore); } } @@ -951,7 +951,7 @@ public: } // Row size should be a multiple of MaxSizeWord size. Then alignOffset is the same for any state - Y_ASSERT((scanner.RowSize()*sizeof(typename ScannerType::Transition)) % sizeof(MaxSizeWord) == 0); + Y_ASSERT((scanner.RowSize()*sizeof(typename ScannerType::Transition)) % sizeof(MaxSizeWord) == 0); size_t alignOffset = (AlignUp((size_t)scanner.m_transitions, sizeof(Word)) - (size_t)scanner.m_transitions) / sizeof(size_t); bool noShortcut = Shortcutting::NoShortcut(scanner, state); diff --git a/contrib/libs/pire/pire/scanners/simple.h b/contrib/libs/pire/pire/scanners/simple.h index d70e699f15..ef959aeed1 100644 --- a/contrib/libs/pire/pire/scanners/simple.h +++ b/contrib/libs/pire/pire/scanners/simple.h @@ -207,9 +207,9 @@ protected: void SetJump(size_t oldState, Char c, size_t newState) { - Y_ASSERT(m_buffer); - Y_ASSERT(oldState < m.statesCount); - Y_ASSERT(newState < m.statesCount); + Y_ASSERT(m_buffer); + Y_ASSERT(oldState < m.statesCount); + Y_ASSERT(newState < m.statesCount); m_transitions[oldState * STATE_ROW_SIZE + 1 + c] = (((newState - oldState) * STATE_ROW_SIZE) * sizeof(Transition)); } @@ -218,13 +218,13 @@ protected: void SetInitial(size_t state) { - Y_ASSERT(m_buffer); + Y_ASSERT(m_buffer); m.initial = reinterpret_cast<size_t>(m_transitions + state * STATE_ROW_SIZE + 1); } void SetTag(size_t state, size_t tag) { - Y_ASSERT(m_buffer); + Y_ASSERT(m_buffer); m_transitions[state * STATE_ROW_SIZE] = tag; } diff --git a/contrib/libs/pire/pire/scanners/slow.h b/contrib/libs/pire/pire/scanners/slow.h index dceb1ac65b..6adfcb8c1d 100644 --- a/contrib/libs/pire/pire/scanners/slow.h +++ b/contrib/libs/pire/pire/scanners/slow.h @@ -383,9 +383,9 @@ private: void SetJump(size_t oldState, Char c, size_t newState, unsigned long action) { - Y_ASSERT(!m_vec.empty()); - Y_ASSERT(oldState < m.statesCount); - Y_ASSERT(newState < m.statesCount); + Y_ASSERT(!m_vec.empty()); + Y_ASSERT(oldState < m.statesCount); + Y_ASSERT(newState < m.statesCount); size_t idx = oldState * m.lettersCount + m_letters[c]; m_vec[idx].push_back(newState); diff --git a/contrib/libs/pire/pire/stub/stl.h b/contrib/libs/pire/pire/stub/stl.h index 0211c06277..98ebd9f7c6 100644 --- a/contrib/libs/pire/pire/stub/stl.h +++ b/contrib/libs/pire/pire/stub/stl.h @@ -56,8 +56,8 @@ namespace Pire { Error(const ystring& msg) { *this << msg; } }; - typedef IOutputStream yostream; - typedef IInputStream yistream; + typedef IOutputStream yostream; + typedef IInputStream yistream; template<class Iter> ystring Join(Iter begin, Iter end, const ystring& separator) { return JoinStrings(begin, end, separator); } diff --git a/contrib/libs/protobuf/src/google/protobuf/json_util.cc b/contrib/libs/protobuf/src/google/protobuf/json_util.cc index 7264b06814..450ce1da57 100644 --- a/contrib/libs/protobuf/src/google/protobuf/json_util.cc +++ b/contrib/libs/protobuf/src/google/protobuf/json_util.cc @@ -4,7 +4,7 @@ namespace google { namespace protobuf { namespace io { -void PrintJSONString(IOutputStream& stream, const TProtoStringType& string) { +void PrintJSONString(IOutputStream& stream, const TProtoStringType& string) { stream << '"'; for (const char c: string) { switch(c) { diff --git a/contrib/libs/protobuf/src/google/protobuf/json_util.h b/contrib/libs/protobuf/src/google/protobuf/json_util.h index 52a4a69a57..ad3a4a5bfe 100644 --- a/contrib/libs/protobuf/src/google/protobuf/json_util.h +++ b/contrib/libs/protobuf/src/google/protobuf/json_util.h @@ -6,7 +6,7 @@ namespace google { namespace protobuf { namespace io { -void PrintJSONString(IOutputStream& stream, const TProtoStringType& string); +void PrintJSONString(IOutputStream& stream, const TProtoStringType& string); template<class T> struct TAsJSON { @@ -20,7 +20,7 @@ public: }; template<class T> -inline IOutputStream& operator <<(IOutputStream& stream, const TAsJSON<T>& protoAsJSON) { +inline IOutputStream& operator <<(IOutputStream& stream, const TAsJSON<T>& protoAsJSON) { protoAsJSON.T_.PrintJSON(stream); return stream; }; diff --git a/contrib/libs/protobuf/src/google/protobuf/message.h b/contrib/libs/protobuf/src/google/protobuf/message.h index 43c795471a..37d92ea393 100644 --- a/contrib/libs/protobuf/src/google/protobuf/message.h +++ b/contrib/libs/protobuf/src/google/protobuf/message.h @@ -344,7 +344,7 @@ class PROTOBUF_EXPORT Message : public MessageLite { bool SerializeToArcadiaStream(IOutputStream* output) const; bool SerializePartialToArcadiaStream(IOutputStream* output) const; - virtual void PrintJSON(IOutputStream&) const; + virtual void PrintJSON(IOutputStream&) const; io::TAsJSON<Message> AsJSON() const { return io::TAsJSON<Message>(*this); diff --git a/contrib/libs/protobuf/src/google/protobuf/messagext.cc b/contrib/libs/protobuf/src/google/protobuf/messagext.cc index e4cfa141f1..1923205598 100644 --- a/contrib/libs/protobuf/src/google/protobuf/messagext.cc +++ b/contrib/libs/protobuf/src/google/protobuf/messagext.cc @@ -101,7 +101,7 @@ bool TOutputStreamProxy::Write(const void* buffer, int size) { } -void TProtoSerializer::Save(IOutputStream* out, const Message& msg) { +void TProtoSerializer::Save(IOutputStream* out, const Message& msg) { int size = msg.ByteSize(); if (size > MaxSizeBytes) { ythrow yexception() << "Message size " << size << " exceeds " << MaxSizeBytes; @@ -113,11 +113,11 @@ void TProtoSerializer::Save(IOutputStream* out, const Message& msg) { ythrow yexception() << "Cannot write protobuf::Message to output stream"; } -// Reading varint32 directly from IInputStream (might be slow if input requires buffering). +// Reading varint32 directly from IInputStream (might be slow if input requires buffering). // Copy-pasted with small modifications from protobuf/io/coded_stream (ReadVarint32FromArray) // Returns true if succeeded, false if stream has ended, throws exception if data is corrupted -static bool ReadVarint32(IInputStream* input, ui32& size) { +static bool ReadVarint32(IInputStream* input, ui32& size) { size_t res; ui8 b; @@ -166,7 +166,7 @@ private: ui8* Buffer; }; -void TProtoSerializer::Load(IInputStream* input, Message& msg) { +void TProtoSerializer::Load(IInputStream* input, Message& msg) { msg.Clear(); MergeFrom(input, msg); } @@ -184,7 +184,7 @@ void TProtoSerializer::MergeFrom(IInputStream* input, Message& msg) { ythrow yexception() << "Cannot read protobuf::Message (" << msg.GetTypeName() << ") from input stream"; } -TProtoReader::TProtoReader(IInputStream* input, const size_t bufferSize) +TProtoReader::TProtoReader(IInputStream* input, const size_t bufferSize) : IStream(input) , Buffer(bufferSize) { diff --git a/contrib/libs/protobuf/src/google/protobuf/messagext.h b/contrib/libs/protobuf/src/google/protobuf/messagext.h index b9cb169187..9176cee1e8 100644 --- a/contrib/libs/protobuf/src/google/protobuf/messagext.h +++ b/contrib/libs/protobuf/src/google/protobuf/messagext.h @@ -2,7 +2,7 @@ #include <google/protobuf/stubs/port.h> #include <google/protobuf/io/coded_stream.h> -#include <util/stream/output.h> +#include <util/stream/output.h> #include <util/generic/buffer.h> #include <google/protobuf/io/zero_copy_stream_impl.h> @@ -32,13 +32,13 @@ namespace io { /// Parse*Seq methods read message size from stream to find a message boundary -/// there is not parse from IInputStream, because it is not push-backable +/// there is not parse from IInputStream, because it is not push-backable bool ParseFromCodedStreamSeq(Message* msg, io::CodedInputStream* input); bool ParseFromZeroCopyStreamSeq(Message* msg, io::ZeroCopyInputStream* input); /// Serialize*Seq methods write message size as varint before writing a message -/// there is no serialize to IOutputStream, because it is not push-backable +/// there is no serialize to IOutputStream, because it is not push-backable bool SerializePartialToCodedStreamSeq(const Message* msg, io::CodedOutputStream* output); bool SerializeToCodedStreamSeq(const Message* msg, io::CodedOutputStream* output); @@ -62,7 +62,7 @@ private: class TInputStreamProxy: public io::CopyingInputStream, public TErrorState { public: - inline TInputStreamProxy(IInputStream* slave) + inline TInputStreamProxy(IInputStream* slave) : mSlave(slave) { } @@ -70,12 +70,12 @@ class TInputStreamProxy: public io::CopyingInputStream, public TErrorState { virtual int Read(void* buffer, int size); private: - IInputStream* mSlave; + IInputStream* mSlave; }; class TOutputStreamProxy: public io::CopyingOutputStream, public TErrorState { public: - inline TOutputStreamProxy(IOutputStream* slave) + inline TOutputStreamProxy(IOutputStream* slave) : mSlave(slave) { } @@ -83,13 +83,13 @@ class TOutputStreamProxy: public io::CopyingOutputStream, public TErrorState { virtual bool Write(const void* buffer, int size); private: - IOutputStream* mSlave; + IOutputStream* mSlave; }; class TCopyingInputStreamAdaptor: public TInputStreamProxy, public CopyingInputStreamAdaptor { public: - TCopyingInputStreamAdaptor(IInputStream* inputStream) + TCopyingInputStreamAdaptor(IInputStream* inputStream) : TInputStreamProxy(inputStream) , CopyingInputStreamAdaptor(this) { } @@ -97,7 +97,7 @@ public: class TCopyingOutputStreamAdaptor: public TOutputStreamProxy, public CopyingOutputStreamAdaptor { public: - TCopyingOutputStreamAdaptor(IOutputStream* outputStream) + TCopyingOutputStreamAdaptor(IOutputStream* outputStream) : TOutputStreamProxy(outputStream) , CopyingOutputStreamAdaptor(this) { } @@ -106,8 +106,8 @@ public: class TProtoSerializer { public: - static void Save(IOutputStream* output, const Message& msg); - static void Load(IInputStream* input, Message& msg); + static void Save(IOutputStream* output, const Message& msg); + static void Load(IInputStream* input, Message& msg); static void MergeFrom(IInputStream* input, Message& msg); // similar interface for protobuf coded streams @@ -130,7 +130,7 @@ public: */ class TProtoReader { public: - TProtoReader(IInputStream* input, const size_t bufferSize = DefaultBufferSize); + TProtoReader(IInputStream* input, const size_t bufferSize = DefaultBufferSize); /** * Reads protobuf message @@ -143,7 +143,7 @@ public: bool Load(Message& msg); private: - IInputStream* IStream; + IInputStream* IStream; TBuffer Buffer; static const size_t DefaultBufferSize = (1 << 16); @@ -154,11 +154,11 @@ private: } // namespace google // arcadia-style serialization -inline void Save(IOutputStream* output, const google::protobuf::Message& msg) { +inline void Save(IOutputStream* output, const google::protobuf::Message& msg) { google::protobuf::io::TProtoSerializer::Save(output, msg); } -inline void Load(IInputStream* input, google::protobuf::Message& msg) { +inline void Load(IInputStream* input, google::protobuf::Message& msg) { google::protobuf::io::TProtoSerializer::Load(input, msg); } diff --git a/contrib/libs/ya.make b/contrib/libs/ya.make index 7006034df7..9c4640fdcf 100644 --- a/contrib/libs/ya.make +++ b/contrib/libs/ya.make @@ -15,7 +15,7 @@ RECURSE( avs-device-sdk aws-sdk-cpp backtrace - base64 + base64 bdb bdb/ut benchmark @@ -209,9 +209,9 @@ RECURSE( llvm11 llvm12 llvm8 - lmdbxx - lmdbxx/check - lmdbxx/example + lmdbxx + lmdbxx/check + lmdbxx/example lua lua-cjson luajit_21 @@ -269,7 +269,7 @@ RECURSE( pffft pfr picohttpparser - #pire/ut + #pire/ut pixman poco portaudio @@ -277,7 +277,7 @@ RECURSE( proj protobuf protobuf/python - protobuf-mutator + protobuf-mutator protobuf_std protoc_std psimd @@ -312,7 +312,7 @@ RECURSE( sqlite3 srt stan - stan-math + stan-math stan/stan/command stxxl subversion |