diff options
author | e-sidorov <e-sidorov@yandex-team.ru> | 2022-02-10 16:46:06 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:06 +0300 |
commit | ce2ad6f6a6f6025e37fb7f8debe7cefd3aa2307c (patch) | |
tree | 1a2c5ffcf89eb53ecd79dbc9bc0a195c27404d0c /library | |
parent | 1ec091f8998d76a211c6015ba6865a73b29d676a (diff) | |
download | ydb-ce2ad6f6a6f6025e37fb7f8debe7cefd3aa2307c.tar.gz |
Restoring authorship annotation for <e-sidorov@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library')
58 files changed, 4304 insertions, 4304 deletions
diff --git a/library/cpp/digest/argonish/AUTHORS b/library/cpp/digest/argonish/AUTHORS index bf88223a88..608109b6da 100644 --- a/library/cpp/digest/argonish/AUTHORS +++ b/library/cpp/digest/argonish/AUTHORS @@ -1,3 +1,3 @@ -The following authors have created the source code of "Argonish" published and distributed by YANDEX LLC as the owner: - -Evgeny Sidorov <e-sidorov@yandex-team.ru> +The following authors have created the source code of "Argonish" published and distributed by YANDEX LLC as the owner: + +Evgeny Sidorov <e-sidorov@yandex-team.ru> diff --git a/library/cpp/digest/argonish/CONTRIBUTING.md b/library/cpp/digest/argonish/CONTRIBUTING.md index 5bf5833cf1..9ebd417e21 100644 --- a/library/cpp/digest/argonish/CONTRIBUTING.md +++ b/library/cpp/digest/argonish/CONTRIBUTING.md @@ -1,35 +1,35 @@ -# Notice to external contributors - - -## General info - -Hello! In order for us (YANDEX LLC) to accept patches and other contributions from you, you will have to adopt our Yandex Contributor License Agreement (the “**CLA**”). The current version of the CLA you may find here: -1) https://yandex.ru/legal/cla/?lang=en (in English) and -2) https://yandex.ru/legal/cla/?lang=ru (in Russian). - -By adopting the CLA, you state the following: - -* You obviously wish and are willingly licensing your contributions to us for our open source projects under the terms of the CLA, -* You has read the terms and conditions of the CLA and agree with them in full, -* You are legally able to provide and license your contributions as stated, -* We may use your contributions for our open source projects and for any other our project too, -* We rely on your assurances concerning the rights of third parties in relation to your contributes. - -If you agree with these principles, please read and adopt our CLA. By providing us your contributions, you hereby declare that you has already read and adopt our CLA, and we may freely merge your contributions with our corresponding open source project and use it in further in accordance with terms and conditions of the CLA. - -## Provide contributions - -If you have already adopted terms and conditions of the CLA, you are able to provide your contributes. When you submit your pull request, please add the following information into it: - -``` -I hereby agree to the terms of the CLA available at: [link]. -``` - -Replace the bracketed text as follows: -* [link] is the link at the current version of the CLA (you may add here a link https://yandex.ru/legal/cla/?lang=en (in English) or a link https://yandex.ru/legal/cla/?lang=ru (in Russian). - -It is enough to provide us such notification at once. - -## Other questions - -If you have any questions, please mail us at opensource@yandex-team.ru. +# Notice to external contributors + + +## General info + +Hello! In order for us (YANDEX LLC) to accept patches and other contributions from you, you will have to adopt our Yandex Contributor License Agreement (the “**CLA**”). The current version of the CLA you may find here: +1) https://yandex.ru/legal/cla/?lang=en (in English) and +2) https://yandex.ru/legal/cla/?lang=ru (in Russian). + +By adopting the CLA, you state the following: + +* You obviously wish and are willingly licensing your contributions to us for our open source projects under the terms of the CLA, +* You has read the terms and conditions of the CLA and agree with them in full, +* You are legally able to provide and license your contributions as stated, +* We may use your contributions for our open source projects and for any other our project too, +* We rely on your assurances concerning the rights of third parties in relation to your contributes. + +If you agree with these principles, please read and adopt our CLA. By providing us your contributions, you hereby declare that you has already read and adopt our CLA, and we may freely merge your contributions with our corresponding open source project and use it in further in accordance with terms and conditions of the CLA. + +## Provide contributions + +If you have already adopted terms and conditions of the CLA, you are able to provide your contributes. When you submit your pull request, please add the following information into it: + +``` +I hereby agree to the terms of the CLA available at: [link]. +``` + +Replace the bracketed text as follows: +* [link] is the link at the current version of the CLA (you may add here a link https://yandex.ru/legal/cla/?lang=en (in English) or a link https://yandex.ru/legal/cla/?lang=ru (in Russian). + +It is enough to provide us such notification at once. + +## Other questions + +If you have any questions, please mail us at opensource@yandex-team.ru. diff --git a/library/cpp/digest/argonish/LICENSE b/library/cpp/digest/argonish/LICENSE index b6ca8327d6..67fc7b16f8 100644 --- a/library/cpp/digest/argonish/LICENSE +++ b/library/cpp/digest/argonish/LICENSE @@ -1,22 +1,22 @@ -Copyright (c) 2017, YANDEX LLC -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, are permitted provided -that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, this list of conditions and -the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and -the following disclaimer in the documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote -products derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, -INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +Copyright (c) 2017, YANDEX LLC +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are permitted provided +that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this list of conditions and +the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and +the following disclaimer in the documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote +products derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, +INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/library/cpp/digest/argonish/README.md b/library/cpp/digest/argonish/README.md index 83bdd9f2a1..7b3ae79cca 100644 --- a/library/cpp/digest/argonish/README.md +++ b/library/cpp/digest/argonish/README.md @@ -1,74 +1,74 @@ -Argonish --------- - -Implementation of argon2 (i, d, id) algorithms with CPU dispatching. The list of features includes: -* C++14 interface -* constexpr and partial templates to get rid of useless branches (give +2% to performance) -* AVX2 implementation of Argon2 (allows to gain +30..40% to performance) -* Vectorized Blake2B implementation (including AVX2 version) -* OpenMP for multithreading in contrast to pthread in the reference implementation - -Acknowledgements ----------------- - -This project uses some ideas and pieces of code from the following projects licensed under CC0: -* https://github.com/P-H-C/phc-winner-argon2 -* https://github.com/BLAKE2/BLAKE2 - -I'm also thankful to the following people whose fruitful feedback improved the project: -* Igor Klevanets (cerevra@yandex-team.ru) - -Benchmark results ------------------ - -On my OS X 10.11, MacBook Pro (Early 2015, Core i5 2,7 GHz, 16 GB 1867 MHz DDR3) for `(Argon2d, 1, 2048, 1)` it gives: - -``` ----- REF ---- -Num | Count | Time -0 | 4555 | 9.93424 -1 | 4556 | 9.94529 ----- SSE2 --- -Num | Count | Time -0 | 6606 | 9.93117 -1 | 6594 | 9.93259 ---- SSSE3 --- -Num | Count | Time -0 | 7393 | 9.93866 -1 | 7392 | 9.94874 ---- SSE41 --- -Num | Count | Time -0 | 7152 | 9.88648 -1 | 7112 | 9.87276 ------AVX2---- -Num | Count | Time -0 | 11120 | 9.9273 -1 | 11138 | 9.94308 -``` - -How to use ----------- - -``` +Argonish +-------- + +Implementation of argon2 (i, d, id) algorithms with CPU dispatching. The list of features includes: +* C++14 interface +* constexpr and partial templates to get rid of useless branches (give +2% to performance) +* AVX2 implementation of Argon2 (allows to gain +30..40% to performance) +* Vectorized Blake2B implementation (including AVX2 version) +* OpenMP for multithreading in contrast to pthread in the reference implementation + +Acknowledgements +---------------- + +This project uses some ideas and pieces of code from the following projects licensed under CC0: +* https://github.com/P-H-C/phc-winner-argon2 +* https://github.com/BLAKE2/BLAKE2 + +I'm also thankful to the following people whose fruitful feedback improved the project: +* Igor Klevanets (cerevra@yandex-team.ru) + +Benchmark results +----------------- + +On my OS X 10.11, MacBook Pro (Early 2015, Core i5 2,7 GHz, 16 GB 1867 MHz DDR3) for `(Argon2d, 1, 2048, 1)` it gives: + +``` +---- REF ---- +Num | Count | Time +0 | 4555 | 9.93424 +1 | 4556 | 9.94529 +---- SSE2 --- +Num | Count | Time +0 | 6606 | 9.93117 +1 | 6594 | 9.93259 +--- SSSE3 --- +Num | Count | Time +0 | 7393 | 9.93866 +1 | 7392 | 9.94874 +--- SSE41 --- +Num | Count | Time +0 | 7152 | 9.88648 +1 | 7112 | 9.87276 +-----AVX2---- +Num | Count | Time +0 | 11120 | 9.9273 +1 | 11138 | 9.94308 +``` + +How to use +---------- + +``` #include <library/cpp/digest/argonish/argon2.h> -... -uint32_t tcost = 1; /* one pass */ -uint32_t mcost = 32; /* in KB */ -uint32_t threads = 1; /* one thread version */ -NArgonish::TArgon2Factory afactory; -THolder<NArgonish::IArgon2Base> argon2 = afactory.Create(NArgonish::EArgon2Type::Argon2d, tcost, mcost, threads); -argon2->Hash(input, insize, salt, saltsize, out, outlen); -... +... +uint32_t tcost = 1; /* one pass */ +uint32_t mcost = 32; /* in KB */ +uint32_t threads = 1; /* one thread version */ +NArgonish::TArgon2Factory afactory; +THolder<NArgonish::IArgon2Base> argon2 = afactory.Create(NArgonish::EArgon2Type::Argon2d, tcost, mcost, threads); +argon2->Hash(input, insize, salt, saltsize, out, outlen); +... #include <library/cpp/digest/argonish/blake2b.h> -... -NArgonish::TBlake2BFactory bfactory; -uint32_t outlen = 32; -THolder<NArgonish::IBlake2Base> blake2b = bfactory.Create(outlen); -blake2b->Update(in, inlen); -blake2b->Final(out, outlen); -``` - -How to add your own Argon2 configuration ----------------------------------------- - -Just modify the `internal/proxy/macro/proxy_macros.h` and add appropriate `ARGON2_INSTANCE_DECL` declaration. +... +NArgonish::TBlake2BFactory bfactory; +uint32_t outlen = 32; +THolder<NArgonish::IBlake2Base> blake2b = bfactory.Create(outlen); +blake2b->Update(in, inlen); +blake2b->Final(out, outlen); +``` + +How to add your own Argon2 configuration +---------------------------------------- + +Just modify the `internal/proxy/macro/proxy_macros.h` and add appropriate `ARGON2_INSTANCE_DECL` declaration. diff --git a/library/cpp/digest/argonish/argon2.h b/library/cpp/digest/argonish/argon2.h index efebed6e35..bbe8ad52f3 100644 --- a/library/cpp/digest/argonish/argon2.h +++ b/library/cpp/digest/argonish/argon2.h @@ -1,147 +1,147 @@ -#pragma once - -#include "common.h" - -#include <util/generic/ptr.h> -#include <util/system/defaults.h> - -namespace NArgonish { - /** - * Type of Argon2 algorithm - */ - enum class EArgon2Type : ui32 { - Argon2d = 0, /// Data dependent version of Argon2 - Argon2i = 1, /// Data independent version of Argon2 - Argon2id = 2 /// Mixed version of Argon2 - }; - - /** - * Interface of all Argon2 instances - */ - class IArgon2Base { - public: - virtual ~IArgon2Base() { - } - /** - * Applies Argon2 algorithm - * @param pwd password - * @param pwdlen password length - * @param salt salt - * @param saltlen salt length - * @param out output - * @param outlen output length - * @param aad additional authenticated data (optional) - * @param aadlen additional authenticated data length (optional) - */ - virtual void Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, - ui8* out, ui32 outlen, const ui8* aad = nullptr, ui32 aadlen = 0) const = 0; - - /** - * Applies Argon2 algorithm to a password and compares the result with the hash data - * @param pwd password - * @param pwdlen password length - * @param salt salt - * @param saltlen salt length - * @param hash hash value to compare with the result - * @param hashlen hash value length - * @param aad additional authenticated data (optional) - * @param adadlen additional authenticated data length (optional) - * @return true if the Argon2 result equals to the value in hash - */ - virtual bool Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, - const ui8* hash, ui32 hashlen, const ui8* aad = nullptr, ui32 adadlen = 0) const = 0; - - /** - * Applies Argon2 algorithms but allows to pass memory buffer for work. - * This allows to use external memory allocator or reuse already allocated memory buffer. - * @param memory memory buffer for Argon2 calculations - * @param mlen memory buffer len (must be at least the value returned by the GetMemorySize method) - * @param pwd password to hash - * @param pwdlen password length - * @param salt salt - * @param saltlen salt length - * @param out output buffer - * @param outlen output length - * @param aad additional authenticated data (optional) - * @param aadlen additional authenticated data length (optional) - */ - virtual void HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, - const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, - const ui8* aad = nullptr, ui32 aadlen = 0) const = 0; - /** - * Applies Argon2 algorithm to a password and compares the result with the hash data. - * This method allows to use a custom memory allocator or reuse already allocated memory buffer. - * @param memory memory buffer for Argon2 calculations - * @param mlen memory buffer length - * @param pwd password to hash - * @param pwdlen password length - * @param salt salt - * @param saltlen salt length - * @param hash hash value to compare with the result - * @param hashlen hash value length - * @param aad additional authenticated data (optional) - * @param aadlen additional authenticated data length (optional) - * @return true if the Argon2 result equals to the value in hash - */ - virtual bool VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, - const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen, - const ui8* aad = nullptr, ui32 aadlen = 0) const = 0; - - /** - * The function calculates the size of memory required by Argon2 algorithm - * @return memory buffer size - */ - virtual size_t GetMemorySize() const = 0; - }; - - /** - * A factory to create Argon2 instances depending on instruction set, tcost, mcost, the number of threads etc. - */ - class TArgon2Factory { - public: - /** - * Constructs a factory object - * @param skipTest if true then a simple runtime test will be skipped in the constructor (optional) - */ - TArgon2Factory(bool skipTest = false); - - /** - * Creates an instance of Argon2 algorithm. - * The particular optimization is chosen automatically based on the cpuid instruction output. - * @param atype the type of Argon2 algorithm - * @param tcost the number of passes over memory block, must be at least 1 - * @param mcost the size in kilobytes of memory block used by Argon2 - * @param threads the number of threads for parallel version of Argon2 (must be 1,2 or 4) - * @param key a secret key to use for password hashing (optional) - * @param keylen the length of the key (optional) - * @return unique_ptr to Argon2 instance. In case of error std::runtime_excetion is thrown - */ - THolder<IArgon2Base> Create(EArgon2Type atype = EArgon2Type::Argon2d, ui32 tcost = 1, ui32 mcost = 1024, - ui32 threads = 1, const ui8* key = nullptr, ui32 keylen = 0) const; - - /** - * Creates an instance of Argon2 algorithm optimized for the provided instruction set - * @param instructionSet instruction set - * @param atype the type of Argon2 algorithm - * @param tcost the number of passes over memory block, must be at least 1 - * @param mcost the size in kilobytes of memory block used by Argon2 - * @param threads the number of threads for parallel version of Argon2 (must be 1,2 or 4) - * @param key a secret key to use for password hashing (optional) - * @param keylen the length of the key (optional) - * @return unique_ptr to Argon2 instance. In case of error std::runtime_excetion is thrown - */ - THolder<IArgon2Base> Create(EInstructionSet instructionSet, EArgon2Type atype = EArgon2Type::Argon2d, ui32 tcost = 1, - ui32 mcost = 1024, ui32 threads = 1, const ui8* key = nullptr, - ui32 keylen = 0) const; - - /** - * The function returns the best instruction set available on the current CPU - * @return InstructionSet value - */ - EInstructionSet GetInstructionSet() const; - - protected: - EInstructionSet InstructionSet_ = EInstructionSet::REF; - void QuickTest_() const; - }; -} +#pragma once + +#include "common.h" + +#include <util/generic/ptr.h> +#include <util/system/defaults.h> + +namespace NArgonish { + /** + * Type of Argon2 algorithm + */ + enum class EArgon2Type : ui32 { + Argon2d = 0, /// Data dependent version of Argon2 + Argon2i = 1, /// Data independent version of Argon2 + Argon2id = 2 /// Mixed version of Argon2 + }; + + /** + * Interface of all Argon2 instances + */ + class IArgon2Base { + public: + virtual ~IArgon2Base() { + } + /** + * Applies Argon2 algorithm + * @param pwd password + * @param pwdlen password length + * @param salt salt + * @param saltlen salt length + * @param out output + * @param outlen output length + * @param aad additional authenticated data (optional) + * @param aadlen additional authenticated data length (optional) + */ + virtual void Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, + ui8* out, ui32 outlen, const ui8* aad = nullptr, ui32 aadlen = 0) const = 0; + + /** + * Applies Argon2 algorithm to a password and compares the result with the hash data + * @param pwd password + * @param pwdlen password length + * @param salt salt + * @param saltlen salt length + * @param hash hash value to compare with the result + * @param hashlen hash value length + * @param aad additional authenticated data (optional) + * @param adadlen additional authenticated data length (optional) + * @return true if the Argon2 result equals to the value in hash + */ + virtual bool Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, + const ui8* hash, ui32 hashlen, const ui8* aad = nullptr, ui32 adadlen = 0) const = 0; + + /** + * Applies Argon2 algorithms but allows to pass memory buffer for work. + * This allows to use external memory allocator or reuse already allocated memory buffer. + * @param memory memory buffer for Argon2 calculations + * @param mlen memory buffer len (must be at least the value returned by the GetMemorySize method) + * @param pwd password to hash + * @param pwdlen password length + * @param salt salt + * @param saltlen salt length + * @param out output buffer + * @param outlen output length + * @param aad additional authenticated data (optional) + * @param aadlen additional authenticated data length (optional) + */ + virtual void HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, + const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, + const ui8* aad = nullptr, ui32 aadlen = 0) const = 0; + /** + * Applies Argon2 algorithm to a password and compares the result with the hash data. + * This method allows to use a custom memory allocator or reuse already allocated memory buffer. + * @param memory memory buffer for Argon2 calculations + * @param mlen memory buffer length + * @param pwd password to hash + * @param pwdlen password length + * @param salt salt + * @param saltlen salt length + * @param hash hash value to compare with the result + * @param hashlen hash value length + * @param aad additional authenticated data (optional) + * @param aadlen additional authenticated data length (optional) + * @return true if the Argon2 result equals to the value in hash + */ + virtual bool VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, + const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen, + const ui8* aad = nullptr, ui32 aadlen = 0) const = 0; + + /** + * The function calculates the size of memory required by Argon2 algorithm + * @return memory buffer size + */ + virtual size_t GetMemorySize() const = 0; + }; + + /** + * A factory to create Argon2 instances depending on instruction set, tcost, mcost, the number of threads etc. + */ + class TArgon2Factory { + public: + /** + * Constructs a factory object + * @param skipTest if true then a simple runtime test will be skipped in the constructor (optional) + */ + TArgon2Factory(bool skipTest = false); + + /** + * Creates an instance of Argon2 algorithm. + * The particular optimization is chosen automatically based on the cpuid instruction output. + * @param atype the type of Argon2 algorithm + * @param tcost the number of passes over memory block, must be at least 1 + * @param mcost the size in kilobytes of memory block used by Argon2 + * @param threads the number of threads for parallel version of Argon2 (must be 1,2 or 4) + * @param key a secret key to use for password hashing (optional) + * @param keylen the length of the key (optional) + * @return unique_ptr to Argon2 instance. In case of error std::runtime_excetion is thrown + */ + THolder<IArgon2Base> Create(EArgon2Type atype = EArgon2Type::Argon2d, ui32 tcost = 1, ui32 mcost = 1024, + ui32 threads = 1, const ui8* key = nullptr, ui32 keylen = 0) const; + + /** + * Creates an instance of Argon2 algorithm optimized for the provided instruction set + * @param instructionSet instruction set + * @param atype the type of Argon2 algorithm + * @param tcost the number of passes over memory block, must be at least 1 + * @param mcost the size in kilobytes of memory block used by Argon2 + * @param threads the number of threads for parallel version of Argon2 (must be 1,2 or 4) + * @param key a secret key to use for password hashing (optional) + * @param keylen the length of the key (optional) + * @return unique_ptr to Argon2 instance. In case of error std::runtime_excetion is thrown + */ + THolder<IArgon2Base> Create(EInstructionSet instructionSet, EArgon2Type atype = EArgon2Type::Argon2d, ui32 tcost = 1, + ui32 mcost = 1024, ui32 threads = 1, const ui8* key = nullptr, + ui32 keylen = 0) const; + + /** + * The function returns the best instruction set available on the current CPU + * @return InstructionSet value + */ + EInstructionSet GetInstructionSet() const; + + protected: + EInstructionSet InstructionSet_ = EInstructionSet::REF; + void QuickTest_() const; + }; +} diff --git a/library/cpp/digest/argonish/benchmark/mbench.cpp b/library/cpp/digest/argonish/benchmark/mbench.cpp index 04dc31f974..178c1169c9 100644 --- a/library/cpp/digest/argonish/benchmark/mbench.cpp +++ b/library/cpp/digest/argonish/benchmark/mbench.cpp @@ -1,64 +1,64 @@ #include <library/cpp/testing/benchmark/bench.h> #include <library/cpp/digest/argonish/argon2.h> - -Y_CPU_BENCHMARK(Argon2d_2048_REF, iface) { - NArgonish::TArgon2Factory factory; - auto argon2 = factory.Create(NArgonish::EInstructionSet::REF, NArgonish::EArgon2Type::Argon2d, 1, 2048, 1); - ui8 password[16] = {0x0}; - ui8 salt[16] = {0x01}; - ui8 result[16] = {0}; - - for (ui64 i = 0; i < iface.Iterations(); ++i) { - argon2->Hash(password, sizeof(password), salt, sizeof(salt), result, sizeof(result)); - } -} - -#if !defined(_arm64_) -Y_CPU_BENCHMARK(Argon2d_2048_SSE2, iface) { - NArgonish::TArgon2Factory factory; - auto argon2 = factory.Create(NArgonish::EInstructionSet::SSE2, NArgonish::EArgon2Type::Argon2d, 1, 2048, 1); - ui8 password[16] = {0x0}; - ui8 salt[16] = {0x01}; - ui8 result[16] = {0}; - - for (ui64 i = 0; i < iface.Iterations(); ++i) { - argon2->Hash(password, sizeof(password), salt, sizeof(salt), result, sizeof(result)); - } -} - -Y_CPU_BENCHMARK(Argon2d_2048_SSSE3, iface) { - NArgonish::TArgon2Factory factory; - auto argon2 = factory.Create(NArgonish::EInstructionSet::SSSE3, NArgonish::EArgon2Type::Argon2d, 1, 2048, 1); - ui8 password[16] = {0x0}; - ui8 salt[16] = {0x01}; - ui8 result[16] = {0}; - - for (ui64 i = 0; i < iface.Iterations(); ++i) { - argon2->Hash(password, sizeof(password), salt, sizeof(salt), result, sizeof(result)); - } -} - -Y_CPU_BENCHMARK(Argon2d_2048_SSE41, iface) { - NArgonish::TArgon2Factory factory; - auto argon2 = factory.Create(NArgonish::EInstructionSet::SSE41, NArgonish::EArgon2Type::Argon2d, 1, 2048, 1); - ui8 password[16] = {0x0}; - ui8 salt[16] = {0x01}; - ui8 result[16] = {0}; - - for (ui64 i = 0; i < iface.Iterations(); ++i) { - argon2->Hash(password, sizeof(password), salt, sizeof(salt), result, sizeof(result)); - } -} - -Y_CPU_BENCHMARK(Argon2d_2048_AVX2, iface) { - NArgonish::TArgon2Factory factory; - auto argon2 = factory.Create(NArgonish::EInstructionSet::AVX2, NArgonish::EArgon2Type::Argon2d, 1, 2048, 1); - ui8 password[16] = {0x0}; - ui8 salt[16] = {0x01}; - ui8 result[16] = {0}; - - for (ui64 i = 0; i < iface.Iterations(); ++i) { - argon2->Hash(password, sizeof(password), salt, sizeof(salt), result, sizeof(result)); - } -} -#endif + +Y_CPU_BENCHMARK(Argon2d_2048_REF, iface) { + NArgonish::TArgon2Factory factory; + auto argon2 = factory.Create(NArgonish::EInstructionSet::REF, NArgonish::EArgon2Type::Argon2d, 1, 2048, 1); + ui8 password[16] = {0x0}; + ui8 salt[16] = {0x01}; + ui8 result[16] = {0}; + + for (ui64 i = 0; i < iface.Iterations(); ++i) { + argon2->Hash(password, sizeof(password), salt, sizeof(salt), result, sizeof(result)); + } +} + +#if !defined(_arm64_) +Y_CPU_BENCHMARK(Argon2d_2048_SSE2, iface) { + NArgonish::TArgon2Factory factory; + auto argon2 = factory.Create(NArgonish::EInstructionSet::SSE2, NArgonish::EArgon2Type::Argon2d, 1, 2048, 1); + ui8 password[16] = {0x0}; + ui8 salt[16] = {0x01}; + ui8 result[16] = {0}; + + for (ui64 i = 0; i < iface.Iterations(); ++i) { + argon2->Hash(password, sizeof(password), salt, sizeof(salt), result, sizeof(result)); + } +} + +Y_CPU_BENCHMARK(Argon2d_2048_SSSE3, iface) { + NArgonish::TArgon2Factory factory; + auto argon2 = factory.Create(NArgonish::EInstructionSet::SSSE3, NArgonish::EArgon2Type::Argon2d, 1, 2048, 1); + ui8 password[16] = {0x0}; + ui8 salt[16] = {0x01}; + ui8 result[16] = {0}; + + for (ui64 i = 0; i < iface.Iterations(); ++i) { + argon2->Hash(password, sizeof(password), salt, sizeof(salt), result, sizeof(result)); + } +} + +Y_CPU_BENCHMARK(Argon2d_2048_SSE41, iface) { + NArgonish::TArgon2Factory factory; + auto argon2 = factory.Create(NArgonish::EInstructionSet::SSE41, NArgonish::EArgon2Type::Argon2d, 1, 2048, 1); + ui8 password[16] = {0x0}; + ui8 salt[16] = {0x01}; + ui8 result[16] = {0}; + + for (ui64 i = 0; i < iface.Iterations(); ++i) { + argon2->Hash(password, sizeof(password), salt, sizeof(salt), result, sizeof(result)); + } +} + +Y_CPU_BENCHMARK(Argon2d_2048_AVX2, iface) { + NArgonish::TArgon2Factory factory; + auto argon2 = factory.Create(NArgonish::EInstructionSet::AVX2, NArgonish::EArgon2Type::Argon2d, 1, 2048, 1); + ui8 password[16] = {0x0}; + ui8 salt[16] = {0x01}; + ui8 result[16] = {0}; + + for (ui64 i = 0; i < iface.Iterations(); ++i) { + argon2->Hash(password, sizeof(password), salt, sizeof(salt), result, sizeof(result)); + } +} +#endif diff --git a/library/cpp/digest/argonish/benchmark/ya.make b/library/cpp/digest/argonish/benchmark/ya.make index 176f235780..5aad1b238f 100644 --- a/library/cpp/digest/argonish/benchmark/ya.make +++ b/library/cpp/digest/argonish/benchmark/ya.make @@ -1,13 +1,13 @@ -OWNER(e-sidorov) - +OWNER(e-sidorov) + Y_BENCHMARK() - + PEERDIR( library/cpp/digest/argonish ) - + SRCS( mbench.cpp ) - -END() + +END() diff --git a/library/cpp/digest/argonish/blake2b.h b/library/cpp/digest/argonish/blake2b.h index 559f0b5580..21ca468423 100644 --- a/library/cpp/digest/argonish/blake2b.h +++ b/library/cpp/digest/argonish/blake2b.h @@ -1,78 +1,78 @@ -#pragma once - -#include "common.h" - -#include <util/generic/ptr.h> - -namespace NArgonish { - /** - * Interface for all Blake2B instances - */ - class IBlake2Base { - public: - virtual ~IBlake2Base() { - } - /** - * Updates intermediate hash with an ui32 value - * @param in integer to hash - */ - virtual void Update(ui32 in) = 0; - - /** - * Updates intermediate hash with an array of bytes - * @param pin input - * @param inlen input length - */ - virtual void Update(const void* pin, size_t inlen) = 0; - - /** - * Finalizes the hash calculation and returns the hash value - * @param out output buffer - * @param outlen output buffer length - */ - virtual void Final(void* out, size_t outlen) = 0; - }; - - /** - * A factory that creates Blake2B instances optimized for different instruction sets - */ - class TBlake2BFactory { - public: - /** - * Constructs the factory object - * @param skipTest if true then the constructor skips runtime Blake2B test - */ - TBlake2BFactory(bool skipTest = false); - - /** - * Creates an instance of Blake2B hash algorithm. - * The optimisation is selected automatically based on the cpuid instruction output. - * @param outlen the output buffer length, this value takes part in hashing - * @param key a secret key to make Blake2B work as a keyed hash function - * @param keylen the secret key length - * @return returns an unique_ptr containing Blake2B instance - */ - THolder<IBlake2Base> Create(size_t outlen = 32, const ui8* key = nullptr, size_t keylen = 0) const; - - /** - * Creates an instance of Blake2B hash algorithm optimized for the particular instruction set - * @param instructionSet instruction set - * @param outlen the output buffer length, this value takes part in hashing - * @param key a secret key to make Blake2B work as a keyed hash function - * @param keylen the secret key length - * @return returns an unique_ptr containing Blake2B instance - */ - THolder<IBlake2Base> Create(EInstructionSet instructionSet, size_t outlen = 32, - const ui8* key = nullptr, size_t keylen = 0) const; - - /** - * The function returns the best instruction set available on the current CPU - * @return InstructionSet value - */ - EInstructionSet GetInstructionSet() const; - - protected: - EInstructionSet InstructionSet_ = EInstructionSet::REF; - void QuickTest_() const; - }; -} +#pragma once + +#include "common.h" + +#include <util/generic/ptr.h> + +namespace NArgonish { + /** + * Interface for all Blake2B instances + */ + class IBlake2Base { + public: + virtual ~IBlake2Base() { + } + /** + * Updates intermediate hash with an ui32 value + * @param in integer to hash + */ + virtual void Update(ui32 in) = 0; + + /** + * Updates intermediate hash with an array of bytes + * @param pin input + * @param inlen input length + */ + virtual void Update(const void* pin, size_t inlen) = 0; + + /** + * Finalizes the hash calculation and returns the hash value + * @param out output buffer + * @param outlen output buffer length + */ + virtual void Final(void* out, size_t outlen) = 0; + }; + + /** + * A factory that creates Blake2B instances optimized for different instruction sets + */ + class TBlake2BFactory { + public: + /** + * Constructs the factory object + * @param skipTest if true then the constructor skips runtime Blake2B test + */ + TBlake2BFactory(bool skipTest = false); + + /** + * Creates an instance of Blake2B hash algorithm. + * The optimisation is selected automatically based on the cpuid instruction output. + * @param outlen the output buffer length, this value takes part in hashing + * @param key a secret key to make Blake2B work as a keyed hash function + * @param keylen the secret key length + * @return returns an unique_ptr containing Blake2B instance + */ + THolder<IBlake2Base> Create(size_t outlen = 32, const ui8* key = nullptr, size_t keylen = 0) const; + + /** + * Creates an instance of Blake2B hash algorithm optimized for the particular instruction set + * @param instructionSet instruction set + * @param outlen the output buffer length, this value takes part in hashing + * @param key a secret key to make Blake2B work as a keyed hash function + * @param keylen the secret key length + * @return returns an unique_ptr containing Blake2B instance + */ + THolder<IBlake2Base> Create(EInstructionSet instructionSet, size_t outlen = 32, + const ui8* key = nullptr, size_t keylen = 0) const; + + /** + * The function returns the best instruction set available on the current CPU + * @return InstructionSet value + */ + EInstructionSet GetInstructionSet() const; + + protected: + EInstructionSet InstructionSet_ = EInstructionSet::REF; + void QuickTest_() const; + }; +} diff --git a/library/cpp/digest/argonish/common.h b/library/cpp/digest/argonish/common.h index 64026b2a0e..973d82f13a 100644 --- a/library/cpp/digest/argonish/common.h +++ b/library/cpp/digest/argonish/common.h @@ -1,18 +1,18 @@ -#pragma once - -#include <util/system/defaults.h> - -namespace NArgonish { - /** - * Instruction sets for which Argon2 is optimized - */ - enum class EInstructionSet : ui32 { - REF = 0, /// Reference implementation -#if !defined(_arm64_) - SSE2 = 1, /// SSE2 optimized version - SSSE3 = 2, /// SSSE3 optimized version - SSE41 = 3, /// SSE4.1 optimized version - AVX2 = 4 /// AVX2 optimized version -#endif - }; -} +#pragma once + +#include <util/system/defaults.h> + +namespace NArgonish { + /** + * Instruction sets for which Argon2 is optimized + */ + enum class EInstructionSet : ui32 { + REF = 0, /// Reference implementation +#if !defined(_arm64_) + SSE2 = 1, /// SSE2 optimized version + SSSE3 = 2, /// SSSE3 optimized version + SSE41 = 3, /// SSE4.1 optimized version + AVX2 = 4 /// AVX2 optimized version +#endif + }; +} diff --git a/library/cpp/digest/argonish/factory/factory.cpp b/library/cpp/digest/argonish/factory/factory.cpp index 84ebc78747..c1f5f5ce79 100644 --- a/library/cpp/digest/argonish/factory/factory.cpp +++ b/library/cpp/digest/argonish/factory/factory.cpp @@ -1,222 +1,222 @@ -// -// Created by Evgeny Sidorov on 12/04/17. -// - +// +// Created by Evgeny Sidorov on 12/04/17. +// + #include <library/cpp/digest/argonish/blake2b.h> #include <library/cpp/digest/argonish/argon2.h> #include <library/cpp/digest/argonish/internal/proxies/ref/proxy_ref.h> -#if !defined(_arm64_) +#if !defined(_arm64_) #include <library/cpp/digest/argonish/internal/proxies/sse2/proxy_sse2.h> #include <library/cpp/digest/argonish/internal/proxies/ssse3/proxy_ssse3.h> #include <library/cpp/digest/argonish/internal/proxies/sse41/proxy_sse41.h> #include <library/cpp/digest/argonish/internal/proxies/avx2/proxy_avx2.h> -#endif - -#include <util/system/cpu_id.h> -#include <util/generic/yexception.h> - -namespace NArgonish { - static EInstructionSet GetBestSet() { -#if !defined(_arm64_) - if (NX86::HaveAVX2()) { - return EInstructionSet::AVX2; - } - - if (NX86::HaveSSE41()) { - return EInstructionSet::SSE41; - } - - if (NX86::HaveSSSE3()) { - return EInstructionSet::SSSE3; - } - - if (NX86::HaveSSE2()) { - return EInstructionSet::SSE2; - } -#endif - return EInstructionSet::REF; - } - - TArgon2Factory::TArgon2Factory(bool skipTest) { - InstructionSet_ = GetBestSet(); - if (!skipTest) - QuickTest_(); - } - - THolder<IArgon2Base> TArgon2Factory::Create(EInstructionSet instructionSet, EArgon2Type atype, ui32 tcost, - ui32 mcost, ui32 threads, const ui8* key, ui32 keylen) const { - switch (instructionSet) { - case EInstructionSet::REF: - return MakeHolder<TArgon2ProxyREF>(atype, tcost, mcost, threads, key, keylen); -#if !defined(_arm64_) - case EInstructionSet::SSE2: - return MakeHolder<TArgon2ProxySSE2>(atype, tcost, mcost, threads, key, keylen); - case EInstructionSet::SSSE3: - return MakeHolder<TArgon2ProxySSSE3>(atype, tcost, mcost, threads, key, keylen); - case EInstructionSet::SSE41: - return MakeHolder<TArgon2ProxySSSE3>(atype, tcost, mcost, threads, key, keylen); - case EInstructionSet::AVX2: - return MakeHolder<TArgon2ProxyAVX2>(atype, tcost, mcost, threads, key, keylen); -#endif - } - - /* to avoid gcc warning */ - ythrow yexception() << "Invalid instruction set value"; - } - - THolder<IArgon2Base> TArgon2Factory::Create(EArgon2Type atype, ui32 tcost, ui32 mcost, ui32 threads, - const ui8* key, ui32 keylen) const { - return Create(InstructionSet_, atype, tcost, mcost, threads, key, keylen); - } - - EInstructionSet TArgon2Factory::GetInstructionSet() const { - return InstructionSet_; - } - - void TArgon2Factory::QuickTest_() const { - const ui8 password[8] = {'p', 'a', 's', 's', 'w', 'o', 'r', 'd'}; - const ui8 salt[8] = {'s', 'o', 'm', 'e', 's', 'a', 'l', 't'}; - const ui8 test_result[][32] = { - {0x2e, 0x2e, 0x5e, 0x05, 0xfe, 0x57, 0xac, 0x2c, - 0xf4, 0x72, 0xec, 0xd0, 0x45, 0xef, 0x68, 0x7e, - 0x56, 0x2a, 0x98, 0x0f, 0xd5, 0x03, 0x39, 0xb3, - 0x89, 0xc8, 0x70, 0xe1, 0x96, 0x2b, 0xbc, 0x45}, - {0x95, 0x46, 0x6c, 0xc4, 0xf9, 0x2f, 0x87, 0x49, - 0x54, 0x61, 0x7e, 0xec, 0x0a, 0xa1, 0x19, 0x5d, - 0x22, 0x98, 0x0a, 0xbd, 0x62, 0x5e, 0x5c, 0xac, - 0x44, 0x76, 0x3a, 0xe3, 0xa9, 0xcb, 0x6a, 0xb7}, - {0xc8, 0xe9, 0xae, 0xdc, 0x95, 0x6f, 0x6a, 0x7d, - 0xff, 0x0a, 0x4d, 0x42, 0x94, 0x0d, 0xf6, 0x28, - 0x62, 0x3f, 0x32, 0x8e, 0xa1, 0x23, 0x50, 0x05, - 0xab, 0xac, 0x93, 0x3c, 0x57, 0x09, 0x3e, 0x23}}; - - ui8 hash_result[32] = {0}; - for (ui32 atype = (ui32)EArgon2Type::Argon2d; atype <= (ui32)EArgon2Type::Argon2id; ++atype) { - auto argon2d = MakeHolder<TArgon2ProxyREF>((EArgon2Type)atype, 1U, 1024U, 1U); - argon2d->Hash(password, sizeof(password), salt, sizeof(salt), hash_result, sizeof(hash_result)); - if (memcmp(test_result[atype], hash_result, sizeof(hash_result)) != 0) - ythrow yexception() << "Argon2: Runtime test failed for reference implementation"; - } -#if !defined(_arm64_) - if (InstructionSet_ >= EInstructionSet::SSE2) { - for (ui32 atype = (ui32)EArgon2Type::Argon2d; atype <= (ui32)EArgon2Type::Argon2id; ++atype) { - auto argon2d = MakeHolder<TArgon2ProxySSE2>((EArgon2Type)atype, 1U, 1024U, 1U); - argon2d->Hash(password, sizeof(password), salt, sizeof(salt), hash_result, sizeof(hash_result)); - if (memcmp(test_result[atype], hash_result, sizeof(hash_result)) != 0) - ythrow yexception() << "Argon2: Runtime test failed for SSE2 implementation"; - } - } - - if (InstructionSet_ >= EInstructionSet::SSSE3) { - for (ui32 atype = (ui32)EArgon2Type::Argon2d; atype <= (ui32)EArgon2Type::Argon2id; ++atype) { - auto argon2d = MakeHolder<TArgon2ProxySSSE3>((EArgon2Type)atype, 1U, 1024U, 1U); - argon2d->Hash(password, sizeof(password), salt, sizeof(salt), hash_result, sizeof(hash_result)); - if (memcmp(test_result[atype], hash_result, sizeof(hash_result)) != 0) - ythrow yexception() << "Argon2: Runtime test failed for SSSE3 implementation"; - } - } - - if (InstructionSet_ >= EInstructionSet::SSE41) { - for (ui32 atype = (ui32)EArgon2Type::Argon2d; atype <= (ui32)EArgon2Type::Argon2id; ++atype) { - auto argon2d = MakeHolder<TArgon2ProxySSE41>((EArgon2Type)atype, 1U, 1024U, 1U); - argon2d->Hash(password, sizeof(password), salt, sizeof(salt), hash_result, sizeof(hash_result)); - if (memcmp(test_result[atype], hash_result, sizeof(hash_result)) != 0) - ythrow yexception() << "Argon2: Runtime test failed for SSE41 implementation"; - } - } - - if (InstructionSet_ >= EInstructionSet::AVX2) { - for (ui32 atype = (ui32)EArgon2Type::Argon2d; atype <= (ui32)EArgon2Type::Argon2id; ++atype) { - auto argon2d = MakeHolder<TArgon2ProxyAVX2>((EArgon2Type)atype, 1U, 1024U, 1U); - argon2d->Hash(password, sizeof(password), salt, sizeof(salt), hash_result, sizeof(hash_result)); - if (memcmp(test_result[atype], hash_result, sizeof(hash_result)) != 0) - ythrow yexception() << "Argon2: Runtime test failed for AVX2 implementation"; - } - } -#endif - } - - TBlake2BFactory::TBlake2BFactory(bool skipTest) { - InstructionSet_ = GetBestSet(); - if (!skipTest) - QuickTest_(); - } - - THolder<IBlake2Base> TBlake2BFactory::Create(EInstructionSet instructionSet, size_t outlen, const ui8* key, - size_t keylen) const { - switch (instructionSet) { - case EInstructionSet::REF: - return MakeHolder<TBlake2BProxyREF>(outlen, key, keylen); -#if !defined(_arm64_) - case EInstructionSet::SSE2: - return MakeHolder<TBlake2BProxySSE2>(outlen, key, keylen); - case EInstructionSet::SSSE3: - return MakeHolder<TBlake2BProxySSSE3>(outlen, key, keylen); - case EInstructionSet::SSE41: - return MakeHolder<TBlake2BProxySSE41>(outlen, key, keylen); - case EInstructionSet::AVX2: - return MakeHolder<TBlake2BProxyAVX2>(outlen, key, keylen); -#endif - } - - /* to supress gcc warning */ - ythrow yexception() << "Invalid instruction set"; - } - - THolder<IBlake2Base> TBlake2BFactory::Create(size_t outlen, const ui8* key, size_t keylen) const { - return Create(InstructionSet_, outlen, key, keylen); - } - - EInstructionSet TBlake2BFactory::GetInstructionSet() const { - return InstructionSet_; - } - - void TBlake2BFactory::QuickTest_() const { - const char* test_str = "abc"; - const ui8 test_result[] = { - 0xcf, 0x4a, 0xb7, 0x91, 0xc6, 0x2b, 0x8d, 0x2b, - 0x21, 0x09, 0xc9, 0x02, 0x75, 0x28, 0x78, 0x16}; - - ui8 hash_val[16]; - if (InstructionSet_ >= EInstructionSet::REF) { - auto blake2 = MakeHolder<TBlake2BProxyREF>(16U); - blake2->Update(test_str, 3); - blake2->Final(hash_val, 16); - if (memcmp(test_result, hash_val, 16) != 0) - ythrow yexception() << "Blake2B: Runtime test failed for reference implementation"; - } -#if !defined(_arm64_) - if (InstructionSet_ >= EInstructionSet::SSE2) { - auto blake2 = MakeHolder<TBlake2BProxySSE2>(16U); - blake2->Update(test_str, 3); - blake2->Final(hash_val, 16); - if (memcmp(test_result, hash_val, 16) != 0) - ythrow yexception() << "Blake2B: Runtime test failed for SSE2 implementation"; - } - - if (InstructionSet_ >= EInstructionSet::SSSE3) { - auto blake2 = MakeHolder<TBlake2BProxySSSE3>(16U); - blake2->Update(test_str, 3); - blake2->Final(hash_val, 16); - if (memcmp(test_result, hash_val, 16) != 0) - ythrow yexception() << "Blake2B: Runtime test failed for SSSE3 implementation"; - } - - if (InstructionSet_ >= EInstructionSet::SSE41) { - auto blake2 = MakeHolder<TBlake2BProxySSE41>(16U); - blake2->Update(test_str, 3); - blake2->Final(hash_val, 16); - if (memcmp(test_result, hash_val, 16) != 0) - ythrow yexception() << "Blake2B: Runtime test failed for SSE41 implmenetation"; - } - - if (InstructionSet_ >= EInstructionSet::AVX2) { - auto blake2 = MakeHolder<TBlake2BProxyAVX2>(16U); - blake2->Update(test_str, 3); - blake2->Final(hash_val, 16); - if (memcmp(test_result, hash_val, 16) != 0) - ythrow yexception() << "Blake2B: Runtime test failed for AVX2 implementation"; - } -#endif - } -} +#endif + +#include <util/system/cpu_id.h> +#include <util/generic/yexception.h> + +namespace NArgonish { + static EInstructionSet GetBestSet() { +#if !defined(_arm64_) + if (NX86::HaveAVX2()) { + return EInstructionSet::AVX2; + } + + if (NX86::HaveSSE41()) { + return EInstructionSet::SSE41; + } + + if (NX86::HaveSSSE3()) { + return EInstructionSet::SSSE3; + } + + if (NX86::HaveSSE2()) { + return EInstructionSet::SSE2; + } +#endif + return EInstructionSet::REF; + } + + TArgon2Factory::TArgon2Factory(bool skipTest) { + InstructionSet_ = GetBestSet(); + if (!skipTest) + QuickTest_(); + } + + THolder<IArgon2Base> TArgon2Factory::Create(EInstructionSet instructionSet, EArgon2Type atype, ui32 tcost, + ui32 mcost, ui32 threads, const ui8* key, ui32 keylen) const { + switch (instructionSet) { + case EInstructionSet::REF: + return MakeHolder<TArgon2ProxyREF>(atype, tcost, mcost, threads, key, keylen); +#if !defined(_arm64_) + case EInstructionSet::SSE2: + return MakeHolder<TArgon2ProxySSE2>(atype, tcost, mcost, threads, key, keylen); + case EInstructionSet::SSSE3: + return MakeHolder<TArgon2ProxySSSE3>(atype, tcost, mcost, threads, key, keylen); + case EInstructionSet::SSE41: + return MakeHolder<TArgon2ProxySSSE3>(atype, tcost, mcost, threads, key, keylen); + case EInstructionSet::AVX2: + return MakeHolder<TArgon2ProxyAVX2>(atype, tcost, mcost, threads, key, keylen); +#endif + } + + /* to avoid gcc warning */ + ythrow yexception() << "Invalid instruction set value"; + } + + THolder<IArgon2Base> TArgon2Factory::Create(EArgon2Type atype, ui32 tcost, ui32 mcost, ui32 threads, + const ui8* key, ui32 keylen) const { + return Create(InstructionSet_, atype, tcost, mcost, threads, key, keylen); + } + + EInstructionSet TArgon2Factory::GetInstructionSet() const { + return InstructionSet_; + } + + void TArgon2Factory::QuickTest_() const { + const ui8 password[8] = {'p', 'a', 's', 's', 'w', 'o', 'r', 'd'}; + const ui8 salt[8] = {'s', 'o', 'm', 'e', 's', 'a', 'l', 't'}; + const ui8 test_result[][32] = { + {0x2e, 0x2e, 0x5e, 0x05, 0xfe, 0x57, 0xac, 0x2c, + 0xf4, 0x72, 0xec, 0xd0, 0x45, 0xef, 0x68, 0x7e, + 0x56, 0x2a, 0x98, 0x0f, 0xd5, 0x03, 0x39, 0xb3, + 0x89, 0xc8, 0x70, 0xe1, 0x96, 0x2b, 0xbc, 0x45}, + {0x95, 0x46, 0x6c, 0xc4, 0xf9, 0x2f, 0x87, 0x49, + 0x54, 0x61, 0x7e, 0xec, 0x0a, 0xa1, 0x19, 0x5d, + 0x22, 0x98, 0x0a, 0xbd, 0x62, 0x5e, 0x5c, 0xac, + 0x44, 0x76, 0x3a, 0xe3, 0xa9, 0xcb, 0x6a, 0xb7}, + {0xc8, 0xe9, 0xae, 0xdc, 0x95, 0x6f, 0x6a, 0x7d, + 0xff, 0x0a, 0x4d, 0x42, 0x94, 0x0d, 0xf6, 0x28, + 0x62, 0x3f, 0x32, 0x8e, 0xa1, 0x23, 0x50, 0x05, + 0xab, 0xac, 0x93, 0x3c, 0x57, 0x09, 0x3e, 0x23}}; + + ui8 hash_result[32] = {0}; + for (ui32 atype = (ui32)EArgon2Type::Argon2d; atype <= (ui32)EArgon2Type::Argon2id; ++atype) { + auto argon2d = MakeHolder<TArgon2ProxyREF>((EArgon2Type)atype, 1U, 1024U, 1U); + argon2d->Hash(password, sizeof(password), salt, sizeof(salt), hash_result, sizeof(hash_result)); + if (memcmp(test_result[atype], hash_result, sizeof(hash_result)) != 0) + ythrow yexception() << "Argon2: Runtime test failed for reference implementation"; + } +#if !defined(_arm64_) + if (InstructionSet_ >= EInstructionSet::SSE2) { + for (ui32 atype = (ui32)EArgon2Type::Argon2d; atype <= (ui32)EArgon2Type::Argon2id; ++atype) { + auto argon2d = MakeHolder<TArgon2ProxySSE2>((EArgon2Type)atype, 1U, 1024U, 1U); + argon2d->Hash(password, sizeof(password), salt, sizeof(salt), hash_result, sizeof(hash_result)); + if (memcmp(test_result[atype], hash_result, sizeof(hash_result)) != 0) + ythrow yexception() << "Argon2: Runtime test failed for SSE2 implementation"; + } + } + + if (InstructionSet_ >= EInstructionSet::SSSE3) { + for (ui32 atype = (ui32)EArgon2Type::Argon2d; atype <= (ui32)EArgon2Type::Argon2id; ++atype) { + auto argon2d = MakeHolder<TArgon2ProxySSSE3>((EArgon2Type)atype, 1U, 1024U, 1U); + argon2d->Hash(password, sizeof(password), salt, sizeof(salt), hash_result, sizeof(hash_result)); + if (memcmp(test_result[atype], hash_result, sizeof(hash_result)) != 0) + ythrow yexception() << "Argon2: Runtime test failed for SSSE3 implementation"; + } + } + + if (InstructionSet_ >= EInstructionSet::SSE41) { + for (ui32 atype = (ui32)EArgon2Type::Argon2d; atype <= (ui32)EArgon2Type::Argon2id; ++atype) { + auto argon2d = MakeHolder<TArgon2ProxySSE41>((EArgon2Type)atype, 1U, 1024U, 1U); + argon2d->Hash(password, sizeof(password), salt, sizeof(salt), hash_result, sizeof(hash_result)); + if (memcmp(test_result[atype], hash_result, sizeof(hash_result)) != 0) + ythrow yexception() << "Argon2: Runtime test failed for SSE41 implementation"; + } + } + + if (InstructionSet_ >= EInstructionSet::AVX2) { + for (ui32 atype = (ui32)EArgon2Type::Argon2d; atype <= (ui32)EArgon2Type::Argon2id; ++atype) { + auto argon2d = MakeHolder<TArgon2ProxyAVX2>((EArgon2Type)atype, 1U, 1024U, 1U); + argon2d->Hash(password, sizeof(password), salt, sizeof(salt), hash_result, sizeof(hash_result)); + if (memcmp(test_result[atype], hash_result, sizeof(hash_result)) != 0) + ythrow yexception() << "Argon2: Runtime test failed for AVX2 implementation"; + } + } +#endif + } + + TBlake2BFactory::TBlake2BFactory(bool skipTest) { + InstructionSet_ = GetBestSet(); + if (!skipTest) + QuickTest_(); + } + + THolder<IBlake2Base> TBlake2BFactory::Create(EInstructionSet instructionSet, size_t outlen, const ui8* key, + size_t keylen) const { + switch (instructionSet) { + case EInstructionSet::REF: + return MakeHolder<TBlake2BProxyREF>(outlen, key, keylen); +#if !defined(_arm64_) + case EInstructionSet::SSE2: + return MakeHolder<TBlake2BProxySSE2>(outlen, key, keylen); + case EInstructionSet::SSSE3: + return MakeHolder<TBlake2BProxySSSE3>(outlen, key, keylen); + case EInstructionSet::SSE41: + return MakeHolder<TBlake2BProxySSE41>(outlen, key, keylen); + case EInstructionSet::AVX2: + return MakeHolder<TBlake2BProxyAVX2>(outlen, key, keylen); +#endif + } + + /* to supress gcc warning */ + ythrow yexception() << "Invalid instruction set"; + } + + THolder<IBlake2Base> TBlake2BFactory::Create(size_t outlen, const ui8* key, size_t keylen) const { + return Create(InstructionSet_, outlen, key, keylen); + } + + EInstructionSet TBlake2BFactory::GetInstructionSet() const { + return InstructionSet_; + } + + void TBlake2BFactory::QuickTest_() const { + const char* test_str = "abc"; + const ui8 test_result[] = { + 0xcf, 0x4a, 0xb7, 0x91, 0xc6, 0x2b, 0x8d, 0x2b, + 0x21, 0x09, 0xc9, 0x02, 0x75, 0x28, 0x78, 0x16}; + + ui8 hash_val[16]; + if (InstructionSet_ >= EInstructionSet::REF) { + auto blake2 = MakeHolder<TBlake2BProxyREF>(16U); + blake2->Update(test_str, 3); + blake2->Final(hash_val, 16); + if (memcmp(test_result, hash_val, 16) != 0) + ythrow yexception() << "Blake2B: Runtime test failed for reference implementation"; + } +#if !defined(_arm64_) + if (InstructionSet_ >= EInstructionSet::SSE2) { + auto blake2 = MakeHolder<TBlake2BProxySSE2>(16U); + blake2->Update(test_str, 3); + blake2->Final(hash_val, 16); + if (memcmp(test_result, hash_val, 16) != 0) + ythrow yexception() << "Blake2B: Runtime test failed for SSE2 implementation"; + } + + if (InstructionSet_ >= EInstructionSet::SSSE3) { + auto blake2 = MakeHolder<TBlake2BProxySSSE3>(16U); + blake2->Update(test_str, 3); + blake2->Final(hash_val, 16); + if (memcmp(test_result, hash_val, 16) != 0) + ythrow yexception() << "Blake2B: Runtime test failed for SSSE3 implementation"; + } + + if (InstructionSet_ >= EInstructionSet::SSE41) { + auto blake2 = MakeHolder<TBlake2BProxySSE41>(16U); + blake2->Update(test_str, 3); + blake2->Final(hash_val, 16); + if (memcmp(test_result, hash_val, 16) != 0) + ythrow yexception() << "Blake2B: Runtime test failed for SSE41 implmenetation"; + } + + if (InstructionSet_ >= EInstructionSet::AVX2) { + auto blake2 = MakeHolder<TBlake2BProxyAVX2>(16U); + blake2->Update(test_str, 3); + blake2->Final(hash_val, 16); + if (memcmp(test_result, hash_val, 16) != 0) + ythrow yexception() << "Blake2B: Runtime test failed for AVX2 implementation"; + } +#endif + } +} diff --git a/library/cpp/digest/argonish/internal/argon2/argon2_avx2.h b/library/cpp/digest/argonish/internal/argon2/argon2_avx2.h index 4ce2712e85..8bf5367817 100644 --- a/library/cpp/digest/argonish/internal/argon2/argon2_avx2.h +++ b/library/cpp/digest/argonish/internal/argon2/argon2_avx2.h @@ -1,117 +1,117 @@ -#pragma once - -#include <immintrin.h> -#include "argon2_base.h" +#pragma once + +#include <immintrin.h> +#include "argon2_base.h" #include <library/cpp/digest/argonish/internal/blamka/blamka_avx2.h> - -namespace NArgonish { - template <ui32 mcost, ui32 threads> - class TArgon2AVX2 final: public TArgon2<EInstructionSet::AVX2, mcost, threads> { - public: - TArgon2AVX2(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) - : TArgon2<EInstructionSet::AVX2, mcost, threads>(atype, tcost, key, keylen) - { - } - - protected: - virtual void XorBlock_(TBlock* dst, const TBlock* src) const override { - __m256i* mdst = (__m256i*)dst; - __m256i* msrc = (__m256i*)src; - - for (ui32 i = 0; i < ARGON2_HWORDS_IN_BLOCK; ++i) - XorValues(mdst + i, mdst + i, msrc + i); - } - - virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override { - memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK); - } - - virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool with_xor) const override { - __m256i blockxy[ARGON2_HWORDS_IN_BLOCK]; - __m256i state[ARGON2_HWORDS_IN_BLOCK]; - - memcpy(state, prevBlock, ARGON2_BLOCK_SIZE); - - if (with_xor) { - for (ui32 i = 0; i < ARGON2_HWORDS_IN_BLOCK; ++i) { - state[i] = _mm256_xor_si256(state[i], _mm256_loadu_si256((const __m256i*)refBlock->V + i)); - blockxy[i] = _mm256_xor_si256(state[i], _mm256_loadu_si256((const __m256i*)nextBlock->V + i)); - } - } else { - for (ui32 i = 0; i < ARGON2_HWORDS_IN_BLOCK; ++i) { - blockxy[i] = state[i] = _mm256_xor_si256( - state[i], _mm256_loadu_si256((const __m256i*)refBlock->V + i)); - } - } - - /** - * state[ 8*i + 0 ] = ( v0_0, v1_0, v2_0, v3_0) - * state[ 8*i + 1 ] = ( v4_0, v5_0, v6_0, v7_0) - * state[ 8*i + 2 ] = ( v8_0, v9_0, v10_0, v11_0) - * state[ 8*i + 3 ] = (v12_0, v13_0, v14_0, v15_0) - * state[ 8*i + 4 ] = ( v0_1, v1_1, v2_1, v3_1) - * state[ 8*i + 5 ] = ( v4_1, v5_1, v6_1, v7_1) - * state[ 8*i + 6 ] = ( v8_1, v9_1, v10_1, v11_1) - * state[ 8*i + 7 ] = (v12_1, v13_1, v14_1, v15_1) - */ - for (ui32 i = 0; i < 4; ++i) { - BlamkaG1AVX2( - state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], - state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); - BlamkaG2AVX2( - state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], - state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); - DiagonalizeAVX21( - state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], - state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); - BlamkaG1AVX2( - state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], - state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); - BlamkaG2AVX2( - state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], - state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); - UndiagonalizeAVX21( - state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], - state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); - } - - /** - * state[ 0 + i] = ( v0_0, v1_0, v0_1, v1_1) - * state[ 4 + i] = ( v2_0, v3_0, v2_1, v3_1) - * state[ 8 + i] = ( v4_0, v5_0, v4_1, v5_1) - * state[12 + i] = ( v6_0, v7_0, v6_1, v7_1) - * state[16 + i] = ( v8_0, v9_0, v8_1, v9_1) - * state[20 + i] = (v10_0, v11_0, v10_1, v11_1) - * state[24 + i] = (v12_0, v13_0, v12_1, v13_1) - * state[28 + i] = (v14_0, v15_0, v14_1, v15_1) - */ - for (ui32 i = 0; i < 4; ++i) { - BlamkaG1AVX2( - state[0 + i], state[4 + i], state[8 + i], state[12 + i], - state[16 + i], state[20 + i], state[24 + i], state[28 + i]); - BlamkaG2AVX2( - state[0 + i], state[4 + i], state[8 + i], state[12 + i], - state[16 + i], state[20 + i], state[24 + i], state[28 + i]); - DiagonalizeAVX22( - state[8 + i], state[12 + i], - state[16 + i], state[20 + i], - state[24 + i], state[28 + i]); - BlamkaG1AVX2( - state[0 + i], state[4 + i], state[8 + i], state[12 + i], - state[16 + i], state[20 + i], state[24 + i], state[28 + i]); - BlamkaG2AVX2( - state[0 + i], state[4 + i], state[8 + i], state[12 + i], - state[16 + i], state[20 + i], state[24 + i], state[28 + i]); - UndiagonalizeAVX22( - state[8 + i], state[12 + i], - state[16 + i], state[20 + i], - state[24 + i], state[28 + i]); - } - - for (ui32 i = 0; i < ARGON2_HWORDS_IN_BLOCK; ++i) { - state[i] = _mm256_xor_si256(state[i], blockxy[i]); - _mm256_storeu_si256((__m256i*)nextBlock->V + i, state[i]); - } - } - }; -} + +namespace NArgonish { + template <ui32 mcost, ui32 threads> + class TArgon2AVX2 final: public TArgon2<EInstructionSet::AVX2, mcost, threads> { + public: + TArgon2AVX2(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) + : TArgon2<EInstructionSet::AVX2, mcost, threads>(atype, tcost, key, keylen) + { + } + + protected: + virtual void XorBlock_(TBlock* dst, const TBlock* src) const override { + __m256i* mdst = (__m256i*)dst; + __m256i* msrc = (__m256i*)src; + + for (ui32 i = 0; i < ARGON2_HWORDS_IN_BLOCK; ++i) + XorValues(mdst + i, mdst + i, msrc + i); + } + + virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override { + memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK); + } + + virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool with_xor) const override { + __m256i blockxy[ARGON2_HWORDS_IN_BLOCK]; + __m256i state[ARGON2_HWORDS_IN_BLOCK]; + + memcpy(state, prevBlock, ARGON2_BLOCK_SIZE); + + if (with_xor) { + for (ui32 i = 0; i < ARGON2_HWORDS_IN_BLOCK; ++i) { + state[i] = _mm256_xor_si256(state[i], _mm256_loadu_si256((const __m256i*)refBlock->V + i)); + blockxy[i] = _mm256_xor_si256(state[i], _mm256_loadu_si256((const __m256i*)nextBlock->V + i)); + } + } else { + for (ui32 i = 0; i < ARGON2_HWORDS_IN_BLOCK; ++i) { + blockxy[i] = state[i] = _mm256_xor_si256( + state[i], _mm256_loadu_si256((const __m256i*)refBlock->V + i)); + } + } + + /** + * state[ 8*i + 0 ] = ( v0_0, v1_0, v2_0, v3_0) + * state[ 8*i + 1 ] = ( v4_0, v5_0, v6_0, v7_0) + * state[ 8*i + 2 ] = ( v8_0, v9_0, v10_0, v11_0) + * state[ 8*i + 3 ] = (v12_0, v13_0, v14_0, v15_0) + * state[ 8*i + 4 ] = ( v0_1, v1_1, v2_1, v3_1) + * state[ 8*i + 5 ] = ( v4_1, v5_1, v6_1, v7_1) + * state[ 8*i + 6 ] = ( v8_1, v9_1, v10_1, v11_1) + * state[ 8*i + 7 ] = (v12_1, v13_1, v14_1, v15_1) + */ + for (ui32 i = 0; i < 4; ++i) { + BlamkaG1AVX2( + state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], + state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); + BlamkaG2AVX2( + state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], + state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); + DiagonalizeAVX21( + state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], + state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); + BlamkaG1AVX2( + state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], + state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); + BlamkaG2AVX2( + state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], + state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); + UndiagonalizeAVX21( + state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], + state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); + } + + /** + * state[ 0 + i] = ( v0_0, v1_0, v0_1, v1_1) + * state[ 4 + i] = ( v2_0, v3_0, v2_1, v3_1) + * state[ 8 + i] = ( v4_0, v5_0, v4_1, v5_1) + * state[12 + i] = ( v6_0, v7_0, v6_1, v7_1) + * state[16 + i] = ( v8_0, v9_0, v8_1, v9_1) + * state[20 + i] = (v10_0, v11_0, v10_1, v11_1) + * state[24 + i] = (v12_0, v13_0, v12_1, v13_1) + * state[28 + i] = (v14_0, v15_0, v14_1, v15_1) + */ + for (ui32 i = 0; i < 4; ++i) { + BlamkaG1AVX2( + state[0 + i], state[4 + i], state[8 + i], state[12 + i], + state[16 + i], state[20 + i], state[24 + i], state[28 + i]); + BlamkaG2AVX2( + state[0 + i], state[4 + i], state[8 + i], state[12 + i], + state[16 + i], state[20 + i], state[24 + i], state[28 + i]); + DiagonalizeAVX22( + state[8 + i], state[12 + i], + state[16 + i], state[20 + i], + state[24 + i], state[28 + i]); + BlamkaG1AVX2( + state[0 + i], state[4 + i], state[8 + i], state[12 + i], + state[16 + i], state[20 + i], state[24 + i], state[28 + i]); + BlamkaG2AVX2( + state[0 + i], state[4 + i], state[8 + i], state[12 + i], + state[16 + i], state[20 + i], state[24 + i], state[28 + i]); + UndiagonalizeAVX22( + state[8 + i], state[12 + i], + state[16 + i], state[20 + i], + state[24 + i], state[28 + i]); + } + + for (ui32 i = 0; i < ARGON2_HWORDS_IN_BLOCK; ++i) { + state[i] = _mm256_xor_si256(state[i], blockxy[i]); + _mm256_storeu_si256((__m256i*)nextBlock->V + i, state[i]); + } + } + }; +} diff --git a/library/cpp/digest/argonish/internal/argon2/argon2_base.h b/library/cpp/digest/argonish/internal/argon2/argon2_base.h index 8de5b6bb42..2385cc947c 100644 --- a/library/cpp/digest/argonish/internal/argon2/argon2_base.h +++ b/library/cpp/digest/argonish/internal/argon2/argon2_base.h @@ -1,388 +1,388 @@ -#pragma once - -#include <util/generic/yexception.h> +#pragma once + +#include <util/generic/yexception.h> #include <library/cpp/digest/argonish/argon2.h> #include <library/cpp/digest/argonish/internal/blake2b/blake2b.h> #include <library/cpp/threading/poor_man_openmp/thread_helper.h> - -namespace NArgonish { - const ui32 ARGON2_PREHASH_DIGEST_LENGTH = 64; + +namespace NArgonish { + const ui32 ARGON2_PREHASH_DIGEST_LENGTH = 64; const ui32 ARGON2_SECRET_MAX_LENGTH = 64; - const ui32 ARGON2_PREHASH_SEED_LENGTH = 72; - const ui32 ARGON2_BLOCK_SIZE = 1024; - const ui32 ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8; - const ui32 ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16; - const ui32 ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32; - const ui32 ARGON2_ADDRESSES_IN_BLOCK = 128; - const ui32 ARGON2_SYNC_POINTS = 4; - const ui32 ARGON2_SALT_MIN_LEN = 8; - const ui32 ARGON2_MIN_OUTLEN = 4; - - struct TBlock { - ui64 V[ARGON2_QWORDS_IN_BLOCK]; - }; - - template <EInstructionSet instructionSet, ui32 mcost, ui32 threads> - class TArgon2: public IArgon2Base { - public: - TArgon2(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) - : SecretLen_(keylen) - , Tcost_(tcost) - , Atype_(atype) - { - if (SecretLen_) - memcpy(Secret_, key, keylen); - } - - virtual ~TArgon2() override { - if (SecretLen_) { - SecureZeroMemory_(Secret_, SecretLen_); - SecretLen_ = 0; - } - } - - virtual void Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, - ui8* out, ui32 outlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override { - TArrayHolder<TBlock> buffer(new TBlock[MemoryBlocks_]); - InternalHash_(buffer.Get(), pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen); - } - - virtual bool Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, - const ui8* hash, ui32 hashlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override { - TArrayHolder<ui8> hashResult(new ui8[hashlen]); - Hash(pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen); - - return SecureCompare_(hash, hashResult.Get(), hashlen); - } - - virtual void HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, - const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, - const ui8* aad = nullptr, ui32 aadlen = 0) const override { - if (memory == nullptr || mlen < sizeof(TBlock) * MemoryBlocks_) - ythrow yexception() << "memory is null or its size is not enough"; - - InternalHash_((TBlock*)memory, pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen); - } - - virtual bool VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, - const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen, - const ui8* aad = nullptr, ui32 aadlen = 0) const override { - TArrayHolder<ui8> hashResult(new ui8[hashlen]); - HashWithCustomMemory(memory, mlen, pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen); - - return SecureCompare_(hashResult.Get(), hash, hashlen); - } - - virtual size_t GetMemorySize() const override { - return MemoryBlocks_ * sizeof(TBlock); - } - - protected: /* Constants */ - ui8 Secret_[ARGON2_SECRET_MAX_LENGTH] = {0}; - ui32 SecretLen_ = 0; - ui32 Tcost_; - EArgon2Type Atype_; - - static constexpr ui32 Lanes_ = threads; - static constexpr ui32 MemoryBlocks_ = (mcost >= 2 * ARGON2_SYNC_POINTS * Lanes_) ? (mcost - mcost % (Lanes_ * ARGON2_SYNC_POINTS)) : 2 * ARGON2_SYNC_POINTS * Lanes_; - static constexpr ui32 SegmentLength_ = MemoryBlocks_ / (Lanes_ * ARGON2_SYNC_POINTS); - static constexpr ui32 LaneLength_ = SegmentLength_ * ARGON2_SYNC_POINTS; - - protected: /* Prototypes */ - virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, - TBlock* nextBlock, bool withXor) const = 0; - - virtual void CopyBlock_(TBlock* dst, const TBlock* src) const = 0; - virtual void XorBlock_(TBlock* dst, const TBlock* src) const = 0; - - protected: /* Static functions */ - static bool SecureCompare_(const ui8* buffer1, const ui8* buffer2, ui32 len) { - bool result = true; - for (ui32 i = 0; i < len; ++i) { - result &= (buffer1[i] == buffer2[i]); - } - return result; - } - - static void SecureZeroMemory_(void* src, size_t len) { - static void* (*const volatile memset_v)(void*, int, size_t) = &memset; - memset_v(src, 0, len); - } - - static void Store32_(ui32 value, void* mem) { - *((ui32*)mem) = value; - } - - static void Blake2BHash64_(ui8 out[BLAKE2B_OUTBYTES], const ui8 in[BLAKE2B_OUTBYTES]) { - TBlake2B<instructionSet> hash(BLAKE2B_OUTBYTES); - hash.Update(in, BLAKE2B_OUTBYTES); - hash.Final(out, BLAKE2B_OUTBYTES); - } - - static void ExpandBlockhash_(ui8 expanded[ARGON2_BLOCK_SIZE], const ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH]) { - ui8 out_buffer[BLAKE2B_OUTBYTES]; - ui8 in_buffer[BLAKE2B_OUTBYTES]; - const ui32 HALF_OUT_BYTES = BLAKE2B_OUTBYTES / 2; - const ui32 HASH_BLOCKS_COUNT = ((ARGON2_BLOCK_SIZE / HALF_OUT_BYTES)); - - TBlake2B<instructionSet> hash(BLAKE2B_OUTBYTES); - hash.Update(ARGON2_BLOCK_SIZE); - hash.Update(blockhash, ARGON2_PREHASH_SEED_LENGTH); - hash.Final(out_buffer, BLAKE2B_OUTBYTES); - - memcpy(expanded, out_buffer, HALF_OUT_BYTES); - - for (ui32 i = 1; i < HASH_BLOCKS_COUNT - 2; ++i) { - memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); - Blake2BHash64_(out_buffer, in_buffer); - memcpy(expanded + (i * HALF_OUT_BYTES), out_buffer, HALF_OUT_BYTES); - } - - Blake2BHash64_(in_buffer, out_buffer); - memcpy(expanded + HALF_OUT_BYTES * (HASH_BLOCKS_COUNT - 2), in_buffer, BLAKE2B_OUTBYTES); - } - - static void Blake2BLong_(ui8* out, ui32 outlen, const ui8* in, ui32 inlen) { - if (outlen < BLAKE2B_OUTBYTES) { - TBlake2B<instructionSet> hash(outlen); - hash.Update(outlen); - hash.Update(in, inlen); - hash.Final(out, outlen); - } else { - ui8 out_buffer[BLAKE2B_OUTBYTES]; - ui8 in_buffer[BLAKE2B_OUTBYTES]; - ui32 toproduce = outlen - BLAKE2B_OUTBYTES / 2; - + const ui32 ARGON2_PREHASH_SEED_LENGTH = 72; + const ui32 ARGON2_BLOCK_SIZE = 1024; + const ui32 ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8; + const ui32 ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16; + const ui32 ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32; + const ui32 ARGON2_ADDRESSES_IN_BLOCK = 128; + const ui32 ARGON2_SYNC_POINTS = 4; + const ui32 ARGON2_SALT_MIN_LEN = 8; + const ui32 ARGON2_MIN_OUTLEN = 4; + + struct TBlock { + ui64 V[ARGON2_QWORDS_IN_BLOCK]; + }; + + template <EInstructionSet instructionSet, ui32 mcost, ui32 threads> + class TArgon2: public IArgon2Base { + public: + TArgon2(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) + : SecretLen_(keylen) + , Tcost_(tcost) + , Atype_(atype) + { + if (SecretLen_) + memcpy(Secret_, key, keylen); + } + + virtual ~TArgon2() override { + if (SecretLen_) { + SecureZeroMemory_(Secret_, SecretLen_); + SecretLen_ = 0; + } + } + + virtual void Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, + ui8* out, ui32 outlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override { + TArrayHolder<TBlock> buffer(new TBlock[MemoryBlocks_]); + InternalHash_(buffer.Get(), pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen); + } + + virtual bool Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, + const ui8* hash, ui32 hashlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override { + TArrayHolder<ui8> hashResult(new ui8[hashlen]); + Hash(pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen); + + return SecureCompare_(hash, hashResult.Get(), hashlen); + } + + virtual void HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, + const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, + const ui8* aad = nullptr, ui32 aadlen = 0) const override { + if (memory == nullptr || mlen < sizeof(TBlock) * MemoryBlocks_) + ythrow yexception() << "memory is null or its size is not enough"; + + InternalHash_((TBlock*)memory, pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen); + } + + virtual bool VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, + const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen, + const ui8* aad = nullptr, ui32 aadlen = 0) const override { + TArrayHolder<ui8> hashResult(new ui8[hashlen]); + HashWithCustomMemory(memory, mlen, pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen); + + return SecureCompare_(hashResult.Get(), hash, hashlen); + } + + virtual size_t GetMemorySize() const override { + return MemoryBlocks_ * sizeof(TBlock); + } + + protected: /* Constants */ + ui8 Secret_[ARGON2_SECRET_MAX_LENGTH] = {0}; + ui32 SecretLen_ = 0; + ui32 Tcost_; + EArgon2Type Atype_; + + static constexpr ui32 Lanes_ = threads; + static constexpr ui32 MemoryBlocks_ = (mcost >= 2 * ARGON2_SYNC_POINTS * Lanes_) ? (mcost - mcost % (Lanes_ * ARGON2_SYNC_POINTS)) : 2 * ARGON2_SYNC_POINTS * Lanes_; + static constexpr ui32 SegmentLength_ = MemoryBlocks_ / (Lanes_ * ARGON2_SYNC_POINTS); + static constexpr ui32 LaneLength_ = SegmentLength_ * ARGON2_SYNC_POINTS; + + protected: /* Prototypes */ + virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, + TBlock* nextBlock, bool withXor) const = 0; + + virtual void CopyBlock_(TBlock* dst, const TBlock* src) const = 0; + virtual void XorBlock_(TBlock* dst, const TBlock* src) const = 0; + + protected: /* Static functions */ + static bool SecureCompare_(const ui8* buffer1, const ui8* buffer2, ui32 len) { + bool result = true; + for (ui32 i = 0; i < len; ++i) { + result &= (buffer1[i] == buffer2[i]); + } + return result; + } + + static void SecureZeroMemory_(void* src, size_t len) { + static void* (*const volatile memset_v)(void*, int, size_t) = &memset; + memset_v(src, 0, len); + } + + static void Store32_(ui32 value, void* mem) { + *((ui32*)mem) = value; + } + + static void Blake2BHash64_(ui8 out[BLAKE2B_OUTBYTES], const ui8 in[BLAKE2B_OUTBYTES]) { + TBlake2B<instructionSet> hash(BLAKE2B_OUTBYTES); + hash.Update(in, BLAKE2B_OUTBYTES); + hash.Final(out, BLAKE2B_OUTBYTES); + } + + static void ExpandBlockhash_(ui8 expanded[ARGON2_BLOCK_SIZE], const ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH]) { + ui8 out_buffer[BLAKE2B_OUTBYTES]; + ui8 in_buffer[BLAKE2B_OUTBYTES]; + const ui32 HALF_OUT_BYTES = BLAKE2B_OUTBYTES / 2; + const ui32 HASH_BLOCKS_COUNT = ((ARGON2_BLOCK_SIZE / HALF_OUT_BYTES)); + + TBlake2B<instructionSet> hash(BLAKE2B_OUTBYTES); + hash.Update(ARGON2_BLOCK_SIZE); + hash.Update(blockhash, ARGON2_PREHASH_SEED_LENGTH); + hash.Final(out_buffer, BLAKE2B_OUTBYTES); + + memcpy(expanded, out_buffer, HALF_OUT_BYTES); + + for (ui32 i = 1; i < HASH_BLOCKS_COUNT - 2; ++i) { + memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); + Blake2BHash64_(out_buffer, in_buffer); + memcpy(expanded + (i * HALF_OUT_BYTES), out_buffer, HALF_OUT_BYTES); + } + + Blake2BHash64_(in_buffer, out_buffer); + memcpy(expanded + HALF_OUT_BYTES * (HASH_BLOCKS_COUNT - 2), in_buffer, BLAKE2B_OUTBYTES); + } + + static void Blake2BLong_(ui8* out, ui32 outlen, const ui8* in, ui32 inlen) { + if (outlen < BLAKE2B_OUTBYTES) { + TBlake2B<instructionSet> hash(outlen); + hash.Update(outlen); + hash.Update(in, inlen); + hash.Final(out, outlen); + } else { + ui8 out_buffer[BLAKE2B_OUTBYTES]; + ui8 in_buffer[BLAKE2B_OUTBYTES]; + ui32 toproduce = outlen - BLAKE2B_OUTBYTES / 2; + TBlake2B<instructionSet> hash1(BLAKE2B_OUTBYTES); hash1.Update(outlen); hash1.Update(in, inlen); hash1.Final(out_buffer, BLAKE2B_OUTBYTES); - - memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); - out += BLAKE2B_OUTBYTES / 2; - - while (toproduce > BLAKE2B_OUTBYTES) { - memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); + + memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); + out += BLAKE2B_OUTBYTES / 2; + + while (toproduce > BLAKE2B_OUTBYTES) { + memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); TBlake2B<instructionSet> hash2(BLAKE2B_OUTBYTES); hash2.Update(in_buffer, BLAKE2B_OUTBYTES); hash2.Final(out_buffer, BLAKE2B_OUTBYTES); - memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); - out += BLAKE2B_OUTBYTES / 2; - toproduce -= BLAKE2B_OUTBYTES / 2; - } - - memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); - { + memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); + out += BLAKE2B_OUTBYTES / 2; + toproduce -= BLAKE2B_OUTBYTES / 2; + } + + memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); + { TBlake2B<instructionSet> hash3(toproduce); hash3.Update(in_buffer, BLAKE2B_OUTBYTES); hash3.Final(out_buffer, toproduce); - memcpy(out, out_buffer, toproduce); - } - } - } - - static void InitBlockValue_(TBlock* b, ui8 in) { - memset(b->V, in, sizeof(b->V)); - } - - protected: /* Functions */ - void InternalHash_(TBlock* memory, const ui8* pwd, ui32 pwdlen, - const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, - const ui8* aad, ui32 aadlen) const { - /* - * all parameters checks are in proxy objects - */ - - Initialize_(memory, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen); - FillMemoryBlocks_(memory); - Finalize_(memory, out, outlen); - } - - void InitialHash_(ui8 blockhash[ARGON2_PREHASH_DIGEST_LENGTH], - ui32 outlen, const ui8* pwd, ui32 pwdlen, - const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const { - TBlake2B<instructionSet> hash(ARGON2_PREHASH_DIGEST_LENGTH); - /* lanes, but lanes == threads */ - hash.Update(Lanes_); - /* outlen */ - hash.Update(outlen); - /* m_cost */ - hash.Update(mcost); - /* t_cost */ - hash.Update(Tcost_); - /* version */ - hash.Update(0x00000013); - /* Argon2 type */ - hash.Update((ui32)Atype_); - /* pwdlen */ - hash.Update(pwdlen); - /* pwd */ - hash.Update(pwd, pwdlen); - /* saltlen */ - hash.Update(saltlen); - /* salt */ - if (saltlen) - hash.Update(salt, saltlen); - /* secret */ - hash.Update(SecretLen_); - if (SecretLen_) - hash.Update((void*)Secret_, SecretLen_); - /* aadlen */ - hash.Update(aadlen); - if (aadlen) - hash.Update((void*)aad, aadlen); - hash.Final(blockhash, ARGON2_PREHASH_DIGEST_LENGTH); - } - - void FillFirstBlocks_(TBlock* blocks, ui8* blockhash) const { - for (ui32 l = 0; l < Lanes_; l++) { - /* fill the first block of the lane */ - Store32_(l, blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4); - Store32_(0, blockhash + ARGON2_PREHASH_DIGEST_LENGTH); - ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_]), blockhash); - - /* fill the second block of the lane */ - Store32_(1, blockhash + ARGON2_PREHASH_DIGEST_LENGTH); - ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_ + 1]), blockhash); - } - } - - /* The 'if' will be optimized out as the number of threads is known at the compile time */ - void FillMemoryBlocks_(TBlock* memory) const { - for (ui32 t = 0; t < Tcost_; ++t) { - for (ui32 s = 0; s < ARGON2_SYNC_POINTS; ++s) { - if (Lanes_ == 1) - FillSegment_(memory, t, 0, s); - else { - NYmp::SetThreadCount(Lanes_); - NYmp::ParallelForStaticAutoChunk<ui32>(0, Lanes_, [this, &memory, s, t](int k) { - this->FillSegment_(memory, t, k, s); - }); - } - } - } - } - - void Initialize_(TBlock* memory, ui32 outlen, const ui8* pwd, ui32 pwdlen, - const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const { - ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH]; - InitialHash_(blockhash, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen); - FillFirstBlocks_(memory, blockhash); - } - - ui32 ComputeReferenceArea_(ui32 pass, ui32 slice, ui32 index, bool sameLane) const { - ui32 passVal = pass == 0 ? (slice * SegmentLength_) : (LaneLength_ - SegmentLength_); - return sameLane ? passVal + (index - 1) : passVal + (index == 0 ? -1 : 0); - } - - ui32 IndexAlpha_(ui32 pass, ui32 slice, ui32 index, ui32 pseudoRand, bool sameLane) const { - ui32 referenceAreaSize = ComputeReferenceArea_(pass, slice, index, sameLane); - - ui64 relativePosition = pseudoRand; - relativePosition = relativePosition * relativePosition >> 32; - relativePosition = referenceAreaSize - 1 - (referenceAreaSize * relativePosition >> 32); - - ui32 startPosition = 0; - if (pass != 0) - startPosition = (slice == ARGON2_SYNC_POINTS - 1) ? 0 : (slice + 1) * SegmentLength_; - - return (ui32)((startPosition + relativePosition) % LaneLength_); - } - - void NextAddresses_(TBlock* addressBlock, TBlock* inputBlock, const TBlock* zeroBlock) const { - inputBlock->V[6]++; - FillBlock_(zeroBlock, inputBlock, addressBlock, false); - FillBlock_(zeroBlock, addressBlock, addressBlock, false); - } - - void Finalize_(const TBlock* memory, ui8* out, ui32 outlen) const { - TBlock blockhash; - CopyBlock_(&blockhash, memory + LaneLength_ - 1); - - /* XOR the last blocks */ - for (ui32 l = 1; l < Lanes_; ++l) { - ui32 lastBlockInLane = l * LaneLength_ + (LaneLength_ - 1); - XorBlock_(&blockhash, memory + lastBlockInLane); - } - - Blake2BLong_(out, outlen, (ui8*)blockhash.V, ARGON2_BLOCK_SIZE); - } - - /* The switch will be optimized out by the compiler as the type is known at the compile time */ - void FillSegment_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const { - switch (Atype_) { - case EArgon2Type::Argon2d: - FillSegmentD_(memory, pass, lane, slice); - return; - case EArgon2Type::Argon2i: - FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2i); - return; - case EArgon2Type::Argon2id: - if (pass == 0 && slice < ARGON2_SYNC_POINTS / 2) - FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2id); - else - FillSegmentD_(memory, pass, lane, slice); - return; - } - } - - void FillSegmentD_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const { - ui32 startingIndex = (pass == 0 && slice == 0) ? 2 : 0; - ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex; - ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1; - - for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) { - if (currOffset % LaneLength_ == 1) { - prevOffset = currOffset - 1; - } - - ui64 pseudoRand = memory[prevOffset].V[0]; - ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_); - ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane); - - TBlock* refBlock = memory + LaneLength_ * refLane + refIndex; - FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0); - } - } - - void FillSegmentI_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice, EArgon2Type atp) const { - TBlock addressBlock, inputBlock, zeroBlock; - InitBlockValue_(&zeroBlock, 0); - InitBlockValue_(&inputBlock, 0); - - inputBlock.V[0] = pass; - inputBlock.V[1] = lane; - inputBlock.V[2] = slice; - inputBlock.V[3] = MemoryBlocks_; - inputBlock.V[4] = Tcost_; - inputBlock.V[5] = (ui64)atp; - - ui32 startingIndex = 0; - - if (pass == 0 && slice == 0) { - startingIndex = 2; - NextAddresses_(&addressBlock, &inputBlock, &zeroBlock); - } - - ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex; - ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1; - - for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) { - if (currOffset % LaneLength_ == 1) { - prevOffset = currOffset - 1; - } - - if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { - NextAddresses_(&addressBlock, &inputBlock, &zeroBlock); - } - - ui64 pseudoRand = addressBlock.V[i % ARGON2_ADDRESSES_IN_BLOCK]; - ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_); - ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane); - - TBlock* refBlock = memory + LaneLength_ * refLane + refIndex; - FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0); - } - } - }; -} + memcpy(out, out_buffer, toproduce); + } + } + } + + static void InitBlockValue_(TBlock* b, ui8 in) { + memset(b->V, in, sizeof(b->V)); + } + + protected: /* Functions */ + void InternalHash_(TBlock* memory, const ui8* pwd, ui32 pwdlen, + const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, + const ui8* aad, ui32 aadlen) const { + /* + * all parameters checks are in proxy objects + */ + + Initialize_(memory, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen); + FillMemoryBlocks_(memory); + Finalize_(memory, out, outlen); + } + + void InitialHash_(ui8 blockhash[ARGON2_PREHASH_DIGEST_LENGTH], + ui32 outlen, const ui8* pwd, ui32 pwdlen, + const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const { + TBlake2B<instructionSet> hash(ARGON2_PREHASH_DIGEST_LENGTH); + /* lanes, but lanes == threads */ + hash.Update(Lanes_); + /* outlen */ + hash.Update(outlen); + /* m_cost */ + hash.Update(mcost); + /* t_cost */ + hash.Update(Tcost_); + /* version */ + hash.Update(0x00000013); + /* Argon2 type */ + hash.Update((ui32)Atype_); + /* pwdlen */ + hash.Update(pwdlen); + /* pwd */ + hash.Update(pwd, pwdlen); + /* saltlen */ + hash.Update(saltlen); + /* salt */ + if (saltlen) + hash.Update(salt, saltlen); + /* secret */ + hash.Update(SecretLen_); + if (SecretLen_) + hash.Update((void*)Secret_, SecretLen_); + /* aadlen */ + hash.Update(aadlen); + if (aadlen) + hash.Update((void*)aad, aadlen); + hash.Final(blockhash, ARGON2_PREHASH_DIGEST_LENGTH); + } + + void FillFirstBlocks_(TBlock* blocks, ui8* blockhash) const { + for (ui32 l = 0; l < Lanes_; l++) { + /* fill the first block of the lane */ + Store32_(l, blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4); + Store32_(0, blockhash + ARGON2_PREHASH_DIGEST_LENGTH); + ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_]), blockhash); + + /* fill the second block of the lane */ + Store32_(1, blockhash + ARGON2_PREHASH_DIGEST_LENGTH); + ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_ + 1]), blockhash); + } + } + + /* The 'if' will be optimized out as the number of threads is known at the compile time */ + void FillMemoryBlocks_(TBlock* memory) const { + for (ui32 t = 0; t < Tcost_; ++t) { + for (ui32 s = 0; s < ARGON2_SYNC_POINTS; ++s) { + if (Lanes_ == 1) + FillSegment_(memory, t, 0, s); + else { + NYmp::SetThreadCount(Lanes_); + NYmp::ParallelForStaticAutoChunk<ui32>(0, Lanes_, [this, &memory, s, t](int k) { + this->FillSegment_(memory, t, k, s); + }); + } + } + } + } + + void Initialize_(TBlock* memory, ui32 outlen, const ui8* pwd, ui32 pwdlen, + const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const { + ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH]; + InitialHash_(blockhash, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen); + FillFirstBlocks_(memory, blockhash); + } + + ui32 ComputeReferenceArea_(ui32 pass, ui32 slice, ui32 index, bool sameLane) const { + ui32 passVal = pass == 0 ? (slice * SegmentLength_) : (LaneLength_ - SegmentLength_); + return sameLane ? passVal + (index - 1) : passVal + (index == 0 ? -1 : 0); + } + + ui32 IndexAlpha_(ui32 pass, ui32 slice, ui32 index, ui32 pseudoRand, bool sameLane) const { + ui32 referenceAreaSize = ComputeReferenceArea_(pass, slice, index, sameLane); + + ui64 relativePosition = pseudoRand; + relativePosition = relativePosition * relativePosition >> 32; + relativePosition = referenceAreaSize - 1 - (referenceAreaSize * relativePosition >> 32); + + ui32 startPosition = 0; + if (pass != 0) + startPosition = (slice == ARGON2_SYNC_POINTS - 1) ? 0 : (slice + 1) * SegmentLength_; + + return (ui32)((startPosition + relativePosition) % LaneLength_); + } + + void NextAddresses_(TBlock* addressBlock, TBlock* inputBlock, const TBlock* zeroBlock) const { + inputBlock->V[6]++; + FillBlock_(zeroBlock, inputBlock, addressBlock, false); + FillBlock_(zeroBlock, addressBlock, addressBlock, false); + } + + void Finalize_(const TBlock* memory, ui8* out, ui32 outlen) const { + TBlock blockhash; + CopyBlock_(&blockhash, memory + LaneLength_ - 1); + + /* XOR the last blocks */ + for (ui32 l = 1; l < Lanes_; ++l) { + ui32 lastBlockInLane = l * LaneLength_ + (LaneLength_ - 1); + XorBlock_(&blockhash, memory + lastBlockInLane); + } + + Blake2BLong_(out, outlen, (ui8*)blockhash.V, ARGON2_BLOCK_SIZE); + } + + /* The switch will be optimized out by the compiler as the type is known at the compile time */ + void FillSegment_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const { + switch (Atype_) { + case EArgon2Type::Argon2d: + FillSegmentD_(memory, pass, lane, slice); + return; + case EArgon2Type::Argon2i: + FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2i); + return; + case EArgon2Type::Argon2id: + if (pass == 0 && slice < ARGON2_SYNC_POINTS / 2) + FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2id); + else + FillSegmentD_(memory, pass, lane, slice); + return; + } + } + + void FillSegmentD_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const { + ui32 startingIndex = (pass == 0 && slice == 0) ? 2 : 0; + ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex; + ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1; + + for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) { + if (currOffset % LaneLength_ == 1) { + prevOffset = currOffset - 1; + } + + ui64 pseudoRand = memory[prevOffset].V[0]; + ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_); + ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane); + + TBlock* refBlock = memory + LaneLength_ * refLane + refIndex; + FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0); + } + } + + void FillSegmentI_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice, EArgon2Type atp) const { + TBlock addressBlock, inputBlock, zeroBlock; + InitBlockValue_(&zeroBlock, 0); + InitBlockValue_(&inputBlock, 0); + + inputBlock.V[0] = pass; + inputBlock.V[1] = lane; + inputBlock.V[2] = slice; + inputBlock.V[3] = MemoryBlocks_; + inputBlock.V[4] = Tcost_; + inputBlock.V[5] = (ui64)atp; + + ui32 startingIndex = 0; + + if (pass == 0 && slice == 0) { + startingIndex = 2; + NextAddresses_(&addressBlock, &inputBlock, &zeroBlock); + } + + ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex; + ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1; + + for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) { + if (currOffset % LaneLength_ == 1) { + prevOffset = currOffset - 1; + } + + if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { + NextAddresses_(&addressBlock, &inputBlock, &zeroBlock); + } + + ui64 pseudoRand = addressBlock.V[i % ARGON2_ADDRESSES_IN_BLOCK]; + ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_); + ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane); + + TBlock* refBlock = memory + LaneLength_ * refLane + refIndex; + FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0); + } + } + }; +} diff --git a/library/cpp/digest/argonish/internal/argon2/argon2_ref.h b/library/cpp/digest/argonish/internal/argon2/argon2_ref.h index d0635b71ee..8e5e3fa971 100644 --- a/library/cpp/digest/argonish/internal/argon2/argon2_ref.h +++ b/library/cpp/digest/argonish/internal/argon2/argon2_ref.h @@ -1,88 +1,88 @@ -#pragma once - -#include "argon2_base.h" +#pragma once + +#include "argon2_base.h" #include <library/cpp/digest/argonish/internal/rotations/rotations_ref.h> - -namespace NArgonish { - static inline ui64 FBlaMka(ui64 x, ui64 y) { - const ui64 m = 0xFFFFFFFF; - const ui64 xy = (x & m) * (y & m); - return x + y + 2 * xy; - } - - static inline void BlamkaGRef(ui64& a, ui64& b, ui64& c, ui64& d) { - a = FBlaMka(a, b); - d = Rotr(d ^ a, 32); - c = FBlaMka(c, d); - b = Rotr(b ^ c, 24); - a = FBlaMka(a, b); - d = Rotr(d ^ a, 16); - c = FBlaMka(c, d); - b = Rotr(b ^ c, 63); - } - - static inline void BlamkaRoundRef( - ui64& v0, ui64& v1, ui64& v2, ui64& v3, - ui64& v4, ui64& v5, ui64& v6, ui64& v7, - ui64& v8, ui64& v9, ui64& v10, ui64& v11, - ui64& v12, ui64& v13, ui64& v14, ui64& v15) { - BlamkaGRef(v0, v4, v8, v12); - BlamkaGRef(v1, v5, v9, v13); - BlamkaGRef(v2, v6, v10, v14); - BlamkaGRef(v3, v7, v11, v15); - BlamkaGRef(v0, v5, v10, v15); - BlamkaGRef(v1, v6, v11, v12); - BlamkaGRef(v2, v7, v8, v13); - BlamkaGRef(v3, v4, v9, v14); - } - - template <ui32 mcost, ui32 threads> - class TArgon2REF final: public TArgon2<EInstructionSet::REF, mcost, threads> { - public: - TArgon2REF(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) - : TArgon2<EInstructionSet::REF, mcost, threads>(atype, tcost, key, keylen) - { - } - - protected: - virtual void XorBlock_(TBlock* dst, const TBlock* src) const override { - for (ui32 i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { - dst->V[i] ^= src->V[i]; - } - } - - virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override { - memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK); - } - - virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool withXor) const override { - TBlock blockR, blockTmp; - CopyBlock_(&blockR, refBlock); - XorBlock_(&blockR, prevBlock); - CopyBlock_(&blockTmp, &blockR); - - if (withXor) { - XorBlock_(&blockTmp, nextBlock); - } - - for (ui32 i = 0; i < 8; ++i) { - BlamkaRoundRef( - blockR.V[16 * i + 0], blockR.V[16 * i + 1], blockR.V[16 * i + 2], blockR.V[16 * i + 3], - blockR.V[16 * i + 4], blockR.V[16 * i + 5], blockR.V[16 * i + 6], blockR.V[16 * i + 7], - blockR.V[16 * i + 8], blockR.V[16 * i + 9], blockR.V[16 * i + 10], blockR.V[16 * i + 11], - blockR.V[16 * i + 12], blockR.V[16 * i + 13], blockR.V[16 * i + 14], blockR.V[16 * i + 15]); - } - - for (ui32 i = 0; i < 8; ++i) { - BlamkaRoundRef( - blockR.V[2 * i + 0], blockR.V[2 * i + 1], blockR.V[2 * i + 16], blockR.V[2 * i + 17], - blockR.V[2 * i + 32], blockR.V[2 * i + 33], blockR.V[2 * i + 48], blockR.V[2 * i + 49], - blockR.V[2 * i + 64], blockR.V[2 * i + 65], blockR.V[2 * i + 80], blockR.V[2 * i + 81], - blockR.V[2 * i + 96], blockR.V[2 * i + 97], blockR.V[2 * i + 112], blockR.V[2 * i + 113]); - } - - CopyBlock_(nextBlock, &blockTmp); - XorBlock_(nextBlock, &blockR); - } - }; -} + +namespace NArgonish { + static inline ui64 FBlaMka(ui64 x, ui64 y) { + const ui64 m = 0xFFFFFFFF; + const ui64 xy = (x & m) * (y & m); + return x + y + 2 * xy; + } + + static inline void BlamkaGRef(ui64& a, ui64& b, ui64& c, ui64& d) { + a = FBlaMka(a, b); + d = Rotr(d ^ a, 32); + c = FBlaMka(c, d); + b = Rotr(b ^ c, 24); + a = FBlaMka(a, b); + d = Rotr(d ^ a, 16); + c = FBlaMka(c, d); + b = Rotr(b ^ c, 63); + } + + static inline void BlamkaRoundRef( + ui64& v0, ui64& v1, ui64& v2, ui64& v3, + ui64& v4, ui64& v5, ui64& v6, ui64& v7, + ui64& v8, ui64& v9, ui64& v10, ui64& v11, + ui64& v12, ui64& v13, ui64& v14, ui64& v15) { + BlamkaGRef(v0, v4, v8, v12); + BlamkaGRef(v1, v5, v9, v13); + BlamkaGRef(v2, v6, v10, v14); + BlamkaGRef(v3, v7, v11, v15); + BlamkaGRef(v0, v5, v10, v15); + BlamkaGRef(v1, v6, v11, v12); + BlamkaGRef(v2, v7, v8, v13); + BlamkaGRef(v3, v4, v9, v14); + } + + template <ui32 mcost, ui32 threads> + class TArgon2REF final: public TArgon2<EInstructionSet::REF, mcost, threads> { + public: + TArgon2REF(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) + : TArgon2<EInstructionSet::REF, mcost, threads>(atype, tcost, key, keylen) + { + } + + protected: + virtual void XorBlock_(TBlock* dst, const TBlock* src) const override { + for (ui32 i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { + dst->V[i] ^= src->V[i]; + } + } + + virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override { + memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK); + } + + virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool withXor) const override { + TBlock blockR, blockTmp; + CopyBlock_(&blockR, refBlock); + XorBlock_(&blockR, prevBlock); + CopyBlock_(&blockTmp, &blockR); + + if (withXor) { + XorBlock_(&blockTmp, nextBlock); + } + + for (ui32 i = 0; i < 8; ++i) { + BlamkaRoundRef( + blockR.V[16 * i + 0], blockR.V[16 * i + 1], blockR.V[16 * i + 2], blockR.V[16 * i + 3], + blockR.V[16 * i + 4], blockR.V[16 * i + 5], blockR.V[16 * i + 6], blockR.V[16 * i + 7], + blockR.V[16 * i + 8], blockR.V[16 * i + 9], blockR.V[16 * i + 10], blockR.V[16 * i + 11], + blockR.V[16 * i + 12], blockR.V[16 * i + 13], blockR.V[16 * i + 14], blockR.V[16 * i + 15]); + } + + for (ui32 i = 0; i < 8; ++i) { + BlamkaRoundRef( + blockR.V[2 * i + 0], blockR.V[2 * i + 1], blockR.V[2 * i + 16], blockR.V[2 * i + 17], + blockR.V[2 * i + 32], blockR.V[2 * i + 33], blockR.V[2 * i + 48], blockR.V[2 * i + 49], + blockR.V[2 * i + 64], blockR.V[2 * i + 65], blockR.V[2 * i + 80], blockR.V[2 * i + 81], + blockR.V[2 * i + 96], blockR.V[2 * i + 97], blockR.V[2 * i + 112], blockR.V[2 * i + 113]); + } + + CopyBlock_(nextBlock, &blockTmp); + XorBlock_(nextBlock, &blockR); + } + }; +} diff --git a/library/cpp/digest/argonish/internal/argon2/argon2_sse2.h b/library/cpp/digest/argonish/internal/argon2/argon2_sse2.h index 04fc70c56f..1d2230a657 100644 --- a/library/cpp/digest/argonish/internal/argon2/argon2_sse2.h +++ b/library/cpp/digest/argonish/internal/argon2/argon2_sse2.h @@ -1,101 +1,101 @@ -#pragma once - -#include <emmintrin.h> -#include "argon2_base.h" +#pragma once + +#include <emmintrin.h> +#include "argon2_base.h" #include <library/cpp/digest/argonish/internal/blamka/blamka_sse2.h> - -namespace NArgonish { - template <ui32 mcost, ui32 threads> - class TArgon2SSE2 final: public TArgon2<EInstructionSet::SSE2, mcost, threads> { - public: - TArgon2SSE2(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) - : TArgon2<EInstructionSet::SSE2, mcost, threads>(atype, tcost, key, keylen) - { - } - - protected: - virtual void XorBlock_(TBlock* dst, const TBlock* src) const override { - __m128i* mdst = (__m128i*)dst->V; - __m128i* msrc = (__m128i*)src->V; - - for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) - XorValues(mdst + i, msrc + i, mdst + i); - } - - virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override { - memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK); - } - - virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool withXor) const override { - __m128i blockxy[ARGON2_OWORDS_IN_BLOCK]; - __m128i state[ARGON2_OWORDS_IN_BLOCK]; - - memcpy(state, prevBlock, ARGON2_BLOCK_SIZE); - - if (withXor) { - for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { - state[i] = _mm_xor_si128( - state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i)); - blockxy[i] = _mm_xor_si128( - state[i], _mm_loadu_si128((const __m128i*)nextBlock->V + i)); - } - } else { - for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { - blockxy[i] = state[i] = _mm_xor_si128( - state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i)); - } - } - - for (ui32 i = 0; i < 8; ++i) { - BlamkaG1SSE2( - state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], - state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); - BlamkaG2SSE2( - state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], - state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); - DiagonalizeSSE2( - state[8 * i + 2], state[8 * i + 3], - state[8 * i + 4], state[8 * i + 5], - state[8 * i + 6], state[8 * i + 7]); - BlamkaG1SSE2( - state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], - state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); - BlamkaG2SSE2( - state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], - state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); - UndiagonalizeSSE2( - state[8 * i + 2], state[8 * i + 3], - state[8 * i + 4], state[8 * i + 5], - state[8 * i + 6], state[8 * i + 7]); - } - - for (ui32 i = 0; i < 8; ++i) { - BlamkaG1SSE2( - state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], - state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); - BlamkaG2SSE2( - state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], - state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); - DiagonalizeSSE2( - state[8 * 2 + i], state[8 * 3 + i], - state[8 * 4 + i], state[8 * 5 + i], - state[8 * 6 + i], state[8 * 7 + i]); - BlamkaG1SSE2( - state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], - state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); - BlamkaG2SSE2( - state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], - state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); - UndiagonalizeSSE2( - state[8 * 2 + i], state[8 * 3 + i], - state[8 * 4 + i], state[8 * 5 + i], - state[8 * 6 + i], state[8 * 7 + i]); - } - - for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { - state[i] = _mm_xor_si128(state[i], blockxy[i]); - _mm_storeu_si128((__m128i*)nextBlock->V + i, state[i]); - } - } - }; -} + +namespace NArgonish { + template <ui32 mcost, ui32 threads> + class TArgon2SSE2 final: public TArgon2<EInstructionSet::SSE2, mcost, threads> { + public: + TArgon2SSE2(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) + : TArgon2<EInstructionSet::SSE2, mcost, threads>(atype, tcost, key, keylen) + { + } + + protected: + virtual void XorBlock_(TBlock* dst, const TBlock* src) const override { + __m128i* mdst = (__m128i*)dst->V; + __m128i* msrc = (__m128i*)src->V; + + for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) + XorValues(mdst + i, msrc + i, mdst + i); + } + + virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override { + memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK); + } + + virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool withXor) const override { + __m128i blockxy[ARGON2_OWORDS_IN_BLOCK]; + __m128i state[ARGON2_OWORDS_IN_BLOCK]; + + memcpy(state, prevBlock, ARGON2_BLOCK_SIZE); + + if (withXor) { + for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { + state[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i)); + blockxy[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i*)nextBlock->V + i)); + } + } else { + for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { + blockxy[i] = state[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i)); + } + } + + for (ui32 i = 0; i < 8; ++i) { + BlamkaG1SSE2( + state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); + BlamkaG2SSE2( + state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); + DiagonalizeSSE2( + state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], + state[8 * i + 6], state[8 * i + 7]); + BlamkaG1SSE2( + state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); + BlamkaG2SSE2( + state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); + UndiagonalizeSSE2( + state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], + state[8 * i + 6], state[8 * i + 7]); + } + + for (ui32 i = 0; i < 8; ++i) { + BlamkaG1SSE2( + state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], + state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); + BlamkaG2SSE2( + state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], + state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); + DiagonalizeSSE2( + state[8 * 2 + i], state[8 * 3 + i], + state[8 * 4 + i], state[8 * 5 + i], + state[8 * 6 + i], state[8 * 7 + i]); + BlamkaG1SSE2( + state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], + state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); + BlamkaG2SSE2( + state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], + state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); + UndiagonalizeSSE2( + state[8 * 2 + i], state[8 * 3 + i], + state[8 * 4 + i], state[8 * 5 + i], + state[8 * 6 + i], state[8 * 7 + i]); + } + + for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { + state[i] = _mm_xor_si128(state[i], blockxy[i]); + _mm_storeu_si128((__m128i*)nextBlock->V + i, state[i]); + } + } + }; +} diff --git a/library/cpp/digest/argonish/internal/argon2/argon2_sse41.h b/library/cpp/digest/argonish/internal/argon2/argon2_sse41.h index c9b01915de..1ad35048ea 100644 --- a/library/cpp/digest/argonish/internal/argon2/argon2_sse41.h +++ b/library/cpp/digest/argonish/internal/argon2/argon2_sse41.h @@ -1,101 +1,101 @@ -#pragma once - -#include <smmintrin.h> -#include "argon2_base.h" +#pragma once + +#include <smmintrin.h> +#include "argon2_base.h" #include <library/cpp/digest/argonish/internal/blamka/blamka_ssse3.h> - -namespace NArgonish { - template <ui32 mcost, ui32 threads> - class TArgon2SSE41 final: public TArgon2<EInstructionSet::SSE41, mcost, threads> { - public: - TArgon2SSE41(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) - : TArgon2<EInstructionSet::SSE41, mcost, threads>(atype, tcost, key, keylen) - { - } - - protected: - virtual void XorBlock_(TBlock* dst, const TBlock* src) const override { - __m128i* mdst = (__m128i*)dst->V; - __m128i* msrc = (__m128i*)src->V; - - for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) - XorValues(mdst + i, msrc + i, mdst + i); - } - - virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override { - memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK); - } - - virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool withXor) const override { - __m128i blockxy[ARGON2_OWORDS_IN_BLOCK]; - __m128i state[ARGON2_OWORDS_IN_BLOCK]; - - memcpy(state, prevBlock, ARGON2_BLOCK_SIZE); - - if (withXor) { - for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { - state[i] = _mm_xor_si128( - state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i)); - blockxy[i] = _mm_xor_si128( - state[i], _mm_loadu_si128((const __m128i*)nextBlock->V + i)); - } - } else { - for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { - blockxy[i] = state[i] = _mm_xor_si128( - state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i)); - } - } - - for (ui32 i = 0; i < 8; ++i) { - BlamkaG1SSSE3( - state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], - state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); - BlamkaG2SSSE3( - state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], - state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); - DiagonalizeSSSE3( - state[8 * i + 2], state[8 * i + 3], - state[8 * i + 4], state[8 * i + 5], - state[8 * i + 6], state[8 * i + 7]); - BlamkaG1SSSE3( - state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], - state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); - BlamkaG2SSSE3( - state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], - state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); - UndiagonalizeSSSE3( - state[8 * i + 2], state[8 * i + 3], - state[8 * i + 4], state[8 * i + 5], - state[8 * i + 6], state[8 * i + 7]); - } - - for (ui32 i = 0; i < 8; ++i) { - BlamkaG1SSSE3( - state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], - state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); - BlamkaG2SSSE3( - state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], - state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); - DiagonalizeSSSE3( - state[8 * 2 + i], state[8 * 3 + i], - state[8 * 4 + i], state[8 * 5 + i], - state[8 * 6 + i], state[8 * 7 + i]); - BlamkaG1SSSE3( - state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], - state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); - BlamkaG2SSSE3( - state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], - state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); - UndiagonalizeSSSE3( - state[8 * 2 + i], state[8 * 3 + i], - state[8 * 4 + i], state[8 * 5 + i], - state[8 * 6 + i], state[8 * 7 + i]); - } - - for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { - state[i] = _mm_xor_si128(state[i], blockxy[i]); - _mm_storeu_si128((__m128i*)nextBlock->V + i, state[i]); - } - } - }; -} + +namespace NArgonish { + template <ui32 mcost, ui32 threads> + class TArgon2SSE41 final: public TArgon2<EInstructionSet::SSE41, mcost, threads> { + public: + TArgon2SSE41(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) + : TArgon2<EInstructionSet::SSE41, mcost, threads>(atype, tcost, key, keylen) + { + } + + protected: + virtual void XorBlock_(TBlock* dst, const TBlock* src) const override { + __m128i* mdst = (__m128i*)dst->V; + __m128i* msrc = (__m128i*)src->V; + + for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) + XorValues(mdst + i, msrc + i, mdst + i); + } + + virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override { + memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK); + } + + virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool withXor) const override { + __m128i blockxy[ARGON2_OWORDS_IN_BLOCK]; + __m128i state[ARGON2_OWORDS_IN_BLOCK]; + + memcpy(state, prevBlock, ARGON2_BLOCK_SIZE); + + if (withXor) { + for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { + state[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i)); + blockxy[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i*)nextBlock->V + i)); + } + } else { + for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { + blockxy[i] = state[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i)); + } + } + + for (ui32 i = 0; i < 8; ++i) { + BlamkaG1SSSE3( + state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); + BlamkaG2SSSE3( + state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); + DiagonalizeSSSE3( + state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], + state[8 * i + 6], state[8 * i + 7]); + BlamkaG1SSSE3( + state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); + BlamkaG2SSSE3( + state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); + UndiagonalizeSSSE3( + state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], + state[8 * i + 6], state[8 * i + 7]); + } + + for (ui32 i = 0; i < 8; ++i) { + BlamkaG1SSSE3( + state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], + state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); + BlamkaG2SSSE3( + state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], + state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); + DiagonalizeSSSE3( + state[8 * 2 + i], state[8 * 3 + i], + state[8 * 4 + i], state[8 * 5 + i], + state[8 * 6 + i], state[8 * 7 + i]); + BlamkaG1SSSE3( + state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], + state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); + BlamkaG2SSSE3( + state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], + state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); + UndiagonalizeSSSE3( + state[8 * 2 + i], state[8 * 3 + i], + state[8 * 4 + i], state[8 * 5 + i], + state[8 * 6 + i], state[8 * 7 + i]); + } + + for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { + state[i] = _mm_xor_si128(state[i], blockxy[i]); + _mm_storeu_si128((__m128i*)nextBlock->V + i, state[i]); + } + } + }; +} diff --git a/library/cpp/digest/argonish/internal/argon2/argon2_ssse3.h b/library/cpp/digest/argonish/internal/argon2/argon2_ssse3.h index 714197a90f..a25a416834 100644 --- a/library/cpp/digest/argonish/internal/argon2/argon2_ssse3.h +++ b/library/cpp/digest/argonish/internal/argon2/argon2_ssse3.h @@ -1,102 +1,102 @@ -#pragma once - -#include <emmintrin.h> -#include <tmmintrin.h> -#include "argon2_base.h" +#pragma once + +#include <emmintrin.h> +#include <tmmintrin.h> +#include "argon2_base.h" #include <library/cpp/digest/argonish/internal/blamka/blamka_ssse3.h> - -namespace NArgonish { - template <ui32 mcost, ui32 threads> - class TArgon2SSSE3 final: public TArgon2<EInstructionSet::SSSE3, mcost, threads> { - public: - TArgon2SSSE3(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) - : TArgon2<EInstructionSet::SSSE3, mcost, threads>(atype, tcost, key, keylen) - { - } - - protected: - virtual void XorBlock_(TBlock* dst, const TBlock* src) const override { - __m128i* mdst = (__m128i*)dst->V; - __m128i* msrc = (__m128i*)src->V; - - for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) - XorValues(mdst + i, msrc + i, mdst + i); - } - - virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override { - memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK); - } - - virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool withXor) const override { - __m128i blockxy[ARGON2_OWORDS_IN_BLOCK]; - __m128i state[ARGON2_OWORDS_IN_BLOCK]; - - memcpy(state, prevBlock, ARGON2_BLOCK_SIZE); - - if (withXor) { - for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { - state[i] = _mm_xor_si128( - state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i)); - blockxy[i] = _mm_xor_si128( - state[i], _mm_loadu_si128((const __m128i*)nextBlock->V + i)); - } - } else { - for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { - blockxy[i] = state[i] = _mm_xor_si128( - state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i)); - } - } - - for (ui32 i = 0; i < 8; ++i) { - BlamkaG1SSSE3( - state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], - state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); - BlamkaG2SSSE3( - state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], - state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); - DiagonalizeSSSE3( - state[8 * i + 2], state[8 * i + 3], - state[8 * i + 4], state[8 * i + 5], - state[8 * i + 6], state[8 * i + 7]); - BlamkaG1SSSE3( - state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], - state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); - BlamkaG2SSSE3( - state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], - state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); - UndiagonalizeSSSE3( - state[8 * i + 2], state[8 * i + 3], - state[8 * i + 4], state[8 * i + 5], - state[8 * i + 6], state[8 * i + 7]); - } - - for (ui32 i = 0; i < 8; ++i) { - BlamkaG1SSSE3( - state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], - state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); - BlamkaG2SSSE3( - state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], - state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); - DiagonalizeSSSE3( - state[8 * 2 + i], state[8 * 3 + i], - state[8 * 4 + i], state[8 * 5 + i], - state[8 * 6 + i], state[8 * 7 + i]); - BlamkaG1SSSE3( - state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], - state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); - BlamkaG2SSSE3( - state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], - state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); - UndiagonalizeSSSE3( - state[8 * 2 + i], state[8 * 3 + i], - state[8 * 4 + i], state[8 * 5 + i], - state[8 * 6 + i], state[8 * 7 + i]); - } - - for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { - state[i] = _mm_xor_si128(state[i], blockxy[i]); - _mm_storeu_si128((__m128i*)nextBlock->V + i, state[i]); - } - } - }; -} + +namespace NArgonish { + template <ui32 mcost, ui32 threads> + class TArgon2SSSE3 final: public TArgon2<EInstructionSet::SSSE3, mcost, threads> { + public: + TArgon2SSSE3(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) + : TArgon2<EInstructionSet::SSSE3, mcost, threads>(atype, tcost, key, keylen) + { + } + + protected: + virtual void XorBlock_(TBlock* dst, const TBlock* src) const override { + __m128i* mdst = (__m128i*)dst->V; + __m128i* msrc = (__m128i*)src->V; + + for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) + XorValues(mdst + i, msrc + i, mdst + i); + } + + virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override { + memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK); + } + + virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool withXor) const override { + __m128i blockxy[ARGON2_OWORDS_IN_BLOCK]; + __m128i state[ARGON2_OWORDS_IN_BLOCK]; + + memcpy(state, prevBlock, ARGON2_BLOCK_SIZE); + + if (withXor) { + for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { + state[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i)); + blockxy[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i*)nextBlock->V + i)); + } + } else { + for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { + blockxy[i] = state[i] = _mm_xor_si128( + state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i)); + } + } + + for (ui32 i = 0; i < 8; ++i) { + BlamkaG1SSSE3( + state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); + BlamkaG2SSSE3( + state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); + DiagonalizeSSSE3( + state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], + state[8 * i + 6], state[8 * i + 7]); + BlamkaG1SSSE3( + state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); + BlamkaG2SSSE3( + state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); + UndiagonalizeSSSE3( + state[8 * i + 2], state[8 * i + 3], + state[8 * i + 4], state[8 * i + 5], + state[8 * i + 6], state[8 * i + 7]); + } + + for (ui32 i = 0; i < 8; ++i) { + BlamkaG1SSSE3( + state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], + state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); + BlamkaG2SSSE3( + state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], + state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); + DiagonalizeSSSE3( + state[8 * 2 + i], state[8 * 3 + i], + state[8 * 4 + i], state[8 * 5 + i], + state[8 * 6 + i], state[8 * 7 + i]); + BlamkaG1SSSE3( + state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], + state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); + BlamkaG2SSSE3( + state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], + state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); + UndiagonalizeSSSE3( + state[8 * 2 + i], state[8 * 3 + i], + state[8 * 4 + i], state[8 * 5 + i], + state[8 * 6 + i], state[8 * 7 + i]); + } + + for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { + state[i] = _mm_xor_si128(state[i], blockxy[i]); + _mm_storeu_si128((__m128i*)nextBlock->V + i, state[i]); + } + } + }; +} diff --git a/library/cpp/digest/argonish/internal/argon2/ya.make b/library/cpp/digest/argonish/internal/argon2/ya.make index 10002edb17..85459865ba 100644 --- a/library/cpp/digest/argonish/internal/argon2/ya.make +++ b/library/cpp/digest/argonish/internal/argon2/ya.make @@ -1,10 +1,10 @@ -LIBRARY() - -OWNER(e-sidorov) - -PEERDIR( +LIBRARY() + +OWNER(e-sidorov) + +PEERDIR( library/cpp/digest/argonish/internal/blamka library/cpp/digest/argonish/internal/blake2b -) - -END() +) + +END() diff --git a/library/cpp/digest/argonish/internal/blake2b/blake2b.h b/library/cpp/digest/argonish/internal/blake2b/blake2b.h index 4dc696c972..3dcfc3fc48 100644 --- a/library/cpp/digest/argonish/internal/blake2b/blake2b.h +++ b/library/cpp/digest/argonish/internal/blake2b/blake2b.h @@ -1,187 +1,187 @@ -#pragma once - -#include <util/generic/yexception.h> -#include <util/system/compiler.h> +#pragma once + +#include <util/generic/yexception.h> +#include <util/system/compiler.h> #include <library/cpp/digest/argonish/blake2b.h> - -namespace NArgonish { - const ui32 BLAKE2B_BLOCKBYTES = 128; - const ui32 BLAKE2B_BLOCKQWORDS = BLAKE2B_BLOCKBYTES / 8; - const ui32 BLAKE2B_OUTBYTES = 64; - const ui32 BLAKE2B_KEYBYTES = 64; - const ui32 BLAKE2B_SALTBYTES = 16; - const ui32 BLAKE2B_PERSONALBYTES = 16; - - template <NArgonish::EInstructionSet instructionSet> - class TBlake2B final: public IBlake2Base { - public: - virtual ~TBlake2B<instructionSet>() { - SecureZeroMemory_((void*)&State_, sizeof(State_)); - SecureZeroMemory_((void*)&Param_, sizeof(Param_)); - } - - EInstructionSet GetInstructionSet() { - return instructionSet; - } - - protected: - struct TBlake2BState { - ui64 H[8]; - ui64 T[2]; - ui64 F[2]; - ui64 Buf[BLAKE2B_BLOCKQWORDS]; - size_t BufLen; - size_t OutLen; - ui8 LastNode; - }; - + +namespace NArgonish { + const ui32 BLAKE2B_BLOCKBYTES = 128; + const ui32 BLAKE2B_BLOCKQWORDS = BLAKE2B_BLOCKBYTES / 8; + const ui32 BLAKE2B_OUTBYTES = 64; + const ui32 BLAKE2B_KEYBYTES = 64; + const ui32 BLAKE2B_SALTBYTES = 16; + const ui32 BLAKE2B_PERSONALBYTES = 16; + + template <NArgonish::EInstructionSet instructionSet> + class TBlake2B final: public IBlake2Base { + public: + virtual ~TBlake2B<instructionSet>() { + SecureZeroMemory_((void*)&State_, sizeof(State_)); + SecureZeroMemory_((void*)&Param_, sizeof(Param_)); + } + + EInstructionSet GetInstructionSet() { + return instructionSet; + } + + protected: + struct TBlake2BState { + ui64 H[8]; + ui64 T[2]; + ui64 F[2]; + ui64 Buf[BLAKE2B_BLOCKQWORDS]; + size_t BufLen; + size_t OutLen; + ui8 LastNode; + }; + struct TBlake2BParam { - ui8 DigestLen; /* 1 */ - ui8 KeyLen; /* 2 */ - ui8 Fanout; /* 3 */ - ui8 Depth; /* 4 */ - ui32 LeafLength; /* 8 */ - ui32 NodeOffset; /* 12 */ - ui32 XofLength; /* 16 */ - ui8 NodeDepth; /* 17 */ - ui8 InnerLength; /* 18 */ - ui8 Reserved[14]; /* 32 */ - ui8 Salt[BLAKE2B_SALTBYTES]; /* 48 */ - ui8 Personal[BLAKE2B_PERSONALBYTES]; /* 64 */ + ui8 DigestLen; /* 1 */ + ui8 KeyLen; /* 2 */ + ui8 Fanout; /* 3 */ + ui8 Depth; /* 4 */ + ui32 LeafLength; /* 8 */ + ui32 NodeOffset; /* 12 */ + ui32 XofLength; /* 16 */ + ui8 NodeDepth; /* 17 */ + ui8 InnerLength; /* 18 */ + ui8 Reserved[14]; /* 32 */ + ui8 Salt[BLAKE2B_SALTBYTES]; /* 48 */ + ui8 Personal[BLAKE2B_PERSONALBYTES]; /* 64 */ } Y_PACKED; - - TBlake2BState State_; - TBlake2BParam Param_; - - protected: - void Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]); - void InitialXor_(ui8* h, const ui8* p); - void* GetIV_() const; - - static void SecureZeroMemory_(void* src, size_t len) { - static void* (*const volatile memsetv)(void*, int, size_t) = &memset; - memsetv(src, 0, len); - } - - void InitParam_() { - memset(&State_, 0, sizeof(State_)); - InitialXor_((ui8*)(State_.H), (const ui8*)(&Param_)); - State_.OutLen = Param_.DigestLen; - } - - void IncrementCounter_(const ui64 inc) { - State_.T[0] += inc; - State_.T[1] += (State_.T[0] < inc) ? 1 : 0; - } - - bool IsLastBlock_() { - return State_.F[0] != 0; - } - - void SetLastNode_() { - State_.F[1] = (ui64)-1; - } - - void SetLastBlock_() { - if (State_.LastNode) - SetLastNode_(); - - State_.F[0] = (ui64)-1; - } - - public: - TBlake2B(size_t outlen) { - /* - * Note that outlen check was moved to proxy class - */ - - Param_.DigestLen = (ui8)outlen; - Param_.KeyLen = 0; - Param_.Fanout = 1; - Param_.Depth = 1; - Param_.LeafLength = 0; - Param_.NodeOffset = 0; - Param_.XofLength = 0; - Param_.NodeDepth = 0; - Param_.InnerLength = 0; - - memset(Param_.Reserved, 0, sizeof(Param_.Reserved)); - memset(Param_.Salt, 0, sizeof(Param_.Salt)); - memset(Param_.Personal, 0, sizeof(Param_.Personal)); - - InitParam_(); - } - - TBlake2B(size_t outlen, const void* key, size_t keylen) { - /** - * Note that key and outlen checks were moved to proxy classes - */ - Param_.DigestLen = (ui8)outlen; - Param_.KeyLen = (ui8)keylen; - Param_.Fanout = 1; - Param_.Depth = 1; - - Param_.LeafLength = 0; - Param_.NodeOffset = 0; - Param_.XofLength = 0; - Param_.NodeDepth = 0; - Param_.InnerLength = 0; - - memset(Param_.Reserved, 0, sizeof(Param_.Reserved)); - memset(Param_.Salt, 0, sizeof(Param_.Salt)); - memset(Param_.Personal, 0, sizeof(Param_.Personal)); - - InitParam_(); - ui8 block[BLAKE2B_BLOCKBYTES] = {0}; - memcpy(block, key, keylen); - Update(block, BLAKE2B_BLOCKBYTES); - SecureZeroMemory_(block, BLAKE2B_BLOCKBYTES); - } - - void Update(ui32 in) override { - Update((const void*)&in, sizeof(in)); - } - - void Update(const void* pin, size_t inlen) override { - const ui8* in = (ui8*)pin; - if (inlen > 0) { - size_t left = State_.BufLen; - size_t fill = BLAKE2B_BLOCKBYTES - left; - if (inlen > fill) { - State_.BufLen = 0; - memcpy((ui8*)State_.Buf + left, in, fill); /* Fill buffer */ - IncrementCounter_(BLAKE2B_BLOCKBYTES); - Compress_(State_.Buf); /* Compress */ - in += fill; - inlen -= fill; - while (inlen > BLAKE2B_BLOCKBYTES) { - /* to fix ubsan's unaligned report */ - ui64 tmpbuf[BLAKE2B_BLOCKQWORDS]; - memcpy(tmpbuf, in, BLAKE2B_BLOCKBYTES); - - IncrementCounter_(BLAKE2B_BLOCKBYTES); - Compress_(tmpbuf); - in += BLAKE2B_BLOCKBYTES; - inlen -= BLAKE2B_BLOCKBYTES; - } - } - memcpy((ui8*)State_.Buf + State_.BufLen, in, inlen); - State_.BufLen += inlen; - } - } - - void Final(void* out, size_t outlen) override { - if (out == nullptr || outlen < State_.OutLen) - ythrow yexception() << "out is null or outlen is too long"; - - if (IsLastBlock_()) - ythrow yexception() << "Final can't be called several times"; - - IncrementCounter_(State_.BufLen); - SetLastBlock_(); - memset((ui8*)State_.Buf + State_.BufLen, 0, BLAKE2B_BLOCKBYTES - State_.BufLen); - Compress_(State_.Buf); - memcpy(out, (void*)&State_.H[0], outlen); - } - }; -} + + TBlake2BState State_; + TBlake2BParam Param_; + + protected: + void Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]); + void InitialXor_(ui8* h, const ui8* p); + void* GetIV_() const; + + static void SecureZeroMemory_(void* src, size_t len) { + static void* (*const volatile memsetv)(void*, int, size_t) = &memset; + memsetv(src, 0, len); + } + + void InitParam_() { + memset(&State_, 0, sizeof(State_)); + InitialXor_((ui8*)(State_.H), (const ui8*)(&Param_)); + State_.OutLen = Param_.DigestLen; + } + + void IncrementCounter_(const ui64 inc) { + State_.T[0] += inc; + State_.T[1] += (State_.T[0] < inc) ? 1 : 0; + } + + bool IsLastBlock_() { + return State_.F[0] != 0; + } + + void SetLastNode_() { + State_.F[1] = (ui64)-1; + } + + void SetLastBlock_() { + if (State_.LastNode) + SetLastNode_(); + + State_.F[0] = (ui64)-1; + } + + public: + TBlake2B(size_t outlen) { + /* + * Note that outlen check was moved to proxy class + */ + + Param_.DigestLen = (ui8)outlen; + Param_.KeyLen = 0; + Param_.Fanout = 1; + Param_.Depth = 1; + Param_.LeafLength = 0; + Param_.NodeOffset = 0; + Param_.XofLength = 0; + Param_.NodeDepth = 0; + Param_.InnerLength = 0; + + memset(Param_.Reserved, 0, sizeof(Param_.Reserved)); + memset(Param_.Salt, 0, sizeof(Param_.Salt)); + memset(Param_.Personal, 0, sizeof(Param_.Personal)); + + InitParam_(); + } + + TBlake2B(size_t outlen, const void* key, size_t keylen) { + /** + * Note that key and outlen checks were moved to proxy classes + */ + Param_.DigestLen = (ui8)outlen; + Param_.KeyLen = (ui8)keylen; + Param_.Fanout = 1; + Param_.Depth = 1; + + Param_.LeafLength = 0; + Param_.NodeOffset = 0; + Param_.XofLength = 0; + Param_.NodeDepth = 0; + Param_.InnerLength = 0; + + memset(Param_.Reserved, 0, sizeof(Param_.Reserved)); + memset(Param_.Salt, 0, sizeof(Param_.Salt)); + memset(Param_.Personal, 0, sizeof(Param_.Personal)); + + InitParam_(); + ui8 block[BLAKE2B_BLOCKBYTES] = {0}; + memcpy(block, key, keylen); + Update(block, BLAKE2B_BLOCKBYTES); + SecureZeroMemory_(block, BLAKE2B_BLOCKBYTES); + } + + void Update(ui32 in) override { + Update((const void*)&in, sizeof(in)); + } + + void Update(const void* pin, size_t inlen) override { + const ui8* in = (ui8*)pin; + if (inlen > 0) { + size_t left = State_.BufLen; + size_t fill = BLAKE2B_BLOCKBYTES - left; + if (inlen > fill) { + State_.BufLen = 0; + memcpy((ui8*)State_.Buf + left, in, fill); /* Fill buffer */ + IncrementCounter_(BLAKE2B_BLOCKBYTES); + Compress_(State_.Buf); /* Compress */ + in += fill; + inlen -= fill; + while (inlen > BLAKE2B_BLOCKBYTES) { + /* to fix ubsan's unaligned report */ + ui64 tmpbuf[BLAKE2B_BLOCKQWORDS]; + memcpy(tmpbuf, in, BLAKE2B_BLOCKBYTES); + + IncrementCounter_(BLAKE2B_BLOCKBYTES); + Compress_(tmpbuf); + in += BLAKE2B_BLOCKBYTES; + inlen -= BLAKE2B_BLOCKBYTES; + } + } + memcpy((ui8*)State_.Buf + State_.BufLen, in, inlen); + State_.BufLen += inlen; + } + } + + void Final(void* out, size_t outlen) override { + if (out == nullptr || outlen < State_.OutLen) + ythrow yexception() << "out is null or outlen is too long"; + + if (IsLastBlock_()) + ythrow yexception() << "Final can't be called several times"; + + IncrementCounter_(State_.BufLen); + SetLastBlock_(); + memset((ui8*)State_.Buf + State_.BufLen, 0, BLAKE2B_BLOCKBYTES - State_.BufLen); + Compress_(State_.Buf); + memcpy(out, (void*)&State_.H[0], outlen); + } + }; +} diff --git a/library/cpp/digest/argonish/internal/blake2b/blake2b_avx2.h b/library/cpp/digest/argonish/internal/blake2b/blake2b_avx2.h index 76eec8cd5a..359ca90ebb 100644 --- a/library/cpp/digest/argonish/internal/blake2b/blake2b_avx2.h +++ b/library/cpp/digest/argonish/internal/blake2b/blake2b_avx2.h @@ -1,104 +1,104 @@ -#pragma once - -#include <immintrin.h> -#include "blake2b.h" +#pragma once + +#include <immintrin.h> +#include "blake2b.h" #include <library/cpp/digest/argonish/internal/rotations/rotations_avx2.h> - -namespace NArgonish { - template <> - void* TBlake2B<EInstructionSet::AVX2>::GetIV_() const { - static const __m256i Iv[2] = { - _mm256_set_epi64x(0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL, 0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL), - _mm256_set_epi64x(0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL, 0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL)}; - return (void*)Iv; - } - - template <> - void TBlake2B<EInstructionSet::AVX2>::InitialXor_(ui8* h, const ui8* p) { - __m256i* iv = (__m256i*)GetIV_(); - __m256i* m_res = (__m256i*)h; - const __m256i* m_second = (__m256i*)p; - _mm256_storeu_si256(m_res, _mm256_xor_si256(iv[0], _mm256_loadu_si256(m_second))); - _mm256_storeu_si256(m_res + 1, _mm256_xor_si256(iv[1], _mm256_loadu_si256(m_second + 1))); - } - - /* - * a = v0, v1, v2, v3 - * b = v4, v5, v6, v7 - * c = v8, v9, v10, v11 - * d = v12, v13, v14, v15 - */ - static inline void G1AVX2(ui32 r, __m256i& a, __m256i& b, __m256i& c, __m256i& d, const ui64* blk, const __m128i vindex[12][4]) { - a = _mm256_add_epi64(a, _mm256_add_epi64(b, _mm256_i32gather_epi64((const long long int*)blk, vindex[r][0], 8))); - d = Rotr32(_mm256_xor_si256(a, d)); - c = _mm256_add_epi64(c, d); - b = Rotr24(_mm256_xor_si256(b, c)); - - a = _mm256_add_epi64(a, _mm256_add_epi64(b, _mm256_i32gather_epi64((const long long int*)blk, vindex[r][1], 8))); - d = Rotr16(_mm256_xor_si256(a, d)); - c = _mm256_add_epi64(c, d); - b = Rotr63(_mm256_xor_si256(b, c)); - } - - static inline void G2AVX2(ui32 r, __m256i& a, __m256i& b, __m256i& c, __m256i& d, const ui64* blk, const __m128i vindex[12][4]) { - a = _mm256_add_epi64(a, _mm256_add_epi64(b, _mm256_i32gather_epi64((const long long int*)blk, vindex[r][2], 8))); - d = Rotr32(_mm256_xor_si256(a, d)); - c = _mm256_add_epi64(c, d); - b = Rotr24(_mm256_xor_si256(b, c)); - - a = _mm256_add_epi64(a, _mm256_add_epi64(b, _mm256_i32gather_epi64((const long long int*)blk, vindex[r][3], 8))); - d = Rotr16(_mm256_xor_si256(a, d)); - c = _mm256_add_epi64(c, d); - b = Rotr63(_mm256_xor_si256(b, c)); - } - - static inline void Diagonalize(__m256i& b, __m256i& c, __m256i& d) { - b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(0, 3, 2, 1)); - c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2)); - d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(2, 1, 0, 3)); - } - - static inline void Undiagonalize(__m256i& b, __m256i& c, __m256i& d) { - b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(2, 1, 0, 3)); - c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2)); - d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(0, 3, 2, 1)); - } - - template <> - void TBlake2B<EInstructionSet::AVX2>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) { - static const __m128i VIndex[12][4] = { - {_mm_set_epi32(6, 4, 2, 0), _mm_set_epi32(7, 5, 3, 1), _mm_set_epi32(14, 12, 10, 8), _mm_set_epi32(15, 13, 11, 9)}, - {_mm_set_epi32(13, 9, 4, 14), _mm_set_epi32(6, 15, 8, 10), _mm_set_epi32(5, 11, 0, 1), _mm_set_epi32(3, 7, 2, 12)}, - {_mm_set_epi32(15, 5, 12, 11), _mm_set_epi32(13, 2, 0, 8), _mm_set_epi32(9, 7, 3, 10), _mm_set_epi32(4, 1, 6, 14)}, - {_mm_set_epi32(11, 13, 3, 7), _mm_set_epi32(14, 12, 1, 9), _mm_set_epi32(15, 4, 5, 2), _mm_set_epi32(8, 0, 10, 6)}, - {_mm_set_epi32(10, 2, 5, 9), _mm_set_epi32(15, 4, 7, 0), _mm_set_epi32(3, 6, 11, 14), _mm_set_epi32(13, 8, 12, 1)}, - {_mm_set_epi32(8, 0, 6, 2), _mm_set_epi32(3, 11, 10, 12), _mm_set_epi32(1, 15, 7, 4), _mm_set_epi32(9, 14, 5, 13)}, - {_mm_set_epi32(4, 14, 1, 12), _mm_set_epi32(10, 13, 15, 5), _mm_set_epi32(8, 9, 6, 0), _mm_set_epi32(11, 2, 3, 7)}, - {_mm_set_epi32(3, 12, 7, 13), _mm_set_epi32(9, 1, 14, 11), _mm_set_epi32(2, 8, 15, 5), _mm_set_epi32(10, 6, 4, 0)}, - {_mm_set_epi32(0, 11, 14, 6), _mm_set_epi32(8, 3, 9, 15), _mm_set_epi32(10, 1, 13, 12), _mm_set_epi32(5, 4, 7, 2)}, - {_mm_set_epi32(1, 7, 8, 10), _mm_set_epi32(5, 6, 4, 2), _mm_set_epi32(13, 3, 9, 15), _mm_set_epi32(0, 12, 14, 11)}, - {_mm_set_epi32(6, 4, 2, 0), _mm_set_epi32(7, 5, 3, 1), _mm_set_epi32(14, 12, 10, 8), _mm_set_epi32(15, 13, 11, 9)}, - {_mm_set_epi32(13, 9, 4, 14), _mm_set_epi32(6, 15, 8, 10), _mm_set_epi32(5, 11, 0, 1), _mm_set_epi32(3, 7, 2, 12)}, - }; - - __m256i* iv = (__m256i*)GetIV_(); - __m256i a = _mm256_loadu_si256((__m256i*)&State_.H[0]); - __m256i b = _mm256_loadu_si256((__m256i*)&State_.H[4]); - __m256i c = iv[0]; - __m256i d = _mm256_xor_si256(iv[1], _mm256_loadu_si256((__m256i*)&State_.T[0])); - - for (ui32 r = 0; r < 12; ++r) { - G1AVX2(r, a, b, c, d, block, VIndex); - Diagonalize(b, c, d); - G2AVX2(r, a, b, c, d, block, VIndex); - Undiagonalize(b, c, d); - } - - _mm256_storeu_si256((__m256i*)State_.H, _mm256_xor_si256( - _mm256_loadu_si256((__m256i*)State_.H), - _mm256_xor_si256(a, c))); - _mm256_storeu_si256(((__m256i*)State_.H) + 1, _mm256_xor_si256( - _mm256_loadu_si256(((__m256i*)State_.H) + 1), - _mm256_xor_si256(b, d))); - } -} + +namespace NArgonish { + template <> + void* TBlake2B<EInstructionSet::AVX2>::GetIV_() const { + static const __m256i Iv[2] = { + _mm256_set_epi64x(0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL, 0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL), + _mm256_set_epi64x(0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL, 0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL)}; + return (void*)Iv; + } + + template <> + void TBlake2B<EInstructionSet::AVX2>::InitialXor_(ui8* h, const ui8* p) { + __m256i* iv = (__m256i*)GetIV_(); + __m256i* m_res = (__m256i*)h; + const __m256i* m_second = (__m256i*)p; + _mm256_storeu_si256(m_res, _mm256_xor_si256(iv[0], _mm256_loadu_si256(m_second))); + _mm256_storeu_si256(m_res + 1, _mm256_xor_si256(iv[1], _mm256_loadu_si256(m_second + 1))); + } + + /* + * a = v0, v1, v2, v3 + * b = v4, v5, v6, v7 + * c = v8, v9, v10, v11 + * d = v12, v13, v14, v15 + */ + static inline void G1AVX2(ui32 r, __m256i& a, __m256i& b, __m256i& c, __m256i& d, const ui64* blk, const __m128i vindex[12][4]) { + a = _mm256_add_epi64(a, _mm256_add_epi64(b, _mm256_i32gather_epi64((const long long int*)blk, vindex[r][0], 8))); + d = Rotr32(_mm256_xor_si256(a, d)); + c = _mm256_add_epi64(c, d); + b = Rotr24(_mm256_xor_si256(b, c)); + + a = _mm256_add_epi64(a, _mm256_add_epi64(b, _mm256_i32gather_epi64((const long long int*)blk, vindex[r][1], 8))); + d = Rotr16(_mm256_xor_si256(a, d)); + c = _mm256_add_epi64(c, d); + b = Rotr63(_mm256_xor_si256(b, c)); + } + + static inline void G2AVX2(ui32 r, __m256i& a, __m256i& b, __m256i& c, __m256i& d, const ui64* blk, const __m128i vindex[12][4]) { + a = _mm256_add_epi64(a, _mm256_add_epi64(b, _mm256_i32gather_epi64((const long long int*)blk, vindex[r][2], 8))); + d = Rotr32(_mm256_xor_si256(a, d)); + c = _mm256_add_epi64(c, d); + b = Rotr24(_mm256_xor_si256(b, c)); + + a = _mm256_add_epi64(a, _mm256_add_epi64(b, _mm256_i32gather_epi64((const long long int*)blk, vindex[r][3], 8))); + d = Rotr16(_mm256_xor_si256(a, d)); + c = _mm256_add_epi64(c, d); + b = Rotr63(_mm256_xor_si256(b, c)); + } + + static inline void Diagonalize(__m256i& b, __m256i& c, __m256i& d) { + b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(0, 3, 2, 1)); + c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2)); + d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(2, 1, 0, 3)); + } + + static inline void Undiagonalize(__m256i& b, __m256i& c, __m256i& d) { + b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(2, 1, 0, 3)); + c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2)); + d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(0, 3, 2, 1)); + } + + template <> + void TBlake2B<EInstructionSet::AVX2>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) { + static const __m128i VIndex[12][4] = { + {_mm_set_epi32(6, 4, 2, 0), _mm_set_epi32(7, 5, 3, 1), _mm_set_epi32(14, 12, 10, 8), _mm_set_epi32(15, 13, 11, 9)}, + {_mm_set_epi32(13, 9, 4, 14), _mm_set_epi32(6, 15, 8, 10), _mm_set_epi32(5, 11, 0, 1), _mm_set_epi32(3, 7, 2, 12)}, + {_mm_set_epi32(15, 5, 12, 11), _mm_set_epi32(13, 2, 0, 8), _mm_set_epi32(9, 7, 3, 10), _mm_set_epi32(4, 1, 6, 14)}, + {_mm_set_epi32(11, 13, 3, 7), _mm_set_epi32(14, 12, 1, 9), _mm_set_epi32(15, 4, 5, 2), _mm_set_epi32(8, 0, 10, 6)}, + {_mm_set_epi32(10, 2, 5, 9), _mm_set_epi32(15, 4, 7, 0), _mm_set_epi32(3, 6, 11, 14), _mm_set_epi32(13, 8, 12, 1)}, + {_mm_set_epi32(8, 0, 6, 2), _mm_set_epi32(3, 11, 10, 12), _mm_set_epi32(1, 15, 7, 4), _mm_set_epi32(9, 14, 5, 13)}, + {_mm_set_epi32(4, 14, 1, 12), _mm_set_epi32(10, 13, 15, 5), _mm_set_epi32(8, 9, 6, 0), _mm_set_epi32(11, 2, 3, 7)}, + {_mm_set_epi32(3, 12, 7, 13), _mm_set_epi32(9, 1, 14, 11), _mm_set_epi32(2, 8, 15, 5), _mm_set_epi32(10, 6, 4, 0)}, + {_mm_set_epi32(0, 11, 14, 6), _mm_set_epi32(8, 3, 9, 15), _mm_set_epi32(10, 1, 13, 12), _mm_set_epi32(5, 4, 7, 2)}, + {_mm_set_epi32(1, 7, 8, 10), _mm_set_epi32(5, 6, 4, 2), _mm_set_epi32(13, 3, 9, 15), _mm_set_epi32(0, 12, 14, 11)}, + {_mm_set_epi32(6, 4, 2, 0), _mm_set_epi32(7, 5, 3, 1), _mm_set_epi32(14, 12, 10, 8), _mm_set_epi32(15, 13, 11, 9)}, + {_mm_set_epi32(13, 9, 4, 14), _mm_set_epi32(6, 15, 8, 10), _mm_set_epi32(5, 11, 0, 1), _mm_set_epi32(3, 7, 2, 12)}, + }; + + __m256i* iv = (__m256i*)GetIV_(); + __m256i a = _mm256_loadu_si256((__m256i*)&State_.H[0]); + __m256i b = _mm256_loadu_si256((__m256i*)&State_.H[4]); + __m256i c = iv[0]; + __m256i d = _mm256_xor_si256(iv[1], _mm256_loadu_si256((__m256i*)&State_.T[0])); + + for (ui32 r = 0; r < 12; ++r) { + G1AVX2(r, a, b, c, d, block, VIndex); + Diagonalize(b, c, d); + G2AVX2(r, a, b, c, d, block, VIndex); + Undiagonalize(b, c, d); + } + + _mm256_storeu_si256((__m256i*)State_.H, _mm256_xor_si256( + _mm256_loadu_si256((__m256i*)State_.H), + _mm256_xor_si256(a, c))); + _mm256_storeu_si256(((__m256i*)State_.H) + 1, _mm256_xor_si256( + _mm256_loadu_si256(((__m256i*)State_.H) + 1), + _mm256_xor_si256(b, d))); + } +} diff --git a/library/cpp/digest/argonish/internal/blake2b/blake2b_ref.h b/library/cpp/digest/argonish/internal/blake2b/blake2b_ref.h index 1a2306f4a0..ef98ed8fc8 100644 --- a/library/cpp/digest/argonish/internal/blake2b/blake2b_ref.h +++ b/library/cpp/digest/argonish/internal/blake2b/blake2b_ref.h @@ -1,83 +1,83 @@ -#pragma once - -#include "blake2b.h" +#pragma once + +#include "blake2b.h" #include <library/cpp/digest/argonish/internal/rotations/rotations_ref.h> - -namespace NArgonish { - static const ui8 Sigma[12][16] = { - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, - {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, - {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, - {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, - {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, - {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, - {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, - {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, - {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}}; - - static const ui64 Iv[8] = { - 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, - 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, - 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, - 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL}; - - static inline void GRef(ui64 r, ui64 i, ui64& a, ui64& b, ui64& c, ui64& d, const ui64* m) { - a = a + b + m[Sigma[r][2 * i + 0]]; - d = Rotr(d ^ a, 32); - c = c + d; - b = Rotr(b ^ c, 24); - a = a + b + m[Sigma[r][2 * i + 1]]; - d = Rotr(d ^ a, 16); - c = c + d; - b = Rotr(b ^ c, 63); - } - - static inline void Round(ui64 r, ui64* v, const ui64* m) { - GRef(r, 0, v[0], v[4], v[8], v[12], m); - GRef(r, 1, v[1], v[5], v[9], v[13], m); - GRef(r, 2, v[2], v[6], v[10], v[14], m); - GRef(r, 3, v[3], v[7], v[11], v[15], m); - GRef(r, 4, v[0], v[5], v[10], v[15], m); - GRef(r, 5, v[1], v[6], v[11], v[12], m); - GRef(r, 6, v[2], v[7], v[8], v[13], m); - GRef(r, 7, v[3], v[4], v[9], v[14], m); - } - - template <> - void* TBlake2B<EInstructionSet::REF>::GetIV_() const { - return nullptr; - } - - template <> - void TBlake2B<EInstructionSet::REF>::InitialXor_(ui8* h, const ui8* p) { - for (size_t i = 0; i < 8; ++i) - ((ui64*)h)[i] = Iv[i] ^ ((ui64*)p)[i]; - } - - template <> - void TBlake2B<EInstructionSet::REF>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) { - ui64 v[16]; - for (size_t i = 0; i < 8; ++i) { - v[i] = State_.H[i]; - } - - v[8] = Iv[0]; - v[9] = Iv[1]; - v[10] = Iv[2]; - v[11] = Iv[3]; - v[12] = Iv[4] ^ State_.T[0]; - v[13] = Iv[5] ^ State_.T[1]; - v[14] = Iv[6] ^ State_.F[0]; - v[15] = Iv[7] ^ State_.F[1]; - - for (ui64 r = 0; r < 12; ++r) - Round(r, v, block); - - for (size_t i = 0; i < 8; ++i) { - State_.H[i] = State_.H[i] ^ v[i] ^ v[i + 8]; - } - } -} + +namespace NArgonish { + static const ui8 Sigma[12][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}}; + + static const ui64 Iv[8] = { + 0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, + 0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, + 0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, + 0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL}; + + static inline void GRef(ui64 r, ui64 i, ui64& a, ui64& b, ui64& c, ui64& d, const ui64* m) { + a = a + b + m[Sigma[r][2 * i + 0]]; + d = Rotr(d ^ a, 32); + c = c + d; + b = Rotr(b ^ c, 24); + a = a + b + m[Sigma[r][2 * i + 1]]; + d = Rotr(d ^ a, 16); + c = c + d; + b = Rotr(b ^ c, 63); + } + + static inline void Round(ui64 r, ui64* v, const ui64* m) { + GRef(r, 0, v[0], v[4], v[8], v[12], m); + GRef(r, 1, v[1], v[5], v[9], v[13], m); + GRef(r, 2, v[2], v[6], v[10], v[14], m); + GRef(r, 3, v[3], v[7], v[11], v[15], m); + GRef(r, 4, v[0], v[5], v[10], v[15], m); + GRef(r, 5, v[1], v[6], v[11], v[12], m); + GRef(r, 6, v[2], v[7], v[8], v[13], m); + GRef(r, 7, v[3], v[4], v[9], v[14], m); + } + + template <> + void* TBlake2B<EInstructionSet::REF>::GetIV_() const { + return nullptr; + } + + template <> + void TBlake2B<EInstructionSet::REF>::InitialXor_(ui8* h, const ui8* p) { + for (size_t i = 0; i < 8; ++i) + ((ui64*)h)[i] = Iv[i] ^ ((ui64*)p)[i]; + } + + template <> + void TBlake2B<EInstructionSet::REF>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) { + ui64 v[16]; + for (size_t i = 0; i < 8; ++i) { + v[i] = State_.H[i]; + } + + v[8] = Iv[0]; + v[9] = Iv[1]; + v[10] = Iv[2]; + v[11] = Iv[3]; + v[12] = Iv[4] ^ State_.T[0]; + v[13] = Iv[5] ^ State_.T[1]; + v[14] = Iv[6] ^ State_.F[0]; + v[15] = Iv[7] ^ State_.F[1]; + + for (ui64 r = 0; r < 12; ++r) + Round(r, v, block); + + for (size_t i = 0; i < 8; ++i) { + State_.H[i] = State_.H[i] ^ v[i] ^ v[i + 8]; + } + } +} diff --git a/library/cpp/digest/argonish/internal/blake2b/blake2b_sse2.h b/library/cpp/digest/argonish/internal/blake2b/blake2b_sse2.h index 0b4f8f85cc..e85a78044c 100644 --- a/library/cpp/digest/argonish/internal/blake2b/blake2b_sse2.h +++ b/library/cpp/digest/argonish/internal/blake2b/blake2b_sse2.h @@ -1,163 +1,163 @@ -#pragma once - -#include <emmintrin.h> -#include "blake2b.h" +#pragma once + +#include <emmintrin.h> +#include "blake2b.h" #include <library/cpp/digest/argonish/internal/rotations/rotations_sse2.h> - -namespace NArgonish { - template <> - void* TBlake2B<EInstructionSet::SSE2>::GetIV_() const { - static const __m128i Iv[4] = { - _mm_set_epi64x(0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL), - _mm_set_epi64x(0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL), - _mm_set_epi64x(0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL), - _mm_set_epi64x(0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL)}; - - return (void*)Iv; - } - - static const ui32 Sigma[12][16] = { - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, - {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, - {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, - {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, - {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, - {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, - {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, - {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, - {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}}; - - static inline void G1( - __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, - __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h, - __m128i& b0, __m128i& b1) { - row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); - row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); - - row4l = _mm_xor_si128(row4l, row1l); - row4h = _mm_xor_si128(row4h, row1h); - - row4l = Rotr32(row4l); - row4h = Rotr32(row4h); - - row3l = _mm_add_epi64(row3l, row4l); - row3h = _mm_add_epi64(row3h, row4h); - - row2l = _mm_xor_si128(row2l, row3l); - row2h = _mm_xor_si128(row2h, row3h); - - row2l = Rotr24(row2l); - row2h = Rotr24(row2h); - } - - static inline void G2( - __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, - __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h, - __m128i& b0, __m128i& b1) { - row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); - row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); - - row4l = _mm_xor_si128(row4l, row1l); - row4h = _mm_xor_si128(row4h, row1h); - - row4l = Rotr16(row4l); - row4h = Rotr16(row4h); - - row3l = _mm_add_epi64(row3l, row4l); - row3h = _mm_add_epi64(row3h, row4h); - - row2l = _mm_xor_si128(row2l, row3l); - row2h = _mm_xor_si128(row2h, row3h); - - row2l = Rotr63(row2l); - row2h = Rotr63(row2h); - } - - static inline void Diagonalize( - __m128i& row2l, __m128i& row3l, __m128i& row4l, - __m128i& row2h, __m128i& row3h, __m128i& row4h) { - __m128i t0 = row4l; - __m128i t1 = row2l; - row4l = row3l; - row3l = row3h; - row3h = row4l; - row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); - row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); - row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); - row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1)); - } - - static inline void Undiagonalize( - __m128i& row2l, __m128i& row3l, __m128i& row4l, - __m128i& row2h, __m128i& row3h, __m128i& row4h) { - __m128i t0 = row3l; - row3l = row3h; - row3h = t0; - t0 = row2l; - __m128i t1 = row4l; - row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); - row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); - row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); - row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1)); - } - - static inline void Round(int r, const ui64* block_ptr, - __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, - __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h) { - __m128i b0, b1; - b0 = _mm_set_epi64x(block_ptr[Sigma[r][2]], block_ptr[Sigma[r][0]]); - b1 = _mm_set_epi64x(block_ptr[Sigma[r][6]], block_ptr[Sigma[r][4]]); - G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); - b0 = _mm_set_epi64x(block_ptr[Sigma[r][3]], block_ptr[Sigma[r][1]]); - b1 = _mm_set_epi64x(block_ptr[Sigma[r][7]], block_ptr[Sigma[r][5]]); - G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); - Diagonalize(row2l, row3l, row4l, row2h, row3h, row4h); - b0 = _mm_set_epi64x(block_ptr[Sigma[r][10]], block_ptr[Sigma[r][8]]); - b1 = _mm_set_epi64x(block_ptr[Sigma[r][14]], block_ptr[Sigma[r][12]]); - G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); - b0 = _mm_set_epi64x(block_ptr[Sigma[r][11]], block_ptr[Sigma[r][9]]); - b1 = _mm_set_epi64x(block_ptr[Sigma[r][15]], block_ptr[Sigma[r][13]]); - G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); - Undiagonalize(row2l, row3l, row4l, row2h, row3h, row4h); - } - - template <> - void TBlake2B<EInstructionSet::SSE2>::InitialXor_(ui8* h, const ui8* p) { - __m128i* m_res = (__m128i*)h; - const __m128i* m_p = (__m128i*)p; - __m128i* iv = (__m128i*)GetIV_(); - - _mm_storeu_si128(m_res + 0, _mm_xor_si128(iv[0], _mm_loadu_si128(m_p + 0))); - _mm_storeu_si128(m_res + 1, _mm_xor_si128(iv[1], _mm_loadu_si128(m_p + 1))); - _mm_storeu_si128(m_res + 2, _mm_xor_si128(iv[2], _mm_loadu_si128(m_p + 2))); - _mm_storeu_si128(m_res + 3, _mm_xor_si128(iv[3], _mm_loadu_si128(m_p + 3))); - } - - template <> - void TBlake2B<EInstructionSet::SSE2>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) { - __m128i* iv = (__m128i*)GetIV_(); - __m128i row1l = _mm_loadu_si128((__m128i*)&State_.H[0]); - __m128i row1h = _mm_loadu_si128((__m128i*)&State_.H[2]); - __m128i row2l = _mm_loadu_si128((__m128i*)&State_.H[4]); - __m128i row2h = _mm_loadu_si128((__m128i*)&State_.H[6]); - __m128i row3l = iv[0]; - __m128i row3h = iv[1]; - __m128i row4l = _mm_xor_si128(iv[2], _mm_loadu_si128((__m128i*)&State_.T[0])); - __m128i row4h = _mm_xor_si128(iv[3], _mm_loadu_si128((__m128i*)&State_.F[0])); - - for (int r = 0; r < 12; r++) - Round(r, block, row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); - - _mm_storeu_si128((__m128i*)&State_.H[0], - _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[0]), _mm_xor_si128(row3l, row1l))); - _mm_storeu_si128((__m128i*)&State_.H[2], - _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[2]), _mm_xor_si128(row3h, row1h))); - _mm_storeu_si128((__m128i*)&State_.H[4], - _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[4]), _mm_xor_si128(row4l, row2l))); - _mm_storeu_si128((__m128i*)&State_.H[6], - _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[6]), _mm_xor_si128(row4h, row2h))); - } -} + +namespace NArgonish { + template <> + void* TBlake2B<EInstructionSet::SSE2>::GetIV_() const { + static const __m128i Iv[4] = { + _mm_set_epi64x(0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL), + _mm_set_epi64x(0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL), + _mm_set_epi64x(0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL), + _mm_set_epi64x(0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL)}; + + return (void*)Iv; + } + + static const ui32 Sigma[12][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}}; + + static inline void G1( + __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, + __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h, + __m128i& b0, __m128i& b1) { + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); + + row4l = _mm_xor_si128(row4l, row1l); + row4h = _mm_xor_si128(row4h, row1h); + + row4l = Rotr32(row4l); + row4h = Rotr32(row4h); + + row3l = _mm_add_epi64(row3l, row4l); + row3h = _mm_add_epi64(row3h, row4h); + + row2l = _mm_xor_si128(row2l, row3l); + row2h = _mm_xor_si128(row2h, row3h); + + row2l = Rotr24(row2l); + row2h = Rotr24(row2h); + } + + static inline void G2( + __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, + __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h, + __m128i& b0, __m128i& b1) { + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); + + row4l = _mm_xor_si128(row4l, row1l); + row4h = _mm_xor_si128(row4h, row1h); + + row4l = Rotr16(row4l); + row4h = Rotr16(row4h); + + row3l = _mm_add_epi64(row3l, row4l); + row3h = _mm_add_epi64(row3h, row4h); + + row2l = _mm_xor_si128(row2l, row3l); + row2h = _mm_xor_si128(row2h, row3h); + + row2l = Rotr63(row2l); + row2h = Rotr63(row2h); + } + + static inline void Diagonalize( + __m128i& row2l, __m128i& row3l, __m128i& row4l, + __m128i& row2h, __m128i& row3h, __m128i& row4h) { + __m128i t0 = row4l; + __m128i t1 = row2l; + row4l = row3l; + row3l = row3h; + row3h = row4l; + row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); + row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); + row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); + row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1)); + } + + static inline void Undiagonalize( + __m128i& row2l, __m128i& row3l, __m128i& row4l, + __m128i& row2h, __m128i& row3h, __m128i& row4h) { + __m128i t0 = row3l; + row3l = row3h; + row3h = t0; + t0 = row2l; + __m128i t1 = row4l; + row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); + row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); + row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); + row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1)); + } + + static inline void Round(int r, const ui64* block_ptr, + __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, + __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h) { + __m128i b0, b1; + b0 = _mm_set_epi64x(block_ptr[Sigma[r][2]], block_ptr[Sigma[r][0]]); + b1 = _mm_set_epi64x(block_ptr[Sigma[r][6]], block_ptr[Sigma[r][4]]); + G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); + b0 = _mm_set_epi64x(block_ptr[Sigma[r][3]], block_ptr[Sigma[r][1]]); + b1 = _mm_set_epi64x(block_ptr[Sigma[r][7]], block_ptr[Sigma[r][5]]); + G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); + Diagonalize(row2l, row3l, row4l, row2h, row3h, row4h); + b0 = _mm_set_epi64x(block_ptr[Sigma[r][10]], block_ptr[Sigma[r][8]]); + b1 = _mm_set_epi64x(block_ptr[Sigma[r][14]], block_ptr[Sigma[r][12]]); + G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); + b0 = _mm_set_epi64x(block_ptr[Sigma[r][11]], block_ptr[Sigma[r][9]]); + b1 = _mm_set_epi64x(block_ptr[Sigma[r][15]], block_ptr[Sigma[r][13]]); + G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); + Undiagonalize(row2l, row3l, row4l, row2h, row3h, row4h); + } + + template <> + void TBlake2B<EInstructionSet::SSE2>::InitialXor_(ui8* h, const ui8* p) { + __m128i* m_res = (__m128i*)h; + const __m128i* m_p = (__m128i*)p; + __m128i* iv = (__m128i*)GetIV_(); + + _mm_storeu_si128(m_res + 0, _mm_xor_si128(iv[0], _mm_loadu_si128(m_p + 0))); + _mm_storeu_si128(m_res + 1, _mm_xor_si128(iv[1], _mm_loadu_si128(m_p + 1))); + _mm_storeu_si128(m_res + 2, _mm_xor_si128(iv[2], _mm_loadu_si128(m_p + 2))); + _mm_storeu_si128(m_res + 3, _mm_xor_si128(iv[3], _mm_loadu_si128(m_p + 3))); + } + + template <> + void TBlake2B<EInstructionSet::SSE2>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) { + __m128i* iv = (__m128i*)GetIV_(); + __m128i row1l = _mm_loadu_si128((__m128i*)&State_.H[0]); + __m128i row1h = _mm_loadu_si128((__m128i*)&State_.H[2]); + __m128i row2l = _mm_loadu_si128((__m128i*)&State_.H[4]); + __m128i row2h = _mm_loadu_si128((__m128i*)&State_.H[6]); + __m128i row3l = iv[0]; + __m128i row3h = iv[1]; + __m128i row4l = _mm_xor_si128(iv[2], _mm_loadu_si128((__m128i*)&State_.T[0])); + __m128i row4h = _mm_xor_si128(iv[3], _mm_loadu_si128((__m128i*)&State_.F[0])); + + for (int r = 0; r < 12; r++) + Round(r, block, row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); + + _mm_storeu_si128((__m128i*)&State_.H[0], + _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[0]), _mm_xor_si128(row3l, row1l))); + _mm_storeu_si128((__m128i*)&State_.H[2], + _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[2]), _mm_xor_si128(row3h, row1h))); + _mm_storeu_si128((__m128i*)&State_.H[4], + _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[4]), _mm_xor_si128(row4l, row2l))); + _mm_storeu_si128((__m128i*)&State_.H[6], + _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[6]), _mm_xor_si128(row4h, row2h))); + } +} diff --git a/library/cpp/digest/argonish/internal/blake2b/blake2b_sse41.h b/library/cpp/digest/argonish/internal/blake2b/blake2b_sse41.h index c1103db4c9..1a033bcceb 100644 --- a/library/cpp/digest/argonish/internal/blake2b/blake2b_sse41.h +++ b/library/cpp/digest/argonish/internal/blake2b/blake2b_sse41.h @@ -1,172 +1,172 @@ -#pragma once - -#include <smmintrin.h> -#include "blake2b.h" -#include "load_sse41.h" +#pragma once + +#include <smmintrin.h> +#include "blake2b.h" +#include "load_sse41.h" #include <library/cpp/digest/argonish/internal/rotations/rotations_ssse3.h> - -namespace NArgonish { - template <> - void* TBlake2B<EInstructionSet::SSE41>::GetIV_() const { - static const __m128i Iv[4] = { - _mm_set_epi64x(0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL), - _mm_set_epi64x(0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL), - _mm_set_epi64x(0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL), - _mm_set_epi64x(0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL)}; - return (void*)Iv; - } - - static inline void G1( - __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, - __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h, - __m128i& b0, __m128i& b1) { - row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); - row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); - - row4l = _mm_xor_si128(row4l, row1l); - row4h = _mm_xor_si128(row4h, row1h); - - row4l = Rotr32(row4l); - row4h = Rotr32(row4h); - - row3l = _mm_add_epi64(row3l, row4l); - row3h = _mm_add_epi64(row3h, row4h); - - row2l = _mm_xor_si128(row2l, row3l); - row2h = _mm_xor_si128(row2h, row3h); - - row2l = Rotr24(row2l); - row2h = Rotr24(row2h); - } - - static inline void G2( - __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, - __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h, - __m128i& b0, __m128i& b1) { - row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); - row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); - - row4l = _mm_xor_si128(row4l, row1l); - row4h = _mm_xor_si128(row4h, row1h); - - row4l = Rotr16(row4l); - row4h = Rotr16(row4h); - - row3l = _mm_add_epi64(row3l, row4l); - row3h = _mm_add_epi64(row3h, row4h); - - row2l = _mm_xor_si128(row2l, row3l); - row2h = _mm_xor_si128(row2h, row3h); - - row2l = Rotr63(row2l); - row2h = Rotr63(row2h); - } - - static inline void Diagonalize( - __m128i& row2l, __m128i& row3l, __m128i& row4l, - __m128i& row2h, __m128i& row3h, __m128i& row4h) { - __m128i t0 = _mm_alignr_epi8(row2h, row2l, 8); - __m128i t1 = _mm_alignr_epi8(row2l, row2h, 8); - row2l = t0; - row2h = t1; - - t0 = row3l; - row3l = row3h; - row3h = t0; - - t0 = _mm_alignr_epi8(row4h, row4l, 8); - t1 = _mm_alignr_epi8(row4l, row4h, 8); - row4l = t1; - row4h = t0; - } - - static inline void Undiagonalize( - __m128i& row2l, __m128i& row3l, __m128i& row4l, - __m128i& row2h, __m128i& row3h, __m128i& row4h) { - __m128i t0 = _mm_alignr_epi8(row2l, row2h, 8); - __m128i t1 = _mm_alignr_epi8(row2h, row2l, 8); - row2l = t0; - row2h = t1; - - t0 = row3l; - row3l = row3h; - row3h = t0; - - t0 = _mm_alignr_epi8(row4l, row4h, 8); - t1 = _mm_alignr_epi8(row4h, row4l, 8); - row4l = t1; - row4h = t0; - } - -#define ROUND(r) \ - LOAD_MSG_##r##_1(b0, b1); \ - G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ - LOAD_MSG_##r##_2(b0, b1); \ - G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ - Diagonalize(row2l, row3l, row4l, row2h, row3h, row4h); \ - LOAD_MSG_##r##_3(b0, b1); \ - G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ - LOAD_MSG_##r##_4(b0, b1); \ - G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ - Undiagonalize(row2l, row3l, row4l, row2h, row3h, row4h); - - template <> - void TBlake2B<EInstructionSet::SSE41>::InitialXor_(ui8* h, const ui8* p) { - __m128i* m_res = (__m128i*)h; - const __m128i* m_p = (__m128i*)p; - __m128i* iv = (__m128i*)GetIV_(); - - _mm_storeu_si128(m_res + 0, _mm_xor_si128(iv[0], _mm_loadu_si128(m_p + 0))); - _mm_storeu_si128(m_res + 1, _mm_xor_si128(iv[1], _mm_loadu_si128(m_p + 1))); - _mm_storeu_si128(m_res + 2, _mm_xor_si128(iv[2], _mm_loadu_si128(m_p + 2))); - _mm_storeu_si128(m_res + 3, _mm_xor_si128(iv[3], _mm_loadu_si128(m_p + 3))); - } - - template <> - void TBlake2B<EInstructionSet::SSE41>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) { - const __m128i* block_ptr = (__m128i*)block; - __m128i* iv = (__m128i*)GetIV_(); - const __m128i m0 = _mm_loadu_si128(block_ptr + 0); - const __m128i m1 = _mm_loadu_si128(block_ptr + 1); - const __m128i m2 = _mm_loadu_si128(block_ptr + 2); - const __m128i m3 = _mm_loadu_si128(block_ptr + 3); - const __m128i m4 = _mm_loadu_si128(block_ptr + 4); - const __m128i m5 = _mm_loadu_si128(block_ptr + 5); - const __m128i m6 = _mm_loadu_si128(block_ptr + 6); - const __m128i m7 = _mm_loadu_si128(block_ptr + 7); - - __m128i row1l = _mm_loadu_si128((__m128i*)&State_.H[0]); - __m128i row1h = _mm_loadu_si128((__m128i*)&State_.H[2]); - __m128i row2l = _mm_loadu_si128((__m128i*)&State_.H[4]); - __m128i row2h = _mm_loadu_si128((__m128i*)&State_.H[6]); - __m128i row3l = iv[0]; - __m128i row3h = iv[1]; - __m128i row4l = _mm_xor_si128(iv[2], _mm_loadu_si128((__m128i*)&State_.T[0])); - __m128i row4h = _mm_xor_si128(iv[3], _mm_loadu_si128((__m128i*)&State_.F[0])); - __m128i b0, b1; - - ROUND(0); - ROUND(1); - ROUND(2); - ROUND(3); - ROUND(4); - ROUND(5); - ROUND(6); - ROUND(7); - ROUND(8); - ROUND(9); - ROUND(10); - ROUND(11); - - _mm_storeu_si128((__m128i*)&State_.H[0], - _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[0]), _mm_xor_si128(row3l, row1l))); - _mm_storeu_si128((__m128i*)&State_.H[2], - _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[2]), _mm_xor_si128(row3h, row1h))); - _mm_storeu_si128((__m128i*)&State_.H[4], - _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[4]), _mm_xor_si128(row4l, row2l))); - _mm_storeu_si128((__m128i*)&State_.H[6], - _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[6]), _mm_xor_si128(row4h, row2h))); - } - -#undef ROUND -} + +namespace NArgonish { + template <> + void* TBlake2B<EInstructionSet::SSE41>::GetIV_() const { + static const __m128i Iv[4] = { + _mm_set_epi64x(0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL), + _mm_set_epi64x(0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL), + _mm_set_epi64x(0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL), + _mm_set_epi64x(0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL)}; + return (void*)Iv; + } + + static inline void G1( + __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, + __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h, + __m128i& b0, __m128i& b1) { + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); + + row4l = _mm_xor_si128(row4l, row1l); + row4h = _mm_xor_si128(row4h, row1h); + + row4l = Rotr32(row4l); + row4h = Rotr32(row4h); + + row3l = _mm_add_epi64(row3l, row4l); + row3h = _mm_add_epi64(row3h, row4h); + + row2l = _mm_xor_si128(row2l, row3l); + row2h = _mm_xor_si128(row2h, row3h); + + row2l = Rotr24(row2l); + row2h = Rotr24(row2h); + } + + static inline void G2( + __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, + __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h, + __m128i& b0, __m128i& b1) { + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); + + row4l = _mm_xor_si128(row4l, row1l); + row4h = _mm_xor_si128(row4h, row1h); + + row4l = Rotr16(row4l); + row4h = Rotr16(row4h); + + row3l = _mm_add_epi64(row3l, row4l); + row3h = _mm_add_epi64(row3h, row4h); + + row2l = _mm_xor_si128(row2l, row3l); + row2h = _mm_xor_si128(row2h, row3h); + + row2l = Rotr63(row2l); + row2h = Rotr63(row2h); + } + + static inline void Diagonalize( + __m128i& row2l, __m128i& row3l, __m128i& row4l, + __m128i& row2h, __m128i& row3h, __m128i& row4h) { + __m128i t0 = _mm_alignr_epi8(row2h, row2l, 8); + __m128i t1 = _mm_alignr_epi8(row2l, row2h, 8); + row2l = t0; + row2h = t1; + + t0 = row3l; + row3l = row3h; + row3h = t0; + + t0 = _mm_alignr_epi8(row4h, row4l, 8); + t1 = _mm_alignr_epi8(row4l, row4h, 8); + row4l = t1; + row4h = t0; + } + + static inline void Undiagonalize( + __m128i& row2l, __m128i& row3l, __m128i& row4l, + __m128i& row2h, __m128i& row3h, __m128i& row4h) { + __m128i t0 = _mm_alignr_epi8(row2l, row2h, 8); + __m128i t1 = _mm_alignr_epi8(row2h, row2l, 8); + row2l = t0; + row2h = t1; + + t0 = row3l; + row3l = row3h; + row3h = t0; + + t0 = _mm_alignr_epi8(row4l, row4h, 8); + t1 = _mm_alignr_epi8(row4h, row4l, 8); + row4l = t1; + row4h = t0; + } + +#define ROUND(r) \ + LOAD_MSG_##r##_1(b0, b1); \ + G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ + LOAD_MSG_##r##_2(b0, b1); \ + G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ + Diagonalize(row2l, row3l, row4l, row2h, row3h, row4h); \ + LOAD_MSG_##r##_3(b0, b1); \ + G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ + LOAD_MSG_##r##_4(b0, b1); \ + G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ + Undiagonalize(row2l, row3l, row4l, row2h, row3h, row4h); + + template <> + void TBlake2B<EInstructionSet::SSE41>::InitialXor_(ui8* h, const ui8* p) { + __m128i* m_res = (__m128i*)h; + const __m128i* m_p = (__m128i*)p; + __m128i* iv = (__m128i*)GetIV_(); + + _mm_storeu_si128(m_res + 0, _mm_xor_si128(iv[0], _mm_loadu_si128(m_p + 0))); + _mm_storeu_si128(m_res + 1, _mm_xor_si128(iv[1], _mm_loadu_si128(m_p + 1))); + _mm_storeu_si128(m_res + 2, _mm_xor_si128(iv[2], _mm_loadu_si128(m_p + 2))); + _mm_storeu_si128(m_res + 3, _mm_xor_si128(iv[3], _mm_loadu_si128(m_p + 3))); + } + + template <> + void TBlake2B<EInstructionSet::SSE41>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) { + const __m128i* block_ptr = (__m128i*)block; + __m128i* iv = (__m128i*)GetIV_(); + const __m128i m0 = _mm_loadu_si128(block_ptr + 0); + const __m128i m1 = _mm_loadu_si128(block_ptr + 1); + const __m128i m2 = _mm_loadu_si128(block_ptr + 2); + const __m128i m3 = _mm_loadu_si128(block_ptr + 3); + const __m128i m4 = _mm_loadu_si128(block_ptr + 4); + const __m128i m5 = _mm_loadu_si128(block_ptr + 5); + const __m128i m6 = _mm_loadu_si128(block_ptr + 6); + const __m128i m7 = _mm_loadu_si128(block_ptr + 7); + + __m128i row1l = _mm_loadu_si128((__m128i*)&State_.H[0]); + __m128i row1h = _mm_loadu_si128((__m128i*)&State_.H[2]); + __m128i row2l = _mm_loadu_si128((__m128i*)&State_.H[4]); + __m128i row2h = _mm_loadu_si128((__m128i*)&State_.H[6]); + __m128i row3l = iv[0]; + __m128i row3h = iv[1]; + __m128i row4l = _mm_xor_si128(iv[2], _mm_loadu_si128((__m128i*)&State_.T[0])); + __m128i row4h = _mm_xor_si128(iv[3], _mm_loadu_si128((__m128i*)&State_.F[0])); + __m128i b0, b1; + + ROUND(0); + ROUND(1); + ROUND(2); + ROUND(3); + ROUND(4); + ROUND(5); + ROUND(6); + ROUND(7); + ROUND(8); + ROUND(9); + ROUND(10); + ROUND(11); + + _mm_storeu_si128((__m128i*)&State_.H[0], + _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[0]), _mm_xor_si128(row3l, row1l))); + _mm_storeu_si128((__m128i*)&State_.H[2], + _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[2]), _mm_xor_si128(row3h, row1h))); + _mm_storeu_si128((__m128i*)&State_.H[4], + _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[4]), _mm_xor_si128(row4l, row2l))); + _mm_storeu_si128((__m128i*)&State_.H[6], + _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[6]), _mm_xor_si128(row4h, row2h))); + } + +#undef ROUND +} diff --git a/library/cpp/digest/argonish/internal/blake2b/blake2b_ssse3.h b/library/cpp/digest/argonish/internal/blake2b/blake2b_ssse3.h index 24bf8ea31a..4cca5a5e7f 100644 --- a/library/cpp/digest/argonish/internal/blake2b/blake2b_ssse3.h +++ b/library/cpp/digest/argonish/internal/blake2b/blake2b_ssse3.h @@ -1,171 +1,171 @@ -#pragma once - -#include <emmintrin.h> -#include <tmmintrin.h> -#include "blake2b.h" +#pragma once + +#include <emmintrin.h> +#include <tmmintrin.h> +#include "blake2b.h" #include <library/cpp/digest/argonish/internal/rotations/rotations_ssse3.h> - -namespace NArgonish { - template <> - void* TBlake2B<EInstructionSet::SSSE3>::GetIV_() const { - static const __m128i Iv[4] = { - _mm_set_epi64x(0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL), - _mm_set_epi64x(0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL), - _mm_set_epi64x(0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL), - _mm_set_epi64x(0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL)}; - return (void*)Iv; - } - - static const ui32 Sigma[12][16] = { - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, - {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, - {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, - {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, - {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, - {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, - {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, - {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, - {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, - {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, - {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}}; - - static inline void G1( - __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, - __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h, - __m128i& b0, __m128i& b1) { - row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); - row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); - - row4l = _mm_xor_si128(row4l, row1l); - row4h = _mm_xor_si128(row4h, row1h); - - row4l = Rotr32(row4l); - row4h = Rotr32(row4h); - - row3l = _mm_add_epi64(row3l, row4l); - row3h = _mm_add_epi64(row3h, row4h); - - row2l = _mm_xor_si128(row2l, row3l); - row2h = _mm_xor_si128(row2h, row3h); - - row2l = Rotr24(row2l); - row2h = Rotr24(row2h); - } - - static inline void G2( - __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, - __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h, - __m128i& b0, __m128i& b1) { - row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); - row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); - - row4l = _mm_xor_si128(row4l, row1l); - row4h = _mm_xor_si128(row4h, row1h); - - row4l = Rotr16(row4l); - row4h = Rotr16(row4h); - - row3l = _mm_add_epi64(row3l, row4l); - row3h = _mm_add_epi64(row3h, row4h); - - row2l = _mm_xor_si128(row2l, row3l); - row2h = _mm_xor_si128(row2h, row3h); - - row2l = Rotr63(row2l); - row2h = Rotr63(row2h); - } - - static inline void Diagonalize( - __m128i& row2l, __m128i& row3l, __m128i& row4l, - __m128i& row2h, __m128i& row3h, __m128i& row4h) { - __m128i t0 = _mm_alignr_epi8(row2h, row2l, 8); - __m128i t1 = _mm_alignr_epi8(row2l, row2h, 8); - row2l = t0; - row2h = t1; - - t0 = row3l; - row3l = row3h; - row3h = t0; - - t0 = _mm_alignr_epi8(row4h, row4l, 8); - t1 = _mm_alignr_epi8(row4l, row4h, 8); - row4l = t1; - row4h = t0; - } - - static inline void Undiagonalize( - __m128i& row2l, __m128i& row3l, __m128i& row4l, - __m128i& row2h, __m128i& row3h, __m128i& row4h) { - __m128i t0 = _mm_alignr_epi8(row2l, row2h, 8); - __m128i t1 = _mm_alignr_epi8(row2h, row2l, 8); - row2l = t0; - row2h = t1; - - t0 = row3l; - row3l = row3h; - row3h = t0; - - t0 = _mm_alignr_epi8(row4l, row4h, 8); - t1 = _mm_alignr_epi8(row4h, row4l, 8); - row4l = t1; - row4h = t0; - } - - static inline void Round(int r, const ui64* block_ptr, - __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, - __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h) { - __m128i b0, b1; - b0 = _mm_set_epi64x(block_ptr[Sigma[r][2]], block_ptr[Sigma[r][0]]); - b1 = _mm_set_epi64x(block_ptr[Sigma[r][6]], block_ptr[Sigma[r][4]]); - G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); - b0 = _mm_set_epi64x(block_ptr[Sigma[r][3]], block_ptr[Sigma[r][1]]); - b1 = _mm_set_epi64x(block_ptr[Sigma[r][7]], block_ptr[Sigma[r][5]]); - G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); - Diagonalize(row2l, row3l, row4l, row2h, row3h, row4h); - b0 = _mm_set_epi64x(block_ptr[Sigma[r][10]], block_ptr[Sigma[r][8]]); - b1 = _mm_set_epi64x(block_ptr[Sigma[r][14]], block_ptr[Sigma[r][12]]); - G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); - b0 = _mm_set_epi64x(block_ptr[Sigma[r][11]], block_ptr[Sigma[r][9]]); - b1 = _mm_set_epi64x(block_ptr[Sigma[r][15]], block_ptr[Sigma[r][13]]); - G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); - Undiagonalize(row2l, row3l, row4l, row2h, row3h, row4h); - } - - template <> - void TBlake2B<EInstructionSet::SSSE3>::InitialXor_(ui8* h, const ui8* p) { - __m128i* m_res = (__m128i*)h; - const __m128i* m_p = (__m128i*)p; - __m128i* iv = (__m128i*)GetIV_(); - - _mm_storeu_si128(m_res + 0, _mm_xor_si128(iv[0], _mm_loadu_si128(m_p + 0))); - _mm_storeu_si128(m_res + 1, _mm_xor_si128(iv[1], _mm_loadu_si128(m_p + 1))); - _mm_storeu_si128(m_res + 2, _mm_xor_si128(iv[2], _mm_loadu_si128(m_p + 2))); - _mm_storeu_si128(m_res + 3, _mm_xor_si128(iv[3], _mm_loadu_si128(m_p + 3))); - } - - template <> - void TBlake2B<EInstructionSet::SSSE3>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) { - __m128i* iv = (__m128i*)GetIV_(); - __m128i row1l = _mm_loadu_si128((__m128i*)&State_.H[0]); - __m128i row1h = _mm_loadu_si128((__m128i*)&State_.H[2]); - __m128i row2l = _mm_loadu_si128((__m128i*)&State_.H[4]); - __m128i row2h = _mm_loadu_si128((__m128i*)&State_.H[6]); - __m128i row3l = iv[0]; - __m128i row3h = iv[1]; - __m128i row4l = _mm_xor_si128(iv[2], _mm_loadu_si128((__m128i*)&State_.T[0])); - __m128i row4h = _mm_xor_si128(iv[3], _mm_loadu_si128((__m128i*)&State_.F[0])); - - for (int r = 0; r < 12; ++r) - Round(r, block, row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); - - _mm_storeu_si128((__m128i*)&State_.H[0], - _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[0]), _mm_xor_si128(row3l, row1l))); - _mm_storeu_si128((__m128i*)&State_.H[2], - _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[2]), _mm_xor_si128(row3h, row1h))); - _mm_storeu_si128((__m128i*)&State_.H[4], - _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[4]), _mm_xor_si128(row4l, row2l))); - _mm_storeu_si128((__m128i*)&State_.H[6], - _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[6]), _mm_xor_si128(row4h, row2h))); - } -} + +namespace NArgonish { + template <> + void* TBlake2B<EInstructionSet::SSSE3>::GetIV_() const { + static const __m128i Iv[4] = { + _mm_set_epi64x(0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL), + _mm_set_epi64x(0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL), + _mm_set_epi64x(0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL), + _mm_set_epi64x(0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL)}; + return (void*)Iv; + } + + static const ui32 Sigma[12][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, + {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, + {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, + {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, + {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, + {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, + {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, + {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, + {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}}; + + static inline void G1( + __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, + __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h, + __m128i& b0, __m128i& b1) { + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); + + row4l = _mm_xor_si128(row4l, row1l); + row4h = _mm_xor_si128(row4h, row1h); + + row4l = Rotr32(row4l); + row4h = Rotr32(row4h); + + row3l = _mm_add_epi64(row3l, row4l); + row3h = _mm_add_epi64(row3h, row4h); + + row2l = _mm_xor_si128(row2l, row3l); + row2h = _mm_xor_si128(row2h, row3h); + + row2l = Rotr24(row2l); + row2h = Rotr24(row2h); + } + + static inline void G2( + __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, + __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h, + __m128i& b0, __m128i& b1) { + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); + + row4l = _mm_xor_si128(row4l, row1l); + row4h = _mm_xor_si128(row4h, row1h); + + row4l = Rotr16(row4l); + row4h = Rotr16(row4h); + + row3l = _mm_add_epi64(row3l, row4l); + row3h = _mm_add_epi64(row3h, row4h); + + row2l = _mm_xor_si128(row2l, row3l); + row2h = _mm_xor_si128(row2h, row3h); + + row2l = Rotr63(row2l); + row2h = Rotr63(row2h); + } + + static inline void Diagonalize( + __m128i& row2l, __m128i& row3l, __m128i& row4l, + __m128i& row2h, __m128i& row3h, __m128i& row4h) { + __m128i t0 = _mm_alignr_epi8(row2h, row2l, 8); + __m128i t1 = _mm_alignr_epi8(row2l, row2h, 8); + row2l = t0; + row2h = t1; + + t0 = row3l; + row3l = row3h; + row3h = t0; + + t0 = _mm_alignr_epi8(row4h, row4l, 8); + t1 = _mm_alignr_epi8(row4l, row4h, 8); + row4l = t1; + row4h = t0; + } + + static inline void Undiagonalize( + __m128i& row2l, __m128i& row3l, __m128i& row4l, + __m128i& row2h, __m128i& row3h, __m128i& row4h) { + __m128i t0 = _mm_alignr_epi8(row2l, row2h, 8); + __m128i t1 = _mm_alignr_epi8(row2h, row2l, 8); + row2l = t0; + row2h = t1; + + t0 = row3l; + row3l = row3h; + row3h = t0; + + t0 = _mm_alignr_epi8(row4l, row4h, 8); + t1 = _mm_alignr_epi8(row4h, row4l, 8); + row4l = t1; + row4h = t0; + } + + static inline void Round(int r, const ui64* block_ptr, + __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, + __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h) { + __m128i b0, b1; + b0 = _mm_set_epi64x(block_ptr[Sigma[r][2]], block_ptr[Sigma[r][0]]); + b1 = _mm_set_epi64x(block_ptr[Sigma[r][6]], block_ptr[Sigma[r][4]]); + G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); + b0 = _mm_set_epi64x(block_ptr[Sigma[r][3]], block_ptr[Sigma[r][1]]); + b1 = _mm_set_epi64x(block_ptr[Sigma[r][7]], block_ptr[Sigma[r][5]]); + G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); + Diagonalize(row2l, row3l, row4l, row2h, row3h, row4h); + b0 = _mm_set_epi64x(block_ptr[Sigma[r][10]], block_ptr[Sigma[r][8]]); + b1 = _mm_set_epi64x(block_ptr[Sigma[r][14]], block_ptr[Sigma[r][12]]); + G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); + b0 = _mm_set_epi64x(block_ptr[Sigma[r][11]], block_ptr[Sigma[r][9]]); + b1 = _mm_set_epi64x(block_ptr[Sigma[r][15]], block_ptr[Sigma[r][13]]); + G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); + Undiagonalize(row2l, row3l, row4l, row2h, row3h, row4h); + } + + template <> + void TBlake2B<EInstructionSet::SSSE3>::InitialXor_(ui8* h, const ui8* p) { + __m128i* m_res = (__m128i*)h; + const __m128i* m_p = (__m128i*)p; + __m128i* iv = (__m128i*)GetIV_(); + + _mm_storeu_si128(m_res + 0, _mm_xor_si128(iv[0], _mm_loadu_si128(m_p + 0))); + _mm_storeu_si128(m_res + 1, _mm_xor_si128(iv[1], _mm_loadu_si128(m_p + 1))); + _mm_storeu_si128(m_res + 2, _mm_xor_si128(iv[2], _mm_loadu_si128(m_p + 2))); + _mm_storeu_si128(m_res + 3, _mm_xor_si128(iv[3], _mm_loadu_si128(m_p + 3))); + } + + template <> + void TBlake2B<EInstructionSet::SSSE3>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) { + __m128i* iv = (__m128i*)GetIV_(); + __m128i row1l = _mm_loadu_si128((__m128i*)&State_.H[0]); + __m128i row1h = _mm_loadu_si128((__m128i*)&State_.H[2]); + __m128i row2l = _mm_loadu_si128((__m128i*)&State_.H[4]); + __m128i row2h = _mm_loadu_si128((__m128i*)&State_.H[6]); + __m128i row3l = iv[0]; + __m128i row3h = iv[1]; + __m128i row4l = _mm_xor_si128(iv[2], _mm_loadu_si128((__m128i*)&State_.T[0])); + __m128i row4h = _mm_xor_si128(iv[3], _mm_loadu_si128((__m128i*)&State_.F[0])); + + for (int r = 0; r < 12; ++r) + Round(r, block, row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); + + _mm_storeu_si128((__m128i*)&State_.H[0], + _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[0]), _mm_xor_si128(row3l, row1l))); + _mm_storeu_si128((__m128i*)&State_.H[2], + _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[2]), _mm_xor_si128(row3h, row1h))); + _mm_storeu_si128((__m128i*)&State_.H[4], + _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[4]), _mm_xor_si128(row4l, row2l))); + _mm_storeu_si128((__m128i*)&State_.H[6], + _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[6]), _mm_xor_si128(row4h, row2h))); + } +} diff --git a/library/cpp/digest/argonish/internal/blake2b/load_sse41.h b/library/cpp/digest/argonish/internal/blake2b/load_sse41.h index 9b1f7781f9..060455aac2 100644 --- a/library/cpp/digest/argonish/internal/blake2b/load_sse41.h +++ b/library/cpp/digest/argonish/internal/blake2b/load_sse41.h @@ -1,301 +1,301 @@ -#pragma once - -/* - BLAKE2 reference source code package - optimized C implementations - Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the - terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at - your option. The terms of these licenses can be found at: - - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 - - OpenSSL license : https://www.openssl.org/source/license.html - - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 - More information about the BLAKE2 hash function can be found at - https://blake2.net. -*/ - -#define LOAD_MSG_0_1(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m0, m1); \ - b1 = _mm_unpacklo_epi64(m2, m3); \ - } while (0) - -#define LOAD_MSG_0_2(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m0, m1); \ - b1 = _mm_unpackhi_epi64(m2, m3); \ - } while (0) - -#define LOAD_MSG_0_3(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m4, m5); \ - b1 = _mm_unpacklo_epi64(m6, m7); \ - } while (0) - -#define LOAD_MSG_0_4(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m4, m5); \ - b1 = _mm_unpackhi_epi64(m6, m7); \ - } while (0) - -#define LOAD_MSG_1_1(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m7, m2); \ - b1 = _mm_unpackhi_epi64(m4, m6); \ - } while (0) - -#define LOAD_MSG_1_2(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m5, m4); \ - b1 = _mm_alignr_epi8(m3, m7, 8); \ - } while (0) - -#define LOAD_MSG_1_3(b0, b1) \ - do { \ - b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \ - b1 = _mm_unpackhi_epi64(m5, m2); \ - } while (0) - -#define LOAD_MSG_1_4(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m6, m1); \ - b1 = _mm_unpackhi_epi64(m3, m1); \ - } while (0) - -#define LOAD_MSG_2_1(b0, b1) \ - do { \ - b0 = _mm_alignr_epi8(m6, m5, 8); \ - b1 = _mm_unpackhi_epi64(m2, m7); \ - } while (0) - -#define LOAD_MSG_2_2(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m4, m0); \ - b1 = _mm_blend_epi16(m1, m6, 0xF0); \ - } while (0) - -#define LOAD_MSG_2_3(b0, b1) \ - do { \ - b0 = _mm_blend_epi16(m5, m1, 0xF0); \ - b1 = _mm_unpackhi_epi64(m3, m4); \ - } while (0) - -#define LOAD_MSG_2_4(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m7, m3); \ - b1 = _mm_alignr_epi8(m2, m0, 8); \ - } while (0) - -#define LOAD_MSG_3_1(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m3, m1); \ - b1 = _mm_unpackhi_epi64(m6, m5); \ - } while (0) - -#define LOAD_MSG_3_2(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m4, m0); \ - b1 = _mm_unpacklo_epi64(m6, m7); \ - } while (0) - -#define LOAD_MSG_3_3(b0, b1) \ - do { \ - b0 = _mm_blend_epi16(m1, m2, 0xF0); \ - b1 = _mm_blend_epi16(m2, m7, 0xF0); \ - } while (0) - -#define LOAD_MSG_3_4(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m3, m5); \ - b1 = _mm_unpacklo_epi64(m0, m4); \ - } while (0) - -#define LOAD_MSG_4_1(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m4, m2); \ - b1 = _mm_unpacklo_epi64(m1, m5); \ - } while (0) - -#define LOAD_MSG_4_2(b0, b1) \ - do { \ - b0 = _mm_blend_epi16(m0, m3, 0xF0); \ - b1 = _mm_blend_epi16(m2, m7, 0xF0); \ - } while (0) - -#define LOAD_MSG_4_3(b0, b1) \ - do { \ - b0 = _mm_blend_epi16(m7, m5, 0xF0); \ - b1 = _mm_blend_epi16(m3, m1, 0xF0); \ - } while (0) - -#define LOAD_MSG_4_4(b0, b1) \ - do { \ - b0 = _mm_alignr_epi8(m6, m0, 8); \ - b1 = _mm_blend_epi16(m4, m6, 0xF0); \ - } while (0) - -#define LOAD_MSG_5_1(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m1, m3); \ - b1 = _mm_unpacklo_epi64(m0, m4); \ - } while (0) - -#define LOAD_MSG_5_2(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m6, m5); \ - b1 = _mm_unpackhi_epi64(m5, m1); \ - } while (0) - -#define LOAD_MSG_5_3(b0, b1) \ - do { \ - b0 = _mm_blend_epi16(m2, m3, 0xF0); \ - b1 = _mm_unpackhi_epi64(m7, m0); \ - } while (0) - -#define LOAD_MSG_5_4(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m6, m2); \ - b1 = _mm_blend_epi16(m7, m4, 0xF0); \ - } while (0) - -#define LOAD_MSG_6_1(b0, b1) \ - do { \ - b0 = _mm_blend_epi16(m6, m0, 0xF0); \ - b1 = _mm_unpacklo_epi64(m7, m2); \ - } while (0) - -#define LOAD_MSG_6_2(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m2, m7); \ - b1 = _mm_alignr_epi8(m5, m6, 8); \ - } while (0) - -#define LOAD_MSG_6_3(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m0, m3); \ - b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1, 0, 3, 2)); \ - } while (0) - -#define LOAD_MSG_6_4(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m3, m1); \ - b1 = _mm_blend_epi16(m1, m5, 0xF0); \ - } while (0) - -#define LOAD_MSG_7_1(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m6, m3); \ - b1 = _mm_blend_epi16(m6, m1, 0xF0); \ - } while (0) - -#define LOAD_MSG_7_2(b0, b1) \ - do { \ - b0 = _mm_alignr_epi8(m7, m5, 8); \ - b1 = _mm_unpackhi_epi64(m0, m4); \ - } while (0) - -#define LOAD_MSG_7_3(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m2, m7); \ - b1 = _mm_unpacklo_epi64(m4, m1); \ - } while (0) - -#define LOAD_MSG_7_4(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m0, m2); \ - b1 = _mm_unpacklo_epi64(m3, m5); \ - } while (0) - -#define LOAD_MSG_8_1(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m3, m7); \ - b1 = _mm_alignr_epi8(m0, m5, 8); \ - } while (0) - -#define LOAD_MSG_8_2(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m7, m4); \ - b1 = _mm_alignr_epi8(m4, m1, 8); \ - } while (0) - -#define LOAD_MSG_8_3(b0, b1) \ - do { \ - b0 = m6; \ - b1 = _mm_alignr_epi8(m5, m0, 8); \ - } while (0) - -#define LOAD_MSG_8_4(b0, b1) \ - do { \ - b0 = _mm_blend_epi16(m1, m3, 0xF0); \ - b1 = m2; \ - } while (0) - -#define LOAD_MSG_9_1(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m5, m4); \ - b1 = _mm_unpackhi_epi64(m3, m0); \ - } while (0) - -#define LOAD_MSG_9_2(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m1, m2); \ - b1 = _mm_blend_epi16(m3, m2, 0xF0); \ - } while (0) - -#define LOAD_MSG_9_3(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m7, m4); \ - b1 = _mm_unpackhi_epi64(m1, m6); \ - } while (0) - -#define LOAD_MSG_9_4(b0, b1) \ - do { \ - b0 = _mm_alignr_epi8(m7, m5, 8); \ - b1 = _mm_unpacklo_epi64(m6, m0); \ - } while (0) - -#define LOAD_MSG_10_1(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m0, m1); \ - b1 = _mm_unpacklo_epi64(m2, m3); \ - } while (0) - -#define LOAD_MSG_10_2(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m0, m1); \ - b1 = _mm_unpackhi_epi64(m2, m3); \ - } while (0) - -#define LOAD_MSG_10_3(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m4, m5); \ - b1 = _mm_unpacklo_epi64(m6, m7); \ - } while (0) - -#define LOAD_MSG_10_4(b0, b1) \ - do { \ - b0 = _mm_unpackhi_epi64(m4, m5); \ - b1 = _mm_unpackhi_epi64(m6, m7); \ - } while (0) - -#define LOAD_MSG_11_1(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m7, m2); \ - b1 = _mm_unpackhi_epi64(m4, m6); \ - } while (0) - -#define LOAD_MSG_11_2(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m5, m4); \ - b1 = _mm_alignr_epi8(m3, m7, 8); \ - } while (0) - -#define LOAD_MSG_11_3(b0, b1) \ - do { \ - b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \ - b1 = _mm_unpackhi_epi64(m5, m2); \ - } while (0) - -#define LOAD_MSG_11_4(b0, b1) \ - do { \ - b0 = _mm_unpacklo_epi64(m6, m1); \ - b1 = _mm_unpackhi_epi64(m3, m1); \ - } while (0) +#pragma once + +/* + BLAKE2 reference source code package - optimized C implementations + Copyright 2012, Samuel Neves <sneves@dei.uc.pt>. You may use this under the + terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at + your option. The terms of these licenses can be found at: + - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 + - OpenSSL license : https://www.openssl.org/source/license.html + - Apache 2.0 : http://www.apache.org/licenses/LICENSE-2.0 + More information about the BLAKE2 hash function can be found at + https://blake2.net. +*/ + +#define LOAD_MSG_0_1(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m0, m1); \ + b1 = _mm_unpacklo_epi64(m2, m3); \ + } while (0) + +#define LOAD_MSG_0_2(b0, b1) \ + do { \ + b0 = _mm_unpackhi_epi64(m0, m1); \ + b1 = _mm_unpackhi_epi64(m2, m3); \ + } while (0) + +#define LOAD_MSG_0_3(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m4, m5); \ + b1 = _mm_unpacklo_epi64(m6, m7); \ + } while (0) + +#define LOAD_MSG_0_4(b0, b1) \ + do { \ + b0 = _mm_unpackhi_epi64(m4, m5); \ + b1 = _mm_unpackhi_epi64(m6, m7); \ + } while (0) + +#define LOAD_MSG_1_1(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m7, m2); \ + b1 = _mm_unpackhi_epi64(m4, m6); \ + } while (0) + +#define LOAD_MSG_1_2(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m5, m4); \ + b1 = _mm_alignr_epi8(m3, m7, 8); \ + } while (0) + +#define LOAD_MSG_1_3(b0, b1) \ + do { \ + b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \ + b1 = _mm_unpackhi_epi64(m5, m2); \ + } while (0) + +#define LOAD_MSG_1_4(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m6, m1); \ + b1 = _mm_unpackhi_epi64(m3, m1); \ + } while (0) + +#define LOAD_MSG_2_1(b0, b1) \ + do { \ + b0 = _mm_alignr_epi8(m6, m5, 8); \ + b1 = _mm_unpackhi_epi64(m2, m7); \ + } while (0) + +#define LOAD_MSG_2_2(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m4, m0); \ + b1 = _mm_blend_epi16(m1, m6, 0xF0); \ + } while (0) + +#define LOAD_MSG_2_3(b0, b1) \ + do { \ + b0 = _mm_blend_epi16(m5, m1, 0xF0); \ + b1 = _mm_unpackhi_epi64(m3, m4); \ + } while (0) + +#define LOAD_MSG_2_4(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m7, m3); \ + b1 = _mm_alignr_epi8(m2, m0, 8); \ + } while (0) + +#define LOAD_MSG_3_1(b0, b1) \ + do { \ + b0 = _mm_unpackhi_epi64(m3, m1); \ + b1 = _mm_unpackhi_epi64(m6, m5); \ + } while (0) + +#define LOAD_MSG_3_2(b0, b1) \ + do { \ + b0 = _mm_unpackhi_epi64(m4, m0); \ + b1 = _mm_unpacklo_epi64(m6, m7); \ + } while (0) + +#define LOAD_MSG_3_3(b0, b1) \ + do { \ + b0 = _mm_blend_epi16(m1, m2, 0xF0); \ + b1 = _mm_blend_epi16(m2, m7, 0xF0); \ + } while (0) + +#define LOAD_MSG_3_4(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m3, m5); \ + b1 = _mm_unpacklo_epi64(m0, m4); \ + } while (0) + +#define LOAD_MSG_4_1(b0, b1) \ + do { \ + b0 = _mm_unpackhi_epi64(m4, m2); \ + b1 = _mm_unpacklo_epi64(m1, m5); \ + } while (0) + +#define LOAD_MSG_4_2(b0, b1) \ + do { \ + b0 = _mm_blend_epi16(m0, m3, 0xF0); \ + b1 = _mm_blend_epi16(m2, m7, 0xF0); \ + } while (0) + +#define LOAD_MSG_4_3(b0, b1) \ + do { \ + b0 = _mm_blend_epi16(m7, m5, 0xF0); \ + b1 = _mm_blend_epi16(m3, m1, 0xF0); \ + } while (0) + +#define LOAD_MSG_4_4(b0, b1) \ + do { \ + b0 = _mm_alignr_epi8(m6, m0, 8); \ + b1 = _mm_blend_epi16(m4, m6, 0xF0); \ + } while (0) + +#define LOAD_MSG_5_1(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m1, m3); \ + b1 = _mm_unpacklo_epi64(m0, m4); \ + } while (0) + +#define LOAD_MSG_5_2(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m6, m5); \ + b1 = _mm_unpackhi_epi64(m5, m1); \ + } while (0) + +#define LOAD_MSG_5_3(b0, b1) \ + do { \ + b0 = _mm_blend_epi16(m2, m3, 0xF0); \ + b1 = _mm_unpackhi_epi64(m7, m0); \ + } while (0) + +#define LOAD_MSG_5_4(b0, b1) \ + do { \ + b0 = _mm_unpackhi_epi64(m6, m2); \ + b1 = _mm_blend_epi16(m7, m4, 0xF0); \ + } while (0) + +#define LOAD_MSG_6_1(b0, b1) \ + do { \ + b0 = _mm_blend_epi16(m6, m0, 0xF0); \ + b1 = _mm_unpacklo_epi64(m7, m2); \ + } while (0) + +#define LOAD_MSG_6_2(b0, b1) \ + do { \ + b0 = _mm_unpackhi_epi64(m2, m7); \ + b1 = _mm_alignr_epi8(m5, m6, 8); \ + } while (0) + +#define LOAD_MSG_6_3(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m0, m3); \ + b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1, 0, 3, 2)); \ + } while (0) + +#define LOAD_MSG_6_4(b0, b1) \ + do { \ + b0 = _mm_unpackhi_epi64(m3, m1); \ + b1 = _mm_blend_epi16(m1, m5, 0xF0); \ + } while (0) + +#define LOAD_MSG_7_1(b0, b1) \ + do { \ + b0 = _mm_unpackhi_epi64(m6, m3); \ + b1 = _mm_blend_epi16(m6, m1, 0xF0); \ + } while (0) + +#define LOAD_MSG_7_2(b0, b1) \ + do { \ + b0 = _mm_alignr_epi8(m7, m5, 8); \ + b1 = _mm_unpackhi_epi64(m0, m4); \ + } while (0) + +#define LOAD_MSG_7_3(b0, b1) \ + do { \ + b0 = _mm_unpackhi_epi64(m2, m7); \ + b1 = _mm_unpacklo_epi64(m4, m1); \ + } while (0) + +#define LOAD_MSG_7_4(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m0, m2); \ + b1 = _mm_unpacklo_epi64(m3, m5); \ + } while (0) + +#define LOAD_MSG_8_1(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m3, m7); \ + b1 = _mm_alignr_epi8(m0, m5, 8); \ + } while (0) + +#define LOAD_MSG_8_2(b0, b1) \ + do { \ + b0 = _mm_unpackhi_epi64(m7, m4); \ + b1 = _mm_alignr_epi8(m4, m1, 8); \ + } while (0) + +#define LOAD_MSG_8_3(b0, b1) \ + do { \ + b0 = m6; \ + b1 = _mm_alignr_epi8(m5, m0, 8); \ + } while (0) + +#define LOAD_MSG_8_4(b0, b1) \ + do { \ + b0 = _mm_blend_epi16(m1, m3, 0xF0); \ + b1 = m2; \ + } while (0) + +#define LOAD_MSG_9_1(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m5, m4); \ + b1 = _mm_unpackhi_epi64(m3, m0); \ + } while (0) + +#define LOAD_MSG_9_2(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m1, m2); \ + b1 = _mm_blend_epi16(m3, m2, 0xF0); \ + } while (0) + +#define LOAD_MSG_9_3(b0, b1) \ + do { \ + b0 = _mm_unpackhi_epi64(m7, m4); \ + b1 = _mm_unpackhi_epi64(m1, m6); \ + } while (0) + +#define LOAD_MSG_9_4(b0, b1) \ + do { \ + b0 = _mm_alignr_epi8(m7, m5, 8); \ + b1 = _mm_unpacklo_epi64(m6, m0); \ + } while (0) + +#define LOAD_MSG_10_1(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m0, m1); \ + b1 = _mm_unpacklo_epi64(m2, m3); \ + } while (0) + +#define LOAD_MSG_10_2(b0, b1) \ + do { \ + b0 = _mm_unpackhi_epi64(m0, m1); \ + b1 = _mm_unpackhi_epi64(m2, m3); \ + } while (0) + +#define LOAD_MSG_10_3(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m4, m5); \ + b1 = _mm_unpacklo_epi64(m6, m7); \ + } while (0) + +#define LOAD_MSG_10_4(b0, b1) \ + do { \ + b0 = _mm_unpackhi_epi64(m4, m5); \ + b1 = _mm_unpackhi_epi64(m6, m7); \ + } while (0) + +#define LOAD_MSG_11_1(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m7, m2); \ + b1 = _mm_unpackhi_epi64(m4, m6); \ + } while (0) + +#define LOAD_MSG_11_2(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m5, m4); \ + b1 = _mm_alignr_epi8(m3, m7, 8); \ + } while (0) + +#define LOAD_MSG_11_3(b0, b1) \ + do { \ + b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \ + b1 = _mm_unpackhi_epi64(m5, m2); \ + } while (0) + +#define LOAD_MSG_11_4(b0, b1) \ + do { \ + b0 = _mm_unpacklo_epi64(m6, m1); \ + b1 = _mm_unpackhi_epi64(m3, m1); \ + } while (0) diff --git a/library/cpp/digest/argonish/internal/blake2b/ya.make b/library/cpp/digest/argonish/internal/blake2b/ya.make index 1f6d903166..0aa6806b31 100644 --- a/library/cpp/digest/argonish/internal/blake2b/ya.make +++ b/library/cpp/digest/argonish/internal/blake2b/ya.make @@ -1,9 +1,9 @@ -LIBRARY() - -OWNER(e-sidorov) - +LIBRARY() + +OWNER(e-sidorov) + PEERDIR( library/cpp/digest/argonish/internal/rotations ) - -END() + +END() diff --git a/library/cpp/digest/argonish/internal/blamka/blamka_avx2.h b/library/cpp/digest/argonish/internal/blamka/blamka_avx2.h index bb701799c4..02c506d6ff 100644 --- a/library/cpp/digest/argonish/internal/blamka/blamka_avx2.h +++ b/library/cpp/digest/argonish/internal/blamka/blamka_avx2.h @@ -1,136 +1,136 @@ -#pragma once - -#include <immintrin.h> +#pragma once + +#include <immintrin.h> #include <library/cpp/digest/argonish/internal/rotations/rotations_avx2.h> - -namespace NArgonish { - static inline void BlamkaG1AVX2( - __m256i& a0, __m256i& a1, __m256i& b0, __m256i& b1, - __m256i& c0, __m256i& c1, __m256i& d0, __m256i& d1) { - __m256i ml = _mm256_mul_epu32(a0, b0); - ml = _mm256_add_epi64(ml, ml); - a0 = _mm256_add_epi64(a0, _mm256_add_epi64(b0, ml)); - d0 = _mm256_xor_si256(d0, a0); - d0 = Rotr32(d0); - - ml = _mm256_mul_epu32(c0, d0); - ml = _mm256_add_epi64(ml, ml); - c0 = _mm256_add_epi64(c0, _mm256_add_epi64(d0, ml)); - - b0 = _mm256_xor_si256(b0, c0); - b0 = Rotr24(b0); - - ml = _mm256_mul_epu32(a1, b1); - ml = _mm256_add_epi64(ml, ml); - a1 = _mm256_add_epi64(a1, _mm256_add_epi64(b1, ml)); - d1 = _mm256_xor_si256(d1, a1); - d1 = Rotr32(d1); - - ml = _mm256_mul_epu32(c1, d1); - ml = _mm256_add_epi64(ml, ml); - c1 = _mm256_add_epi64(c1, _mm256_add_epi64(d1, ml)); - - b1 = _mm256_xor_si256(b1, c1); - b1 = Rotr24(b1); - } - - static inline void BlamkaG2AVX2( - __m256i& a0, __m256i& a1, __m256i& b0, __m256i& b1, - __m256i& c0, __m256i& c1, __m256i& d0, __m256i& d1) { - __m256i ml = _mm256_mul_epu32(a0, b0); - ml = _mm256_add_epi64(ml, ml); - a0 = _mm256_add_epi64(a0, _mm256_add_epi64(b0, ml)); - d0 = _mm256_xor_si256(d0, a0); - d0 = Rotr16(d0); - - ml = _mm256_mul_epu32(c0, d0); - ml = _mm256_add_epi64(ml, ml); - c0 = _mm256_add_epi64(c0, _mm256_add_epi64(d0, ml)); - b0 = _mm256_xor_si256(b0, c0); - b0 = Rotr63(b0); - - ml = _mm256_mul_epu32(a1, b1); - ml = _mm256_add_epi64(ml, ml); - a1 = _mm256_add_epi64(a1, _mm256_add_epi64(b1, ml)); - d1 = _mm256_xor_si256(d1, a1); - d1 = Rotr16(d1); - - ml = _mm256_mul_epu32(c1, d1); - ml = _mm256_add_epi64(ml, ml); - c1 = _mm256_add_epi64(c1, _mm256_add_epi64(d1, ml)); - b1 = _mm256_xor_si256(b1, c1); - b1 = Rotr63(b1); - } - - /* a = ( v0, v1, v2, v3) */ - /* b = ( v4, v5, v6, v7) */ - /* c = ( v8, v9, v10, v11) */ - /* d = (v12, v13, v14, v15) */ - static inline void DiagonalizeAVX21( - __m256i& b0, __m256i& c0, __m256i& d0, __m256i& b1, __m256i& c1, __m256i& d1) { - /* (v4, v5, v6, v7) -> (v5, v6, v7, v4) */ - b0 = _mm256_permute4x64_epi64(b0, _MM_SHUFFLE(0, 3, 2, 1)); - /* (v8, v9, v10, v11) -> (v10, v11, v8, v9) */ - c0 = _mm256_permute4x64_epi64(c0, _MM_SHUFFLE(1, 0, 3, 2)); - /* (v12, v13, v14, v15) -> (v15, v12, v13, v14) */ - d0 = _mm256_permute4x64_epi64(d0, _MM_SHUFFLE(2, 1, 0, 3)); - - b1 = _mm256_permute4x64_epi64(b1, _MM_SHUFFLE(0, 3, 2, 1)); - c1 = _mm256_permute4x64_epi64(c1, _MM_SHUFFLE(1, 0, 3, 2)); - d1 = _mm256_permute4x64_epi64(d1, _MM_SHUFFLE(2, 1, 0, 3)); - } - - static inline void DiagonalizeAVX22( - __m256i& b0, __m256i& b1, __m256i& c0, __m256i& c1, __m256i& d0, __m256i& d1) { - /* (v4, v5, v6, v7) -> (v5, v6, v7, v4) */ - __m256i tmp1 = _mm256_blend_epi32(b0, b1, 0b11001100); /* v4v7 */ - __m256i tmp2 = _mm256_blend_epi32(b0, b1, 0b00110011); /* v6v5 */ - b1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2, 3, 0, 1)); /* v7v4 */ - b0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2, 3, 0, 1)); /* v5v6 */ - - /* (v8, v9, v10, v11) -> (v10, v11, v8, v9) */ - tmp1 = c0; - c0 = c1; - c1 = tmp1; - - /* (v12, v13, v14, v15) -> (v15, v12, v13, v14) */ - tmp1 = _mm256_blend_epi32(d0, d1, 0b11001100); /* v12v15 */ - tmp2 = _mm256_blend_epi32(d0, d1, 0b00110011); /* v14v13 */ - d0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2, 3, 0, 1)); /* v15v12 */ - d1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2, 3, 0, 1)); /* v13v14 */ - } - - static inline void UndiagonalizeAVX21( - __m256i& b0, __m256i& c0, __m256i& d0, __m256i& b1, __m256i& c1, __m256i& d1) { - /* (v5, v6, v7, v4) -> (v4, v5, v6, v7) */ - b0 = _mm256_permute4x64_epi64(b0, _MM_SHUFFLE(2, 1, 0, 3)); - /* (v10, v11, v8, v9) -> (v8, v9, v10, v11) */ - c0 = _mm256_permute4x64_epi64(c0, _MM_SHUFFLE(1, 0, 3, 2)); - /* (v15, v12, v13, v14) -> (v12, v13, v14, v15) */ - d0 = _mm256_permute4x64_epi64(d0, _MM_SHUFFLE(0, 3, 2, 1)); - - b1 = _mm256_permute4x64_epi64(b1, _MM_SHUFFLE(2, 1, 0, 3)); - c1 = _mm256_permute4x64_epi64(c1, _MM_SHUFFLE(1, 0, 3, 2)); - d1 = _mm256_permute4x64_epi64(d1, _MM_SHUFFLE(0, 3, 2, 1)); - } - - static inline void UndiagonalizeAVX22( - __m256i& b0, __m256i& b1, __m256i& c0, __m256i& c1, __m256i& d0, __m256i& d1) { - /* (v5, v6, v7, v4) -> (v4, v5, v6, v7) */ - __m256i tmp1 = _mm256_blend_epi32(b0, b1, 0b11001100); /* v5v4 */ - __m256i tmp2 = _mm256_blend_epi32(b0, b1, 0b00110011); /* v7v6 */ - b0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2, 3, 0, 1)); /* v4v5 */ - b1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2, 3, 0, 1)); /* v6v7 */ - - /* (v10,v11,v8,v9) -> (v8,v9,v10,v11) */ - tmp1 = c0; - c0 = c1; - c1 = tmp1; - - /* (v15,v12,v13,v14) -> (v12,v13,v14,v15) */ - tmp1 = _mm256_blend_epi32(d0, d1, 0b00110011); /* v13v12 */ - tmp2 = _mm256_blend_epi32(d0, d1, 0b11001100); /* v15v14 */ - d0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2, 3, 0, 1)); - d1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2, 3, 0, 1)); - } -} + +namespace NArgonish { + static inline void BlamkaG1AVX2( + __m256i& a0, __m256i& a1, __m256i& b0, __m256i& b1, + __m256i& c0, __m256i& c1, __m256i& d0, __m256i& d1) { + __m256i ml = _mm256_mul_epu32(a0, b0); + ml = _mm256_add_epi64(ml, ml); + a0 = _mm256_add_epi64(a0, _mm256_add_epi64(b0, ml)); + d0 = _mm256_xor_si256(d0, a0); + d0 = Rotr32(d0); + + ml = _mm256_mul_epu32(c0, d0); + ml = _mm256_add_epi64(ml, ml); + c0 = _mm256_add_epi64(c0, _mm256_add_epi64(d0, ml)); + + b0 = _mm256_xor_si256(b0, c0); + b0 = Rotr24(b0); + + ml = _mm256_mul_epu32(a1, b1); + ml = _mm256_add_epi64(ml, ml); + a1 = _mm256_add_epi64(a1, _mm256_add_epi64(b1, ml)); + d1 = _mm256_xor_si256(d1, a1); + d1 = Rotr32(d1); + + ml = _mm256_mul_epu32(c1, d1); + ml = _mm256_add_epi64(ml, ml); + c1 = _mm256_add_epi64(c1, _mm256_add_epi64(d1, ml)); + + b1 = _mm256_xor_si256(b1, c1); + b1 = Rotr24(b1); + } + + static inline void BlamkaG2AVX2( + __m256i& a0, __m256i& a1, __m256i& b0, __m256i& b1, + __m256i& c0, __m256i& c1, __m256i& d0, __m256i& d1) { + __m256i ml = _mm256_mul_epu32(a0, b0); + ml = _mm256_add_epi64(ml, ml); + a0 = _mm256_add_epi64(a0, _mm256_add_epi64(b0, ml)); + d0 = _mm256_xor_si256(d0, a0); + d0 = Rotr16(d0); + + ml = _mm256_mul_epu32(c0, d0); + ml = _mm256_add_epi64(ml, ml); + c0 = _mm256_add_epi64(c0, _mm256_add_epi64(d0, ml)); + b0 = _mm256_xor_si256(b0, c0); + b0 = Rotr63(b0); + + ml = _mm256_mul_epu32(a1, b1); + ml = _mm256_add_epi64(ml, ml); + a1 = _mm256_add_epi64(a1, _mm256_add_epi64(b1, ml)); + d1 = _mm256_xor_si256(d1, a1); + d1 = Rotr16(d1); + + ml = _mm256_mul_epu32(c1, d1); + ml = _mm256_add_epi64(ml, ml); + c1 = _mm256_add_epi64(c1, _mm256_add_epi64(d1, ml)); + b1 = _mm256_xor_si256(b1, c1); + b1 = Rotr63(b1); + } + + /* a = ( v0, v1, v2, v3) */ + /* b = ( v4, v5, v6, v7) */ + /* c = ( v8, v9, v10, v11) */ + /* d = (v12, v13, v14, v15) */ + static inline void DiagonalizeAVX21( + __m256i& b0, __m256i& c0, __m256i& d0, __m256i& b1, __m256i& c1, __m256i& d1) { + /* (v4, v5, v6, v7) -> (v5, v6, v7, v4) */ + b0 = _mm256_permute4x64_epi64(b0, _MM_SHUFFLE(0, 3, 2, 1)); + /* (v8, v9, v10, v11) -> (v10, v11, v8, v9) */ + c0 = _mm256_permute4x64_epi64(c0, _MM_SHUFFLE(1, 0, 3, 2)); + /* (v12, v13, v14, v15) -> (v15, v12, v13, v14) */ + d0 = _mm256_permute4x64_epi64(d0, _MM_SHUFFLE(2, 1, 0, 3)); + + b1 = _mm256_permute4x64_epi64(b1, _MM_SHUFFLE(0, 3, 2, 1)); + c1 = _mm256_permute4x64_epi64(c1, _MM_SHUFFLE(1, 0, 3, 2)); + d1 = _mm256_permute4x64_epi64(d1, _MM_SHUFFLE(2, 1, 0, 3)); + } + + static inline void DiagonalizeAVX22( + __m256i& b0, __m256i& b1, __m256i& c0, __m256i& c1, __m256i& d0, __m256i& d1) { + /* (v4, v5, v6, v7) -> (v5, v6, v7, v4) */ + __m256i tmp1 = _mm256_blend_epi32(b0, b1, 0b11001100); /* v4v7 */ + __m256i tmp2 = _mm256_blend_epi32(b0, b1, 0b00110011); /* v6v5 */ + b1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2, 3, 0, 1)); /* v7v4 */ + b0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2, 3, 0, 1)); /* v5v6 */ + + /* (v8, v9, v10, v11) -> (v10, v11, v8, v9) */ + tmp1 = c0; + c0 = c1; + c1 = tmp1; + + /* (v12, v13, v14, v15) -> (v15, v12, v13, v14) */ + tmp1 = _mm256_blend_epi32(d0, d1, 0b11001100); /* v12v15 */ + tmp2 = _mm256_blend_epi32(d0, d1, 0b00110011); /* v14v13 */ + d0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2, 3, 0, 1)); /* v15v12 */ + d1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2, 3, 0, 1)); /* v13v14 */ + } + + static inline void UndiagonalizeAVX21( + __m256i& b0, __m256i& c0, __m256i& d0, __m256i& b1, __m256i& c1, __m256i& d1) { + /* (v5, v6, v7, v4) -> (v4, v5, v6, v7) */ + b0 = _mm256_permute4x64_epi64(b0, _MM_SHUFFLE(2, 1, 0, 3)); + /* (v10, v11, v8, v9) -> (v8, v9, v10, v11) */ + c0 = _mm256_permute4x64_epi64(c0, _MM_SHUFFLE(1, 0, 3, 2)); + /* (v15, v12, v13, v14) -> (v12, v13, v14, v15) */ + d0 = _mm256_permute4x64_epi64(d0, _MM_SHUFFLE(0, 3, 2, 1)); + + b1 = _mm256_permute4x64_epi64(b1, _MM_SHUFFLE(2, 1, 0, 3)); + c1 = _mm256_permute4x64_epi64(c1, _MM_SHUFFLE(1, 0, 3, 2)); + d1 = _mm256_permute4x64_epi64(d1, _MM_SHUFFLE(0, 3, 2, 1)); + } + + static inline void UndiagonalizeAVX22( + __m256i& b0, __m256i& b1, __m256i& c0, __m256i& c1, __m256i& d0, __m256i& d1) { + /* (v5, v6, v7, v4) -> (v4, v5, v6, v7) */ + __m256i tmp1 = _mm256_blend_epi32(b0, b1, 0b11001100); /* v5v4 */ + __m256i tmp2 = _mm256_blend_epi32(b0, b1, 0b00110011); /* v7v6 */ + b0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2, 3, 0, 1)); /* v4v5 */ + b1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2, 3, 0, 1)); /* v6v7 */ + + /* (v10,v11,v8,v9) -> (v8,v9,v10,v11) */ + tmp1 = c0; + c0 = c1; + c1 = tmp1; + + /* (v15,v12,v13,v14) -> (v12,v13,v14,v15) */ + tmp1 = _mm256_blend_epi32(d0, d1, 0b00110011); /* v13v12 */ + tmp2 = _mm256_blend_epi32(d0, d1, 0b11001100); /* v15v14 */ + d0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2, 3, 0, 1)); + d1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2, 3, 0, 1)); + } +} diff --git a/library/cpp/digest/argonish/internal/blamka/blamka_sse2.h b/library/cpp/digest/argonish/internal/blamka/blamka_sse2.h index b46fc7624a..1b55651b34 100644 --- a/library/cpp/digest/argonish/internal/blamka/blamka_sse2.h +++ b/library/cpp/digest/argonish/internal/blamka/blamka_sse2.h @@ -1,95 +1,95 @@ -#pragma once - +#pragma once + #include <library/cpp/digest/argonish/internal/rotations/rotations_sse2.h> - -namespace NArgonish { - static inline void BlamkaG1SSE2( - __m128i& a0, __m128i& a1, __m128i& b0, __m128i& b1, - __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { - __m128i ml = _mm_mul_epu32(a0, b0); - ml = _mm_add_epi64(ml, ml); - a0 = _mm_add_epi64(a0, _mm_add_epi64(b0, ml)); - - ml = _mm_mul_epu32(a1, b1); - ml = _mm_add_epi64(ml, ml); - a1 = _mm_add_epi64(a1, _mm_add_epi64(b1, ml)); - - d0 = _mm_xor_si128(d0, a0); - d1 = _mm_xor_si128(d1, a1); - - d0 = Rotr32(d0); - d1 = Rotr32(d1); - - ml = _mm_mul_epu32(c0, d0); - ml = _mm_add_epi64(ml, ml); - c0 = _mm_add_epi64(c0, _mm_add_epi64(d0, ml)); - - ml = _mm_mul_epu32(c1, d1); - ml = _mm_add_epi64(ml, ml); - c1 = _mm_add_epi64(c1, _mm_add_epi64(ml, d1)); - - b0 = _mm_xor_si128(b0, c0); - b1 = _mm_xor_si128(b1, c1); - - b0 = Rotr24(b0); - b1 = Rotr24(b1); - } - - static inline void BlamkaG2SSE2( - __m128i& a0, __m128i& a1, __m128i& b0, __m128i& b1, - __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { - __m128i ml = _mm_mul_epu32(a0, b0); - ml = _mm_add_epi64(ml, ml); - a0 = _mm_add_epi64(a0, _mm_add_epi64(b0, ml)); - - ml = _mm_mul_epu32(a1, b1); - ml = _mm_add_epi64(ml, ml); - a1 = _mm_add_epi64(a1, _mm_add_epi64(b1, ml)); - - d0 = _mm_xor_si128(d0, a0); - d1 = _mm_xor_si128(d1, a1); - - d0 = Rotr16(d0); - d1 = Rotr16(d1); - - ml = _mm_mul_epu32(c0, d0); - ml = _mm_add_epi64(ml, ml); - c0 = _mm_add_epi64(c0, _mm_add_epi64(d0, ml)); - - ml = _mm_mul_epu32(c1, d1); - ml = _mm_add_epi64(ml, ml); - c1 = _mm_add_epi64(c1, _mm_add_epi64(ml, d1)); - - b0 = _mm_xor_si128(b0, c0); - b1 = _mm_xor_si128(b1, c1); - - b0 = Rotr63(b0); - b1 = Rotr63(b1); - } - - static inline void DiagonalizeSSE2( - __m128i& b0, __m128i& b1, __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { - __m128i tmp0 = d0; - __m128i tmp1 = b0; - d0 = c0; - c0 = c1; - c1 = d0; - d0 = _mm_unpackhi_epi64(d1, _mm_unpacklo_epi64(tmp0, tmp0)); - d1 = _mm_unpackhi_epi64(tmp0, _mm_unpacklo_epi64(d1, d1)); - b0 = _mm_unpackhi_epi64(b0, _mm_unpacklo_epi64(b1, b1)); - b1 = _mm_unpackhi_epi64(b1, _mm_unpacklo_epi64(tmp1, tmp1)); - } - - static inline void UndiagonalizeSSE2( - __m128i& b0, __m128i& b1, __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { - __m128i tmp0 = c0; - c0 = c1; - c1 = tmp0; - tmp0 = b0; - __m128i tmp1 = d0; - b0 = _mm_unpackhi_epi64(b1, _mm_unpacklo_epi64(b0, b0)); - b1 = _mm_unpackhi_epi64(tmp0, _mm_unpacklo_epi64(b1, b1)); - d0 = _mm_unpackhi_epi64(d0, _mm_unpacklo_epi64(d1, d1)); - d1 = _mm_unpackhi_epi64(d1, _mm_unpacklo_epi64(tmp1, tmp1)); - } -} + +namespace NArgonish { + static inline void BlamkaG1SSE2( + __m128i& a0, __m128i& a1, __m128i& b0, __m128i& b1, + __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { + __m128i ml = _mm_mul_epu32(a0, b0); + ml = _mm_add_epi64(ml, ml); + a0 = _mm_add_epi64(a0, _mm_add_epi64(b0, ml)); + + ml = _mm_mul_epu32(a1, b1); + ml = _mm_add_epi64(ml, ml); + a1 = _mm_add_epi64(a1, _mm_add_epi64(b1, ml)); + + d0 = _mm_xor_si128(d0, a0); + d1 = _mm_xor_si128(d1, a1); + + d0 = Rotr32(d0); + d1 = Rotr32(d1); + + ml = _mm_mul_epu32(c0, d0); + ml = _mm_add_epi64(ml, ml); + c0 = _mm_add_epi64(c0, _mm_add_epi64(d0, ml)); + + ml = _mm_mul_epu32(c1, d1); + ml = _mm_add_epi64(ml, ml); + c1 = _mm_add_epi64(c1, _mm_add_epi64(ml, d1)); + + b0 = _mm_xor_si128(b0, c0); + b1 = _mm_xor_si128(b1, c1); + + b0 = Rotr24(b0); + b1 = Rotr24(b1); + } + + static inline void BlamkaG2SSE2( + __m128i& a0, __m128i& a1, __m128i& b0, __m128i& b1, + __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { + __m128i ml = _mm_mul_epu32(a0, b0); + ml = _mm_add_epi64(ml, ml); + a0 = _mm_add_epi64(a0, _mm_add_epi64(b0, ml)); + + ml = _mm_mul_epu32(a1, b1); + ml = _mm_add_epi64(ml, ml); + a1 = _mm_add_epi64(a1, _mm_add_epi64(b1, ml)); + + d0 = _mm_xor_si128(d0, a0); + d1 = _mm_xor_si128(d1, a1); + + d0 = Rotr16(d0); + d1 = Rotr16(d1); + + ml = _mm_mul_epu32(c0, d0); + ml = _mm_add_epi64(ml, ml); + c0 = _mm_add_epi64(c0, _mm_add_epi64(d0, ml)); + + ml = _mm_mul_epu32(c1, d1); + ml = _mm_add_epi64(ml, ml); + c1 = _mm_add_epi64(c1, _mm_add_epi64(ml, d1)); + + b0 = _mm_xor_si128(b0, c0); + b1 = _mm_xor_si128(b1, c1); + + b0 = Rotr63(b0); + b1 = Rotr63(b1); + } + + static inline void DiagonalizeSSE2( + __m128i& b0, __m128i& b1, __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { + __m128i tmp0 = d0; + __m128i tmp1 = b0; + d0 = c0; + c0 = c1; + c1 = d0; + d0 = _mm_unpackhi_epi64(d1, _mm_unpacklo_epi64(tmp0, tmp0)); + d1 = _mm_unpackhi_epi64(tmp0, _mm_unpacklo_epi64(d1, d1)); + b0 = _mm_unpackhi_epi64(b0, _mm_unpacklo_epi64(b1, b1)); + b1 = _mm_unpackhi_epi64(b1, _mm_unpacklo_epi64(tmp1, tmp1)); + } + + static inline void UndiagonalizeSSE2( + __m128i& b0, __m128i& b1, __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { + __m128i tmp0 = c0; + c0 = c1; + c1 = tmp0; + tmp0 = b0; + __m128i tmp1 = d0; + b0 = _mm_unpackhi_epi64(b1, _mm_unpacklo_epi64(b0, b0)); + b1 = _mm_unpackhi_epi64(tmp0, _mm_unpacklo_epi64(b1, b1)); + d0 = _mm_unpackhi_epi64(d0, _mm_unpacklo_epi64(d1, d1)); + d1 = _mm_unpackhi_epi64(d1, _mm_unpacklo_epi64(tmp1, tmp1)); + } +} diff --git a/library/cpp/digest/argonish/internal/blamka/blamka_ssse3.h b/library/cpp/digest/argonish/internal/blamka/blamka_ssse3.h index a7bd0c9539..46e8500cd6 100644 --- a/library/cpp/digest/argonish/internal/blamka/blamka_ssse3.h +++ b/library/cpp/digest/argonish/internal/blamka/blamka_ssse3.h @@ -1,103 +1,103 @@ -#pragma once - +#pragma once + #include <library/cpp/digest/argonish/internal/rotations/rotations_ssse3.h> - -namespace NArgonish { - static inline void BlamkaG1SSSE3( - __m128i& a0, __m128i& a1, __m128i& b0, __m128i& b1, - __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { - __m128i ml = _mm_mul_epu32(a0, b0); - ml = _mm_add_epi64(ml, ml); - a0 = _mm_add_epi64(a0, _mm_add_epi64(b0, ml)); - - ml = _mm_mul_epu32(a1, b1); - ml = _mm_add_epi64(ml, ml); - a1 = _mm_add_epi64(a1, _mm_add_epi64(b1, ml)); - - d0 = _mm_xor_si128(d0, a0); - d1 = _mm_xor_si128(d1, a1); - - d0 = Rotr32(d0); - d1 = Rotr32(d1); - - ml = _mm_mul_epu32(c0, d0); - ml = _mm_add_epi64(ml, ml); - c0 = _mm_add_epi64(c0, _mm_add_epi64(d0, ml)); - - ml = _mm_mul_epu32(c1, d1); - ml = _mm_add_epi64(ml, ml); - c1 = _mm_add_epi64(c1, _mm_add_epi64(ml, d1)); - - b0 = _mm_xor_si128(b0, c0); - b1 = _mm_xor_si128(b1, c1); - - b0 = Rotr24(b0); - b1 = Rotr24(b1); - } - - static inline void BlamkaG2SSSE3( - __m128i& a0, __m128i& a1, __m128i& b0, __m128i& b1, - __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { - __m128i ml = _mm_mul_epu32(a0, b0); - ml = _mm_add_epi64(ml, ml); - a0 = _mm_add_epi64(a0, _mm_add_epi64(b0, ml)); - - ml = _mm_mul_epu32(a1, b1); - ml = _mm_add_epi64(ml, ml); - a1 = _mm_add_epi64(a1, _mm_add_epi64(b1, ml)); - - d0 = _mm_xor_si128(d0, a0); - d1 = _mm_xor_si128(d1, a1); - - d0 = Rotr16(d0); - d1 = Rotr16(d1); - - ml = _mm_mul_epu32(c0, d0); - ml = _mm_add_epi64(ml, ml); - c0 = _mm_add_epi64(c0, _mm_add_epi64(d0, ml)); - - ml = _mm_mul_epu32(c1, d1); - ml = _mm_add_epi64(ml, ml); - c1 = _mm_add_epi64(c1, _mm_add_epi64(ml, d1)); - - b0 = _mm_xor_si128(b0, c0); - b1 = _mm_xor_si128(b1, c1); - - b0 = Rotr63(b0); - b1 = Rotr63(b1); - } - - static inline void DiagonalizeSSSE3( - __m128i& b0, __m128i& b1, __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { - __m128i t0 = _mm_alignr_epi8(b1, b0, 8); - __m128i t1 = _mm_alignr_epi8(b0, b1, 8); - b0 = t0; - b1 = t1; - - t0 = c0; - c0 = c1; - c1 = t0; - - t0 = _mm_alignr_epi8(d1, d0, 8); - t1 = _mm_alignr_epi8(d0, d1, 8); - d0 = t1; - d1 = t0; - } - - static inline void UndiagonalizeSSSE3( - __m128i& b0, __m128i& b1, __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { - __m128i t0 = _mm_alignr_epi8(b0, b1, 8); - __m128i t1 = _mm_alignr_epi8(b1, b0, 8); - b0 = t0; - b1 = t1; - - t0 = c0; - c0 = c1; - c1 = t0; - - t0 = _mm_alignr_epi8(d0, d1, 8); - t1 = _mm_alignr_epi8(d1, d0, 8); - d0 = t1; - d1 = t0; - } -} + +namespace NArgonish { + static inline void BlamkaG1SSSE3( + __m128i& a0, __m128i& a1, __m128i& b0, __m128i& b1, + __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { + __m128i ml = _mm_mul_epu32(a0, b0); + ml = _mm_add_epi64(ml, ml); + a0 = _mm_add_epi64(a0, _mm_add_epi64(b0, ml)); + + ml = _mm_mul_epu32(a1, b1); + ml = _mm_add_epi64(ml, ml); + a1 = _mm_add_epi64(a1, _mm_add_epi64(b1, ml)); + + d0 = _mm_xor_si128(d0, a0); + d1 = _mm_xor_si128(d1, a1); + + d0 = Rotr32(d0); + d1 = Rotr32(d1); + + ml = _mm_mul_epu32(c0, d0); + ml = _mm_add_epi64(ml, ml); + c0 = _mm_add_epi64(c0, _mm_add_epi64(d0, ml)); + + ml = _mm_mul_epu32(c1, d1); + ml = _mm_add_epi64(ml, ml); + c1 = _mm_add_epi64(c1, _mm_add_epi64(ml, d1)); + + b0 = _mm_xor_si128(b0, c0); + b1 = _mm_xor_si128(b1, c1); + + b0 = Rotr24(b0); + b1 = Rotr24(b1); + } + + static inline void BlamkaG2SSSE3( + __m128i& a0, __m128i& a1, __m128i& b0, __m128i& b1, + __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { + __m128i ml = _mm_mul_epu32(a0, b0); + ml = _mm_add_epi64(ml, ml); + a0 = _mm_add_epi64(a0, _mm_add_epi64(b0, ml)); + + ml = _mm_mul_epu32(a1, b1); + ml = _mm_add_epi64(ml, ml); + a1 = _mm_add_epi64(a1, _mm_add_epi64(b1, ml)); + + d0 = _mm_xor_si128(d0, a0); + d1 = _mm_xor_si128(d1, a1); + + d0 = Rotr16(d0); + d1 = Rotr16(d1); + + ml = _mm_mul_epu32(c0, d0); + ml = _mm_add_epi64(ml, ml); + c0 = _mm_add_epi64(c0, _mm_add_epi64(d0, ml)); + + ml = _mm_mul_epu32(c1, d1); + ml = _mm_add_epi64(ml, ml); + c1 = _mm_add_epi64(c1, _mm_add_epi64(ml, d1)); + + b0 = _mm_xor_si128(b0, c0); + b1 = _mm_xor_si128(b1, c1); + + b0 = Rotr63(b0); + b1 = Rotr63(b1); + } + + static inline void DiagonalizeSSSE3( + __m128i& b0, __m128i& b1, __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { + __m128i t0 = _mm_alignr_epi8(b1, b0, 8); + __m128i t1 = _mm_alignr_epi8(b0, b1, 8); + b0 = t0; + b1 = t1; + + t0 = c0; + c0 = c1; + c1 = t0; + + t0 = _mm_alignr_epi8(d1, d0, 8); + t1 = _mm_alignr_epi8(d0, d1, 8); + d0 = t1; + d1 = t0; + } + + static inline void UndiagonalizeSSSE3( + __m128i& b0, __m128i& b1, __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { + __m128i t0 = _mm_alignr_epi8(b0, b1, 8); + __m128i t1 = _mm_alignr_epi8(b1, b0, 8); + b0 = t0; + b1 = t1; + + t0 = c0; + c0 = c1; + c1 = t0; + + t0 = _mm_alignr_epi8(d0, d1, 8); + t1 = _mm_alignr_epi8(d1, d0, 8); + d0 = t1; + d1 = t0; + } +} diff --git a/library/cpp/digest/argonish/internal/blamka/ya.make b/library/cpp/digest/argonish/internal/blamka/ya.make index 1f6d903166..0aa6806b31 100644 --- a/library/cpp/digest/argonish/internal/blamka/ya.make +++ b/library/cpp/digest/argonish/internal/blamka/ya.make @@ -1,9 +1,9 @@ -LIBRARY() - -OWNER(e-sidorov) - +LIBRARY() + +OWNER(e-sidorov) + PEERDIR( library/cpp/digest/argonish/internal/rotations ) - -END() + +END() diff --git a/library/cpp/digest/argonish/internal/proxies/avx2/proxy_avx2.cpp b/library/cpp/digest/argonish/internal/proxies/avx2/proxy_avx2.cpp index 8d320063f4..c1cf004f58 100644 --- a/library/cpp/digest/argonish/internal/proxies/avx2/proxy_avx2.cpp +++ b/library/cpp/digest/argonish/internal/proxies/avx2/proxy_avx2.cpp @@ -1,18 +1,18 @@ -// -// Created by Evgeny Sidorov on 12/04/17. -// - -#include "proxy_avx2.h" +// +// Created by Evgeny Sidorov on 12/04/17. +// + +#include "proxy_avx2.h" #include <library/cpp/digest/argonish/internal/argon2/argon2_base.h> #include <library/cpp/digest/argonish/internal/argon2/argon2_avx2.h> #include <library/cpp/digest/argonish/internal/blake2b/blake2b.h> #include <library/cpp/digest/argonish/internal/blake2b/blake2b_avx2.h> - -#define ZEROUPPER _mm256_zeroupper(); - -namespace NArgonish { - ARGON2_PROXY_CLASS_IMPL(AVX2) - BLAKE2B_PROXY_CLASS_IMPL(AVX2) -} - -#undef ZEROUPPER + +#define ZEROUPPER _mm256_zeroupper(); + +namespace NArgonish { + ARGON2_PROXY_CLASS_IMPL(AVX2) + BLAKE2B_PROXY_CLASS_IMPL(AVX2) +} + +#undef ZEROUPPER diff --git a/library/cpp/digest/argonish/internal/proxies/avx2/proxy_avx2.h b/library/cpp/digest/argonish/internal/proxies/avx2/proxy_avx2.h index fca23250a2..eec0094563 100644 --- a/library/cpp/digest/argonish/internal/proxies/avx2/proxy_avx2.h +++ b/library/cpp/digest/argonish/internal/proxies/avx2/proxy_avx2.h @@ -1,11 +1,11 @@ -#pragma once - -#include <util/generic/yexception.h> +#pragma once + +#include <util/generic/yexception.h> #include <library/cpp/digest/argonish/argon2.h> #include <library/cpp/digest/argonish/blake2b.h> #include <library/cpp/digest/argonish/internal/proxies/macro/proxy_macros.h> - -namespace NArgonish { - ARGON2_PROXY_CLASS_DECL(AVX2) - BLAKE2B_PROXY_CLASS_DECL(AVX2) -} + +namespace NArgonish { + ARGON2_PROXY_CLASS_DECL(AVX2) + BLAKE2B_PROXY_CLASS_DECL(AVX2) +} diff --git a/library/cpp/digest/argonish/internal/proxies/avx2/ya.make b/library/cpp/digest/argonish/internal/proxies/avx2/ya.make index 94ce211e06..53f814c48d 100644 --- a/library/cpp/digest/argonish/internal/proxies/avx2/ya.make +++ b/library/cpp/digest/argonish/internal/proxies/avx2/ya.make @@ -1,18 +1,18 @@ -OWNER(e-sidorov) - -LIBRARY() - -NO_UTIL() - -IF (ARCH_X86_64 OR ARCH_I386) - PEERDIR( - library/cpp/digest/argonish/internal/proxies/macro - library/cpp/digest/argonish/internal/argon2 - library/cpp/digest/argonish/internal/blake2b - ) - SRC_CPP_AVX2( - proxy_avx2.cpp - ) -ENDIF() - -END() +OWNER(e-sidorov) + +LIBRARY() + +NO_UTIL() + +IF (ARCH_X86_64 OR ARCH_I386) + PEERDIR( + library/cpp/digest/argonish/internal/proxies/macro + library/cpp/digest/argonish/internal/argon2 + library/cpp/digest/argonish/internal/blake2b + ) + SRC_CPP_AVX2( + proxy_avx2.cpp + ) +ENDIF() + +END() diff --git a/library/cpp/digest/argonish/internal/proxies/macro/proxy_macros.h b/library/cpp/digest/argonish/internal/proxies/macro/proxy_macros.h index d9bddf55bd..5ed5f53b4f 100644 --- a/library/cpp/digest/argonish/internal/proxies/macro/proxy_macros.h +++ b/library/cpp/digest/argonish/internal/proxies/macro/proxy_macros.h @@ -1,194 +1,194 @@ -#pragma once - -// -// Created by Evgeny Sidorov on 12/04/17. -// -/** - * ZEROUPPER macro is only used for AVX2 instruction set to clear up the upper half of YMM registers - * It's done to avoid performance penalty when CPU switches to non-AVX2 code (according to Agner) - * and the post at https://software.intel.com/en-us/articles/intel-avx-state-transitions-migrating-sse-code-to-avx - */ - -#define ARGON2_PROXY_CLASS_DECL(IS) \ - class TArgon2Proxy##IS final: public IArgon2Base { \ - public: \ - TArgon2Proxy##IS(EArgon2Type atype, ui32 tcost, ui32 mcost, ui32 threads, \ - const ui8* key = nullptr, ui32 keylen = 0); \ - virtual ~TArgon2Proxy##IS(); \ - \ - virtual void Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, \ - ui8* out, ui32 outlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override; \ - virtual bool Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, \ - const ui8* hash, ui32 hashlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override; \ - virtual void HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, \ - const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, \ - const ui8* aad = nullptr, ui32 aadlen = 0) const override; \ - virtual bool VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, \ - const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen, \ - const ui8* aad = nullptr, ui32 aadlen = 0) const override; \ - virtual size_t GetMemorySize() const override; \ - \ - protected: \ - THolder<IArgon2Base> argon2; \ - }; - -#define ARGON2_INSTANCE_DECL(IS_val, mcost_val, threads_val) \ - if (mcost == mcost_val && threads == threads_val) { \ - argon2 = MakeHolder<TArgon2##IS_val<mcost_val, threads_val>>(atype, tcost, key, keylen); \ - return; \ - } - -#define ARGON2_PROXY_CLASS_IMPL(IS) \ - TArgon2Proxy##IS::TArgon2Proxy##IS(EArgon2Type atype, ui32 tcost, ui32 mcost, ui32 threads, \ - const ui8* key, ui32 keylen) { \ - if ((key == nullptr && keylen > 0) || keylen > ARGON2_SECRET_MAX_LENGTH) \ - ythrow yexception() << "key is null or keylen equals 0 or key is too long"; \ - \ - ARGON2_INSTANCE_DECL(IS, 1, 1) \ +#pragma once + +// +// Created by Evgeny Sidorov on 12/04/17. +// +/** + * ZEROUPPER macro is only used for AVX2 instruction set to clear up the upper half of YMM registers + * It's done to avoid performance penalty when CPU switches to non-AVX2 code (according to Agner) + * and the post at https://software.intel.com/en-us/articles/intel-avx-state-transitions-migrating-sse-code-to-avx + */ + +#define ARGON2_PROXY_CLASS_DECL(IS) \ + class TArgon2Proxy##IS final: public IArgon2Base { \ + public: \ + TArgon2Proxy##IS(EArgon2Type atype, ui32 tcost, ui32 mcost, ui32 threads, \ + const ui8* key = nullptr, ui32 keylen = 0); \ + virtual ~TArgon2Proxy##IS(); \ + \ + virtual void Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, \ + ui8* out, ui32 outlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override; \ + virtual bool Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, \ + const ui8* hash, ui32 hashlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override; \ + virtual void HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, \ + const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, \ + const ui8* aad = nullptr, ui32 aadlen = 0) const override; \ + virtual bool VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, \ + const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen, \ + const ui8* aad = nullptr, ui32 aadlen = 0) const override; \ + virtual size_t GetMemorySize() const override; \ + \ + protected: \ + THolder<IArgon2Base> argon2; \ + }; + +#define ARGON2_INSTANCE_DECL(IS_val, mcost_val, threads_val) \ + if (mcost == mcost_val && threads == threads_val) { \ + argon2 = MakeHolder<TArgon2##IS_val<mcost_val, threads_val>>(atype, tcost, key, keylen); \ + return; \ + } + +#define ARGON2_PROXY_CLASS_IMPL(IS) \ + TArgon2Proxy##IS::TArgon2Proxy##IS(EArgon2Type atype, ui32 tcost, ui32 mcost, ui32 threads, \ + const ui8* key, ui32 keylen) { \ + if ((key == nullptr && keylen > 0) || keylen > ARGON2_SECRET_MAX_LENGTH) \ + ythrow yexception() << "key is null or keylen equals 0 or key is too long"; \ + \ + ARGON2_INSTANCE_DECL(IS, 1, 1) \ ARGON2_INSTANCE_DECL(IS, 8, 1) \ - ARGON2_INSTANCE_DECL(IS, 16, 1) \ - ARGON2_INSTANCE_DECL(IS, 32, 1) \ - ARGON2_INSTANCE_DECL(IS, 64, 1) \ + ARGON2_INSTANCE_DECL(IS, 16, 1) \ + ARGON2_INSTANCE_DECL(IS, 32, 1) \ + ARGON2_INSTANCE_DECL(IS, 64, 1) \ ARGON2_INSTANCE_DECL(IS, 128, 1) \ ARGON2_INSTANCE_DECL(IS, 256, 1) \ - ARGON2_INSTANCE_DECL(IS, 512, 1) \ - ARGON2_INSTANCE_DECL(IS, 1024, 1) \ - ARGON2_INSTANCE_DECL(IS, 2048, 1) \ - ARGON2_INSTANCE_DECL(IS, 4096, 1) \ - ARGON2_INSTANCE_DECL(IS, 8192, 1) \ - ARGON2_INSTANCE_DECL(IS, 16384, 1) \ - ARGON2_INSTANCE_DECL(IS, 32768, 1) \ - ARGON2_INSTANCE_DECL(IS, 65536, 1) \ - ARGON2_INSTANCE_DECL(IS, 131072, 1) \ - ARGON2_INSTANCE_DECL(IS, 262144, 1) \ - ARGON2_INSTANCE_DECL(IS, 524288, 1) \ - ARGON2_INSTANCE_DECL(IS, 1048576, 1) \ - ARGON2_INSTANCE_DECL(IS, 1, 2) \ - ARGON2_INSTANCE_DECL(IS, 32, 2) \ - ARGON2_INSTANCE_DECL(IS, 64, 2) \ - ARGON2_INSTANCE_DECL(IS, 512, 2) \ - ARGON2_INSTANCE_DECL(IS, 1024, 2) \ - ARGON2_INSTANCE_DECL(IS, 2048, 2) \ - ARGON2_INSTANCE_DECL(IS, 4096, 2) \ - ARGON2_INSTANCE_DECL(IS, 8192, 2) \ - ARGON2_INSTANCE_DECL(IS, 16384, 2) \ - ARGON2_INSTANCE_DECL(IS, 32768, 2) \ - ARGON2_INSTANCE_DECL(IS, 65536, 2) \ - ARGON2_INSTANCE_DECL(IS, 131072, 2) \ - ARGON2_INSTANCE_DECL(IS, 262144, 2) \ - ARGON2_INSTANCE_DECL(IS, 524288, 2) \ - ARGON2_INSTANCE_DECL(IS, 1048576, 2) \ - ARGON2_INSTANCE_DECL(IS, 1, 4) \ - ARGON2_INSTANCE_DECL(IS, 32, 4) \ - ARGON2_INSTANCE_DECL(IS, 64, 4) \ - ARGON2_INSTANCE_DECL(IS, 512, 4) \ - ARGON2_INSTANCE_DECL(IS, 1024, 4) \ - ARGON2_INSTANCE_DECL(IS, 2048, 4) \ - ARGON2_INSTANCE_DECL(IS, 4096, 4) \ - ARGON2_INSTANCE_DECL(IS, 8192, 4) \ - ARGON2_INSTANCE_DECL(IS, 16384, 4) \ - ARGON2_INSTANCE_DECL(IS, 32768, 4) \ - ARGON2_INSTANCE_DECL(IS, 65536, 4) \ - ARGON2_INSTANCE_DECL(IS, 131072, 4) \ - ARGON2_INSTANCE_DECL(IS, 262144, 4) \ - ARGON2_INSTANCE_DECL(IS, 524288, 4) \ - ARGON2_INSTANCE_DECL(IS, 1048576, 4) \ - \ - ythrow yexception() << "These parameters are not supported. Please add the corresponding ARGON2_INSTANCE_DECL macro"; \ - } \ - \ - TArgon2Proxy##IS::~TArgon2Proxy##IS() { \ - } \ - \ - void TArgon2Proxy##IS::Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, \ - ui8* out, ui32 outlen, const ui8* aad, ui32 aadlen) const { \ - if (saltlen < ARGON2_SALT_MIN_LEN) \ - ythrow yexception() << "salt is too short"; \ - if (outlen < ARGON2_MIN_OUTLEN) \ - ythrow yexception() << "output length is too short"; \ - \ - argon2->Hash(pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen); \ - ZEROUPPER \ - } \ - \ - bool TArgon2Proxy##IS::Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, \ - const ui8* hash, ui32 hashlen, const ui8* aad, ui32 aadlen) const { \ - if (saltlen < ARGON2_SALT_MIN_LEN) \ - ythrow yexception() << "salt is too short"; \ - if (hashlen < ARGON2_MIN_OUTLEN) \ - ythrow yexception() << "hash length is too short"; \ - \ - return argon2->Verify(pwd, pwdlen, salt, saltlen, hash, hashlen, aad, aadlen); \ - ZEROUPPER \ - } \ - \ - void TArgon2Proxy##IS::HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, \ - const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, \ - const ui8* aad, ui32 aadlen) const { \ - if (saltlen < ARGON2_SALT_MIN_LEN) \ - ythrow yexception() << "salt is too short"; \ - if (outlen < ARGON2_MIN_OUTLEN) \ - ythrow yexception() << "output length is too short"; \ - \ - argon2->HashWithCustomMemory(memory, mlen, pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen); \ - ZEROUPPER \ - } \ - \ - bool TArgon2Proxy##IS::VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, \ - const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen, \ - const ui8* aad, ui32 aadlen) const { \ - if (saltlen < ARGON2_SALT_MIN_LEN) \ - ythrow yexception() << "salt is too short"; \ - if (hashlen < ARGON2_MIN_OUTLEN) \ - ythrow yexception() << "hash length is too short"; \ - \ - return argon2->VerifyWithCustomMemory(memory, mlen, pwd, pwdlen, salt, saltlen, hash, hashlen, aad, aadlen); \ - ZEROUPPER \ - } \ - \ - size_t TArgon2Proxy##IS::GetMemorySize() const { \ - return argon2->GetMemorySize(); \ - } - -#define BLAKE2B_PROXY_CLASS_DECL(IS) \ - class TBlake2BProxy##IS final: public IBlake2Base { \ - public: \ - TBlake2BProxy##IS(size_t outlen, const void* key = nullptr, size_t keylen = 0); \ - virtual void Update(ui32 in) override; \ - virtual void Update(const void* pin, size_t inlen) override; \ - virtual void Final(void* out, size_t outlen) override; \ - \ - protected: \ - THolder<IBlake2Base> blake2; \ - }; - -#define BLAKE2B_PROXY_CLASS_IMPL(IS) \ - TBlake2BProxy##IS::TBlake2BProxy##IS(size_t outlen, const void* key, size_t keylen) { \ - if (!outlen || outlen > BLAKE2B_OUTBYTES) \ - ythrow yexception() << "outlen equals 0 or too long"; \ - \ - if (key == nullptr) { \ - blake2 = MakeHolder<TBlake2B<EInstructionSet::IS>>(outlen); \ - return; \ - } \ - \ - if (!key || !keylen || keylen > BLAKE2B_KEYBYTES) \ - ythrow yexception() << "key is null or too long"; \ - \ - blake2 = MakeHolder<TBlake2B<EInstructionSet::IS>>(outlen, key, keylen); \ - } \ - \ - void TBlake2BProxy##IS::Update(ui32 in) { \ - blake2->Update(in); \ - ZEROUPPER \ - } \ - \ - void TBlake2BProxy##IS::Update(const void* pin, size_t inlen) { \ - blake2->Update(pin, inlen); \ - ZEROUPPER \ - } \ - \ - void TBlake2BProxy##IS::Final(void* out, size_t outlen) { \ - blake2->Final(out, outlen); \ - ZEROUPPER \ - } + ARGON2_INSTANCE_DECL(IS, 512, 1) \ + ARGON2_INSTANCE_DECL(IS, 1024, 1) \ + ARGON2_INSTANCE_DECL(IS, 2048, 1) \ + ARGON2_INSTANCE_DECL(IS, 4096, 1) \ + ARGON2_INSTANCE_DECL(IS, 8192, 1) \ + ARGON2_INSTANCE_DECL(IS, 16384, 1) \ + ARGON2_INSTANCE_DECL(IS, 32768, 1) \ + ARGON2_INSTANCE_DECL(IS, 65536, 1) \ + ARGON2_INSTANCE_DECL(IS, 131072, 1) \ + ARGON2_INSTANCE_DECL(IS, 262144, 1) \ + ARGON2_INSTANCE_DECL(IS, 524288, 1) \ + ARGON2_INSTANCE_DECL(IS, 1048576, 1) \ + ARGON2_INSTANCE_DECL(IS, 1, 2) \ + ARGON2_INSTANCE_DECL(IS, 32, 2) \ + ARGON2_INSTANCE_DECL(IS, 64, 2) \ + ARGON2_INSTANCE_DECL(IS, 512, 2) \ + ARGON2_INSTANCE_DECL(IS, 1024, 2) \ + ARGON2_INSTANCE_DECL(IS, 2048, 2) \ + ARGON2_INSTANCE_DECL(IS, 4096, 2) \ + ARGON2_INSTANCE_DECL(IS, 8192, 2) \ + ARGON2_INSTANCE_DECL(IS, 16384, 2) \ + ARGON2_INSTANCE_DECL(IS, 32768, 2) \ + ARGON2_INSTANCE_DECL(IS, 65536, 2) \ + ARGON2_INSTANCE_DECL(IS, 131072, 2) \ + ARGON2_INSTANCE_DECL(IS, 262144, 2) \ + ARGON2_INSTANCE_DECL(IS, 524288, 2) \ + ARGON2_INSTANCE_DECL(IS, 1048576, 2) \ + ARGON2_INSTANCE_DECL(IS, 1, 4) \ + ARGON2_INSTANCE_DECL(IS, 32, 4) \ + ARGON2_INSTANCE_DECL(IS, 64, 4) \ + ARGON2_INSTANCE_DECL(IS, 512, 4) \ + ARGON2_INSTANCE_DECL(IS, 1024, 4) \ + ARGON2_INSTANCE_DECL(IS, 2048, 4) \ + ARGON2_INSTANCE_DECL(IS, 4096, 4) \ + ARGON2_INSTANCE_DECL(IS, 8192, 4) \ + ARGON2_INSTANCE_DECL(IS, 16384, 4) \ + ARGON2_INSTANCE_DECL(IS, 32768, 4) \ + ARGON2_INSTANCE_DECL(IS, 65536, 4) \ + ARGON2_INSTANCE_DECL(IS, 131072, 4) \ + ARGON2_INSTANCE_DECL(IS, 262144, 4) \ + ARGON2_INSTANCE_DECL(IS, 524288, 4) \ + ARGON2_INSTANCE_DECL(IS, 1048576, 4) \ + \ + ythrow yexception() << "These parameters are not supported. Please add the corresponding ARGON2_INSTANCE_DECL macro"; \ + } \ + \ + TArgon2Proxy##IS::~TArgon2Proxy##IS() { \ + } \ + \ + void TArgon2Proxy##IS::Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, \ + ui8* out, ui32 outlen, const ui8* aad, ui32 aadlen) const { \ + if (saltlen < ARGON2_SALT_MIN_LEN) \ + ythrow yexception() << "salt is too short"; \ + if (outlen < ARGON2_MIN_OUTLEN) \ + ythrow yexception() << "output length is too short"; \ + \ + argon2->Hash(pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen); \ + ZEROUPPER \ + } \ + \ + bool TArgon2Proxy##IS::Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, \ + const ui8* hash, ui32 hashlen, const ui8* aad, ui32 aadlen) const { \ + if (saltlen < ARGON2_SALT_MIN_LEN) \ + ythrow yexception() << "salt is too short"; \ + if (hashlen < ARGON2_MIN_OUTLEN) \ + ythrow yexception() << "hash length is too short"; \ + \ + return argon2->Verify(pwd, pwdlen, salt, saltlen, hash, hashlen, aad, aadlen); \ + ZEROUPPER \ + } \ + \ + void TArgon2Proxy##IS::HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, \ + const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, \ + const ui8* aad, ui32 aadlen) const { \ + if (saltlen < ARGON2_SALT_MIN_LEN) \ + ythrow yexception() << "salt is too short"; \ + if (outlen < ARGON2_MIN_OUTLEN) \ + ythrow yexception() << "output length is too short"; \ + \ + argon2->HashWithCustomMemory(memory, mlen, pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen); \ + ZEROUPPER \ + } \ + \ + bool TArgon2Proxy##IS::VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, \ + const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen, \ + const ui8* aad, ui32 aadlen) const { \ + if (saltlen < ARGON2_SALT_MIN_LEN) \ + ythrow yexception() << "salt is too short"; \ + if (hashlen < ARGON2_MIN_OUTLEN) \ + ythrow yexception() << "hash length is too short"; \ + \ + return argon2->VerifyWithCustomMemory(memory, mlen, pwd, pwdlen, salt, saltlen, hash, hashlen, aad, aadlen); \ + ZEROUPPER \ + } \ + \ + size_t TArgon2Proxy##IS::GetMemorySize() const { \ + return argon2->GetMemorySize(); \ + } + +#define BLAKE2B_PROXY_CLASS_DECL(IS) \ + class TBlake2BProxy##IS final: public IBlake2Base { \ + public: \ + TBlake2BProxy##IS(size_t outlen, const void* key = nullptr, size_t keylen = 0); \ + virtual void Update(ui32 in) override; \ + virtual void Update(const void* pin, size_t inlen) override; \ + virtual void Final(void* out, size_t outlen) override; \ + \ + protected: \ + THolder<IBlake2Base> blake2; \ + }; + +#define BLAKE2B_PROXY_CLASS_IMPL(IS) \ + TBlake2BProxy##IS::TBlake2BProxy##IS(size_t outlen, const void* key, size_t keylen) { \ + if (!outlen || outlen > BLAKE2B_OUTBYTES) \ + ythrow yexception() << "outlen equals 0 or too long"; \ + \ + if (key == nullptr) { \ + blake2 = MakeHolder<TBlake2B<EInstructionSet::IS>>(outlen); \ + return; \ + } \ + \ + if (!key || !keylen || keylen > BLAKE2B_KEYBYTES) \ + ythrow yexception() << "key is null or too long"; \ + \ + blake2 = MakeHolder<TBlake2B<EInstructionSet::IS>>(outlen, key, keylen); \ + } \ + \ + void TBlake2BProxy##IS::Update(ui32 in) { \ + blake2->Update(in); \ + ZEROUPPER \ + } \ + \ + void TBlake2BProxy##IS::Update(const void* pin, size_t inlen) { \ + blake2->Update(pin, inlen); \ + ZEROUPPER \ + } \ + \ + void TBlake2BProxy##IS::Final(void* out, size_t outlen) { \ + blake2->Final(out, outlen); \ + ZEROUPPER \ + } diff --git a/library/cpp/digest/argonish/internal/proxies/macro/ya.make b/library/cpp/digest/argonish/internal/proxies/macro/ya.make index b2b79b2b2a..5f639d4571 100644 --- a/library/cpp/digest/argonish/internal/proxies/macro/ya.make +++ b/library/cpp/digest/argonish/internal/proxies/macro/ya.make @@ -1,5 +1,5 @@ -LIBRARY() - -OWNER(e-sidorov) - -END() +LIBRARY() + +OWNER(e-sidorov) + +END() diff --git a/library/cpp/digest/argonish/internal/proxies/ref/proxy_ref.cpp b/library/cpp/digest/argonish/internal/proxies/ref/proxy_ref.cpp index 55832396be..0bc51866fd 100644 --- a/library/cpp/digest/argonish/internal/proxies/ref/proxy_ref.cpp +++ b/library/cpp/digest/argonish/internal/proxies/ref/proxy_ref.cpp @@ -1,20 +1,20 @@ -// -// Created by Evgeny Sidorov on 12/04/17. -// - -#include "proxy_ref.h" +// +// Created by Evgeny Sidorov on 12/04/17. +// + +#include "proxy_ref.h" #include <library/cpp/digest/argonish/internal/argon2/argon2_base.h> #include <library/cpp/digest/argonish/internal/argon2/argon2_ref.h> #include <library/cpp/digest/argonish/internal/blake2b/blake2b.h> #include <library/cpp/digest/argonish/internal/blake2b/blake2b_ref.h> - -#include <stdexcept> - -#define ZEROUPPER ; - -namespace NArgonish { - ARGON2_PROXY_CLASS_IMPL(REF) - BLAKE2B_PROXY_CLASS_IMPL(REF) -} - -#undef ZEROUPPER + +#include <stdexcept> + +#define ZEROUPPER ; + +namespace NArgonish { + ARGON2_PROXY_CLASS_IMPL(REF) + BLAKE2B_PROXY_CLASS_IMPL(REF) +} + +#undef ZEROUPPER diff --git a/library/cpp/digest/argonish/internal/proxies/ref/proxy_ref.h b/library/cpp/digest/argonish/internal/proxies/ref/proxy_ref.h index c9217a986c..821abc50cd 100644 --- a/library/cpp/digest/argonish/internal/proxies/ref/proxy_ref.h +++ b/library/cpp/digest/argonish/internal/proxies/ref/proxy_ref.h @@ -1,11 +1,11 @@ -#pragma once - -#include <util/generic/yexception.h> +#pragma once + +#include <util/generic/yexception.h> #include <library/cpp/digest/argonish/argon2.h> #include <library/cpp/digest/argonish/blake2b.h> #include <library/cpp/digest/argonish/internal/proxies/macro/proxy_macros.h> - -namespace NArgonish { - ARGON2_PROXY_CLASS_DECL(REF) - BLAKE2B_PROXY_CLASS_DECL(REF) -} + +namespace NArgonish { + ARGON2_PROXY_CLASS_DECL(REF) + BLAKE2B_PROXY_CLASS_DECL(REF) +} diff --git a/library/cpp/digest/argonish/internal/proxies/ref/ya.make b/library/cpp/digest/argonish/internal/proxies/ref/ya.make index 08ac4bb77d..7a15f44611 100644 --- a/library/cpp/digest/argonish/internal/proxies/ref/ya.make +++ b/library/cpp/digest/argonish/internal/proxies/ref/ya.make @@ -1,17 +1,17 @@ -OWNER(e-sidorov) - -LIBRARY() - -NO_UTIL() - -PEERDIR( +OWNER(e-sidorov) + +LIBRARY() + +NO_UTIL() + +PEERDIR( library/cpp/digest/argonish/internal/proxies/macro library/cpp/digest/argonish/internal/argon2 library/cpp/digest/argonish/internal/blake2b -) - -SRCS( - proxy_ref.cpp -) - -END() +) + +SRCS( + proxy_ref.cpp +) + +END() diff --git a/library/cpp/digest/argonish/internal/proxies/sse2/proxy_sse2.cpp b/library/cpp/digest/argonish/internal/proxies/sse2/proxy_sse2.cpp index d56396cee8..3e63c9ad62 100644 --- a/library/cpp/digest/argonish/internal/proxies/sse2/proxy_sse2.cpp +++ b/library/cpp/digest/argonish/internal/proxies/sse2/proxy_sse2.cpp @@ -1,18 +1,18 @@ -// -// Created by Evgeny Sidorov on 12/04/17. -// - -#include "proxy_sse2.h" +// +// Created by Evgeny Sidorov on 12/04/17. +// + +#include "proxy_sse2.h" #include <library/cpp/digest/argonish/internal/argon2/argon2_base.h> #include <library/cpp/digest/argonish/internal/argon2/argon2_sse2.h> #include <library/cpp/digest/argonish/internal/blake2b/blake2b.h> #include <library/cpp/digest/argonish/internal/blake2b/blake2b_sse2.h> - -#define ZEROUPPER ; - -namespace NArgonish { - ARGON2_PROXY_CLASS_IMPL(SSE2) - BLAKE2B_PROXY_CLASS_IMPL(SSE2) -} - -#undef ZEROUPPER + +#define ZEROUPPER ; + +namespace NArgonish { + ARGON2_PROXY_CLASS_IMPL(SSE2) + BLAKE2B_PROXY_CLASS_IMPL(SSE2) +} + +#undef ZEROUPPER diff --git a/library/cpp/digest/argonish/internal/proxies/sse2/proxy_sse2.h b/library/cpp/digest/argonish/internal/proxies/sse2/proxy_sse2.h index 553b5797a8..a2b74cd9a7 100644 --- a/library/cpp/digest/argonish/internal/proxies/sse2/proxy_sse2.h +++ b/library/cpp/digest/argonish/internal/proxies/sse2/proxy_sse2.h @@ -1,11 +1,11 @@ -#pragma once - -#include <util/generic/yexception.h> +#pragma once + +#include <util/generic/yexception.h> #include <library/cpp/digest/argonish/argon2.h> #include <library/cpp/digest/argonish/blake2b.h> #include <library/cpp/digest/argonish/internal/proxies/macro/proxy_macros.h> - -namespace NArgonish { - ARGON2_PROXY_CLASS_DECL(SSE2) - BLAKE2B_PROXY_CLASS_DECL(SSE2) -} + +namespace NArgonish { + ARGON2_PROXY_CLASS_DECL(SSE2) + BLAKE2B_PROXY_CLASS_DECL(SSE2) +} diff --git a/library/cpp/digest/argonish/internal/proxies/sse2/ya.make b/library/cpp/digest/argonish/internal/proxies/sse2/ya.make index 1529a982fa..1c752f0dd5 100644 --- a/library/cpp/digest/argonish/internal/proxies/sse2/ya.make +++ b/library/cpp/digest/argonish/internal/proxies/sse2/ya.make @@ -1,18 +1,18 @@ -OWNER(e-sidorov) - -LIBRARY() - -NO_UTIL() - -IF (ARCH_X86_64 OR ARCH_I386) - PEERDIR( - library/cpp/digest/argonish/internal/proxies/macro - library/cpp/digest/argonish/internal/argon2 - library/cpp/digest/argonish/internal/blake2b - ) - SRC_CPP_SSE2( - proxy_sse2.cpp - ) -ENDIF() - -END() +OWNER(e-sidorov) + +LIBRARY() + +NO_UTIL() + +IF (ARCH_X86_64 OR ARCH_I386) + PEERDIR( + library/cpp/digest/argonish/internal/proxies/macro + library/cpp/digest/argonish/internal/argon2 + library/cpp/digest/argonish/internal/blake2b + ) + SRC_CPP_SSE2( + proxy_sse2.cpp + ) +ENDIF() + +END() diff --git a/library/cpp/digest/argonish/internal/proxies/sse41/proxy_sse41.cpp b/library/cpp/digest/argonish/internal/proxies/sse41/proxy_sse41.cpp index fe1b28bf24..b633ad8cbf 100644 --- a/library/cpp/digest/argonish/internal/proxies/sse41/proxy_sse41.cpp +++ b/library/cpp/digest/argonish/internal/proxies/sse41/proxy_sse41.cpp @@ -1,18 +1,18 @@ -// -// Created by Evgeny Sidorov on 12/04/17. -// - -#include "proxy_sse41.h" +// +// Created by Evgeny Sidorov on 12/04/17. +// + +#include "proxy_sse41.h" #include <library/cpp/digest/argonish/internal/argon2/argon2_base.h> #include <library/cpp/digest/argonish/internal/argon2/argon2_sse41.h> #include <library/cpp/digest/argonish/internal/blake2b/blake2b.h> #include <library/cpp/digest/argonish/internal/blake2b/blake2b_sse41.h> - -#define ZEROUPPER ; - -namespace NArgonish { - ARGON2_PROXY_CLASS_IMPL(SSE41) - BLAKE2B_PROXY_CLASS_IMPL(SSE41) -} - -#undef ZEROUPPER + +#define ZEROUPPER ; + +namespace NArgonish { + ARGON2_PROXY_CLASS_IMPL(SSE41) + BLAKE2B_PROXY_CLASS_IMPL(SSE41) +} + +#undef ZEROUPPER diff --git a/library/cpp/digest/argonish/internal/proxies/sse41/proxy_sse41.h b/library/cpp/digest/argonish/internal/proxies/sse41/proxy_sse41.h index c56f41750c..2a4b6614aa 100644 --- a/library/cpp/digest/argonish/internal/proxies/sse41/proxy_sse41.h +++ b/library/cpp/digest/argonish/internal/proxies/sse41/proxy_sse41.h @@ -1,11 +1,11 @@ -#pragma once - -#include <util/generic/yexception.h> +#pragma once + +#include <util/generic/yexception.h> #include <library/cpp/digest/argonish/argon2.h> #include <library/cpp/digest/argonish/blake2b.h> #include <library/cpp/digest/argonish/internal/proxies/macro/proxy_macros.h> - -namespace NArgonish { - ARGON2_PROXY_CLASS_DECL(SSE41) - BLAKE2B_PROXY_CLASS_DECL(SSE41) -} + +namespace NArgonish { + ARGON2_PROXY_CLASS_DECL(SSE41) + BLAKE2B_PROXY_CLASS_DECL(SSE41) +} diff --git a/library/cpp/digest/argonish/internal/proxies/sse41/ya.make b/library/cpp/digest/argonish/internal/proxies/sse41/ya.make index 5da63f0bbf..16a9922016 100644 --- a/library/cpp/digest/argonish/internal/proxies/sse41/ya.make +++ b/library/cpp/digest/argonish/internal/proxies/sse41/ya.make @@ -1,18 +1,18 @@ -OWNER(e-sidorov) - -LIBRARY() - -NO_UTIL() - -IF (ARCH_X86_64 OR ARCH_I386) - PEERDIR( - library/cpp/digest/argonish/internal/proxies/macro - library/cpp/digest/argonish/internal/argon2 - library/cpp/digest/argonish/internal/blake2b - ) - SRC_CPP_SSE41( - proxy_sse41.cpp - ) -ENDIF() - -END() +OWNER(e-sidorov) + +LIBRARY() + +NO_UTIL() + +IF (ARCH_X86_64 OR ARCH_I386) + PEERDIR( + library/cpp/digest/argonish/internal/proxies/macro + library/cpp/digest/argonish/internal/argon2 + library/cpp/digest/argonish/internal/blake2b + ) + SRC_CPP_SSE41( + proxy_sse41.cpp + ) +ENDIF() + +END() diff --git a/library/cpp/digest/argonish/internal/proxies/ssse3/proxy_ssse3.cpp b/library/cpp/digest/argonish/internal/proxies/ssse3/proxy_ssse3.cpp index 24b70e22d3..d77b55737c 100644 --- a/library/cpp/digest/argonish/internal/proxies/ssse3/proxy_ssse3.cpp +++ b/library/cpp/digest/argonish/internal/proxies/ssse3/proxy_ssse3.cpp @@ -1,18 +1,18 @@ -// -// Created by Evgeny Sidorov on 12/04/17. -// - -#include "proxy_ssse3.h" +// +// Created by Evgeny Sidorov on 12/04/17. +// + +#include "proxy_ssse3.h" #include <library/cpp/digest/argonish/internal/argon2/argon2_base.h> #include <library/cpp/digest/argonish/internal/argon2/argon2_ssse3.h> #include <library/cpp/digest/argonish/internal/blake2b/blake2b.h> #include <library/cpp/digest/argonish/internal/blake2b/blake2b_ssse3.h> - -#define ZEROUPPER ; - -namespace NArgonish { - ARGON2_PROXY_CLASS_IMPL(SSSE3) - BLAKE2B_PROXY_CLASS_IMPL(SSSE3) -} - -#undef ZEROUPPER + +#define ZEROUPPER ; + +namespace NArgonish { + ARGON2_PROXY_CLASS_IMPL(SSSE3) + BLAKE2B_PROXY_CLASS_IMPL(SSSE3) +} + +#undef ZEROUPPER diff --git a/library/cpp/digest/argonish/internal/proxies/ssse3/proxy_ssse3.h b/library/cpp/digest/argonish/internal/proxies/ssse3/proxy_ssse3.h index 93be69e3c6..994133e88e 100644 --- a/library/cpp/digest/argonish/internal/proxies/ssse3/proxy_ssse3.h +++ b/library/cpp/digest/argonish/internal/proxies/ssse3/proxy_ssse3.h @@ -1,11 +1,11 @@ -#pragma once - -#include <util/generic/yexception.h> +#pragma once + +#include <util/generic/yexception.h> #include <library/cpp/digest/argonish/argon2.h> #include <library/cpp/digest/argonish/blake2b.h> #include <library/cpp/digest/argonish/internal/proxies/macro/proxy_macros.h> - -namespace NArgonish { - ARGON2_PROXY_CLASS_DECL(SSSE3) - BLAKE2B_PROXY_CLASS_DECL(SSSE3) -} + +namespace NArgonish { + ARGON2_PROXY_CLASS_DECL(SSSE3) + BLAKE2B_PROXY_CLASS_DECL(SSSE3) +} diff --git a/library/cpp/digest/argonish/internal/proxies/ssse3/ya.make b/library/cpp/digest/argonish/internal/proxies/ssse3/ya.make index e585a09fca..82d5116559 100644 --- a/library/cpp/digest/argonish/internal/proxies/ssse3/ya.make +++ b/library/cpp/digest/argonish/internal/proxies/ssse3/ya.make @@ -1,19 +1,19 @@ -LIBRARY() - -OWNER(e-sidorov) - -NO_UTIL() - -IF (ARCH_X86_64 OR ARCH_I386) - PEERDIR( - library/cpp/digest/argonish/internal/proxies/macro - library/cpp/digest/argonish/internal/argon2 - library/cpp/digest/argonish/internal/blake2b - ) - - SRC_CPP_SSSE3( - proxy_ssse3.cpp - ) -ENDIF() - -END() +LIBRARY() + +OWNER(e-sidorov) + +NO_UTIL() + +IF (ARCH_X86_64 OR ARCH_I386) + PEERDIR( + library/cpp/digest/argonish/internal/proxies/macro + library/cpp/digest/argonish/internal/argon2 + library/cpp/digest/argonish/internal/blake2b + ) + + SRC_CPP_SSSE3( + proxy_ssse3.cpp + ) +ENDIF() + +END() diff --git a/library/cpp/digest/argonish/internal/proxies/ya.make b/library/cpp/digest/argonish/internal/proxies/ya.make index f7cceda5f0..62bb1bcc50 100644 --- a/library/cpp/digest/argonish/internal/proxies/ya.make +++ b/library/cpp/digest/argonish/internal/proxies/ya.make @@ -1,8 +1,8 @@ -RECURSE( - avx2 - ref - sse2 - sse41 - ssse3 - macro -) +RECURSE( + avx2 + ref + sse2 + sse41 + ssse3 + macro +) diff --git a/library/cpp/digest/argonish/internal/rotations/rotations_avx2.h b/library/cpp/digest/argonish/internal/rotations/rotations_avx2.h index 6d1910d34c..81cd171f59 100644 --- a/library/cpp/digest/argonish/internal/rotations/rotations_avx2.h +++ b/library/cpp/digest/argonish/internal/rotations/rotations_avx2.h @@ -1,30 +1,30 @@ -#pragma once - -#include <immintrin.h> - -namespace NArgonish { - static inline void XorValues(__m256i* result, const __m256i* val1, const __m256i* val2) { - _mm256_storeu_si256(result, _mm256_xor_si256( - _mm256_loadu_si256(val1), _mm256_loadu_si256(val2))); - } - - static inline __m256i Rotr32(__m256i x) { - return _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1)); - } - - static inline __m256i Rotr24(__m256i x) { - return _mm256_shuffle_epi8(x, _mm256_setr_epi8( - 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, - 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)); - } - - static inline __m256i Rotr16(__m256i x) { - return _mm256_shuffle_epi8(x, _mm256_setr_epi8( - 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, - 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)); - } - - static inline __m256i Rotr63(__m256i x) { - return _mm256_xor_si256(_mm256_srli_epi64(x, 63), _mm256_add_epi64(x, x)); - } -} +#pragma once + +#include <immintrin.h> + +namespace NArgonish { + static inline void XorValues(__m256i* result, const __m256i* val1, const __m256i* val2) { + _mm256_storeu_si256(result, _mm256_xor_si256( + _mm256_loadu_si256(val1), _mm256_loadu_si256(val2))); + } + + static inline __m256i Rotr32(__m256i x) { + return _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1)); + } + + static inline __m256i Rotr24(__m256i x) { + return _mm256_shuffle_epi8(x, _mm256_setr_epi8( + 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, + 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)); + } + + static inline __m256i Rotr16(__m256i x) { + return _mm256_shuffle_epi8(x, _mm256_setr_epi8( + 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, + 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)); + } + + static inline __m256i Rotr63(__m256i x) { + return _mm256_xor_si256(_mm256_srli_epi64(x, 63), _mm256_add_epi64(x, x)); + } +} diff --git a/library/cpp/digest/argonish/internal/rotations/rotations_ref.h b/library/cpp/digest/argonish/internal/rotations/rotations_ref.h index 82ffcae640..6f59e233a5 100644 --- a/library/cpp/digest/argonish/internal/rotations/rotations_ref.h +++ b/library/cpp/digest/argonish/internal/rotations/rotations_ref.h @@ -1,7 +1,7 @@ -#pragma once - -namespace NArgonish { - static inline ui64 Rotr(const ui64 w, const unsigned c) { - return (w >> c) | (w << (64 - c)); - } -} +#pragma once + +namespace NArgonish { + static inline ui64 Rotr(const ui64 w, const unsigned c) { + return (w >> c) | (w << (64 - c)); + } +} diff --git a/library/cpp/digest/argonish/internal/rotations/rotations_sse2.h b/library/cpp/digest/argonish/internal/rotations/rotations_sse2.h index 9af07b67f5..55a10a31b0 100644 --- a/library/cpp/digest/argonish/internal/rotations/rotations_sse2.h +++ b/library/cpp/digest/argonish/internal/rotations/rotations_sse2.h @@ -1,27 +1,27 @@ -#pragma once - -#include <emmintrin.h> - -namespace NArgonish { - static inline void XorValues(__m128i* result, const __m128i* val1, const __m128i* val2) { - _mm_storeu_si128(result, _mm_xor_si128( - _mm_loadu_si128(val1), - _mm_loadu_si128(val2))); - } - - static inline __m128i Rotr32(__m128i x) { - return _mm_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1)); - } - - static inline __m128i Rotr24(__m128i x) { - return _mm_xor_si128(_mm_srli_epi64(x, 24), _mm_slli_epi64(x, 40)); - } - - static inline __m128i Rotr16(__m128i x) { - return _mm_xor_si128(_mm_srli_epi64(x, 16), _mm_slli_epi64(x, 48)); - } - - static inline __m128i Rotr63(__m128i x) { - return _mm_xor_si128(_mm_srli_epi64(x, 63), _mm_add_epi64(x, x)); - } -} +#pragma once + +#include <emmintrin.h> + +namespace NArgonish { + static inline void XorValues(__m128i* result, const __m128i* val1, const __m128i* val2) { + _mm_storeu_si128(result, _mm_xor_si128( + _mm_loadu_si128(val1), + _mm_loadu_si128(val2))); + } + + static inline __m128i Rotr32(__m128i x) { + return _mm_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1)); + } + + static inline __m128i Rotr24(__m128i x) { + return _mm_xor_si128(_mm_srli_epi64(x, 24), _mm_slli_epi64(x, 40)); + } + + static inline __m128i Rotr16(__m128i x) { + return _mm_xor_si128(_mm_srli_epi64(x, 16), _mm_slli_epi64(x, 48)); + } + + static inline __m128i Rotr63(__m128i x) { + return _mm_xor_si128(_mm_srli_epi64(x, 63), _mm_add_epi64(x, x)); + } +} diff --git a/library/cpp/digest/argonish/internal/rotations/rotations_ssse3.h b/library/cpp/digest/argonish/internal/rotations/rotations_ssse3.h index 88669dc76a..39c9c5491b 100644 --- a/library/cpp/digest/argonish/internal/rotations/rotations_ssse3.h +++ b/library/cpp/digest/argonish/internal/rotations/rotations_ssse3.h @@ -1,28 +1,28 @@ -#pragma once - -#include <emmintrin.h> -#include <tmmintrin.h> - -namespace NArgonish { - static inline void XorValues(__m128i* result, __m128i* val1, __m128i* val2) { - _mm_storeu_si128(result, _mm_xor_si128( - _mm_loadu_si128(val1), - _mm_loadu_si128(val2))); - } - - static inline __m128i Rotr32(__m128i x) { - return _mm_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1)); - } - - static inline __m128i Rotr24(__m128i x) { - return _mm_shuffle_epi8(x, _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)); - } - - static inline __m128i Rotr16(__m128i x) { - return _mm_shuffle_epi8(x, _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)); - } - - static inline __m128i Rotr63(__m128i x) { - return _mm_xor_si128(_mm_srli_epi64(x, 63), _mm_add_epi64(x, x)); - } -} +#pragma once + +#include <emmintrin.h> +#include <tmmintrin.h> + +namespace NArgonish { + static inline void XorValues(__m128i* result, __m128i* val1, __m128i* val2) { + _mm_storeu_si128(result, _mm_xor_si128( + _mm_loadu_si128(val1), + _mm_loadu_si128(val2))); + } + + static inline __m128i Rotr32(__m128i x) { + return _mm_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1)); + } + + static inline __m128i Rotr24(__m128i x) { + return _mm_shuffle_epi8(x, _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)); + } + + static inline __m128i Rotr16(__m128i x) { + return _mm_shuffle_epi8(x, _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)); + } + + static inline __m128i Rotr63(__m128i x) { + return _mm_xor_si128(_mm_srli_epi64(x, 63), _mm_add_epi64(x, x)); + } +} diff --git a/library/cpp/digest/argonish/internal/rotations/ya.make b/library/cpp/digest/argonish/internal/rotations/ya.make index b2b79b2b2a..5f639d4571 100644 --- a/library/cpp/digest/argonish/internal/rotations/ya.make +++ b/library/cpp/digest/argonish/internal/rotations/ya.make @@ -1,5 +1,5 @@ -LIBRARY() - -OWNER(e-sidorov) - -END() +LIBRARY() + +OWNER(e-sidorov) + +END() diff --git a/library/cpp/digest/argonish/internal/ya.make b/library/cpp/digest/argonish/internal/ya.make index 35003e964e..4a69395970 100644 --- a/library/cpp/digest/argonish/internal/ya.make +++ b/library/cpp/digest/argonish/internal/ya.make @@ -1,7 +1,7 @@ -RECURSE( - proxies - argon2 - blake2b - blamka - rotations -) +RECURSE( + proxies + argon2 + blake2b + blamka + rotations +) diff --git a/library/cpp/digest/argonish/ut/ut.cpp b/library/cpp/digest/argonish/ut/ut.cpp index 74417eec62..12ef530a18 100644 --- a/library/cpp/digest/argonish/ut/ut.cpp +++ b/library/cpp/digest/argonish/ut/ut.cpp @@ -1,9 +1,9 @@ #include <library/cpp/digest/argonish/argon2.h> #include <library/cpp/digest/argonish/blake2b.h> #include <library/cpp/testing/unittest/registar.h> - + Y_UNIT_TEST_SUITE(ArgonishTest) { - const ui8 GenKatPassword[32] = { + const ui8 GenKatPassword[32] = { 0x01, 0x01, 0x01, @@ -36,9 +36,9 @@ Y_UNIT_TEST_SUITE(ArgonishTest) { 0x01, 0x01, 0x01, - }; - - const ui8 GenKatSalt[16] = { + }; + + const ui8 GenKatSalt[16] = { 0x02, 0x02, 0x02, @@ -55,9 +55,9 @@ Y_UNIT_TEST_SUITE(ArgonishTest) { 0x02, 0x02, 0x02, - }; - - const ui8 GenKatSecret[8] = { + }; + + const ui8 GenKatSecret[8] = { 0x03, 0x03, 0x03, @@ -66,9 +66,9 @@ Y_UNIT_TEST_SUITE(ArgonishTest) { 0x03, 0x03, 0x03, - }; - - const ui8 FrPassword[13] = { + }; + + const ui8 FrPassword[13] = { 'm', 'e', 'g', @@ -82,9 +82,9 @@ Y_UNIT_TEST_SUITE(ArgonishTest) { 'o', 'r', 'd', - }; - - const ui8 FrSecret[16] = { + }; + + const ui8 FrSecret[16] = { 'm', 'e', 'g', @@ -101,9 +101,9 @@ Y_UNIT_TEST_SUITE(ArgonishTest) { 'e', 'y', '2', - }; - - const ui8 FrSalt[9] = { + }; + + const ui8 FrSalt[9] = { 'm', 'e', 'g', @@ -113,9 +113,9 @@ Y_UNIT_TEST_SUITE(ArgonishTest) { 'a', 'l', 't', - }; - - const ui8 GenKatAAD[12] = { + }; + + const ui8 GenKatAAD[12] = { 0x04, 0x04, 0x04, @@ -128,422 +128,422 @@ Y_UNIT_TEST_SUITE(ArgonishTest) { 0x04, 0x04, 0x04, - }; - + }; + Y_UNIT_TEST(Argon2_Fr_Test) { - const ui32 mcost = 16; - const ui32 tcost = 1; - TArrayHolder<ui8> memory(new ui8[mcost * 1024]); + const ui32 mcost = 16; + const ui32 tcost = 1; + TArrayHolder<ui8> memory(new ui8[mcost * 1024]); const ui8 TResult[165] = { - 0xe6, 0xff, 0x7b, 0xa1, 0xfa, 0x93, 0x0a, 0x51, - 0x24, 0xf3, 0xbc, 0xc4, 0x98, 0xe2, 0x32, 0x08, - 0x22, 0x7d, 0x4d, 0xf9, 0xe7, 0x2c, 0xd2, 0xd8, - 0x21, 0x6b, 0x3f, 0xf3, 0xfc, 0x3d, 0xa2, 0x79, - 0xb8, 0xdb, 0xfe, 0xfc, 0x18, 0xfa, 0x33, 0x08, - 0x45, 0x03, 0x5f, 0x21, 0x0d, 0xaa, 0x05, 0x5f, - 0x57, 0x6f, 0xc5, 0x4c, 0xba, 0xe7, 0xcd, 0xcf, - 0x8e, 0xc3, 0xc1, 0xab, 0x40, 0xea, 0x18, 0xca, - 0xe4, 0xa1, 0x08, 0x23, 0x54, 0x8d, 0xc0, 0x39, - 0x2c, 0xdd, 0x7e, 0x1f, 0x06, 0x6a, 0x2c, 0x25, - 0xe3, 0x7b, 0xbd, 0x45, 0xa3, 0xd8, 0xeb, 0x4c, - 0x44, 0xc3, 0x00, 0x52, 0x84, 0x35, 0x6c, 0x48, - 0xb2, 0xbc, 0x43, 0xd6, 0x58, 0xa9, 0x85, 0x78, - 0xe0, 0x2a, 0x6c, 0x0b, 0x42, 0x9a, 0x60, 0x62, - 0xc8, 0xe8, 0x62, 0x5d, 0xab, 0x3c, 0xfc, 0xba, - 0xe2, 0x84, 0x11, 0xda, 0x68, 0x79, 0x36, 0x13, - 0x84, 0x78, 0x6f, 0x47, 0x6a, 0x01, 0xd9, 0x94, - 0x8c, 0x6a, 0x81, 0xdc, 0x59, 0x82, 0xcd, 0x99, - 0x9b, 0x0a, 0x11, 0x8f, 0x1c, 0x09, 0x25, 0x2f, - 0x07, 0xf4, 0x50, 0xf9, 0x3f, 0xd7, 0xe7, 0x7b, - 0x0c, 0xc9, 0xe4, 0xc8, 0xe9}; - - try { - NArgonish::TArgon2Factory factory; - NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { - auto argon2d = factory.Create((NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, tcost, mcost, 1, - FrSecret, sizeof(FrSecret)); + 0xe6, 0xff, 0x7b, 0xa1, 0xfa, 0x93, 0x0a, 0x51, + 0x24, 0xf3, 0xbc, 0xc4, 0x98, 0xe2, 0x32, 0x08, + 0x22, 0x7d, 0x4d, 0xf9, 0xe7, 0x2c, 0xd2, 0xd8, + 0x21, 0x6b, 0x3f, 0xf3, 0xfc, 0x3d, 0xa2, 0x79, + 0xb8, 0xdb, 0xfe, 0xfc, 0x18, 0xfa, 0x33, 0x08, + 0x45, 0x03, 0x5f, 0x21, 0x0d, 0xaa, 0x05, 0x5f, + 0x57, 0x6f, 0xc5, 0x4c, 0xba, 0xe7, 0xcd, 0xcf, + 0x8e, 0xc3, 0xc1, 0xab, 0x40, 0xea, 0x18, 0xca, + 0xe4, 0xa1, 0x08, 0x23, 0x54, 0x8d, 0xc0, 0x39, + 0x2c, 0xdd, 0x7e, 0x1f, 0x06, 0x6a, 0x2c, 0x25, + 0xe3, 0x7b, 0xbd, 0x45, 0xa3, 0xd8, 0xeb, 0x4c, + 0x44, 0xc3, 0x00, 0x52, 0x84, 0x35, 0x6c, 0x48, + 0xb2, 0xbc, 0x43, 0xd6, 0x58, 0xa9, 0x85, 0x78, + 0xe0, 0x2a, 0x6c, 0x0b, 0x42, 0x9a, 0x60, 0x62, + 0xc8, 0xe8, 0x62, 0x5d, 0xab, 0x3c, 0xfc, 0xba, + 0xe2, 0x84, 0x11, 0xda, 0x68, 0x79, 0x36, 0x13, + 0x84, 0x78, 0x6f, 0x47, 0x6a, 0x01, 0xd9, 0x94, + 0x8c, 0x6a, 0x81, 0xdc, 0x59, 0x82, 0xcd, 0x99, + 0x9b, 0x0a, 0x11, 0x8f, 0x1c, 0x09, 0x25, 0x2f, + 0x07, 0xf4, 0x50, 0xf9, 0x3f, 0xd7, 0xe7, 0x7b, + 0x0c, 0xc9, 0xe4, 0xc8, 0xe9}; + + try { + NArgonish::TArgon2Factory factory; + NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { + auto argon2d = factory.Create((NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, tcost, mcost, 1, + FrSecret, sizeof(FrSecret)); ui8 hashResult[sizeof(TResult)]; - argon2d->HashWithCustomMemory(memory.Get(), mcost * 1024, FrPassword, sizeof(FrPassword), - FrSalt, sizeof(FrSalt), hashResult, sizeof(hashResult)); + argon2d->HashWithCustomMemory(memory.Get(), mcost * 1024, FrPassword, sizeof(FrPassword), + FrSalt, sizeof(FrSalt), hashResult, sizeof(hashResult)); UNIT_ASSERT(memcmp(hashResult, TResult, sizeof(hashResult)) == 0); - - UNIT_ASSERT(argon2d->VerifyWithCustomMemory(memory.Get(), mcost * 1024, FrPassword, sizeof(FrPassword), + + UNIT_ASSERT(argon2d->VerifyWithCustomMemory(memory.Get(), mcost * 1024, FrPassword, sizeof(FrPassword), FrSalt, sizeof(FrSalt), TResult, sizeof(TResult))); - } - } catch (...) { - UNIT_FAIL("Argon2 fraction len test fail"); - } - } - + } + } catch (...) { + UNIT_FAIL("Argon2 fraction len test fail"); + } + } + Y_UNIT_TEST(Argon2_Factory_SelfTest) { - try { - NArgonish::TArgon2Factory factory; - factory.GetInstructionSet(); - } catch (...) { - UNIT_FAIL("Argon2 factory self-test fail"); - } - } - + try { + NArgonish::TArgon2Factory factory; + factory.GetInstructionSet(); + } catch (...) { + UNIT_FAIL("Argon2 factory self-test fail"); + } + } + Y_UNIT_TEST(Blake2B_Factory_SelfTest) { - try { - NArgonish::TBlake2BFactory factory; - factory.GetInstructionSet(); - } catch (...) { - UNIT_FAIL("Blake2B factory self-test fail"); - } - } - + try { + NArgonish::TBlake2BFactory factory; + factory.GetInstructionSet(); + } catch (...) { + UNIT_FAIL("Blake2B factory self-test fail"); + } + } + Y_UNIT_TEST(Argon2d) { const ui8 TResult[32] = { - 0x7b, 0xa5, 0xa1, 0x7a, 0x72, 0xf7, 0xe5, 0x99, - 0x77, 0xf7, 0xf2, 0x3d, 0x10, 0xe6, 0x21, 0x89, - 0x8c, 0x63, 0xce, 0xbe, 0xed, 0xda, 0xbd, 0x15, - 0xd8, 0xc6, 0x8f, 0x53, 0xea, 0xb2, 0x1a, 0x32}; - - NArgonish::TArgon2Factory factory; - NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { - ui8 result[32]; - auto argon2d = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, - 1, 32, 1, GenKatSecret, sizeof(GenKatSecret)); - - argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x7b, 0xa5, 0xa1, 0x7a, 0x72, 0xf7, 0xe5, 0x99, + 0x77, 0xf7, 0xf2, 0x3d, 0x10, 0xe6, 0x21, 0x89, + 0x8c, 0x63, 0xce, 0xbe, 0xed, 0xda, 0xbd, 0x15, + 0xd8, 0xc6, 0x8f, 0x53, 0xea, 0xb2, 0x1a, 0x32}; + + NArgonish::TArgon2Factory factory; + NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { + ui8 result[32]; + auto argon2d = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, + 1, 32, 1, GenKatSecret, sizeof(GenKatSecret)); + + argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2i) { const ui8 TResult[32] = { - 0x87, 0x4d, 0x23, 0xfb, 0x9f, 0x55, 0xe2, 0xff, - 0x66, 0xbc, 0x19, 0x03, 0x46, 0xe7, 0x01, 0x19, - 0x7c, 0x9f, 0x25, 0xd1, 0x1d, 0xa4, 0x5a, 0xad, - 0x0d, 0x5d, 0x24, 0x19, 0x8a, 0xac, 0xd2, 0xbb}; - - NArgonish::TArgon2Factory factory; - NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { - ui8 result[32]; - auto argon2i = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2i, - 1, 32, 1, GenKatSecret, sizeof(GenKatSecret)); - - argon2i->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x87, 0x4d, 0x23, 0xfb, 0x9f, 0x55, 0xe2, 0xff, + 0x66, 0xbc, 0x19, 0x03, 0x46, 0xe7, 0x01, 0x19, + 0x7c, 0x9f, 0x25, 0xd1, 0x1d, 0xa4, 0x5a, 0xad, + 0x0d, 0x5d, 0x24, 0x19, 0x8a, 0xac, 0xd2, 0xbb}; + + NArgonish::TArgon2Factory factory; + NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { + ui8 result[32]; + auto argon2i = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2i, + 1, 32, 1, GenKatSecret, sizeof(GenKatSecret)); + + argon2i->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2i->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2i->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2id) { const ui8 TResult[32] = { - 0x99, 0xdf, 0xcf, 0xc2, 0x89, 0x76, 0x93, 0x9d, - 0xa2, 0x97, 0x09, 0x44, 0x34, 0xd8, 0x6f, 0xd0, - 0x0c, 0x94, 0x9a, 0x0f, 0x31, 0x8c, 0x22, 0xf0, - 0xcb, 0xb4, 0x69, 0xaa, 0xa8, 0x72, 0x18, 0xba}; - - NArgonish::TArgon2Factory factory; - NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { - ui8 result[32]; - auto argon2id = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2id, - 1, 32, 1, GenKatSecret, sizeof(GenKatSecret)); - - argon2id->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x99, 0xdf, 0xcf, 0xc2, 0x89, 0x76, 0x93, 0x9d, + 0xa2, 0x97, 0x09, 0x44, 0x34, 0xd8, 0x6f, 0xd0, + 0x0c, 0x94, 0x9a, 0x0f, 0x31, 0x8c, 0x22, 0xf0, + 0xcb, 0xb4, 0x69, 0xaa, 0xa8, 0x72, 0x18, 0xba}; + + NArgonish::TArgon2Factory factory; + NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { + ui8 result[32]; + auto argon2id = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2id, + 1, 32, 1, GenKatSecret, sizeof(GenKatSecret)); + + argon2id->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2id->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2id->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2d_2p) { const ui8 TResult[32] = { - 0x59, 0xb0, 0x94, 0x62, 0xcf, 0xdc, 0xd2, 0xb4, - 0x0a, 0xbd, 0x17, 0x81, 0x0a, 0x47, 0x4a, 0x8e, - 0xc1, 0xab, 0xb7, 0xc1, 0x8d, 0x07, 0x53, 0x7c, - 0xb9, 0x64, 0xa2, 0x59, 0x3f, 0xe9, 0xd9, 0xc5}; - - NArgonish::TArgon2Factory factory; - NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { - ui8 result[32]; - auto argon2d = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, - 2, 32, 1, GenKatSecret, sizeof(GenKatSecret)); - - argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x59, 0xb0, 0x94, 0x62, 0xcf, 0xdc, 0xd2, 0xb4, + 0x0a, 0xbd, 0x17, 0x81, 0x0a, 0x47, 0x4a, 0x8e, + 0xc1, 0xab, 0xb7, 0xc1, 0x8d, 0x07, 0x53, 0x7c, + 0xb9, 0x64, 0xa2, 0x59, 0x3f, 0xe9, 0xd9, 0xc5}; + + NArgonish::TArgon2Factory factory; + NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { + ui8 result[32]; + auto argon2d = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, + 2, 32, 1, GenKatSecret, sizeof(GenKatSecret)); + + argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2i_2p) { const ui8 TResult[32] = { - 0xc1, 0x0f, 0x00, 0x5e, 0xf8, 0x78, 0xc8, 0x07, - 0x0e, 0x2c, 0xc5, 0x2f, 0x57, 0x75, 0x25, 0xc9, - 0x71, 0xc7, 0x30, 0xeb, 0x00, 0x64, 0x4a, 0x4e, - 0x26, 0xd0, 0x6e, 0xad, 0x75, 0x46, 0xe0, 0x44}; - - NArgonish::TArgon2Factory factory; - NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { - ui8 result[32]; - auto argon2i = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2i, - 2, 32, 1, GenKatSecret, sizeof(GenKatSecret)); - - argon2i->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0xc1, 0x0f, 0x00, 0x5e, 0xf8, 0x78, 0xc8, 0x07, + 0x0e, 0x2c, 0xc5, 0x2f, 0x57, 0x75, 0x25, 0xc9, + 0x71, 0xc7, 0x30, 0xeb, 0x00, 0x64, 0x4a, 0x4e, + 0x26, 0xd0, 0x6e, 0xad, 0x75, 0x46, 0xe0, 0x44}; + + NArgonish::TArgon2Factory factory; + NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { + ui8 result[32]; + auto argon2i = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2i, + 2, 32, 1, GenKatSecret, sizeof(GenKatSecret)); + + argon2i->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2i->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2i->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2id_2p) { const ui8 TResult[32] = { - 0x6c, 0x00, 0xb7, 0xa9, 0x00, 0xe5, 0x00, 0x4c, - 0x24, 0x46, 0x9e, 0xc1, 0xe7, 0xc0, 0x1a, 0x99, - 0xb2, 0xb8, 0xf7, 0x73, 0x75, 0xd4, 0xec, 0xa7, - 0xd8, 0x08, 0x42, 0x11, 0xd3, 0x23, 0x6b, 0x7a}; - - NArgonish::TArgon2Factory factory; - NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { - ui8 result[32]; - auto argon2id = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2id, - 2, 32, 1, GenKatSecret, sizeof(GenKatSecret)); - - argon2id->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x6c, 0x00, 0xb7, 0xa9, 0x00, 0xe5, 0x00, 0x4c, + 0x24, 0x46, 0x9e, 0xc1, 0xe7, 0xc0, 0x1a, 0x99, + 0xb2, 0xb8, 0xf7, 0x73, 0x75, 0xd4, 0xec, 0xa7, + 0xd8, 0x08, 0x42, 0x11, 0xd3, 0x23, 0x6b, 0x7a}; + + NArgonish::TArgon2Factory factory; + NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { + ui8 result[32]; + auto argon2id = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2id, + 2, 32, 1, GenKatSecret, sizeof(GenKatSecret)); + + argon2id->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2id->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2id->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2d_2p_2th) { const ui8 TResult[32] = { - 0x2b, 0x47, 0x35, 0x39, 0x4a, 0x40, 0x3c, 0xc9, - 0x05, 0xfb, 0x51, 0x25, 0x96, 0x68, 0x64, 0x43, - 0x02, 0x16, 0x38, 0xa6, 0xc1, 0x58, 0xfc, 0x8d, - 0xbf, 0x35, 0x73, 0x9a, 0xdb, 0x31, 0x0c, 0x60}; - - NArgonish::TArgon2Factory factory; - NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { - ui8 result[32]; - auto argon2d = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, - 2, 32, 2, GenKatSecret, sizeof(GenKatSecret)); - - argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x2b, 0x47, 0x35, 0x39, 0x4a, 0x40, 0x3c, 0xc9, + 0x05, 0xfb, 0x51, 0x25, 0x96, 0x68, 0x64, 0x43, + 0x02, 0x16, 0x38, 0xa6, 0xc1, 0x58, 0xfc, 0x8d, + 0xbf, 0x35, 0x73, 0x9a, 0xdb, 0x31, 0x0c, 0x60}; + + NArgonish::TArgon2Factory factory; + NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { + ui8 result[32]; + auto argon2d = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, + 2, 32, 2, GenKatSecret, sizeof(GenKatSecret)); + + argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2id_2p_4th) { const ui8 TResult[32] = { - 0x4f, 0x93, 0xb5, 0xad, 0x78, 0xa4, 0xa9, 0x49, - 0xfb, 0xe3, 0x55, 0x96, 0xd5, 0xa0, 0xc2, 0xab, - 0x6f, 0x52, 0x2d, 0x2d, 0x29, 0xbc, 0x98, 0x49, - 0xca, 0x92, 0xaa, 0xae, 0xba, 0x05, 0x29, 0xd8}; - - NArgonish::TArgon2Factory factory; - NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { - ui8 result[32]; - auto argon2id = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2id, - 2, 64, 4, GenKatSecret, sizeof(GenKatSecret)); - - argon2id->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x4f, 0x93, 0xb5, 0xad, 0x78, 0xa4, 0xa9, 0x49, + 0xfb, 0xe3, 0x55, 0x96, 0xd5, 0xa0, 0xc2, 0xab, + 0x6f, 0x52, 0x2d, 0x2d, 0x29, 0xbc, 0x98, 0x49, + 0xca, 0x92, 0xaa, 0xae, 0xba, 0x05, 0x29, 0xd8}; + + NArgonish::TArgon2Factory factory; + NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { + ui8 result[32]; + auto argon2id = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2id, + 2, 64, 4, GenKatSecret, sizeof(GenKatSecret)); + + argon2id->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2id->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2id->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2d_2p_4th) { const ui8 TResult[32] = { - 0x8f, 0xa2, 0x7c, 0xed, 0x28, 0x38, 0x79, 0x0f, - 0xba, 0x5c, 0x11, 0x85, 0x1c, 0xdf, 0x90, 0x88, - 0xb2, 0x18, 0x44, 0xd7, 0xf0, 0x4c, 0x97, 0xb2, - 0xca, 0xaf, 0xe4, 0xdc, 0x61, 0x4c, 0xae, 0xb2}; - - NArgonish::TArgon2Factory factory; - NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { - ui8 result[32]; - auto argon2d = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, - 2, 64, 4, GenKatSecret, sizeof(GenKatSecret)); - - argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x8f, 0xa2, 0x7c, 0xed, 0x28, 0x38, 0x79, 0x0f, + 0xba, 0x5c, 0x11, 0x85, 0x1c, 0xdf, 0x90, 0x88, + 0xb2, 0x18, 0x44, 0xd7, 0xf0, 0x4c, 0x97, 0xb2, + 0xca, 0xaf, 0xe4, 0xdc, 0x61, 0x4c, 0xae, 0xb2}; + + NArgonish::TArgon2Factory factory; + NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { + ui8 result[32]; + auto argon2d = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, + 2, 64, 4, GenKatSecret, sizeof(GenKatSecret)); + + argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2i_2p_4th) { const ui8 TResult[32] = { - 0x61, 0x1c, 0x99, 0x3c, 0xb0, 0xb7, 0x23, 0x16, - 0xbd, 0xa2, 0x6c, 0x4c, 0x2f, 0xe8, 0x2d, 0x39, - 0x9c, 0x8f, 0x1c, 0xfd, 0x45, 0xd9, 0x58, 0xa9, - 0xb4, 0x9c, 0x6c, 0x64, 0xaf, 0xf0, 0x79, 0x0b}; - - NArgonish::TArgon2Factory factory; - NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { - ui8 result[32]; - auto argon2i = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2i, - 2, 64, 4, GenKatSecret, sizeof(GenKatSecret)); - - argon2i->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x61, 0x1c, 0x99, 0x3c, 0xb0, 0xb7, 0x23, 0x16, + 0xbd, 0xa2, 0x6c, 0x4c, 0x2f, 0xe8, 0x2d, 0x39, + 0x9c, 0x8f, 0x1c, 0xfd, 0x45, 0xd9, 0x58, 0xa9, + 0xb4, 0x9c, 0x6c, 0x64, 0xaf, 0xf0, 0x79, 0x0b}; + + NArgonish::TArgon2Factory factory; + NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { + ui8 result[32]; + auto argon2i = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2i, + 2, 64, 4, GenKatSecret, sizeof(GenKatSecret)); + + argon2i->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2i->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2i->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2d_128) { const ui8 TResult[128] = { - 0x4e, 0xc4, 0x6c, 0x4e, 0x8c, 0x32, 0x89, 0x65, - 0xf9, 0x82, 0x2b, 0x00, 0x95, 0x00, 0x50, 0x0a, - 0x72, 0x0d, 0xc5, 0x12, 0x8d, 0x6b, 0xbd, 0x84, - 0x7a, 0xf0, 0x78, 0x5d, 0xa6, 0x14, 0xe3, 0xf1, - 0xac, 0x07, 0x1c, 0xca, 0x12, 0x4d, 0x32, 0xa4, - 0x24, 0x08, 0x5e, 0x07, 0x7c, 0x26, 0xb9, 0x1b, - 0x5c, 0xc0, 0xff, 0xb8, 0x7a, 0x20, 0x00, 0xcb, - 0x07, 0x2b, 0xb4, 0x4d, 0x7b, 0x5b, 0x79, 0x9e, - 0xb4, 0x21, 0xcb, 0x63, 0xeb, 0x46, 0xd7, 0x79, - 0x44, 0x9c, 0x9f, 0xee, 0xa4, 0x17, 0xb5, 0x01, - 0x0f, 0x61, 0x7e, 0xd8, 0xec, 0x1b, 0xe3, 0x8b, - 0x9a, 0x74, 0x17, 0x19, 0x9d, 0x80, 0xe9, 0x20, - 0xd4, 0x84, 0xdd, 0x07, 0x40, 0xb2, 0x26, 0xdb, - 0xf7, 0xbe, 0x79, 0x7f, 0x81, 0x59, 0x86, 0xf3, - 0xe9, 0x34, 0xe4, 0x52, 0xcd, 0x33, 0xb9, 0xf8, - 0x9e, 0x62, 0x65, 0x89, 0xbb, 0xce, 0x7d, 0x65}; - - NArgonish::TArgon2Factory factory; - NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { - ui8 result[128]; - auto argon2d = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, - 1, 32, 1, GenKatSecret, sizeof(GenKatSecret)); - - argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x4e, 0xc4, 0x6c, 0x4e, 0x8c, 0x32, 0x89, 0x65, + 0xf9, 0x82, 0x2b, 0x00, 0x95, 0x00, 0x50, 0x0a, + 0x72, 0x0d, 0xc5, 0x12, 0x8d, 0x6b, 0xbd, 0x84, + 0x7a, 0xf0, 0x78, 0x5d, 0xa6, 0x14, 0xe3, 0xf1, + 0xac, 0x07, 0x1c, 0xca, 0x12, 0x4d, 0x32, 0xa4, + 0x24, 0x08, 0x5e, 0x07, 0x7c, 0x26, 0xb9, 0x1b, + 0x5c, 0xc0, 0xff, 0xb8, 0x7a, 0x20, 0x00, 0xcb, + 0x07, 0x2b, 0xb4, 0x4d, 0x7b, 0x5b, 0x79, 0x9e, + 0xb4, 0x21, 0xcb, 0x63, 0xeb, 0x46, 0xd7, 0x79, + 0x44, 0x9c, 0x9f, 0xee, 0xa4, 0x17, 0xb5, 0x01, + 0x0f, 0x61, 0x7e, 0xd8, 0xec, 0x1b, 0xe3, 0x8b, + 0x9a, 0x74, 0x17, 0x19, 0x9d, 0x80, 0xe9, 0x20, + 0xd4, 0x84, 0xdd, 0x07, 0x40, 0xb2, 0x26, 0xdb, + 0xf7, 0xbe, 0x79, 0x7f, 0x81, 0x59, 0x86, 0xf3, + 0xe9, 0x34, 0xe4, 0x52, 0xcd, 0x33, 0xb9, 0xf8, + 0x9e, 0x62, 0x65, 0x89, 0xbb, 0xce, 0x7d, 0x65}; + + NArgonish::TArgon2Factory factory; + NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { + ui8 result[128]; + auto argon2d = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, + 1, 32, 1, GenKatSecret, sizeof(GenKatSecret)); + + argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Blake2B_16_ABC) { const ui8 TResult[16] = { - 0xcf, 0x4a, 0xb7, 0x91, 0xc6, 0x2b, 0x8d, 0x2b, - 0x21, 0x09, 0xc9, 0x02, 0x75, 0x28, 0x78, 0x16}; - const ui8 data[] = {'a', 'b', 'c'}; - - NArgonish::TBlake2BFactory factory; - NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { + 0xcf, 0x4a, 0xb7, 0x91, 0xc6, 0x2b, 0x8d, 0x2b, + 0x21, 0x09, 0xc9, 0x02, 0x75, 0x28, 0x78, 0x16}; + const ui8 data[] = {'a', 'b', 'c'}; + + NArgonish::TBlake2BFactory factory; + NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { auto blake2b = factory.Create((NArgonish::EInstructionSet)i, sizeof(TResult)); - ui8 hashResult[16] = {0}; - - blake2b->Update(data, sizeof(data)); - blake2b->Final(hashResult, sizeof(hashResult)); - + ui8 hashResult[16] = {0}; + + blake2b->Update(data, sizeof(data)); + blake2b->Final(hashResult, sizeof(hashResult)); + UNIT_ASSERT(memcmp(hashResult, TResult, sizeof(TResult)) == 0); - } - } - + } + } + Y_UNIT_TEST(Blake2B_64_ABC) { const ui8 TResult[64] = { - 0xba, 0x80, 0xa5, 0x3f, 0x98, 0x1c, 0x4d, 0x0d, - 0x6a, 0x27, 0x97, 0xb6, 0x9f, 0x12, 0xf6, 0xe9, - 0x4c, 0x21, 0x2f, 0x14, 0x68, 0x5a, 0xc4, 0xb7, - 0x4b, 0x12, 0xbb, 0x6f, 0xdb, 0xff, 0xa2, 0xd1, - 0x7d, 0x87, 0xc5, 0x39, 0x2a, 0xab, 0x79, 0x2d, - 0xc2, 0x52, 0xd5, 0xde, 0x45, 0x33, 0xcc, 0x95, - 0x18, 0xd3, 0x8a, 0xa8, 0xdb, 0xf1, 0x92, 0x5a, - 0xb9, 0x23, 0x86, 0xed, 0xd4, 0x00, 0x99, 0x23}; - const ui8 data[] = {'a', 'b', 'c'}; - - NArgonish::TBlake2BFactory factory; - NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { + 0xba, 0x80, 0xa5, 0x3f, 0x98, 0x1c, 0x4d, 0x0d, + 0x6a, 0x27, 0x97, 0xb6, 0x9f, 0x12, 0xf6, 0xe9, + 0x4c, 0x21, 0x2f, 0x14, 0x68, 0x5a, 0xc4, 0xb7, + 0x4b, 0x12, 0xbb, 0x6f, 0xdb, 0xff, 0xa2, 0xd1, + 0x7d, 0x87, 0xc5, 0x39, 0x2a, 0xab, 0x79, 0x2d, + 0xc2, 0x52, 0xd5, 0xde, 0x45, 0x33, 0xcc, 0x95, + 0x18, 0xd3, 0x8a, 0xa8, 0xdb, 0xf1, 0x92, 0x5a, + 0xb9, 0x23, 0x86, 0xed, 0xd4, 0x00, 0x99, 0x23}; + const ui8 data[] = {'a', 'b', 'c'}; + + NArgonish::TBlake2BFactory factory; + NArgonish::EInstructionSet maxInstructionSet = factory.GetInstructionSet(); + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)maxInstructionSet; ++i) { auto blake2b = factory.Create((NArgonish::EInstructionSet)i, sizeof(TResult)); - ui8 hashResult[64] = {0}; - - blake2b->Update(data, sizeof(data)); - blake2b->Final(hashResult, sizeof(hashResult)); - + ui8 hashResult[64] = {0}; + + blake2b->Update(data, sizeof(data)); + blake2b->Final(hashResult, sizeof(hashResult)); + UNIT_ASSERT(memcmp(hashResult, TResult, sizeof(TResult)) == 0); - } - } -} + } + } +} diff --git a/library/cpp/digest/argonish/ut/ya.make b/library/cpp/digest/argonish/ut/ya.make index 4167ea5032..3440908799 100644 --- a/library/cpp/digest/argonish/ut/ya.make +++ b/library/cpp/digest/argonish/ut/ya.make @@ -1,13 +1,13 @@ UNITTEST_FOR(library/cpp/digest/argonish) - -OWNER(e-sidorov) - + +OWNER(e-sidorov) + PEERDIR( library/cpp/digest/argonish ) - + SRCS( ut.cpp ) - -END() + +END() diff --git a/library/cpp/digest/argonish/ut_fat/ut.cpp b/library/cpp/digest/argonish/ut_fat/ut.cpp index c69ddfe1fc..41fa001685 100644 --- a/library/cpp/digest/argonish/ut_fat/ut.cpp +++ b/library/cpp/digest/argonish/ut_fat/ut.cpp @@ -1,9 +1,9 @@ #include <library/cpp/digest/argonish/argon2.h> #include <library/cpp/digest/argonish/blake2b.h> #include <library/cpp/testing/unittest/registar.h> - + Y_UNIT_TEST_SUITE(ArgonishTest) { - const ui8 GenKatPassword[32] = { + const ui8 GenKatPassword[32] = { 0x01, 0x01, 0x01, @@ -36,9 +36,9 @@ Y_UNIT_TEST_SUITE(ArgonishTest) { 0x01, 0x01, 0x01, - }; - - const ui8 GenKatSalt[16] = { + }; + + const ui8 GenKatSalt[16] = { 0x02, 0x02, 0x02, @@ -55,9 +55,9 @@ Y_UNIT_TEST_SUITE(ArgonishTest) { 0x02, 0x02, 0x02, - }; - - const ui8 GenKatSecret[8] = { + }; + + const ui8 GenKatSecret[8] = { 0x03, 0x03, 0x03, @@ -66,9 +66,9 @@ Y_UNIT_TEST_SUITE(ArgonishTest) { 0x03, 0x03, 0x03, - }; - - const ui8 GenKatAAD[12] = { + }; + + const ui8 GenKatAAD[12] = { 0x04, 0x04, 0x04, @@ -81,361 +81,361 @@ Y_UNIT_TEST_SUITE(ArgonishTest) { 0x04, 0x04, 0x04, - }; - - constexpr NArgonish::EInstructionSet MaxArch = -#if !defined(_arm64_) - NArgonish::EInstructionSet::AVX2 -#else - NArgonish::EInstructionSet::REF -#endif - ; - + }; + + constexpr NArgonish::EInstructionSet MaxArch = +#if !defined(_arm64_) + NArgonish::EInstructionSet::AVX2 +#else + NArgonish::EInstructionSet::REF +#endif + ; + Y_UNIT_TEST(Argon2_Factory_SelfTest) { - try { - NArgonish::TArgon2Factory factory; - } catch (...) { - UNIT_FAIL("Argon2 factory self-test fail"); - } - } - + try { + NArgonish::TArgon2Factory factory; + } catch (...) { + UNIT_FAIL("Argon2 factory self-test fail"); + } + } + Y_UNIT_TEST(Argon2d) { const ui8 TResult[32] = { - 0x7b, 0xa5, 0xa1, 0x7a, 0x72, 0xf7, 0xe5, 0x99, - 0x77, 0xf7, 0xf2, 0x3d, 0x10, 0xe6, 0x21, 0x89, - 0x8c, 0x63, 0xce, 0xbe, 0xed, 0xda, 0xbd, 0x15, - 0xd8, 0xc6, 0x8f, 0x53, 0xea, 0xb2, 0x1a, 0x32}; - - NArgonish::TArgon2Factory factory; - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { - ui8 result[32]; - auto argon2d = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, - 1, 32, 1, GenKatSecret, sizeof(GenKatSecret)); - - argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x7b, 0xa5, 0xa1, 0x7a, 0x72, 0xf7, 0xe5, 0x99, + 0x77, 0xf7, 0xf2, 0x3d, 0x10, 0xe6, 0x21, 0x89, + 0x8c, 0x63, 0xce, 0xbe, 0xed, 0xda, 0xbd, 0x15, + 0xd8, 0xc6, 0x8f, 0x53, 0xea, 0xb2, 0x1a, 0x32}; + + NArgonish::TArgon2Factory factory; + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { + ui8 result[32]; + auto argon2d = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, + 1, 32, 1, GenKatSecret, sizeof(GenKatSecret)); + + argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2i) { const ui8 TResult[32] = { - 0x87, 0x4d, 0x23, 0xfb, 0x9f, 0x55, 0xe2, 0xff, - 0x66, 0xbc, 0x19, 0x03, 0x46, 0xe7, 0x01, 0x19, - 0x7c, 0x9f, 0x25, 0xd1, 0x1d, 0xa4, 0x5a, 0xad, - 0x0d, 0x5d, 0x24, 0x19, 0x8a, 0xac, 0xd2, 0xbb}; - - NArgonish::TArgon2Factory factory; - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { - ui8 result[32]; - auto argon2i = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2i, - 1, 32, 1, GenKatSecret, sizeof(GenKatSecret)); - - argon2i->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x87, 0x4d, 0x23, 0xfb, 0x9f, 0x55, 0xe2, 0xff, + 0x66, 0xbc, 0x19, 0x03, 0x46, 0xe7, 0x01, 0x19, + 0x7c, 0x9f, 0x25, 0xd1, 0x1d, 0xa4, 0x5a, 0xad, + 0x0d, 0x5d, 0x24, 0x19, 0x8a, 0xac, 0xd2, 0xbb}; + + NArgonish::TArgon2Factory factory; + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { + ui8 result[32]; + auto argon2i = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2i, + 1, 32, 1, GenKatSecret, sizeof(GenKatSecret)); + + argon2i->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2i->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2i->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2id) { const ui8 TResult[32] = { - 0x99, 0xdf, 0xcf, 0xc2, 0x89, 0x76, 0x93, 0x9d, - 0xa2, 0x97, 0x09, 0x44, 0x34, 0xd8, 0x6f, 0xd0, - 0x0c, 0x94, 0x9a, 0x0f, 0x31, 0x8c, 0x22, 0xf0, - 0xcb, 0xb4, 0x69, 0xaa, 0xa8, 0x72, 0x18, 0xba}; - - NArgonish::TArgon2Factory factory; - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { - ui8 result[32]; - auto argon2id = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2id, - 1, 32, 1, GenKatSecret, sizeof(GenKatSecret)); - - argon2id->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x99, 0xdf, 0xcf, 0xc2, 0x89, 0x76, 0x93, 0x9d, + 0xa2, 0x97, 0x09, 0x44, 0x34, 0xd8, 0x6f, 0xd0, + 0x0c, 0x94, 0x9a, 0x0f, 0x31, 0x8c, 0x22, 0xf0, + 0xcb, 0xb4, 0x69, 0xaa, 0xa8, 0x72, 0x18, 0xba}; + + NArgonish::TArgon2Factory factory; + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { + ui8 result[32]; + auto argon2id = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2id, + 1, 32, 1, GenKatSecret, sizeof(GenKatSecret)); + + argon2id->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2id->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2id->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2d_2p) { const ui8 TResult[32] = { - 0x59, 0xb0, 0x94, 0x62, 0xcf, 0xdc, 0xd2, 0xb4, - 0x0a, 0xbd, 0x17, 0x81, 0x0a, 0x47, 0x4a, 0x8e, - 0xc1, 0xab, 0xb7, 0xc1, 0x8d, 0x07, 0x53, 0x7c, - 0xb9, 0x64, 0xa2, 0x59, 0x3f, 0xe9, 0xd9, 0xc5}; - - NArgonish::TArgon2Factory factory; - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { - ui8 result[32]; - auto argon2d = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, - 2, 32, 1, GenKatSecret, sizeof(GenKatSecret)); - - argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x59, 0xb0, 0x94, 0x62, 0xcf, 0xdc, 0xd2, 0xb4, + 0x0a, 0xbd, 0x17, 0x81, 0x0a, 0x47, 0x4a, 0x8e, + 0xc1, 0xab, 0xb7, 0xc1, 0x8d, 0x07, 0x53, 0x7c, + 0xb9, 0x64, 0xa2, 0x59, 0x3f, 0xe9, 0xd9, 0xc5}; + + NArgonish::TArgon2Factory factory; + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { + ui8 result[32]; + auto argon2d = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, + 2, 32, 1, GenKatSecret, sizeof(GenKatSecret)); + + argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2i_2p) { const ui8 TResult[32] = { - 0xc1, 0x0f, 0x00, 0x5e, 0xf8, 0x78, 0xc8, 0x07, - 0x0e, 0x2c, 0xc5, 0x2f, 0x57, 0x75, 0x25, 0xc9, - 0x71, 0xc7, 0x30, 0xeb, 0x00, 0x64, 0x4a, 0x4e, - 0x26, 0xd0, 0x6e, 0xad, 0x75, 0x46, 0xe0, 0x44}; - - NArgonish::TArgon2Factory factory; - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { - ui8 result[32]; - auto argon2i = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2i, - 2, 32, 1, GenKatSecret, sizeof(GenKatSecret)); - - argon2i->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0xc1, 0x0f, 0x00, 0x5e, 0xf8, 0x78, 0xc8, 0x07, + 0x0e, 0x2c, 0xc5, 0x2f, 0x57, 0x75, 0x25, 0xc9, + 0x71, 0xc7, 0x30, 0xeb, 0x00, 0x64, 0x4a, 0x4e, + 0x26, 0xd0, 0x6e, 0xad, 0x75, 0x46, 0xe0, 0x44}; + + NArgonish::TArgon2Factory factory; + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { + ui8 result[32]; + auto argon2i = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2i, + 2, 32, 1, GenKatSecret, sizeof(GenKatSecret)); + + argon2i->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2i->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2i->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2id_2p) { const ui8 TResult[32] = { - 0x6c, 0x00, 0xb7, 0xa9, 0x00, 0xe5, 0x00, 0x4c, - 0x24, 0x46, 0x9e, 0xc1, 0xe7, 0xc0, 0x1a, 0x99, - 0xb2, 0xb8, 0xf7, 0x73, 0x75, 0xd4, 0xec, 0xa7, - 0xd8, 0x08, 0x42, 0x11, 0xd3, 0x23, 0x6b, 0x7a}; - - NArgonish::TArgon2Factory factory; - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { - ui8 result[32]; - auto argon2id = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2id, - 2, 32, 1, GenKatSecret, sizeof(GenKatSecret)); - - argon2id->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x6c, 0x00, 0xb7, 0xa9, 0x00, 0xe5, 0x00, 0x4c, + 0x24, 0x46, 0x9e, 0xc1, 0xe7, 0xc0, 0x1a, 0x99, + 0xb2, 0xb8, 0xf7, 0x73, 0x75, 0xd4, 0xec, 0xa7, + 0xd8, 0x08, 0x42, 0x11, 0xd3, 0x23, 0x6b, 0x7a}; + + NArgonish::TArgon2Factory factory; + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { + ui8 result[32]; + auto argon2id = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2id, + 2, 32, 1, GenKatSecret, sizeof(GenKatSecret)); + + argon2id->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2id->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2id->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2d_2p_2th) { const ui8 TResult[32] = { - 0x2b, 0x47, 0x35, 0x39, 0x4a, 0x40, 0x3c, 0xc9, - 0x05, 0xfb, 0x51, 0x25, 0x96, 0x68, 0x64, 0x43, - 0x02, 0x16, 0x38, 0xa6, 0xc1, 0x58, 0xfc, 0x8d, - 0xbf, 0x35, 0x73, 0x9a, 0xdb, 0x31, 0x0c, 0x60}; - - NArgonish::TArgon2Factory factory; - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { - ui8 result[32]; - auto argon2d = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, - 2, 32, 2, GenKatSecret, sizeof(GenKatSecret)); - - argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x2b, 0x47, 0x35, 0x39, 0x4a, 0x40, 0x3c, 0xc9, + 0x05, 0xfb, 0x51, 0x25, 0x96, 0x68, 0x64, 0x43, + 0x02, 0x16, 0x38, 0xa6, 0xc1, 0x58, 0xfc, 0x8d, + 0xbf, 0x35, 0x73, 0x9a, 0xdb, 0x31, 0x0c, 0x60}; + + NArgonish::TArgon2Factory factory; + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { + ui8 result[32]; + auto argon2d = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, + 2, 32, 2, GenKatSecret, sizeof(GenKatSecret)); + + argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2id_2p_4th) { const ui8 TResult[32] = { - 0x4f, 0x93, 0xb5, 0xad, 0x78, 0xa4, 0xa9, 0x49, - 0xfb, 0xe3, 0x55, 0x96, 0xd5, 0xa0, 0xc2, 0xab, - 0x6f, 0x52, 0x2d, 0x2d, 0x29, 0xbc, 0x98, 0x49, - 0xca, 0x92, 0xaa, 0xae, 0xba, 0x05, 0x29, 0xd8}; - - NArgonish::TArgon2Factory factory; - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { - ui8 result[32]; - auto argon2id = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2id, - 2, 64, 4, GenKatSecret, sizeof(GenKatSecret)); - - argon2id->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x4f, 0x93, 0xb5, 0xad, 0x78, 0xa4, 0xa9, 0x49, + 0xfb, 0xe3, 0x55, 0x96, 0xd5, 0xa0, 0xc2, 0xab, + 0x6f, 0x52, 0x2d, 0x2d, 0x29, 0xbc, 0x98, 0x49, + 0xca, 0x92, 0xaa, 0xae, 0xba, 0x05, 0x29, 0xd8}; + + NArgonish::TArgon2Factory factory; + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { + ui8 result[32]; + auto argon2id = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2id, + 2, 64, 4, GenKatSecret, sizeof(GenKatSecret)); + + argon2id->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2id->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2id->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2d_2p_4th) { const ui8 TResult[32] = { - 0x8f, 0xa2, 0x7c, 0xed, 0x28, 0x38, 0x79, 0x0f, - 0xba, 0x5c, 0x11, 0x85, 0x1c, 0xdf, 0x90, 0x88, - 0xb2, 0x18, 0x44, 0xd7, 0xf0, 0x4c, 0x97, 0xb2, - 0xca, 0xaf, 0xe4, 0xdc, 0x61, 0x4c, 0xae, 0xb2}; - - NArgonish::TArgon2Factory factory; - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { - ui8 result[32]; - auto argon2d = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, - 2, 64, 4, GenKatSecret, sizeof(GenKatSecret)); - - argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x8f, 0xa2, 0x7c, 0xed, 0x28, 0x38, 0x79, 0x0f, + 0xba, 0x5c, 0x11, 0x85, 0x1c, 0xdf, 0x90, 0x88, + 0xb2, 0x18, 0x44, 0xd7, 0xf0, 0x4c, 0x97, 0xb2, + 0xca, 0xaf, 0xe4, 0xdc, 0x61, 0x4c, 0xae, 0xb2}; + + NArgonish::TArgon2Factory factory; + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { + ui8 result[32]; + auto argon2d = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, + 2, 64, 4, GenKatSecret, sizeof(GenKatSecret)); + + argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2i_2p_4th) { const ui8 TResult[32] = { - 0x61, 0x1c, 0x99, 0x3c, 0xb0, 0xb7, 0x23, 0x16, - 0xbd, 0xa2, 0x6c, 0x4c, 0x2f, 0xe8, 0x2d, 0x39, - 0x9c, 0x8f, 0x1c, 0xfd, 0x45, 0xd9, 0x58, 0xa9, - 0xb4, 0x9c, 0x6c, 0x64, 0xaf, 0xf0, 0x79, 0x0b}; - - NArgonish::TArgon2Factory factory; - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { - ui8 result[32]; - auto argon2i = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2i, - 2, 64, 4, GenKatSecret, sizeof(GenKatSecret)); - - argon2i->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x61, 0x1c, 0x99, 0x3c, 0xb0, 0xb7, 0x23, 0x16, + 0xbd, 0xa2, 0x6c, 0x4c, 0x2f, 0xe8, 0x2d, 0x39, + 0x9c, 0x8f, 0x1c, 0xfd, 0x45, 0xd9, 0x58, 0xa9, + 0xb4, 0x9c, 0x6c, 0x64, 0xaf, 0xf0, 0x79, 0x0b}; + + NArgonish::TArgon2Factory factory; + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { + ui8 result[32]; + auto argon2i = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2i, + 2, 64, 4, GenKatSecret, sizeof(GenKatSecret)); + + argon2i->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2i->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2i->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Argon2d_128) { const ui8 TResult[128] = { - 0x4e, 0xc4, 0x6c, 0x4e, 0x8c, 0x32, 0x89, 0x65, - 0xf9, 0x82, 0x2b, 0x00, 0x95, 0x00, 0x50, 0x0a, - 0x72, 0x0d, 0xc5, 0x12, 0x8d, 0x6b, 0xbd, 0x84, - 0x7a, 0xf0, 0x78, 0x5d, 0xa6, 0x14, 0xe3, 0xf1, - 0xac, 0x07, 0x1c, 0xca, 0x12, 0x4d, 0x32, 0xa4, - 0x24, 0x08, 0x5e, 0x07, 0x7c, 0x26, 0xb9, 0x1b, - 0x5c, 0xc0, 0xff, 0xb8, 0x7a, 0x20, 0x00, 0xcb, - 0x07, 0x2b, 0xb4, 0x4d, 0x7b, 0x5b, 0x79, 0x9e, - 0xb4, 0x21, 0xcb, 0x63, 0xeb, 0x46, 0xd7, 0x79, - 0x44, 0x9c, 0x9f, 0xee, 0xa4, 0x17, 0xb5, 0x01, - 0x0f, 0x61, 0x7e, 0xd8, 0xec, 0x1b, 0xe3, 0x8b, - 0x9a, 0x74, 0x17, 0x19, 0x9d, 0x80, 0xe9, 0x20, - 0xd4, 0x84, 0xdd, 0x07, 0x40, 0xb2, 0x26, 0xdb, - 0xf7, 0xbe, 0x79, 0x7f, 0x81, 0x59, 0x86, 0xf3, - 0xe9, 0x34, 0xe4, 0x52, 0xcd, 0x33, 0xb9, 0xf8, - 0x9e, 0x62, 0x65, 0x89, 0xbb, 0xce, 0x7d, 0x65}; - - NArgonish::TArgon2Factory factory; - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { - ui8 result[128]; - auto argon2d = factory.Create( - (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, - 1, 32, 1, GenKatSecret, sizeof(GenKatSecret)); - - argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), - result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); - + 0x4e, 0xc4, 0x6c, 0x4e, 0x8c, 0x32, 0x89, 0x65, + 0xf9, 0x82, 0x2b, 0x00, 0x95, 0x00, 0x50, 0x0a, + 0x72, 0x0d, 0xc5, 0x12, 0x8d, 0x6b, 0xbd, 0x84, + 0x7a, 0xf0, 0x78, 0x5d, 0xa6, 0x14, 0xe3, 0xf1, + 0xac, 0x07, 0x1c, 0xca, 0x12, 0x4d, 0x32, 0xa4, + 0x24, 0x08, 0x5e, 0x07, 0x7c, 0x26, 0xb9, 0x1b, + 0x5c, 0xc0, 0xff, 0xb8, 0x7a, 0x20, 0x00, 0xcb, + 0x07, 0x2b, 0xb4, 0x4d, 0x7b, 0x5b, 0x79, 0x9e, + 0xb4, 0x21, 0xcb, 0x63, 0xeb, 0x46, 0xd7, 0x79, + 0x44, 0x9c, 0x9f, 0xee, 0xa4, 0x17, 0xb5, 0x01, + 0x0f, 0x61, 0x7e, 0xd8, 0xec, 0x1b, 0xe3, 0x8b, + 0x9a, 0x74, 0x17, 0x19, 0x9d, 0x80, 0xe9, 0x20, + 0xd4, 0x84, 0xdd, 0x07, 0x40, 0xb2, 0x26, 0xdb, + 0xf7, 0xbe, 0x79, 0x7f, 0x81, 0x59, 0x86, 0xf3, + 0xe9, 0x34, 0xe4, 0x52, 0xcd, 0x33, 0xb9, 0xf8, + 0x9e, 0x62, 0x65, 0x89, 0xbb, 0xce, 0x7d, 0x65}; + + NArgonish::TArgon2Factory factory; + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { + ui8 result[128]; + auto argon2d = factory.Create( + (NArgonish::EInstructionSet)i, NArgonish::EArgon2Type::Argon2d, + 1, 32, 1, GenKatSecret, sizeof(GenKatSecret)); + + argon2d->Hash(GenKatPassword, sizeof(GenKatPassword), GenKatSalt, sizeof(GenKatSalt), + result, sizeof(result), GenKatAAD, sizeof(GenKatAAD)); + UNIT_ASSERT(memcmp(result, TResult, sizeof(result)) == 0); - - UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), - GenKatSalt, sizeof(GenKatSalt), + + UNIT_ASSERT(argon2d->Verify(GenKatPassword, sizeof(GenKatPassword), + GenKatSalt, sizeof(GenKatSalt), TResult, sizeof(TResult), - GenKatAAD, sizeof(GenKatAAD))); - } - } - + GenKatAAD, sizeof(GenKatAAD))); + } + } + Y_UNIT_TEST(Blake2B_16_ABC) { const ui8 TResult[16] = { - 0xcf, 0x4a, 0xb7, 0x91, 0xc6, 0x2b, 0x8d, 0x2b, - 0x21, 0x09, 0xc9, 0x02, 0x75, 0x28, 0x78, 0x16}; - const ui8 data[] = {'a', 'b', 'c'}; - - NArgonish::TBlake2BFactory factory; - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { + 0xcf, 0x4a, 0xb7, 0x91, 0xc6, 0x2b, 0x8d, 0x2b, + 0x21, 0x09, 0xc9, 0x02, 0x75, 0x28, 0x78, 0x16}; + const ui8 data[] = {'a', 'b', 'c'}; + + NArgonish::TBlake2BFactory factory; + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)MaxArch; ++i) { auto blake2b = factory.Create((NArgonish::EInstructionSet)i, sizeof(TResult)); - ui8 hashResult[16] = {0}; - - blake2b->Update(data, sizeof(data)); - blake2b->Final(hashResult, sizeof(hashResult)); - + ui8 hashResult[16] = {0}; + + blake2b->Update(data, sizeof(data)); + blake2b->Final(hashResult, sizeof(hashResult)); + UNIT_ASSERT(memcmp(hashResult, TResult, sizeof(TResult)) == 0); - } - } - + } + } + Y_UNIT_TEST(Blake2B_64_ABC) { const ui8 TResult[64] = { - 0xba, 0x80, 0xa5, 0x3f, 0x98, 0x1c, 0x4d, 0x0d, - 0x6a, 0x27, 0x97, 0xb6, 0x9f, 0x12, 0xf6, 0xe9, - 0x4c, 0x21, 0x2f, 0x14, 0x68, 0x5a, 0xc4, 0xb7, - 0x4b, 0x12, 0xbb, 0x6f, 0xdb, 0xff, 0xa2, 0xd1, - 0x7d, 0x87, 0xc5, 0x39, 0x2a, 0xab, 0x79, 0x2d, - 0xc2, 0x52, 0xd5, 0xde, 0x45, 0x33, 0xcc, 0x95, - 0x18, 0xd3, 0x8a, 0xa8, 0xdb, 0xf1, 0x92, 0x5a, - 0xb9, 0x23, 0x86, 0xed, 0xd4, 0x00, 0x99, 0x23}; - const ui8 data[] = {'a', 'b', 'c'}; - - NArgonish::TBlake2BFactory factory; - for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)(int)MaxArch; ++i) { + 0xba, 0x80, 0xa5, 0x3f, 0x98, 0x1c, 0x4d, 0x0d, + 0x6a, 0x27, 0x97, 0xb6, 0x9f, 0x12, 0xf6, 0xe9, + 0x4c, 0x21, 0x2f, 0x14, 0x68, 0x5a, 0xc4, 0xb7, + 0x4b, 0x12, 0xbb, 0x6f, 0xdb, 0xff, 0xa2, 0xd1, + 0x7d, 0x87, 0xc5, 0x39, 0x2a, 0xab, 0x79, 0x2d, + 0xc2, 0x52, 0xd5, 0xde, 0x45, 0x33, 0xcc, 0x95, + 0x18, 0xd3, 0x8a, 0xa8, 0xdb, 0xf1, 0x92, 0x5a, + 0xb9, 0x23, 0x86, 0xed, 0xd4, 0x00, 0x99, 0x23}; + const ui8 data[] = {'a', 'b', 'c'}; + + NArgonish::TBlake2BFactory factory; + for (int i = (int)NArgonish::EInstructionSet::REF; i <= (int)(int)MaxArch; ++i) { auto blake2b = factory.Create((NArgonish::EInstructionSet)i, sizeof(TResult)); - ui8 hashResult[64] = {0}; - - blake2b->Update(data, sizeof(data)); - blake2b->Final(hashResult, sizeof(hashResult)); - + ui8 hashResult[64] = {0}; + + blake2b->Update(data, sizeof(data)); + blake2b->Final(hashResult, sizeof(hashResult)); + UNIT_ASSERT(memcmp(hashResult, TResult, sizeof(TResult)) == 0); - } - } -} + } + } +} diff --git a/library/cpp/digest/argonish/ut_fat/ya.make b/library/cpp/digest/argonish/ut_fat/ya.make index f56175f4d0..94ebda9225 100644 --- a/library/cpp/digest/argonish/ut_fat/ya.make +++ b/library/cpp/digest/argonish/ut_fat/ya.make @@ -1,15 +1,15 @@ UNITTEST_FOR(library/cpp/digest/argonish) - -OWNER(e-sidorov) - + +OWNER(e-sidorov) + PEERDIR( library/cpp/digest/argonish ) - -SRCS( - ut.cpp -) - + +SRCS( + ut.cpp +) + TAG( sb:intel_e5_2660v4 ya:fat @@ -17,5 +17,5 @@ TAG( ) SIZE(LARGE) - -END() + +END() diff --git a/library/cpp/digest/argonish/ya.make b/library/cpp/digest/argonish/ya.make index e8515b3464..4a0e937279 100644 --- a/library/cpp/digest/argonish/ya.make +++ b/library/cpp/digest/argonish/ya.make @@ -1,25 +1,25 @@ -LIBRARY() - -OWNER(e-sidorov) - -IF (ARCH_X86_64 OR ARCH_I386) - PEERDIR( - library/cpp/threading/poor_man_openmp - library/cpp/digest/argonish/internal/proxies/avx2 - library/cpp/digest/argonish/internal/proxies/ref - library/cpp/digest/argonish/internal/proxies/sse2 - library/cpp/digest/argonish/internal/proxies/sse41 - library/cpp/digest/argonish/internal/proxies/ssse3 - ) -ELSE() - PEERDIR( - library/cpp/threading/poor_man_openmp - library/cpp/digest/argonish/internal/proxies/ref - ) -ENDIF() - -SRCS( - factory/factory.cpp -) - -END() +LIBRARY() + +OWNER(e-sidorov) + +IF (ARCH_X86_64 OR ARCH_I386) + PEERDIR( + library/cpp/threading/poor_man_openmp + library/cpp/digest/argonish/internal/proxies/avx2 + library/cpp/digest/argonish/internal/proxies/ref + library/cpp/digest/argonish/internal/proxies/sse2 + library/cpp/digest/argonish/internal/proxies/sse41 + library/cpp/digest/argonish/internal/proxies/ssse3 + ) +ELSE() + PEERDIR( + library/cpp/threading/poor_man_openmp + library/cpp/digest/argonish/internal/proxies/ref + ) +ENDIF() + +SRCS( + factory/factory.cpp +) + +END() |