Restoring authorship annotation for <e-sidorov@yandex-team.ru>. Commit 1 of 2.

author: e-sidorov <e-sidorov@yandex-team.ru> 2022-02-10 16:46:05 +0300
committer: Daniil Cherednik <dcherednik@yandex-team.ru> 2022-02-10 16:46:05 +0300
commit: 1ec091f8998d76a211c6015ba6865a73b29d676a (patch)
tree: 6c72f0309888be2dd18d007d19c490ed87740d66 /library/cpp/digest/argonish/internal
parent: 3b241dd57cf58f20bbbd63fa6a0a758dbec09b68 (diff)
download: ydb-1ec091f8998d76a211c6015ba6865a73b29d676a.tar.gz
43 files changed, 2909 insertions, 2909 deletions
diff --git a/library/cpp/digest/argonish/internal/argon2/argon2_avx2.h b/library/cpp/digest/argonish/internal/argon2/argon2_avx2.h
index 8bf5367817..4ce2712e85 100644
--- a/library/cpp/digest/argonish/internal/argon2/argon2_avx2.h
+++ b/library/cpp/digest/argonish/internal/argon2/argon2_avx2.h
@@ -1,117 +1,117 @@
-#pragma once
-
-#include <immintrin.h>
-#include "argon2_base.h"
+#pragma once 
+ 
+#include <immintrin.h> 
+#include "argon2_base.h" 
 #include <library/cpp/digest/argonish/internal/blamka/blamka_avx2.h>
-
-namespace NArgonish {
-    template <ui32 mcost, ui32 threads>
-    class TArgon2AVX2 final: public TArgon2<EInstructionSet::AVX2, mcost, threads> {
-    public:
-        TArgon2AVX2(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen)
-            : TArgon2<EInstructionSet::AVX2, mcost, threads>(atype, tcost, key, keylen)
-        {
-        }
-
-    protected:
-        virtual void XorBlock_(TBlock* dst, const TBlock* src) const override {
-            __m256i* mdst = (__m256i*)dst;
-            __m256i* msrc = (__m256i*)src;
-
-            for (ui32 i = 0; i < ARGON2_HWORDS_IN_BLOCK; ++i)
-                XorValues(mdst + i, mdst + i, msrc + i);
-        }
-
-        virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override {
-            memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK);
-        }
-
-        virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool with_xor) const override {
-            __m256i blockxy[ARGON2_HWORDS_IN_BLOCK];
-            __m256i state[ARGON2_HWORDS_IN_BLOCK];
-
-            memcpy(state, prevBlock, ARGON2_BLOCK_SIZE);
-
-            if (with_xor) {
-                for (ui32 i = 0; i < ARGON2_HWORDS_IN_BLOCK; ++i) {
-                    state[i] = _mm256_xor_si256(state[i], _mm256_loadu_si256((const __m256i*)refBlock->V + i));
-                    blockxy[i] = _mm256_xor_si256(state[i], _mm256_loadu_si256((const __m256i*)nextBlock->V + i));
-                }
-            } else {
-                for (ui32 i = 0; i < ARGON2_HWORDS_IN_BLOCK; ++i) {
-                    blockxy[i] = state[i] = _mm256_xor_si256(
-                        state[i], _mm256_loadu_si256((const __m256i*)refBlock->V + i));
-                }
-            }
-
-            /**
-             * state[ 8*i + 0 ] = ( v0_0,  v1_0,  v2_0,  v3_0)
-             * state[ 8*i + 1 ] = ( v4_0,  v5_0,  v6_0,  v7_0)
-             * state[ 8*i + 2 ] = ( v8_0,  v9_0, v10_0, v11_0)
-             * state[ 8*i + 3 ] = (v12_0, v13_0, v14_0, v15_0)
-             * state[ 8*i + 4 ] = ( v0_1,  v1_1,  v2_1,  v3_1)
-             * state[ 8*i + 5 ] = ( v4_1,  v5_1,  v6_1,  v7_1)
-             * state[ 8*i + 6 ] = ( v8_1,  v9_1, v10_1, v11_1)
-             * state[ 8*i + 7 ] = (v12_1, v13_1, v14_1, v15_1)
-             */
-            for (ui32 i = 0; i < 4; ++i) {
-                BlamkaG1AVX2(
-                    state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
-                    state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
-                BlamkaG2AVX2(
-                    state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
-                    state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
-                DiagonalizeAVX21(
-                    state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
-                BlamkaG1AVX2(
-                    state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
-                    state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
-                BlamkaG2AVX2(
-                    state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5],
-                    state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]);
-                UndiagonalizeAVX21(
-                    state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
-            }
-
-            /**
-             * state[ 0 + i] = ( v0_0,  v1_0,  v0_1,  v1_1)
-             * state[ 4 + i] = ( v2_0,  v3_0,  v2_1,  v3_1)
-             * state[ 8 + i] = ( v4_0,  v5_0,  v4_1,  v5_1)
-             * state[12 + i] = ( v6_0,  v7_0,  v6_1,  v7_1)
-             * state[16 + i] = ( v8_0,  v9_0,  v8_1,  v9_1)
-             * state[20 + i] = (v10_0, v11_0, v10_1, v11_1)
-             * state[24 + i] = (v12_0, v13_0, v12_1, v13_1)
-             * state[28 + i] = (v14_0, v15_0, v14_1, v15_1)
-             */
-            for (ui32 i = 0; i < 4; ++i) {
-                BlamkaG1AVX2(
-                    state[0 + i], state[4 + i], state[8 + i], state[12 + i],
-                    state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
-                BlamkaG2AVX2(
-                    state[0 + i], state[4 + i], state[8 + i], state[12 + i],
-                    state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
-                DiagonalizeAVX22(
-                    state[8 + i], state[12 + i],
-                    state[16 + i], state[20 + i],
-                    state[24 + i], state[28 + i]);
-                BlamkaG1AVX2(
-                    state[0 + i], state[4 + i], state[8 + i], state[12 + i],
-                    state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
-                BlamkaG2AVX2(
-                    state[0 + i], state[4 + i], state[8 + i], state[12 + i],
-                    state[16 + i], state[20 + i], state[24 + i], state[28 + i]);
-                UndiagonalizeAVX22(
-                    state[8 + i], state[12 + i],
-                    state[16 + i], state[20 + i],
-                    state[24 + i], state[28 + i]);
-            }
-
-            for (ui32 i = 0; i < ARGON2_HWORDS_IN_BLOCK; ++i) {
-                state[i] = _mm256_xor_si256(state[i], blockxy[i]);
-                _mm256_storeu_si256((__m256i*)nextBlock->V + i, state[i]);
-            }
-        }
-    };
-}
+ 
+namespace NArgonish { 
+    template <ui32 mcost, ui32 threads> 
+    class TArgon2AVX2 final: public TArgon2<EInstructionSet::AVX2, mcost, threads> { 
+    public: 
+        TArgon2AVX2(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) 
+            : TArgon2<EInstructionSet::AVX2, mcost, threads>(atype, tcost, key, keylen) 
+        { 
+        } 
+ 
+    protected: 
+        virtual void XorBlock_(TBlock* dst, const TBlock* src) const override { 
+            __m256i* mdst = (__m256i*)dst; 
+            __m256i* msrc = (__m256i*)src; 
+ 
+            for (ui32 i = 0; i < ARGON2_HWORDS_IN_BLOCK; ++i) 
+                XorValues(mdst + i, mdst + i, msrc + i); 
+        } 
+ 
+        virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override { 
+            memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK); 
+        } 
+ 
+        virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool with_xor) const override { 
+            __m256i blockxy[ARGON2_HWORDS_IN_BLOCK]; 
+            __m256i state[ARGON2_HWORDS_IN_BLOCK]; 
+ 
+            memcpy(state, prevBlock, ARGON2_BLOCK_SIZE); 
+ 
+            if (with_xor) { 
+                for (ui32 i = 0; i < ARGON2_HWORDS_IN_BLOCK; ++i) { 
+                    state[i] = _mm256_xor_si256(state[i], _mm256_loadu_si256((const __m256i*)refBlock->V + i)); 
+                    blockxy[i] = _mm256_xor_si256(state[i], _mm256_loadu_si256((const __m256i*)nextBlock->V + i)); 
+                } 
+            } else { 
+                for (ui32 i = 0; i < ARGON2_HWORDS_IN_BLOCK; ++i) { 
+                    blockxy[i] = state[i] = _mm256_xor_si256( 
+                        state[i], _mm256_loadu_si256((const __m256i*)refBlock->V + i)); 
+                } 
+            } 
+ 
+            /** 
+             * state[ 8*i + 0 ] = ( v0_0,  v1_0,  v2_0,  v3_0) 
+             * state[ 8*i + 1 ] = ( v4_0,  v5_0,  v6_0,  v7_0) 
+             * state[ 8*i + 2 ] = ( v8_0,  v9_0, v10_0, v11_0) 
+             * state[ 8*i + 3 ] = (v12_0, v13_0, v14_0, v15_0) 
+             * state[ 8*i + 4 ] = ( v0_1,  v1_1,  v2_1,  v3_1) 
+             * state[ 8*i + 5 ] = ( v4_1,  v5_1,  v6_1,  v7_1) 
+             * state[ 8*i + 6 ] = ( v8_1,  v9_1, v10_1, v11_1) 
+             * state[ 8*i + 7 ] = (v12_1, v13_1, v14_1, v15_1) 
+             */ 
+            for (ui32 i = 0; i < 4; ++i) { 
+                BlamkaG1AVX2( 
+                    state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], 
+                    state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); 
+                BlamkaG2AVX2( 
+                    state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], 
+                    state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); 
+                DiagonalizeAVX21( 
+                    state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); 
+                BlamkaG1AVX2( 
+                    state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], 
+                    state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); 
+                BlamkaG2AVX2( 
+                    state[8 * i + 0], state[8 * i + 4], state[8 * i + 1], state[8 * i + 5], 
+                    state[8 * i + 2], state[8 * i + 6], state[8 * i + 3], state[8 * i + 7]); 
+                UndiagonalizeAVX21( 
+                    state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); 
+            } 
+ 
+            /** 
+             * state[ 0 + i] = ( v0_0,  v1_0,  v0_1,  v1_1) 
+             * state[ 4 + i] = ( v2_0,  v3_0,  v2_1,  v3_1) 
+             * state[ 8 + i] = ( v4_0,  v5_0,  v4_1,  v5_1) 
+             * state[12 + i] = ( v6_0,  v7_0,  v6_1,  v7_1) 
+             * state[16 + i] = ( v8_0,  v9_0,  v8_1,  v9_1) 
+             * state[20 + i] = (v10_0, v11_0, v10_1, v11_1) 
+             * state[24 + i] = (v12_0, v13_0, v12_1, v13_1) 
+             * state[28 + i] = (v14_0, v15_0, v14_1, v15_1) 
+             */ 
+            for (ui32 i = 0; i < 4; ++i) { 
+                BlamkaG1AVX2( 
+                    state[0 + i], state[4 + i], state[8 + i], state[12 + i], 
+                    state[16 + i], state[20 + i], state[24 + i], state[28 + i]); 
+                BlamkaG2AVX2( 
+                    state[0 + i], state[4 + i], state[8 + i], state[12 + i], 
+                    state[16 + i], state[20 + i], state[24 + i], state[28 + i]); 
+                DiagonalizeAVX22( 
+                    state[8 + i], state[12 + i], 
+                    state[16 + i], state[20 + i], 
+                    state[24 + i], state[28 + i]); 
+                BlamkaG1AVX2( 
+                    state[0 + i], state[4 + i], state[8 + i], state[12 + i], 
+                    state[16 + i], state[20 + i], state[24 + i], state[28 + i]); 
+                BlamkaG2AVX2( 
+                    state[0 + i], state[4 + i], state[8 + i], state[12 + i], 
+                    state[16 + i], state[20 + i], state[24 + i], state[28 + i]); 
+                UndiagonalizeAVX22( 
+                    state[8 + i], state[12 + i], 
+                    state[16 + i], state[20 + i], 
+                    state[24 + i], state[28 + i]); 
+            } 
+ 
+            for (ui32 i = 0; i < ARGON2_HWORDS_IN_BLOCK; ++i) { 
+                state[i] = _mm256_xor_si256(state[i], blockxy[i]); 
+                _mm256_storeu_si256((__m256i*)nextBlock->V + i, state[i]); 
+            } 
+        } 
+    }; 
+} 
diff --git a/library/cpp/digest/argonish/internal/argon2/argon2_base.h b/library/cpp/digest/argonish/internal/argon2/argon2_base.h
index 2385cc947c..8de5b6bb42 100644
--- a/library/cpp/digest/argonish/internal/argon2/argon2_base.h
+++ b/library/cpp/digest/argonish/internal/argon2/argon2_base.h
@@ -1,388 +1,388 @@
-#pragma once
-
-#include <util/generic/yexception.h>
+#pragma once 
+ 
+#include <util/generic/yexception.h> 
 #include <library/cpp/digest/argonish/argon2.h>
 #include <library/cpp/digest/argonish/internal/blake2b/blake2b.h>
 #include <library/cpp/threading/poor_man_openmp/thread_helper.h>
-
-namespace NArgonish {
-    const ui32 ARGON2_PREHASH_DIGEST_LENGTH = 64;
+ 
+namespace NArgonish { 
+    const ui32 ARGON2_PREHASH_DIGEST_LENGTH = 64; 
     const ui32 ARGON2_SECRET_MAX_LENGTH = 64;
-    const ui32 ARGON2_PREHASH_SEED_LENGTH = 72;
-    const ui32 ARGON2_BLOCK_SIZE = 1024;
-    const ui32 ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8;
-    const ui32 ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16;
-    const ui32 ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32;
-    const ui32 ARGON2_ADDRESSES_IN_BLOCK = 128;
-    const ui32 ARGON2_SYNC_POINTS = 4;
-    const ui32 ARGON2_SALT_MIN_LEN = 8;
-    const ui32 ARGON2_MIN_OUTLEN = 4;
-
-    struct TBlock {
-        ui64 V[ARGON2_QWORDS_IN_BLOCK];
-    };
-
-    template <EInstructionSet instructionSet, ui32 mcost, ui32 threads>
-    class TArgon2: public IArgon2Base {
-    public:
-        TArgon2(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen)
-            : SecretLen_(keylen)
-            , Tcost_(tcost)
-            , Atype_(atype)
-        {
-            if (SecretLen_)
-                memcpy(Secret_, key, keylen);
-        }
-
-        virtual ~TArgon2() override {
-            if (SecretLen_) {
-                SecureZeroMemory_(Secret_, SecretLen_);
-                SecretLen_ = 0;
-            }
-        }
-
-        virtual void Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen,
-                          ui8* out, ui32 outlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override {
-            TArrayHolder<TBlock> buffer(new TBlock[MemoryBlocks_]);
-            InternalHash_(buffer.Get(), pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen);
-        }
-
-        virtual bool Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen,
-                            const ui8* hash, ui32 hashlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override {
-            TArrayHolder<ui8> hashResult(new ui8[hashlen]);
-            Hash(pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen);
-
-            return SecureCompare_(hash, hashResult.Get(), hashlen);
-        }
-
-        virtual void HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen,
-                                          const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen,
-                                          const ui8* aad = nullptr, ui32 aadlen = 0) const override {
-            if (memory == nullptr || mlen < sizeof(TBlock) * MemoryBlocks_)
-                ythrow yexception() << "memory is null or its size is not enough";
-
-            InternalHash_((TBlock*)memory, pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen);
-        }
-
-        virtual bool VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen,
-                                            const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen,
-                                            const ui8* aad = nullptr, ui32 aadlen = 0) const override {
-            TArrayHolder<ui8> hashResult(new ui8[hashlen]);
-            HashWithCustomMemory(memory, mlen, pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen);
-
-            return SecureCompare_(hashResult.Get(), hash, hashlen);
-        }
-
-        virtual size_t GetMemorySize() const override {
-            return MemoryBlocks_ * sizeof(TBlock);
-        }
-
-    protected: /* Constants */
-        ui8 Secret_[ARGON2_SECRET_MAX_LENGTH] = {0};
-        ui32 SecretLen_ = 0;
-        ui32 Tcost_;
-        EArgon2Type Atype_;
-
-        static constexpr ui32 Lanes_ = threads;
-        static constexpr ui32 MemoryBlocks_ = (mcost >= 2 * ARGON2_SYNC_POINTS * Lanes_) ? (mcost - mcost % (Lanes_ * ARGON2_SYNC_POINTS)) : 2 * ARGON2_SYNC_POINTS * Lanes_;
-        static constexpr ui32 SegmentLength_ = MemoryBlocks_ / (Lanes_ * ARGON2_SYNC_POINTS);
-        static constexpr ui32 LaneLength_ = SegmentLength_ * ARGON2_SYNC_POINTS;
-
-    protected: /* Prototypes */
-        virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock,
-                                TBlock* nextBlock, bool withXor) const = 0;
-
-        virtual void CopyBlock_(TBlock* dst, const TBlock* src) const = 0;
-        virtual void XorBlock_(TBlock* dst, const TBlock* src) const = 0;
-
-    protected: /* Static functions */
-        static bool SecureCompare_(const ui8* buffer1, const ui8* buffer2, ui32 len) {
-            bool result = true;
-            for (ui32 i = 0; i < len; ++i) {
-                result &= (buffer1[i] == buffer2[i]);
-            }
-            return result;
-        }
-
-        static void SecureZeroMemory_(void* src, size_t len) {
-            static void* (*const volatile memset_v)(void*, int, size_t) = &memset;
-            memset_v(src, 0, len);
-        }
-
-        static void Store32_(ui32 value, void* mem) {
-            *((ui32*)mem) = value;
-        }
-
-        static void Blake2BHash64_(ui8 out[BLAKE2B_OUTBYTES], const ui8 in[BLAKE2B_OUTBYTES]) {
-            TBlake2B<instructionSet> hash(BLAKE2B_OUTBYTES);
-            hash.Update(in, BLAKE2B_OUTBYTES);
-            hash.Final(out, BLAKE2B_OUTBYTES);
-        }
-
-        static void ExpandBlockhash_(ui8 expanded[ARGON2_BLOCK_SIZE], const ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH]) {
-            ui8 out_buffer[BLAKE2B_OUTBYTES];
-            ui8 in_buffer[BLAKE2B_OUTBYTES];
-            const ui32 HALF_OUT_BYTES = BLAKE2B_OUTBYTES / 2;
-            const ui32 HASH_BLOCKS_COUNT = ((ARGON2_BLOCK_SIZE / HALF_OUT_BYTES));
-
-            TBlake2B<instructionSet> hash(BLAKE2B_OUTBYTES);
-            hash.Update(ARGON2_BLOCK_SIZE);
-            hash.Update(blockhash, ARGON2_PREHASH_SEED_LENGTH);
-            hash.Final(out_buffer, BLAKE2B_OUTBYTES);
-
-            memcpy(expanded, out_buffer, HALF_OUT_BYTES);
-
-            for (ui32 i = 1; i < HASH_BLOCKS_COUNT - 2; ++i) {
-                memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
-                Blake2BHash64_(out_buffer, in_buffer);
-                memcpy(expanded + (i * HALF_OUT_BYTES), out_buffer, HALF_OUT_BYTES);
-            }
-
-            Blake2BHash64_(in_buffer, out_buffer);
-            memcpy(expanded + HALF_OUT_BYTES * (HASH_BLOCKS_COUNT - 2), in_buffer, BLAKE2B_OUTBYTES);
-        }
-
-        static void Blake2BLong_(ui8* out, ui32 outlen, const ui8* in, ui32 inlen) {
-            if (outlen < BLAKE2B_OUTBYTES) {
-                TBlake2B<instructionSet> hash(outlen);
-                hash.Update(outlen);
-                hash.Update(in, inlen);
-                hash.Final(out, outlen);
-            } else {
-                ui8 out_buffer[BLAKE2B_OUTBYTES];
-                ui8 in_buffer[BLAKE2B_OUTBYTES];
-                ui32 toproduce = outlen - BLAKE2B_OUTBYTES / 2;
-
+    const ui32 ARGON2_PREHASH_SEED_LENGTH = 72; 
+    const ui32 ARGON2_BLOCK_SIZE = 1024; 
+    const ui32 ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8; 
+    const ui32 ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16; 
+    const ui32 ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32; 
+    const ui32 ARGON2_ADDRESSES_IN_BLOCK = 128; 
+    const ui32 ARGON2_SYNC_POINTS = 4; 
+    const ui32 ARGON2_SALT_MIN_LEN = 8; 
+    const ui32 ARGON2_MIN_OUTLEN = 4; 
+ 
+    struct TBlock { 
+        ui64 V[ARGON2_QWORDS_IN_BLOCK]; 
+    }; 
+ 
+    template <EInstructionSet instructionSet, ui32 mcost, ui32 threads> 
+    class TArgon2: public IArgon2Base { 
+    public: 
+        TArgon2(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) 
+            : SecretLen_(keylen) 
+            , Tcost_(tcost) 
+            , Atype_(atype) 
+        { 
+            if (SecretLen_) 
+                memcpy(Secret_, key, keylen); 
+        } 
+ 
+        virtual ~TArgon2() override { 
+            if (SecretLen_) { 
+                SecureZeroMemory_(Secret_, SecretLen_); 
+                SecretLen_ = 0; 
+            } 
+        } 
+ 
+        virtual void Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, 
+                          ui8* out, ui32 outlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override { 
+            TArrayHolder<TBlock> buffer(new TBlock[MemoryBlocks_]); 
+            InternalHash_(buffer.Get(), pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen); 
+        } 
+ 
+        virtual bool Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, 
+                            const ui8* hash, ui32 hashlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override { 
+            TArrayHolder<ui8> hashResult(new ui8[hashlen]); 
+            Hash(pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen); 
+ 
+            return SecureCompare_(hash, hashResult.Get(), hashlen); 
+        } 
+ 
+        virtual void HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, 
+                                          const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, 
+                                          const ui8* aad = nullptr, ui32 aadlen = 0) const override { 
+            if (memory == nullptr || mlen < sizeof(TBlock) * MemoryBlocks_) 
+                ythrow yexception() << "memory is null or its size is not enough"; 
+ 
+            InternalHash_((TBlock*)memory, pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen); 
+        } 
+ 
+        virtual bool VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, 
+                                            const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen, 
+                                            const ui8* aad = nullptr, ui32 aadlen = 0) const override { 
+            TArrayHolder<ui8> hashResult(new ui8[hashlen]); 
+            HashWithCustomMemory(memory, mlen, pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen); 
+ 
+            return SecureCompare_(hashResult.Get(), hash, hashlen); 
+        } 
+ 
+        virtual size_t GetMemorySize() const override { 
+            return MemoryBlocks_ * sizeof(TBlock); 
+        } 
+ 
+    protected: /* Constants */ 
+        ui8 Secret_[ARGON2_SECRET_MAX_LENGTH] = {0}; 
+        ui32 SecretLen_ = 0; 
+        ui32 Tcost_; 
+        EArgon2Type Atype_; 
+ 
+        static constexpr ui32 Lanes_ = threads; 
+        static constexpr ui32 MemoryBlocks_ = (mcost >= 2 * ARGON2_SYNC_POINTS * Lanes_) ? (mcost - mcost % (Lanes_ * ARGON2_SYNC_POINTS)) : 2 * ARGON2_SYNC_POINTS * Lanes_; 
+        static constexpr ui32 SegmentLength_ = MemoryBlocks_ / (Lanes_ * ARGON2_SYNC_POINTS); 
+        static constexpr ui32 LaneLength_ = SegmentLength_ * ARGON2_SYNC_POINTS; 
+ 
+    protected: /* Prototypes */ 
+        virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, 
+                                TBlock* nextBlock, bool withXor) const = 0; 
+ 
+        virtual void CopyBlock_(TBlock* dst, const TBlock* src) const = 0; 
+        virtual void XorBlock_(TBlock* dst, const TBlock* src) const = 0; 
+ 
+    protected: /* Static functions */ 
+        static bool SecureCompare_(const ui8* buffer1, const ui8* buffer2, ui32 len) { 
+            bool result = true; 
+            for (ui32 i = 0; i < len; ++i) { 
+                result &= (buffer1[i] == buffer2[i]); 
+            } 
+            return result; 
+        } 
+ 
+        static void SecureZeroMemory_(void* src, size_t len) { 
+            static void* (*const volatile memset_v)(void*, int, size_t) = &memset; 
+            memset_v(src, 0, len); 
+        } 
+ 
+        static void Store32_(ui32 value, void* mem) { 
+            *((ui32*)mem) = value; 
+        } 
+ 
+        static void Blake2BHash64_(ui8 out[BLAKE2B_OUTBYTES], const ui8 in[BLAKE2B_OUTBYTES]) { 
+            TBlake2B<instructionSet> hash(BLAKE2B_OUTBYTES); 
+            hash.Update(in, BLAKE2B_OUTBYTES); 
+            hash.Final(out, BLAKE2B_OUTBYTES); 
+        } 
+ 
+        static void ExpandBlockhash_(ui8 expanded[ARGON2_BLOCK_SIZE], const ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH]) { 
+            ui8 out_buffer[BLAKE2B_OUTBYTES]; 
+            ui8 in_buffer[BLAKE2B_OUTBYTES]; 
+            const ui32 HALF_OUT_BYTES = BLAKE2B_OUTBYTES / 2; 
+            const ui32 HASH_BLOCKS_COUNT = ((ARGON2_BLOCK_SIZE / HALF_OUT_BYTES)); 
+ 
+            TBlake2B<instructionSet> hash(BLAKE2B_OUTBYTES); 
+            hash.Update(ARGON2_BLOCK_SIZE); 
+            hash.Update(blockhash, ARGON2_PREHASH_SEED_LENGTH); 
+            hash.Final(out_buffer, BLAKE2B_OUTBYTES); 
+ 
+            memcpy(expanded, out_buffer, HALF_OUT_BYTES); 
+ 
+            for (ui32 i = 1; i < HASH_BLOCKS_COUNT - 2; ++i) { 
+                memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); 
+                Blake2BHash64_(out_buffer, in_buffer); 
+                memcpy(expanded + (i * HALF_OUT_BYTES), out_buffer, HALF_OUT_BYTES); 
+            } 
+ 
+            Blake2BHash64_(in_buffer, out_buffer); 
+            memcpy(expanded + HALF_OUT_BYTES * (HASH_BLOCKS_COUNT - 2), in_buffer, BLAKE2B_OUTBYTES); 
+        } 
+ 
+        static void Blake2BLong_(ui8* out, ui32 outlen, const ui8* in, ui32 inlen) { 
+            if (outlen < BLAKE2B_OUTBYTES) { 
+                TBlake2B<instructionSet> hash(outlen); 
+                hash.Update(outlen); 
+                hash.Update(in, inlen); 
+                hash.Final(out, outlen); 
+            } else { 
+                ui8 out_buffer[BLAKE2B_OUTBYTES]; 
+                ui8 in_buffer[BLAKE2B_OUTBYTES]; 
+                ui32 toproduce = outlen - BLAKE2B_OUTBYTES / 2; 
+ 
                 TBlake2B<instructionSet> hash1(BLAKE2B_OUTBYTES);
                 hash1.Update(outlen);
                 hash1.Update(in, inlen);
                 hash1.Final(out_buffer, BLAKE2B_OUTBYTES);
-
-                memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
-                out += BLAKE2B_OUTBYTES / 2;
-
-                while (toproduce > BLAKE2B_OUTBYTES) {
-                    memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
+ 
+                memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); 
+                out += BLAKE2B_OUTBYTES / 2; 
+ 
+                while (toproduce > BLAKE2B_OUTBYTES) { 
+                    memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); 
                     TBlake2B<instructionSet> hash2(BLAKE2B_OUTBYTES);
                     hash2.Update(in_buffer, BLAKE2B_OUTBYTES);
                     hash2.Final(out_buffer, BLAKE2B_OUTBYTES);
-                    memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
-                    out += BLAKE2B_OUTBYTES / 2;
-                    toproduce -= BLAKE2B_OUTBYTES / 2;
-                }
-
-                memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
-                {
+                    memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); 
+                    out += BLAKE2B_OUTBYTES / 2; 
+                    toproduce -= BLAKE2B_OUTBYTES / 2; 
+                } 
+ 
+                memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); 
+                { 
                     TBlake2B<instructionSet> hash3(toproduce);
                     hash3.Update(in_buffer, BLAKE2B_OUTBYTES);
                     hash3.Final(out_buffer, toproduce);
-                    memcpy(out, out_buffer, toproduce);
-                }
-            }
-        }
-
-        static void InitBlockValue_(TBlock* b, ui8 in) {
-            memset(b->V, in, sizeof(b->V));
-        }
-
-    protected: /* Functions */
-        void InternalHash_(TBlock* memory, const ui8* pwd, ui32 pwdlen,
-                           const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen,
-                           const ui8* aad, ui32 aadlen) const {
-            /*
-             * all parameters checks are in proxy objects
-             */
-
-            Initialize_(memory, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen);
-            FillMemoryBlocks_(memory);
-            Finalize_(memory, out, outlen);
-        }
-
-        void InitialHash_(ui8 blockhash[ARGON2_PREHASH_DIGEST_LENGTH],
-                          ui32 outlen, const ui8* pwd, ui32 pwdlen,
-                          const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const {
-            TBlake2B<instructionSet> hash(ARGON2_PREHASH_DIGEST_LENGTH);
-            /* lanes, but lanes == threads */
-            hash.Update(Lanes_);
-            /* outlen */
-            hash.Update(outlen);
-            /* m_cost */
-            hash.Update(mcost);
-            /* t_cost */
-            hash.Update(Tcost_);
-            /* version */
-            hash.Update(0x00000013);
-            /* Argon2 type */
-            hash.Update((ui32)Atype_);
-            /* pwdlen */
-            hash.Update(pwdlen);
-            /* pwd */
-            hash.Update(pwd, pwdlen);
-            /* saltlen */
-            hash.Update(saltlen);
-            /* salt */
-            if (saltlen)
-                hash.Update(salt, saltlen);
-            /* secret */
-            hash.Update(SecretLen_);
-            if (SecretLen_)
-                hash.Update((void*)Secret_, SecretLen_);
-            /* aadlen */
-            hash.Update(aadlen);
-            if (aadlen)
-                hash.Update((void*)aad, aadlen);
-            hash.Final(blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
-        }
-
-        void FillFirstBlocks_(TBlock* blocks, ui8* blockhash) const {
-            for (ui32 l = 0; l < Lanes_; l++) {
-                /* fill the first block of the lane */
-                Store32_(l, blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4);
-                Store32_(0, blockhash + ARGON2_PREHASH_DIGEST_LENGTH);
-                ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_]), blockhash);
-
-                /* fill the second block of the lane */
-                Store32_(1, blockhash + ARGON2_PREHASH_DIGEST_LENGTH);
-                ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_ + 1]), blockhash);
-            }
-        }
-
-        /* The 'if' will be optimized out as the number of threads is known at the compile time */
-        void FillMemoryBlocks_(TBlock* memory) const {
-            for (ui32 t = 0; t < Tcost_; ++t) {
-                for (ui32 s = 0; s < ARGON2_SYNC_POINTS; ++s) {
-                    if (Lanes_ == 1)
-                        FillSegment_(memory, t, 0, s);
-                    else {
-                        NYmp::SetThreadCount(Lanes_);
-                        NYmp::ParallelForStaticAutoChunk<ui32>(0, Lanes_, [this, &memory, s, t](int k) {
-                            this->FillSegment_(memory, t, k, s);
-                        });
-                    }
-                }
-            }
-        }
-
-        void Initialize_(TBlock* memory, ui32 outlen, const ui8* pwd, ui32 pwdlen,
-                         const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const {
-            ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH];
-            InitialHash_(blockhash, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen);
-            FillFirstBlocks_(memory, blockhash);
-        }
-
-        ui32 ComputeReferenceArea_(ui32 pass, ui32 slice, ui32 index, bool sameLane) const {
-            ui32 passVal = pass == 0 ? (slice * SegmentLength_) : (LaneLength_ - SegmentLength_);
-            return sameLane ? passVal + (index - 1) : passVal + (index == 0 ? -1 : 0);
-        }
-
-        ui32 IndexAlpha_(ui32 pass, ui32 slice, ui32 index, ui32 pseudoRand, bool sameLane) const {
-            ui32 referenceAreaSize = ComputeReferenceArea_(pass, slice, index, sameLane);
-
-            ui64 relativePosition = pseudoRand;
-            relativePosition = relativePosition * relativePosition >> 32;
-            relativePosition = referenceAreaSize - 1 - (referenceAreaSize * relativePosition >> 32);
-
-            ui32 startPosition = 0;
-            if (pass != 0)
-                startPosition = (slice == ARGON2_SYNC_POINTS - 1) ? 0 : (slice + 1) * SegmentLength_;
-
-            return (ui32)((startPosition + relativePosition) % LaneLength_);
-        }
-
-        void NextAddresses_(TBlock* addressBlock, TBlock* inputBlock, const TBlock* zeroBlock) const {
-            inputBlock->V[6]++;
-            FillBlock_(zeroBlock, inputBlock, addressBlock, false);
-            FillBlock_(zeroBlock, addressBlock, addressBlock, false);
-        }
-
-        void Finalize_(const TBlock* memory, ui8* out, ui32 outlen) const {
-            TBlock blockhash;
-            CopyBlock_(&blockhash, memory + LaneLength_ - 1);
-
-            /* XOR the last blocks */
-            for (ui32 l = 1; l < Lanes_; ++l) {
-                ui32 lastBlockInLane = l * LaneLength_ + (LaneLength_ - 1);
-                XorBlock_(&blockhash, memory + lastBlockInLane);
-            }
-
-            Blake2BLong_(out, outlen, (ui8*)blockhash.V, ARGON2_BLOCK_SIZE);
-        }
-
-        /* The switch will be optimized out by the compiler as the type is known at the compile time */
-        void FillSegment_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const {
-            switch (Atype_) {
-                case EArgon2Type::Argon2d:
-                    FillSegmentD_(memory, pass, lane, slice);
-                    return;
-                case EArgon2Type::Argon2i:
-                    FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2i);
-                    return;
-                case EArgon2Type::Argon2id:
-                    if (pass == 0 && slice < ARGON2_SYNC_POINTS / 2)
-                        FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2id);
-                    else
-                        FillSegmentD_(memory, pass, lane, slice);
-                    return;
-            }
-        }
-
-        void FillSegmentD_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const {
-            ui32 startingIndex = (pass == 0 && slice == 0) ? 2 : 0;
-            ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex;
-            ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1;
-
-            for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) {
-                if (currOffset % LaneLength_ == 1) {
-                    prevOffset = currOffset - 1;
-                }
-
-                ui64 pseudoRand = memory[prevOffset].V[0];
-                ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_);
-                ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane);
-
-                TBlock* refBlock = memory + LaneLength_ * refLane + refIndex;
-                FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0);
-            }
-        }
-
-        void FillSegmentI_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice, EArgon2Type atp) const {
-            TBlock addressBlock, inputBlock, zeroBlock;
-            InitBlockValue_(&zeroBlock, 0);
-            InitBlockValue_(&inputBlock, 0);
-
-            inputBlock.V[0] = pass;
-            inputBlock.V[1] = lane;
-            inputBlock.V[2] = slice;
-            inputBlock.V[3] = MemoryBlocks_;
-            inputBlock.V[4] = Tcost_;
-            inputBlock.V[5] = (ui64)atp;
-
-            ui32 startingIndex = 0;
-
-            if (pass == 0 && slice == 0) {
-                startingIndex = 2;
-                NextAddresses_(&addressBlock, &inputBlock, &zeroBlock);
-            }
-
-            ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex;
-            ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1;
-
-            for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) {
-                if (currOffset % LaneLength_ == 1) {
-                    prevOffset = currOffset - 1;
-                }
-
-                if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
-                    NextAddresses_(&addressBlock, &inputBlock, &zeroBlock);
-                }
-
-                ui64 pseudoRand = addressBlock.V[i % ARGON2_ADDRESSES_IN_BLOCK];
-                ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_);
-                ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane);
-
-                TBlock* refBlock = memory + LaneLength_ * refLane + refIndex;
-                FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0);
-            }
-        }
-    };
-}
+                    memcpy(out, out_buffer, toproduce); 
+                } 
+            } 
+        } 
+ 
+        static void InitBlockValue_(TBlock* b, ui8 in) { 
+            memset(b->V, in, sizeof(b->V)); 
+        } 
+ 
+    protected: /* Functions */ 
+        void InternalHash_(TBlock* memory, const ui8* pwd, ui32 pwdlen, 
+                           const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, 
+                           const ui8* aad, ui32 aadlen) const { 
+            /* 
+             * all parameters checks are in proxy objects 
+             */ 
+ 
+            Initialize_(memory, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen); 
+            FillMemoryBlocks_(memory); 
+            Finalize_(memory, out, outlen); 
+        } 
+ 
+        void InitialHash_(ui8 blockhash[ARGON2_PREHASH_DIGEST_LENGTH], 
+                          ui32 outlen, const ui8* pwd, ui32 pwdlen, 
+                          const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const { 
+            TBlake2B<instructionSet> hash(ARGON2_PREHASH_DIGEST_LENGTH); 
+            /* lanes, but lanes == threads */ 
+            hash.Update(Lanes_); 
+            /* outlen */ 
+            hash.Update(outlen); 
+            /* m_cost */ 
+            hash.Update(mcost); 
+            /* t_cost */ 
+            hash.Update(Tcost_); 
+            /* version */ 
+            hash.Update(0x00000013); 
+            /* Argon2 type */ 
+            hash.Update((ui32)Atype_); 
+            /* pwdlen */ 
+            hash.Update(pwdlen); 
+            /* pwd */ 
+            hash.Update(pwd, pwdlen); 
+            /* saltlen */ 
+            hash.Update(saltlen); 
+            /* salt */ 
+            if (saltlen) 
+                hash.Update(salt, saltlen); 
+            /* secret */ 
+            hash.Update(SecretLen_); 
+            if (SecretLen_) 
+                hash.Update((void*)Secret_, SecretLen_); 
+            /* aadlen */ 
+            hash.Update(aadlen); 
+            if (aadlen) 
+                hash.Update((void*)aad, aadlen); 
+            hash.Final(blockhash, ARGON2_PREHASH_DIGEST_LENGTH); 
+        } 
+ 
+        void FillFirstBlocks_(TBlock* blocks, ui8* blockhash) const { 
+            for (ui32 l = 0; l < Lanes_; l++) { 
+                /* fill the first block of the lane */ 
+                Store32_(l, blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4); 
+                Store32_(0, blockhash + ARGON2_PREHASH_DIGEST_LENGTH); 
+                ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_]), blockhash); 
+ 
+                /* fill the second block of the lane */ 
+                Store32_(1, blockhash + ARGON2_PREHASH_DIGEST_LENGTH); 
+                ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_ + 1]), blockhash); 
+            } 
+        } 
+ 
+        /* The 'if' will be optimized out as the number of threads is known at the compile time */ 
+        void FillMemoryBlocks_(TBlock* memory) const { 
+            for (ui32 t = 0; t < Tcost_; ++t) { 
+                for (ui32 s = 0; s < ARGON2_SYNC_POINTS; ++s) { 
+                    if (Lanes_ == 1) 
+                        FillSegment_(memory, t, 0, s); 
+                    else { 
+                        NYmp::SetThreadCount(Lanes_); 
+                        NYmp::ParallelForStaticAutoChunk<ui32>(0, Lanes_, [this, &memory, s, t](int k) { 
+                            this->FillSegment_(memory, t, k, s); 
+                        }); 
+                    } 
+                } 
+            } 
+        } 
+ 
+        void Initialize_(TBlock* memory, ui32 outlen, const ui8* pwd, ui32 pwdlen, 
+                         const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const { 
+            ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH]; 
+            InitialHash_(blockhash, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen); 
+            FillFirstBlocks_(memory, blockhash); 
+        } 
+ 
+        ui32 ComputeReferenceArea_(ui32 pass, ui32 slice, ui32 index, bool sameLane) const { 
+            ui32 passVal = pass == 0 ? (slice * SegmentLength_) : (LaneLength_ - SegmentLength_); 
+            return sameLane ? passVal + (index - 1) : passVal + (index == 0 ? -1 : 0); 
+        } 
+ 
+        ui32 IndexAlpha_(ui32 pass, ui32 slice, ui32 index, ui32 pseudoRand, bool sameLane) const { 
+            ui32 referenceAreaSize = ComputeReferenceArea_(pass, slice, index, sameLane); 
+ 
+            ui64 relativePosition = pseudoRand; 
+            relativePosition = relativePosition * relativePosition >> 32; 
+            relativePosition = referenceAreaSize - 1 - (referenceAreaSize * relativePosition >> 32); 
+ 
+            ui32 startPosition = 0; 
+            if (pass != 0) 
+                startPosition = (slice == ARGON2_SYNC_POINTS - 1) ? 0 : (slice + 1) * SegmentLength_; 
+ 
+            return (ui32)((startPosition + relativePosition) % LaneLength_); 
+        } 
+ 
+        void NextAddresses_(TBlock* addressBlock, TBlock* inputBlock, const TBlock* zeroBlock) const { 
+            inputBlock->V[6]++; 
+            FillBlock_(zeroBlock, inputBlock, addressBlock, false); 
+            FillBlock_(zeroBlock, addressBlock, addressBlock, false); 
+        } 
+ 
+        void Finalize_(const TBlock* memory, ui8* out, ui32 outlen) const { 
+            TBlock blockhash; 
+            CopyBlock_(&blockhash, memory + LaneLength_ - 1); 
+ 
+            /* XOR the last blocks */ 
+            for (ui32 l = 1; l < Lanes_; ++l) { 
+                ui32 lastBlockInLane = l * LaneLength_ + (LaneLength_ - 1); 
+                XorBlock_(&blockhash, memory + lastBlockInLane); 
+            } 
+ 
+            Blake2BLong_(out, outlen, (ui8*)blockhash.V, ARGON2_BLOCK_SIZE); 
+        } 
+ 
+        /* The switch will be optimized out by the compiler as the type is known at the compile time */ 
+        void FillSegment_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const { 
+            switch (Atype_) { 
+                case EArgon2Type::Argon2d: 
+                    FillSegmentD_(memory, pass, lane, slice); 
+                    return; 
+                case EArgon2Type::Argon2i: 
+                    FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2i); 
+                    return; 
+                case EArgon2Type::Argon2id: 
+                    if (pass == 0 && slice < ARGON2_SYNC_POINTS / 2) 
+                        FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2id); 
+                    else 
+                        FillSegmentD_(memory, pass, lane, slice); 
+                    return; 
+            } 
+        } 
+ 
+        void FillSegmentD_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const { 
+            ui32 startingIndex = (pass == 0 && slice == 0) ? 2 : 0; 
+            ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex; 
+            ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1; 
+ 
+            for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) { 
+                if (currOffset % LaneLength_ == 1) { 
+                    prevOffset = currOffset - 1; 
+                } 
+ 
+                ui64 pseudoRand = memory[prevOffset].V[0]; 
+                ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_); 
+                ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane); 
+ 
+                TBlock* refBlock = memory + LaneLength_ * refLane + refIndex; 
+                FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0); 
+            } 
+        } 
+ 
+        void FillSegmentI_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice, EArgon2Type atp) const { 
+            TBlock addressBlock, inputBlock, zeroBlock; 
+            InitBlockValue_(&zeroBlock, 0); 
+            InitBlockValue_(&inputBlock, 0); 
+ 
+            inputBlock.V[0] = pass; 
+            inputBlock.V[1] = lane; 
+            inputBlock.V[2] = slice; 
+            inputBlock.V[3] = MemoryBlocks_; 
+            inputBlock.V[4] = Tcost_; 
+            inputBlock.V[5] = (ui64)atp; 
+ 
+            ui32 startingIndex = 0; 
+ 
+            if (pass == 0 && slice == 0) { 
+                startingIndex = 2; 
+                NextAddresses_(&addressBlock, &inputBlock, &zeroBlock); 
+            } 
+ 
+            ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex; 
+            ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1; 
+ 
+            for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) { 
+                if (currOffset % LaneLength_ == 1) { 
+                    prevOffset = currOffset - 1; 
+                } 
+ 
+                if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { 
+                    NextAddresses_(&addressBlock, &inputBlock, &zeroBlock); 
+                } 
+ 
+                ui64 pseudoRand = addressBlock.V[i % ARGON2_ADDRESSES_IN_BLOCK]; 
+                ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_); 
+                ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane); 
+ 
+                TBlock* refBlock = memory + LaneLength_ * refLane + refIndex; 
+                FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0); 
+            } 
+        } 
+    }; 
+} 
diff --git a/library/cpp/digest/argonish/internal/argon2/argon2_ref.h b/library/cpp/digest/argonish/internal/argon2/argon2_ref.h
index 8e5e3fa971..d0635b71ee 100644
--- a/library/cpp/digest/argonish/internal/argon2/argon2_ref.h
+++ b/library/cpp/digest/argonish/internal/argon2/argon2_ref.h
@@ -1,88 +1,88 @@
-#pragma once
-
-#include "argon2_base.h"
+#pragma once 
+ 
+#include "argon2_base.h" 
 #include <library/cpp/digest/argonish/internal/rotations/rotations_ref.h>
-
-namespace NArgonish {
-    static inline ui64 FBlaMka(ui64 x, ui64 y) {
-        const ui64 m = 0xFFFFFFFF;
-        const ui64 xy = (x & m) * (y & m);
-        return x + y + 2 * xy;
-    }
-
-    static inline void BlamkaGRef(ui64& a, ui64& b, ui64& c, ui64& d) {
-        a = FBlaMka(a, b);
-        d = Rotr(d ^ a, 32);
-        c = FBlaMka(c, d);
-        b = Rotr(b ^ c, 24);
-        a = FBlaMka(a, b);
-        d = Rotr(d ^ a, 16);
-        c = FBlaMka(c, d);
-        b = Rotr(b ^ c, 63);
-    }
-
-    static inline void BlamkaRoundRef(
-        ui64& v0, ui64& v1, ui64& v2, ui64& v3,
-        ui64& v4, ui64& v5, ui64& v6, ui64& v7,
-        ui64& v8, ui64& v9, ui64& v10, ui64& v11,
-        ui64& v12, ui64& v13, ui64& v14, ui64& v15) {
-        BlamkaGRef(v0, v4, v8, v12);
-        BlamkaGRef(v1, v5, v9, v13);
-        BlamkaGRef(v2, v6, v10, v14);
-        BlamkaGRef(v3, v7, v11, v15);
-        BlamkaGRef(v0, v5, v10, v15);
-        BlamkaGRef(v1, v6, v11, v12);
-        BlamkaGRef(v2, v7, v8, v13);
-        BlamkaGRef(v3, v4, v9, v14);
-    }
-
-    template <ui32 mcost, ui32 threads>
-    class TArgon2REF final: public TArgon2<EInstructionSet::REF, mcost, threads> {
-    public:
-        TArgon2REF(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen)
-            : TArgon2<EInstructionSet::REF, mcost, threads>(atype, tcost, key, keylen)
-        {
-        }
-
-    protected:
-        virtual void XorBlock_(TBlock* dst, const TBlock* src) const override {
-            for (ui32 i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) {
-                dst->V[i] ^= src->V[i];
-            }
-        }
-
-        virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override {
-            memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK);
-        }
-
-        virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool withXor) const override {
-            TBlock blockR, blockTmp;
-            CopyBlock_(&blockR, refBlock);
-            XorBlock_(&blockR, prevBlock);
-            CopyBlock_(&blockTmp, &blockR);
-
-            if (withXor) {
-                XorBlock_(&blockTmp, nextBlock);
-            }
-
-            for (ui32 i = 0; i < 8; ++i) {
-                BlamkaRoundRef(
-                    blockR.V[16 * i + 0], blockR.V[16 * i + 1], blockR.V[16 * i + 2], blockR.V[16 * i + 3],
-                    blockR.V[16 * i + 4], blockR.V[16 * i + 5], blockR.V[16 * i + 6], blockR.V[16 * i + 7],
-                    blockR.V[16 * i + 8], blockR.V[16 * i + 9], blockR.V[16 * i + 10], blockR.V[16 * i + 11],
-                    blockR.V[16 * i + 12], blockR.V[16 * i + 13], blockR.V[16 * i + 14], blockR.V[16 * i + 15]);
-            }
-
-            for (ui32 i = 0; i < 8; ++i) {
-                BlamkaRoundRef(
-                    blockR.V[2 * i + 0], blockR.V[2 * i + 1], blockR.V[2 * i + 16], blockR.V[2 * i + 17],
-                    blockR.V[2 * i + 32], blockR.V[2 * i + 33], blockR.V[2 * i + 48], blockR.V[2 * i + 49],
-                    blockR.V[2 * i + 64], blockR.V[2 * i + 65], blockR.V[2 * i + 80], blockR.V[2 * i + 81],
-                    blockR.V[2 * i + 96], blockR.V[2 * i + 97], blockR.V[2 * i + 112], blockR.V[2 * i + 113]);
-            }
-
-            CopyBlock_(nextBlock, &blockTmp);
-            XorBlock_(nextBlock, &blockR);
-        }
-    };
-}
+ 
+namespace NArgonish { 
+    static inline ui64 FBlaMka(ui64 x, ui64 y) { 
+        const ui64 m = 0xFFFFFFFF; 
+        const ui64 xy = (x & m) * (y & m); 
+        return x + y + 2 * xy; 
+    } 
+ 
+    static inline void BlamkaGRef(ui64& a, ui64& b, ui64& c, ui64& d) { 
+        a = FBlaMka(a, b); 
+        d = Rotr(d ^ a, 32); 
+        c = FBlaMka(c, d); 
+        b = Rotr(b ^ c, 24); 
+        a = FBlaMka(a, b); 
+        d = Rotr(d ^ a, 16); 
+        c = FBlaMka(c, d); 
+        b = Rotr(b ^ c, 63); 
+    } 
+ 
+    static inline void BlamkaRoundRef( 
+        ui64& v0, ui64& v1, ui64& v2, ui64& v3, 
+        ui64& v4, ui64& v5, ui64& v6, ui64& v7, 
+        ui64& v8, ui64& v9, ui64& v10, ui64& v11, 
+        ui64& v12, ui64& v13, ui64& v14, ui64& v15) { 
+        BlamkaGRef(v0, v4, v8, v12); 
+        BlamkaGRef(v1, v5, v9, v13); 
+        BlamkaGRef(v2, v6, v10, v14); 
+        BlamkaGRef(v3, v7, v11, v15); 
+        BlamkaGRef(v0, v5, v10, v15); 
+        BlamkaGRef(v1, v6, v11, v12); 
+        BlamkaGRef(v2, v7, v8, v13); 
+        BlamkaGRef(v3, v4, v9, v14); 
+    } 
+ 
+    template <ui32 mcost, ui32 threads> 
+    class TArgon2REF final: public TArgon2<EInstructionSet::REF, mcost, threads> { 
+    public: 
+        TArgon2REF(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) 
+            : TArgon2<EInstructionSet::REF, mcost, threads>(atype, tcost, key, keylen) 
+        { 
+        } 
+ 
+    protected: 
+        virtual void XorBlock_(TBlock* dst, const TBlock* src) const override { 
+            for (ui32 i = 0; i < ARGON2_QWORDS_IN_BLOCK; ++i) { 
+                dst->V[i] ^= src->V[i]; 
+            } 
+        } 
+ 
+        virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override { 
+            memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK); 
+        } 
+ 
+        virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool withXor) const override { 
+            TBlock blockR, blockTmp; 
+            CopyBlock_(&blockR, refBlock); 
+            XorBlock_(&blockR, prevBlock); 
+            CopyBlock_(&blockTmp, &blockR); 
+ 
+            if (withXor) { 
+                XorBlock_(&blockTmp, nextBlock); 
+            } 
+ 
+            for (ui32 i = 0; i < 8; ++i) { 
+                BlamkaRoundRef( 
+                    blockR.V[16 * i + 0], blockR.V[16 * i + 1], blockR.V[16 * i + 2], blockR.V[16 * i + 3], 
+                    blockR.V[16 * i + 4], blockR.V[16 * i + 5], blockR.V[16 * i + 6], blockR.V[16 * i + 7], 
+                    blockR.V[16 * i + 8], blockR.V[16 * i + 9], blockR.V[16 * i + 10], blockR.V[16 * i + 11], 
+                    blockR.V[16 * i + 12], blockR.V[16 * i + 13], blockR.V[16 * i + 14], blockR.V[16 * i + 15]); 
+            } 
+ 
+            for (ui32 i = 0; i < 8; ++i) { 
+                BlamkaRoundRef( 
+                    blockR.V[2 * i + 0], blockR.V[2 * i + 1], blockR.V[2 * i + 16], blockR.V[2 * i + 17], 
+                    blockR.V[2 * i + 32], blockR.V[2 * i + 33], blockR.V[2 * i + 48], blockR.V[2 * i + 49], 
+                    blockR.V[2 * i + 64], blockR.V[2 * i + 65], blockR.V[2 * i + 80], blockR.V[2 * i + 81], 
+                    blockR.V[2 * i + 96], blockR.V[2 * i + 97], blockR.V[2 * i + 112], blockR.V[2 * i + 113]); 
+            } 
+ 
+            CopyBlock_(nextBlock, &blockTmp); 
+            XorBlock_(nextBlock, &blockR); 
+        } 
+    }; 
+} 
diff --git a/library/cpp/digest/argonish/internal/argon2/argon2_sse2.h b/library/cpp/digest/argonish/internal/argon2/argon2_sse2.h
index 1d2230a657..04fc70c56f 100644
--- a/library/cpp/digest/argonish/internal/argon2/argon2_sse2.h
+++ b/library/cpp/digest/argonish/internal/argon2/argon2_sse2.h
@@ -1,101 +1,101 @@
-#pragma once
-
-#include <emmintrin.h>
-#include "argon2_base.h"
+#pragma once 
+ 
+#include <emmintrin.h> 
+#include "argon2_base.h" 
 #include <library/cpp/digest/argonish/internal/blamka/blamka_sse2.h>
-
-namespace NArgonish {
-    template <ui32 mcost, ui32 threads>
-    class TArgon2SSE2 final: public TArgon2<EInstructionSet::SSE2, mcost, threads> {
-    public:
-        TArgon2SSE2(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen)
-            : TArgon2<EInstructionSet::SSE2, mcost, threads>(atype, tcost, key, keylen)
-        {
-        }
-
-    protected:
-        virtual void XorBlock_(TBlock* dst, const TBlock* src) const override {
-            __m128i* mdst = (__m128i*)dst->V;
-            __m128i* msrc = (__m128i*)src->V;
-
-            for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i)
-                XorValues(mdst + i, msrc + i, mdst + i);
-        }
-
-        virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override {
-            memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK);
-        }
-
-        virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool withXor) const override {
-            __m128i blockxy[ARGON2_OWORDS_IN_BLOCK];
-            __m128i state[ARGON2_OWORDS_IN_BLOCK];
-
-            memcpy(state, prevBlock, ARGON2_BLOCK_SIZE);
-
-            if (withXor) {
-                for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) {
-                    state[i] = _mm_xor_si128(
-                        state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i));
-                    blockxy[i] = _mm_xor_si128(
-                        state[i], _mm_loadu_si128((const __m128i*)nextBlock->V + i));
-                }
-            } else {
-                for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) {
-                    blockxy[i] = state[i] = _mm_xor_si128(
-                        state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i));
-                }
-            }
-
-            for (ui32 i = 0; i < 8; ++i) {
-                BlamkaG1SSE2(
-                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
-                BlamkaG2SSE2(
-                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
-                DiagonalizeSSE2(
-                    state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 4], state[8 * i + 5],
-                    state[8 * i + 6], state[8 * i + 7]);
-                BlamkaG1SSE2(
-                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
-                BlamkaG2SSE2(
-                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
-                UndiagonalizeSSE2(
-                    state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 4], state[8 * i + 5],
-                    state[8 * i + 6], state[8 * i + 7]);
-            }
-
-            for (ui32 i = 0; i < 8; ++i) {
-                BlamkaG1SSE2(
-                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i],
-                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]);
-                BlamkaG2SSE2(
-                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i],
-                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]);
-                DiagonalizeSSE2(
-                    state[8 * 2 + i], state[8 * 3 + i],
-                    state[8 * 4 + i], state[8 * 5 + i],
-                    state[8 * 6 + i], state[8 * 7 + i]);
-                BlamkaG1SSE2(
-                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i],
-                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]);
-                BlamkaG2SSE2(
-                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i],
-                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]);
-                UndiagonalizeSSE2(
-                    state[8 * 2 + i], state[8 * 3 + i],
-                    state[8 * 4 + i], state[8 * 5 + i],
-                    state[8 * 6 + i], state[8 * 7 + i]);
-            }
-
-            for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) {
-                state[i] = _mm_xor_si128(state[i], blockxy[i]);
-                _mm_storeu_si128((__m128i*)nextBlock->V + i, state[i]);
-            }
-        }
-    };
-}
+ 
+namespace NArgonish { 
+    template <ui32 mcost, ui32 threads> 
+    class TArgon2SSE2 final: public TArgon2<EInstructionSet::SSE2, mcost, threads> { 
+    public: 
+        TArgon2SSE2(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) 
+            : TArgon2<EInstructionSet::SSE2, mcost, threads>(atype, tcost, key, keylen) 
+        { 
+        } 
+ 
+    protected: 
+        virtual void XorBlock_(TBlock* dst, const TBlock* src) const override { 
+            __m128i* mdst = (__m128i*)dst->V; 
+            __m128i* msrc = (__m128i*)src->V; 
+ 
+            for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) 
+                XorValues(mdst + i, msrc + i, mdst + i); 
+        } 
+ 
+        virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override { 
+            memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK); 
+        } 
+ 
+        virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool withXor) const override { 
+            __m128i blockxy[ARGON2_OWORDS_IN_BLOCK]; 
+            __m128i state[ARGON2_OWORDS_IN_BLOCK]; 
+ 
+            memcpy(state, prevBlock, ARGON2_BLOCK_SIZE); 
+ 
+            if (withXor) { 
+                for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { 
+                    state[i] = _mm_xor_si128( 
+                        state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i)); 
+                    blockxy[i] = _mm_xor_si128( 
+                        state[i], _mm_loadu_si128((const __m128i*)nextBlock->V + i)); 
+                } 
+            } else { 
+                for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { 
+                    blockxy[i] = state[i] = _mm_xor_si128( 
+                        state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i)); 
+                } 
+            } 
+ 
+            for (ui32 i = 0; i < 8; ++i) { 
+                BlamkaG1SSE2( 
+                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); 
+                BlamkaG2SSE2( 
+                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); 
+                DiagonalizeSSE2( 
+                    state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 4], state[8 * i + 5], 
+                    state[8 * i + 6], state[8 * i + 7]); 
+                BlamkaG1SSE2( 
+                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); 
+                BlamkaG2SSE2( 
+                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); 
+                UndiagonalizeSSE2( 
+                    state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 4], state[8 * i + 5], 
+                    state[8 * i + 6], state[8 * i + 7]); 
+            } 
+ 
+            for (ui32 i = 0; i < 8; ++i) { 
+                BlamkaG1SSE2( 
+                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], 
+                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); 
+                BlamkaG2SSE2( 
+                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], 
+                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); 
+                DiagonalizeSSE2( 
+                    state[8 * 2 + i], state[8 * 3 + i], 
+                    state[8 * 4 + i], state[8 * 5 + i], 
+                    state[8 * 6 + i], state[8 * 7 + i]); 
+                BlamkaG1SSE2( 
+                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], 
+                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); 
+                BlamkaG2SSE2( 
+                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], 
+                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); 
+                UndiagonalizeSSE2( 
+                    state[8 * 2 + i], state[8 * 3 + i], 
+                    state[8 * 4 + i], state[8 * 5 + i], 
+                    state[8 * 6 + i], state[8 * 7 + i]); 
+            } 
+ 
+            for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { 
+                state[i] = _mm_xor_si128(state[i], blockxy[i]); 
+                _mm_storeu_si128((__m128i*)nextBlock->V + i, state[i]); 
+            } 
+        } 
+    }; 
+} 
diff --git a/library/cpp/digest/argonish/internal/argon2/argon2_sse41.h b/library/cpp/digest/argonish/internal/argon2/argon2_sse41.h
index 1ad35048ea..c9b01915de 100644
--- a/library/cpp/digest/argonish/internal/argon2/argon2_sse41.h
+++ b/library/cpp/digest/argonish/internal/argon2/argon2_sse41.h
@@ -1,101 +1,101 @@
-#pragma once
-
-#include <smmintrin.h>
-#include "argon2_base.h"
+#pragma once 
+ 
+#include <smmintrin.h> 
+#include "argon2_base.h" 
 #include <library/cpp/digest/argonish/internal/blamka/blamka_ssse3.h>
-
-namespace NArgonish {
-    template <ui32 mcost, ui32 threads>
-    class TArgon2SSE41 final: public TArgon2<EInstructionSet::SSE41, mcost, threads> {
-    public:
-        TArgon2SSE41(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen)
-            : TArgon2<EInstructionSet::SSE41, mcost, threads>(atype, tcost, key, keylen)
-        {
-        }
-
-    protected:
-        virtual void XorBlock_(TBlock* dst, const TBlock* src) const override {
-            __m128i* mdst = (__m128i*)dst->V;
-            __m128i* msrc = (__m128i*)src->V;
-
-            for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i)
-                XorValues(mdst + i, msrc + i, mdst + i);
-        }
-
-        virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override {
-            memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK);
-        }
-
-        virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool withXor) const override {
-            __m128i blockxy[ARGON2_OWORDS_IN_BLOCK];
-            __m128i state[ARGON2_OWORDS_IN_BLOCK];
-
-            memcpy(state, prevBlock, ARGON2_BLOCK_SIZE);
-
-            if (withXor) {
-                for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) {
-                    state[i] = _mm_xor_si128(
-                        state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i));
-                    blockxy[i] = _mm_xor_si128(
-                        state[i], _mm_loadu_si128((const __m128i*)nextBlock->V + i));
-                }
-            } else {
-                for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) {
-                    blockxy[i] = state[i] = _mm_xor_si128(
-                        state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i));
-                }
-            }
-
-            for (ui32 i = 0; i < 8; ++i) {
-                BlamkaG1SSSE3(
-                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
-                BlamkaG2SSSE3(
-                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
-                DiagonalizeSSSE3(
-                    state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 4], state[8 * i + 5],
-                    state[8 * i + 6], state[8 * i + 7]);
-                BlamkaG1SSSE3(
-                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
-                BlamkaG2SSSE3(
-                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
-                UndiagonalizeSSSE3(
-                    state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 4], state[8 * i + 5],
-                    state[8 * i + 6], state[8 * i + 7]);
-            }
-
-            for (ui32 i = 0; i < 8; ++i) {
-                BlamkaG1SSSE3(
-                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i],
-                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]);
-                BlamkaG2SSSE3(
-                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i],
-                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]);
-                DiagonalizeSSSE3(
-                    state[8 * 2 + i], state[8 * 3 + i],
-                    state[8 * 4 + i], state[8 * 5 + i],
-                    state[8 * 6 + i], state[8 * 7 + i]);
-                BlamkaG1SSSE3(
-                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i],
-                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]);
-                BlamkaG2SSSE3(
-                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i],
-                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]);
-                UndiagonalizeSSSE3(
-                    state[8 * 2 + i], state[8 * 3 + i],
-                    state[8 * 4 + i], state[8 * 5 + i],
-                    state[8 * 6 + i], state[8 * 7 + i]);
-            }
-
-            for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) {
-                state[i] = _mm_xor_si128(state[i], blockxy[i]);
-                _mm_storeu_si128((__m128i*)nextBlock->V + i, state[i]);
-            }
-        }
-    };
-}
+ 
+namespace NArgonish { 
+    template <ui32 mcost, ui32 threads> 
+    class TArgon2SSE41 final: public TArgon2<EInstructionSet::SSE41, mcost, threads> { 
+    public: 
+        TArgon2SSE41(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) 
+            : TArgon2<EInstructionSet::SSE41, mcost, threads>(atype, tcost, key, keylen) 
+        { 
+        } 
+ 
+    protected: 
+        virtual void XorBlock_(TBlock* dst, const TBlock* src) const override { 
+            __m128i* mdst = (__m128i*)dst->V; 
+            __m128i* msrc = (__m128i*)src->V; 
+ 
+            for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) 
+                XorValues(mdst + i, msrc + i, mdst + i); 
+        } 
+ 
+        virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override { 
+            memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK); 
+        } 
+ 
+        virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool withXor) const override { 
+            __m128i blockxy[ARGON2_OWORDS_IN_BLOCK]; 
+            __m128i state[ARGON2_OWORDS_IN_BLOCK]; 
+ 
+            memcpy(state, prevBlock, ARGON2_BLOCK_SIZE); 
+ 
+            if (withXor) { 
+                for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { 
+                    state[i] = _mm_xor_si128( 
+                        state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i)); 
+                    blockxy[i] = _mm_xor_si128( 
+                        state[i], _mm_loadu_si128((const __m128i*)nextBlock->V + i)); 
+                } 
+            } else { 
+                for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { 
+                    blockxy[i] = state[i] = _mm_xor_si128( 
+                        state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i)); 
+                } 
+            } 
+ 
+            for (ui32 i = 0; i < 8; ++i) { 
+                BlamkaG1SSSE3( 
+                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); 
+                BlamkaG2SSSE3( 
+                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); 
+                DiagonalizeSSSE3( 
+                    state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 4], state[8 * i + 5], 
+                    state[8 * i + 6], state[8 * i + 7]); 
+                BlamkaG1SSSE3( 
+                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); 
+                BlamkaG2SSSE3( 
+                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); 
+                UndiagonalizeSSSE3( 
+                    state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 4], state[8 * i + 5], 
+                    state[8 * i + 6], state[8 * i + 7]); 
+            } 
+ 
+            for (ui32 i = 0; i < 8; ++i) { 
+                BlamkaG1SSSE3( 
+                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], 
+                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); 
+                BlamkaG2SSSE3( 
+                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], 
+                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); 
+                DiagonalizeSSSE3( 
+                    state[8 * 2 + i], state[8 * 3 + i], 
+                    state[8 * 4 + i], state[8 * 5 + i], 
+                    state[8 * 6 + i], state[8 * 7 + i]); 
+                BlamkaG1SSSE3( 
+                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], 
+                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); 
+                BlamkaG2SSSE3( 
+                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], 
+                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); 
+                UndiagonalizeSSSE3( 
+                    state[8 * 2 + i], state[8 * 3 + i], 
+                    state[8 * 4 + i], state[8 * 5 + i], 
+                    state[8 * 6 + i], state[8 * 7 + i]); 
+            } 
+ 
+            for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { 
+                state[i] = _mm_xor_si128(state[i], blockxy[i]); 
+                _mm_storeu_si128((__m128i*)nextBlock->V + i, state[i]); 
+            } 
+        } 
+    }; 
+} 
diff --git a/library/cpp/digest/argonish/internal/argon2/argon2_ssse3.h b/library/cpp/digest/argonish/internal/argon2/argon2_ssse3.h
index a25a416834..714197a90f 100644
--- a/library/cpp/digest/argonish/internal/argon2/argon2_ssse3.h
+++ b/library/cpp/digest/argonish/internal/argon2/argon2_ssse3.h
@@ -1,102 +1,102 @@
-#pragma once
-
-#include <emmintrin.h>
-#include <tmmintrin.h>
-#include "argon2_base.h"
+#pragma once 
+ 
+#include <emmintrin.h> 
+#include <tmmintrin.h> 
+#include "argon2_base.h" 
 #include <library/cpp/digest/argonish/internal/blamka/blamka_ssse3.h>
-
-namespace NArgonish {
-    template <ui32 mcost, ui32 threads>
-    class TArgon2SSSE3 final: public TArgon2<EInstructionSet::SSSE3, mcost, threads> {
-    public:
-        TArgon2SSSE3(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen)
-            : TArgon2<EInstructionSet::SSSE3, mcost, threads>(atype, tcost, key, keylen)
-        {
-        }
-
-    protected:
-        virtual void XorBlock_(TBlock* dst, const TBlock* src) const override {
-            __m128i* mdst = (__m128i*)dst->V;
-            __m128i* msrc = (__m128i*)src->V;
-
-            for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i)
-                XorValues(mdst + i, msrc + i, mdst + i);
-        }
-
-        virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override {
-            memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK);
-        }
-
-        virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool withXor) const override {
-            __m128i blockxy[ARGON2_OWORDS_IN_BLOCK];
-            __m128i state[ARGON2_OWORDS_IN_BLOCK];
-
-            memcpy(state, prevBlock, ARGON2_BLOCK_SIZE);
-
-            if (withXor) {
-                for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) {
-                    state[i] = _mm_xor_si128(
-                        state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i));
-                    blockxy[i] = _mm_xor_si128(
-                        state[i], _mm_loadu_si128((const __m128i*)nextBlock->V + i));
-                }
-            } else {
-                for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) {
-                    blockxy[i] = state[i] = _mm_xor_si128(
-                        state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i));
-                }
-            }
-
-            for (ui32 i = 0; i < 8; ++i) {
-                BlamkaG1SSSE3(
-                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
-                BlamkaG2SSSE3(
-                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
-                DiagonalizeSSSE3(
-                    state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 4], state[8 * i + 5],
-                    state[8 * i + 6], state[8 * i + 7]);
-                BlamkaG1SSSE3(
-                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
-                BlamkaG2SSSE3(
-                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]);
-                UndiagonalizeSSSE3(
-                    state[8 * i + 2], state[8 * i + 3],
-                    state[8 * i + 4], state[8 * i + 5],
-                    state[8 * i + 6], state[8 * i + 7]);
-            }
-
-            for (ui32 i = 0; i < 8; ++i) {
-                BlamkaG1SSSE3(
-                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i],
-                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]);
-                BlamkaG2SSSE3(
-                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i],
-                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]);
-                DiagonalizeSSSE3(
-                    state[8 * 2 + i], state[8 * 3 + i],
-                    state[8 * 4 + i], state[8 * 5 + i],
-                    state[8 * 6 + i], state[8 * 7 + i]);
-                BlamkaG1SSSE3(
-                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i],
-                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]);
-                BlamkaG2SSSE3(
-                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i],
-                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]);
-                UndiagonalizeSSSE3(
-                    state[8 * 2 + i], state[8 * 3 + i],
-                    state[8 * 4 + i], state[8 * 5 + i],
-                    state[8 * 6 + i], state[8 * 7 + i]);
-            }
-
-            for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) {
-                state[i] = _mm_xor_si128(state[i], blockxy[i]);
-                _mm_storeu_si128((__m128i*)nextBlock->V + i, state[i]);
-            }
-        }
-    };
-}
+ 
+namespace NArgonish { 
+    template <ui32 mcost, ui32 threads> 
+    class TArgon2SSSE3 final: public TArgon2<EInstructionSet::SSSE3, mcost, threads> { 
+    public: 
+        TArgon2SSSE3(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) 
+            : TArgon2<EInstructionSet::SSSE3, mcost, threads>(atype, tcost, key, keylen) 
+        { 
+        } 
+ 
+    protected: 
+        virtual void XorBlock_(TBlock* dst, const TBlock* src) const override { 
+            __m128i* mdst = (__m128i*)dst->V; 
+            __m128i* msrc = (__m128i*)src->V; 
+ 
+            for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) 
+                XorValues(mdst + i, msrc + i, mdst + i); 
+        } 
+ 
+        virtual void CopyBlock_(TBlock* dst, const TBlock* src) const override { 
+            memcpy(dst->V, src->V, sizeof(ui64) * ARGON2_QWORDS_IN_BLOCK); 
+        } 
+ 
+        virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, TBlock* nextBlock, bool withXor) const override { 
+            __m128i blockxy[ARGON2_OWORDS_IN_BLOCK]; 
+            __m128i state[ARGON2_OWORDS_IN_BLOCK]; 
+ 
+            memcpy(state, prevBlock, ARGON2_BLOCK_SIZE); 
+ 
+            if (withXor) { 
+                for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { 
+                    state[i] = _mm_xor_si128( 
+                        state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i)); 
+                    blockxy[i] = _mm_xor_si128( 
+                        state[i], _mm_loadu_si128((const __m128i*)nextBlock->V + i)); 
+                } 
+            } else { 
+                for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { 
+                    blockxy[i] = state[i] = _mm_xor_si128( 
+                        state[i], _mm_loadu_si128((const __m128i*)refBlock->V + i)); 
+                } 
+            } 
+ 
+            for (ui32 i = 0; i < 8; ++i) { 
+                BlamkaG1SSSE3( 
+                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); 
+                BlamkaG2SSSE3( 
+                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); 
+                DiagonalizeSSSE3( 
+                    state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 4], state[8 * i + 5], 
+                    state[8 * i + 6], state[8 * i + 7]); 
+                BlamkaG1SSSE3( 
+                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); 
+                BlamkaG2SSSE3( 
+                    state[8 * i + 0], state[8 * i + 1], state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 4], state[8 * i + 5], state[8 * i + 6], state[8 * i + 7]); 
+                UndiagonalizeSSSE3( 
+                    state[8 * i + 2], state[8 * i + 3], 
+                    state[8 * i + 4], state[8 * i + 5], 
+                    state[8 * i + 6], state[8 * i + 7]); 
+            } 
+ 
+            for (ui32 i = 0; i < 8; ++i) { 
+                BlamkaG1SSSE3( 
+                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], 
+                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); 
+                BlamkaG2SSSE3( 
+                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], 
+                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); 
+                DiagonalizeSSSE3( 
+                    state[8 * 2 + i], state[8 * 3 + i], 
+                    state[8 * 4 + i], state[8 * 5 + i], 
+                    state[8 * 6 + i], state[8 * 7 + i]); 
+                BlamkaG1SSSE3( 
+                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], 
+                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); 
+                BlamkaG2SSSE3( 
+                    state[8 * 0 + i], state[8 * 1 + i], state[8 * 2 + i], state[8 * 3 + i], 
+                    state[8 * 4 + i], state[8 * 5 + i], state[8 * 6 + i], state[8 * 7 + i]); 
+                UndiagonalizeSSSE3( 
+                    state[8 * 2 + i], state[8 * 3 + i], 
+                    state[8 * 4 + i], state[8 * 5 + i], 
+                    state[8 * 6 + i], state[8 * 7 + i]); 
+            } 
+ 
+            for (ui32 i = 0; i < ARGON2_OWORDS_IN_BLOCK; ++i) { 
+                state[i] = _mm_xor_si128(state[i], blockxy[i]); 
+                _mm_storeu_si128((__m128i*)nextBlock->V + i, state[i]); 
+            } 
+        } 
+    }; 
+} 
diff --git a/library/cpp/digest/argonish/internal/argon2/ya.make b/library/cpp/digest/argonish/internal/argon2/ya.make
index 85459865ba..10002edb17 100644
--- a/library/cpp/digest/argonish/internal/argon2/ya.make
+++ b/library/cpp/digest/argonish/internal/argon2/ya.make
@@ -1,10 +1,10 @@
-LIBRARY()
-
-OWNER(e-sidorov)
-
-PEERDIR(
+LIBRARY() 
+ 
+OWNER(e-sidorov) 
+ 
+PEERDIR( 
     library/cpp/digest/argonish/internal/blamka
     library/cpp/digest/argonish/internal/blake2b
-)
-
-END()
+) 
+ 
+END() 
diff --git a/library/cpp/digest/argonish/internal/blake2b/blake2b.h b/library/cpp/digest/argonish/internal/blake2b/blake2b.h
index 3dcfc3fc48..4dc696c972 100644
--- a/library/cpp/digest/argonish/internal/blake2b/blake2b.h
+++ b/library/cpp/digest/argonish/internal/blake2b/blake2b.h
@@ -1,187 +1,187 @@
-#pragma once
-
-#include <util/generic/yexception.h>
-#include <util/system/compiler.h>
+#pragma once 
+ 
+#include <util/generic/yexception.h> 
+#include <util/system/compiler.h> 
 #include <library/cpp/digest/argonish/blake2b.h>
-
-namespace NArgonish {
-    const ui32 BLAKE2B_BLOCKBYTES = 128;
-    const ui32 BLAKE2B_BLOCKQWORDS = BLAKE2B_BLOCKBYTES / 8;
-    const ui32 BLAKE2B_OUTBYTES = 64;
-    const ui32 BLAKE2B_KEYBYTES = 64;
-    const ui32 BLAKE2B_SALTBYTES = 16;
-    const ui32 BLAKE2B_PERSONALBYTES = 16;
-
-    template <NArgonish::EInstructionSet instructionSet>
-    class TBlake2B final: public IBlake2Base {
-    public:
-        virtual ~TBlake2B<instructionSet>() {
-            SecureZeroMemory_((void*)&State_, sizeof(State_));
-            SecureZeroMemory_((void*)&Param_, sizeof(Param_));
-        }
-
-        EInstructionSet GetInstructionSet() {
-            return instructionSet;
-        }
-
-    protected:
-        struct TBlake2BState {
-            ui64 H[8];
-            ui64 T[2];
-            ui64 F[2];
-            ui64 Buf[BLAKE2B_BLOCKQWORDS];
-            size_t BufLen;
-            size_t OutLen;
-            ui8 LastNode;
-        };
-
+ 
+namespace NArgonish { 
+    const ui32 BLAKE2B_BLOCKBYTES = 128; 
+    const ui32 BLAKE2B_BLOCKQWORDS = BLAKE2B_BLOCKBYTES / 8; 
+    const ui32 BLAKE2B_OUTBYTES = 64; 
+    const ui32 BLAKE2B_KEYBYTES = 64; 
+    const ui32 BLAKE2B_SALTBYTES = 16; 
+    const ui32 BLAKE2B_PERSONALBYTES = 16; 
+ 
+    template <NArgonish::EInstructionSet instructionSet> 
+    class TBlake2B final: public IBlake2Base { 
+    public: 
+        virtual ~TBlake2B<instructionSet>() { 
+            SecureZeroMemory_((void*)&State_, sizeof(State_)); 
+            SecureZeroMemory_((void*)&Param_, sizeof(Param_)); 
+        } 
+ 
+        EInstructionSet GetInstructionSet() { 
+            return instructionSet; 
+        } 
+ 
+    protected: 
+        struct TBlake2BState { 
+            ui64 H[8]; 
+            ui64 T[2]; 
+            ui64 F[2]; 
+            ui64 Buf[BLAKE2B_BLOCKQWORDS]; 
+            size_t BufLen; 
+            size_t OutLen; 
+            ui8 LastNode; 
+        }; 
+ 
         struct TBlake2BParam {
-            ui8 DigestLen;                       /* 1 */
-            ui8 KeyLen;                          /* 2 */
-            ui8 Fanout;                          /* 3 */
-            ui8 Depth;                           /* 4 */
-            ui32 LeafLength;                     /* 8 */
-            ui32 NodeOffset;                     /* 12 */
-            ui32 XofLength;                      /* 16 */
-            ui8 NodeDepth;                       /* 17 */
-            ui8 InnerLength;                     /* 18 */
-            ui8 Reserved[14];                    /* 32 */
-            ui8 Salt[BLAKE2B_SALTBYTES];         /* 48 */
-            ui8 Personal[BLAKE2B_PERSONALBYTES]; /* 64 */
+            ui8 DigestLen;                       /* 1 */ 
+            ui8 KeyLen;                          /* 2 */ 
+            ui8 Fanout;                          /* 3 */ 
+            ui8 Depth;                           /* 4 */ 
+            ui32 LeafLength;                     /* 8 */ 
+            ui32 NodeOffset;                     /* 12 */ 
+            ui32 XofLength;                      /* 16 */ 
+            ui8 NodeDepth;                       /* 17 */ 
+            ui8 InnerLength;                     /* 18 */ 
+            ui8 Reserved[14];                    /* 32 */ 
+            ui8 Salt[BLAKE2B_SALTBYTES];         /* 48 */ 
+            ui8 Personal[BLAKE2B_PERSONALBYTES]; /* 64 */ 
         } Y_PACKED;
-
-        TBlake2BState State_;
-        TBlake2BParam Param_;
-
-    protected:
-        void Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]);
-        void InitialXor_(ui8* h, const ui8* p);
-        void* GetIV_() const;
-
-        static void SecureZeroMemory_(void* src, size_t len) {
-            static void* (*const volatile memsetv)(void*, int, size_t) = &memset;
-            memsetv(src, 0, len);
-        }
-
-        void InitParam_() {
-            memset(&State_, 0, sizeof(State_));
-            InitialXor_((ui8*)(State_.H), (const ui8*)(&Param_));
-            State_.OutLen = Param_.DigestLen;
-        }
-
-        void IncrementCounter_(const ui64 inc) {
-            State_.T[0] += inc;
-            State_.T[1] += (State_.T[0] < inc) ? 1 : 0;
-        }
-
-        bool IsLastBlock_() {
-            return State_.F[0] != 0;
-        }
-
-        void SetLastNode_() {
-            State_.F[1] = (ui64)-1;
-        }
-
-        void SetLastBlock_() {
-            if (State_.LastNode)
-                SetLastNode_();
-
-            State_.F[0] = (ui64)-1;
-        }
-
-    public:
-        TBlake2B(size_t outlen) {
-            /*
-             * Note that outlen check was moved to proxy class
-             */
-
-            Param_.DigestLen = (ui8)outlen;
-            Param_.KeyLen = 0;
-            Param_.Fanout = 1;
-            Param_.Depth = 1;
-            Param_.LeafLength = 0;
-            Param_.NodeOffset = 0;
-            Param_.XofLength = 0;
-            Param_.NodeDepth = 0;
-            Param_.InnerLength = 0;
-
-            memset(Param_.Reserved, 0, sizeof(Param_.Reserved));
-            memset(Param_.Salt, 0, sizeof(Param_.Salt));
-            memset(Param_.Personal, 0, sizeof(Param_.Personal));
-
-            InitParam_();
-        }
-
-        TBlake2B(size_t outlen, const void* key, size_t keylen) {
-            /**
-             * Note that key and outlen checks were moved to proxy classes
-             */
-            Param_.DigestLen = (ui8)outlen;
-            Param_.KeyLen = (ui8)keylen;
-            Param_.Fanout = 1;
-            Param_.Depth = 1;
-
-            Param_.LeafLength = 0;
-            Param_.NodeOffset = 0;
-            Param_.XofLength = 0;
-            Param_.NodeDepth = 0;
-            Param_.InnerLength = 0;
-
-            memset(Param_.Reserved, 0, sizeof(Param_.Reserved));
-            memset(Param_.Salt, 0, sizeof(Param_.Salt));
-            memset(Param_.Personal, 0, sizeof(Param_.Personal));
-
-            InitParam_();
-            ui8 block[BLAKE2B_BLOCKBYTES] = {0};
-            memcpy(block, key, keylen);
-            Update(block, BLAKE2B_BLOCKBYTES);
-            SecureZeroMemory_(block, BLAKE2B_BLOCKBYTES);
-        }
-
-        void Update(ui32 in) override {
-            Update((const void*)&in, sizeof(in));
-        }
-
-        void Update(const void* pin, size_t inlen) override {
-            const ui8* in = (ui8*)pin;
-            if (inlen > 0) {
-                size_t left = State_.BufLen;
-                size_t fill = BLAKE2B_BLOCKBYTES - left;
-                if (inlen > fill) {
-                    State_.BufLen = 0;
-                    memcpy((ui8*)State_.Buf + left, in, fill); /* Fill buffer */
-                    IncrementCounter_(BLAKE2B_BLOCKBYTES);
-                    Compress_(State_.Buf); /* Compress */
-                    in += fill;
-                    inlen -= fill;
-                    while (inlen > BLAKE2B_BLOCKBYTES) {
-                        /* to fix ubsan's unaligned report */
-                        ui64 tmpbuf[BLAKE2B_BLOCKQWORDS];
-                        memcpy(tmpbuf, in, BLAKE2B_BLOCKBYTES);
-
-                        IncrementCounter_(BLAKE2B_BLOCKBYTES);
-                        Compress_(tmpbuf);
-                        in += BLAKE2B_BLOCKBYTES;
-                        inlen -= BLAKE2B_BLOCKBYTES;
-                    }
-                }
-                memcpy((ui8*)State_.Buf + State_.BufLen, in, inlen);
-                State_.BufLen += inlen;
-            }
-        }
-
-        void Final(void* out, size_t outlen) override {
-            if (out == nullptr || outlen < State_.OutLen)
-                ythrow yexception() << "out is null or outlen is too long";
-
-            if (IsLastBlock_())
-                ythrow yexception() << "Final can't be called several times";
-
-            IncrementCounter_(State_.BufLen);
-            SetLastBlock_();
-            memset((ui8*)State_.Buf + State_.BufLen, 0, BLAKE2B_BLOCKBYTES - State_.BufLen);
-            Compress_(State_.Buf);
-            memcpy(out, (void*)&State_.H[0], outlen);
-        }
-    };
-}
+ 
+        TBlake2BState State_; 
+        TBlake2BParam Param_; 
+ 
+    protected: 
+        void Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]); 
+        void InitialXor_(ui8* h, const ui8* p); 
+        void* GetIV_() const; 
+ 
+        static void SecureZeroMemory_(void* src, size_t len) { 
+            static void* (*const volatile memsetv)(void*, int, size_t) = &memset; 
+            memsetv(src, 0, len); 
+        } 
+ 
+        void InitParam_() { 
+            memset(&State_, 0, sizeof(State_)); 
+            InitialXor_((ui8*)(State_.H), (const ui8*)(&Param_)); 
+            State_.OutLen = Param_.DigestLen; 
+        } 
+ 
+        void IncrementCounter_(const ui64 inc) { 
+            State_.T[0] += inc; 
+            State_.T[1] += (State_.T[0] < inc) ? 1 : 0; 
+        } 
+ 
+        bool IsLastBlock_() { 
+            return State_.F[0] != 0; 
+        } 
+ 
+        void SetLastNode_() { 
+            State_.F[1] = (ui64)-1; 
+        } 
+ 
+        void SetLastBlock_() { 
+            if (State_.LastNode) 
+                SetLastNode_(); 
+ 
+            State_.F[0] = (ui64)-1; 
+        } 
+ 
+    public: 
+        TBlake2B(size_t outlen) { 
+            /* 
+             * Note that outlen check was moved to proxy class 
+             */ 
+ 
+            Param_.DigestLen = (ui8)outlen; 
+            Param_.KeyLen = 0; 
+            Param_.Fanout = 1; 
+            Param_.Depth = 1; 
+            Param_.LeafLength = 0; 
+            Param_.NodeOffset = 0; 
+            Param_.XofLength = 0; 
+            Param_.NodeDepth = 0; 
+            Param_.InnerLength = 0; 
+ 
+            memset(Param_.Reserved, 0, sizeof(Param_.Reserved)); 
+            memset(Param_.Salt, 0, sizeof(Param_.Salt)); 
+            memset(Param_.Personal, 0, sizeof(Param_.Personal)); 
+ 
+            InitParam_(); 
+        } 
+ 
+        TBlake2B(size_t outlen, const void* key, size_t keylen) { 
+            /** 
+             * Note that key and outlen checks were moved to proxy classes 
+             */ 
+            Param_.DigestLen = (ui8)outlen; 
+            Param_.KeyLen = (ui8)keylen; 
+            Param_.Fanout = 1; 
+            Param_.Depth = 1; 
+ 
+            Param_.LeafLength = 0; 
+            Param_.NodeOffset = 0; 
+            Param_.XofLength = 0; 
+            Param_.NodeDepth = 0; 
+            Param_.InnerLength = 0; 
+ 
+            memset(Param_.Reserved, 0, sizeof(Param_.Reserved)); 
+            memset(Param_.Salt, 0, sizeof(Param_.Salt)); 
+            memset(Param_.Personal, 0, sizeof(Param_.Personal)); 
+ 
+            InitParam_(); 
+            ui8 block[BLAKE2B_BLOCKBYTES] = {0}; 
+            memcpy(block, key, keylen); 
+            Update(block, BLAKE2B_BLOCKBYTES); 
+            SecureZeroMemory_(block, BLAKE2B_BLOCKBYTES); 
+        } 
+ 
+        void Update(ui32 in) override { 
+            Update((const void*)&in, sizeof(in)); 
+        } 
+ 
+        void Update(const void* pin, size_t inlen) override { 
+            const ui8* in = (ui8*)pin; 
+            if (inlen > 0) { 
+                size_t left = State_.BufLen; 
+                size_t fill = BLAKE2B_BLOCKBYTES - left; 
+                if (inlen > fill) { 
+                    State_.BufLen = 0; 
+                    memcpy((ui8*)State_.Buf + left, in, fill); /* Fill buffer */ 
+                    IncrementCounter_(BLAKE2B_BLOCKBYTES); 
+                    Compress_(State_.Buf); /* Compress */ 
+                    in += fill; 
+                    inlen -= fill; 
+                    while (inlen > BLAKE2B_BLOCKBYTES) { 
+                        /* to fix ubsan's unaligned report */ 
+                        ui64 tmpbuf[BLAKE2B_BLOCKQWORDS]; 
+                        memcpy(tmpbuf, in, BLAKE2B_BLOCKBYTES); 
+ 
+                        IncrementCounter_(BLAKE2B_BLOCKBYTES); 
+                        Compress_(tmpbuf); 
+                        in += BLAKE2B_BLOCKBYTES; 
+                        inlen -= BLAKE2B_BLOCKBYTES; 
+                    } 
+                } 
+                memcpy((ui8*)State_.Buf + State_.BufLen, in, inlen); 
+                State_.BufLen += inlen; 
+            } 
+        } 
+ 
+        void Final(void* out, size_t outlen) override { 
+            if (out == nullptr || outlen < State_.OutLen) 
+                ythrow yexception() << "out is null or outlen is too long"; 
+ 
+            if (IsLastBlock_()) 
+                ythrow yexception() << "Final can't be called several times"; 
+ 
+            IncrementCounter_(State_.BufLen); 
+            SetLastBlock_(); 
+            memset((ui8*)State_.Buf + State_.BufLen, 0, BLAKE2B_BLOCKBYTES - State_.BufLen); 
+            Compress_(State_.Buf); 
+            memcpy(out, (void*)&State_.H[0], outlen); 
+        } 
+    }; 
+} 
diff --git a/library/cpp/digest/argonish/internal/blake2b/blake2b_avx2.h b/library/cpp/digest/argonish/internal/blake2b/blake2b_avx2.h
index 359ca90ebb..76eec8cd5a 100644
--- a/library/cpp/digest/argonish/internal/blake2b/blake2b_avx2.h
+++ b/library/cpp/digest/argonish/internal/blake2b/blake2b_avx2.h
@@ -1,104 +1,104 @@
-#pragma once
-
-#include <immintrin.h>
-#include "blake2b.h"
+#pragma once 
+ 
+#include <immintrin.h> 
+#include "blake2b.h" 
 #include <library/cpp/digest/argonish/internal/rotations/rotations_avx2.h>
-
-namespace NArgonish {
-    template <>
-    void* TBlake2B<EInstructionSet::AVX2>::GetIV_() const {
-        static const __m256i Iv[2] = {
-            _mm256_set_epi64x(0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL, 0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL),
-            _mm256_set_epi64x(0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL, 0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL)};
-        return (void*)Iv;
-    }
-
-    template <>
-    void TBlake2B<EInstructionSet::AVX2>::InitialXor_(ui8* h, const ui8* p) {
-        __m256i* iv = (__m256i*)GetIV_();
-        __m256i* m_res = (__m256i*)h;
-        const __m256i* m_second = (__m256i*)p;
-        _mm256_storeu_si256(m_res, _mm256_xor_si256(iv[0], _mm256_loadu_si256(m_second)));
-        _mm256_storeu_si256(m_res + 1, _mm256_xor_si256(iv[1], _mm256_loadu_si256(m_second + 1)));
-    }
-
-    /*
-     * a =  v0,  v1,  v2,  v3
-     * b =  v4,  v5,  v6,  v7
-     * c =  v8,  v9, v10, v11
-     * d = v12, v13, v14, v15
-     */
-    static inline void G1AVX2(ui32 r, __m256i& a, __m256i& b, __m256i& c, __m256i& d, const ui64* blk, const __m128i vindex[12][4]) {
-        a = _mm256_add_epi64(a, _mm256_add_epi64(b, _mm256_i32gather_epi64((const long long int*)blk, vindex[r][0], 8)));
-        d = Rotr32(_mm256_xor_si256(a, d));
-        c = _mm256_add_epi64(c, d);
-        b = Rotr24(_mm256_xor_si256(b, c));
-
-        a = _mm256_add_epi64(a, _mm256_add_epi64(b, _mm256_i32gather_epi64((const long long int*)blk, vindex[r][1], 8)));
-        d = Rotr16(_mm256_xor_si256(a, d));
-        c = _mm256_add_epi64(c, d);
-        b = Rotr63(_mm256_xor_si256(b, c));
-    }
-
-    static inline void G2AVX2(ui32 r, __m256i& a, __m256i& b, __m256i& c, __m256i& d, const ui64* blk, const __m128i vindex[12][4]) {
-        a = _mm256_add_epi64(a, _mm256_add_epi64(b, _mm256_i32gather_epi64((const long long int*)blk, vindex[r][2], 8)));
-        d = Rotr32(_mm256_xor_si256(a, d));
-        c = _mm256_add_epi64(c, d);
-        b = Rotr24(_mm256_xor_si256(b, c));
-
-        a = _mm256_add_epi64(a, _mm256_add_epi64(b, _mm256_i32gather_epi64((const long long int*)blk, vindex[r][3], 8)));
-        d = Rotr16(_mm256_xor_si256(a, d));
-        c = _mm256_add_epi64(c, d);
-        b = Rotr63(_mm256_xor_si256(b, c));
-    }
-
-    static inline void Diagonalize(__m256i& b, __m256i& c, __m256i& d) {
-        b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(0, 3, 2, 1));
-        c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2));
-        d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(2, 1, 0, 3));
-    }
-
-    static inline void Undiagonalize(__m256i& b, __m256i& c, __m256i& d) {
-        b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(2, 1, 0, 3));
-        c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2));
-        d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(0, 3, 2, 1));
-    }
-
-    template <>
-    void TBlake2B<EInstructionSet::AVX2>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) {
-        static const __m128i VIndex[12][4] = {
-            {_mm_set_epi32(6, 4, 2, 0), _mm_set_epi32(7, 5, 3, 1), _mm_set_epi32(14, 12, 10, 8), _mm_set_epi32(15, 13, 11, 9)},
-            {_mm_set_epi32(13, 9, 4, 14), _mm_set_epi32(6, 15, 8, 10), _mm_set_epi32(5, 11, 0, 1), _mm_set_epi32(3, 7, 2, 12)},
-            {_mm_set_epi32(15, 5, 12, 11), _mm_set_epi32(13, 2, 0, 8), _mm_set_epi32(9, 7, 3, 10), _mm_set_epi32(4, 1, 6, 14)},
-            {_mm_set_epi32(11, 13, 3, 7), _mm_set_epi32(14, 12, 1, 9), _mm_set_epi32(15, 4, 5, 2), _mm_set_epi32(8, 0, 10, 6)},
-            {_mm_set_epi32(10, 2, 5, 9), _mm_set_epi32(15, 4, 7, 0), _mm_set_epi32(3, 6, 11, 14), _mm_set_epi32(13, 8, 12, 1)},
-            {_mm_set_epi32(8, 0, 6, 2), _mm_set_epi32(3, 11, 10, 12), _mm_set_epi32(1, 15, 7, 4), _mm_set_epi32(9, 14, 5, 13)},
-            {_mm_set_epi32(4, 14, 1, 12), _mm_set_epi32(10, 13, 15, 5), _mm_set_epi32(8, 9, 6, 0), _mm_set_epi32(11, 2, 3, 7)},
-            {_mm_set_epi32(3, 12, 7, 13), _mm_set_epi32(9, 1, 14, 11), _mm_set_epi32(2, 8, 15, 5), _mm_set_epi32(10, 6, 4, 0)},
-            {_mm_set_epi32(0, 11, 14, 6), _mm_set_epi32(8, 3, 9, 15), _mm_set_epi32(10, 1, 13, 12), _mm_set_epi32(5, 4, 7, 2)},
-            {_mm_set_epi32(1, 7, 8, 10), _mm_set_epi32(5, 6, 4, 2), _mm_set_epi32(13, 3, 9, 15), _mm_set_epi32(0, 12, 14, 11)},
-            {_mm_set_epi32(6, 4, 2, 0), _mm_set_epi32(7, 5, 3, 1), _mm_set_epi32(14, 12, 10, 8), _mm_set_epi32(15, 13, 11, 9)},
-            {_mm_set_epi32(13, 9, 4, 14), _mm_set_epi32(6, 15, 8, 10), _mm_set_epi32(5, 11, 0, 1), _mm_set_epi32(3, 7, 2, 12)},
-        };
-
-        __m256i* iv = (__m256i*)GetIV_();
-        __m256i a = _mm256_loadu_si256((__m256i*)&State_.H[0]);
-        __m256i b = _mm256_loadu_si256((__m256i*)&State_.H[4]);
-        __m256i c = iv[0];
-        __m256i d = _mm256_xor_si256(iv[1], _mm256_loadu_si256((__m256i*)&State_.T[0]));
-
-        for (ui32 r = 0; r < 12; ++r) {
-            G1AVX2(r, a, b, c, d, block, VIndex);
-            Diagonalize(b, c, d);
-            G2AVX2(r, a, b, c, d, block, VIndex);
-            Undiagonalize(b, c, d);
-        }
-
-        _mm256_storeu_si256((__m256i*)State_.H, _mm256_xor_si256(
-                                                    _mm256_loadu_si256((__m256i*)State_.H),
-                                                    _mm256_xor_si256(a, c)));
-        _mm256_storeu_si256(((__m256i*)State_.H) + 1, _mm256_xor_si256(
-                                                          _mm256_loadu_si256(((__m256i*)State_.H) + 1),
-                                                          _mm256_xor_si256(b, d)));
-    }
-}
+ 
+namespace NArgonish { 
+    template <> 
+    void* TBlake2B<EInstructionSet::AVX2>::GetIV_() const { 
+        static const __m256i Iv[2] = { 
+            _mm256_set_epi64x(0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL, 0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL), 
+            _mm256_set_epi64x(0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL, 0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL)}; 
+        return (void*)Iv; 
+    } 
+ 
+    template <> 
+    void TBlake2B<EInstructionSet::AVX2>::InitialXor_(ui8* h, const ui8* p) { 
+        __m256i* iv = (__m256i*)GetIV_(); 
+        __m256i* m_res = (__m256i*)h; 
+        const __m256i* m_second = (__m256i*)p; 
+        _mm256_storeu_si256(m_res, _mm256_xor_si256(iv[0], _mm256_loadu_si256(m_second))); 
+        _mm256_storeu_si256(m_res + 1, _mm256_xor_si256(iv[1], _mm256_loadu_si256(m_second + 1))); 
+    } 
+ 
+    /* 
+     * a =  v0,  v1,  v2,  v3 
+     * b =  v4,  v5,  v6,  v7 
+     * c =  v8,  v9, v10, v11 
+     * d = v12, v13, v14, v15 
+     */ 
+    static inline void G1AVX2(ui32 r, __m256i& a, __m256i& b, __m256i& c, __m256i& d, const ui64* blk, const __m128i vindex[12][4]) { 
+        a = _mm256_add_epi64(a, _mm256_add_epi64(b, _mm256_i32gather_epi64((const long long int*)blk, vindex[r][0], 8))); 
+        d = Rotr32(_mm256_xor_si256(a, d)); 
+        c = _mm256_add_epi64(c, d); 
+        b = Rotr24(_mm256_xor_si256(b, c)); 
+ 
+        a = _mm256_add_epi64(a, _mm256_add_epi64(b, _mm256_i32gather_epi64((const long long int*)blk, vindex[r][1], 8))); 
+        d = Rotr16(_mm256_xor_si256(a, d)); 
+        c = _mm256_add_epi64(c, d); 
+        b = Rotr63(_mm256_xor_si256(b, c)); 
+    } 
+ 
+    static inline void G2AVX2(ui32 r, __m256i& a, __m256i& b, __m256i& c, __m256i& d, const ui64* blk, const __m128i vindex[12][4]) { 
+        a = _mm256_add_epi64(a, _mm256_add_epi64(b, _mm256_i32gather_epi64((const long long int*)blk, vindex[r][2], 8))); 
+        d = Rotr32(_mm256_xor_si256(a, d)); 
+        c = _mm256_add_epi64(c, d); 
+        b = Rotr24(_mm256_xor_si256(b, c)); 
+ 
+        a = _mm256_add_epi64(a, _mm256_add_epi64(b, _mm256_i32gather_epi64((const long long int*)blk, vindex[r][3], 8))); 
+        d = Rotr16(_mm256_xor_si256(a, d)); 
+        c = _mm256_add_epi64(c, d); 
+        b = Rotr63(_mm256_xor_si256(b, c)); 
+    } 
+ 
+    static inline void Diagonalize(__m256i& b, __m256i& c, __m256i& d) { 
+        b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(0, 3, 2, 1)); 
+        c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2)); 
+        d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(2, 1, 0, 3)); 
+    } 
+ 
+    static inline void Undiagonalize(__m256i& b, __m256i& c, __m256i& d) { 
+        b = _mm256_permute4x64_epi64(b, _MM_SHUFFLE(2, 1, 0, 3)); 
+        c = _mm256_permute4x64_epi64(c, _MM_SHUFFLE(1, 0, 3, 2)); 
+        d = _mm256_permute4x64_epi64(d, _MM_SHUFFLE(0, 3, 2, 1)); 
+    } 
+ 
+    template <> 
+    void TBlake2B<EInstructionSet::AVX2>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) { 
+        static const __m128i VIndex[12][4] = { 
+            {_mm_set_epi32(6, 4, 2, 0), _mm_set_epi32(7, 5, 3, 1), _mm_set_epi32(14, 12, 10, 8), _mm_set_epi32(15, 13, 11, 9)}, 
+            {_mm_set_epi32(13, 9, 4, 14), _mm_set_epi32(6, 15, 8, 10), _mm_set_epi32(5, 11, 0, 1), _mm_set_epi32(3, 7, 2, 12)}, 
+            {_mm_set_epi32(15, 5, 12, 11), _mm_set_epi32(13, 2, 0, 8), _mm_set_epi32(9, 7, 3, 10), _mm_set_epi32(4, 1, 6, 14)}, 
+            {_mm_set_epi32(11, 13, 3, 7), _mm_set_epi32(14, 12, 1, 9), _mm_set_epi32(15, 4, 5, 2), _mm_set_epi32(8, 0, 10, 6)}, 
+            {_mm_set_epi32(10, 2, 5, 9), _mm_set_epi32(15, 4, 7, 0), _mm_set_epi32(3, 6, 11, 14), _mm_set_epi32(13, 8, 12, 1)}, 
+            {_mm_set_epi32(8, 0, 6, 2), _mm_set_epi32(3, 11, 10, 12), _mm_set_epi32(1, 15, 7, 4), _mm_set_epi32(9, 14, 5, 13)}, 
+            {_mm_set_epi32(4, 14, 1, 12), _mm_set_epi32(10, 13, 15, 5), _mm_set_epi32(8, 9, 6, 0), _mm_set_epi32(11, 2, 3, 7)}, 
+            {_mm_set_epi32(3, 12, 7, 13), _mm_set_epi32(9, 1, 14, 11), _mm_set_epi32(2, 8, 15, 5), _mm_set_epi32(10, 6, 4, 0)}, 
+            {_mm_set_epi32(0, 11, 14, 6), _mm_set_epi32(8, 3, 9, 15), _mm_set_epi32(10, 1, 13, 12), _mm_set_epi32(5, 4, 7, 2)}, 
+            {_mm_set_epi32(1, 7, 8, 10), _mm_set_epi32(5, 6, 4, 2), _mm_set_epi32(13, 3, 9, 15), _mm_set_epi32(0, 12, 14, 11)}, 
+            {_mm_set_epi32(6, 4, 2, 0), _mm_set_epi32(7, 5, 3, 1), _mm_set_epi32(14, 12, 10, 8), _mm_set_epi32(15, 13, 11, 9)}, 
+            {_mm_set_epi32(13, 9, 4, 14), _mm_set_epi32(6, 15, 8, 10), _mm_set_epi32(5, 11, 0, 1), _mm_set_epi32(3, 7, 2, 12)}, 
+        }; 
+ 
+        __m256i* iv = (__m256i*)GetIV_(); 
+        __m256i a = _mm256_loadu_si256((__m256i*)&State_.H[0]); 
+        __m256i b = _mm256_loadu_si256((__m256i*)&State_.H[4]); 
+        __m256i c = iv[0]; 
+        __m256i d = _mm256_xor_si256(iv[1], _mm256_loadu_si256((__m256i*)&State_.T[0])); 
+ 
+        for (ui32 r = 0; r < 12; ++r) { 
+            G1AVX2(r, a, b, c, d, block, VIndex); 
+            Diagonalize(b, c, d); 
+            G2AVX2(r, a, b, c, d, block, VIndex); 
+            Undiagonalize(b, c, d); 
+        } 
+ 
+        _mm256_storeu_si256((__m256i*)State_.H, _mm256_xor_si256( 
+                                                    _mm256_loadu_si256((__m256i*)State_.H), 
+                                                    _mm256_xor_si256(a, c))); 
+        _mm256_storeu_si256(((__m256i*)State_.H) + 1, _mm256_xor_si256( 
+                                                          _mm256_loadu_si256(((__m256i*)State_.H) + 1), 
+                                                          _mm256_xor_si256(b, d))); 
+    } 
+} 
diff --git a/library/cpp/digest/argonish/internal/blake2b/blake2b_ref.h b/library/cpp/digest/argonish/internal/blake2b/blake2b_ref.h
index ef98ed8fc8..1a2306f4a0 100644
--- a/library/cpp/digest/argonish/internal/blake2b/blake2b_ref.h
+++ b/library/cpp/digest/argonish/internal/blake2b/blake2b_ref.h
@@ -1,83 +1,83 @@
-#pragma once
-
-#include "blake2b.h"
+#pragma once 
+ 
+#include "blake2b.h" 
 #include <library/cpp/digest/argonish/internal/rotations/rotations_ref.h>
-
-namespace NArgonish {
-    static const ui8 Sigma[12][16] = {
-        {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
-        {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
-        {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
-        {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
-        {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
-        {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
-        {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
-        {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
-        {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
-        {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
-        {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
-        {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}};
-
-    static const ui64 Iv[8] = {
-        0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL,
-        0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL,
-        0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL,
-        0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL};
-
-    static inline void GRef(ui64 r, ui64 i, ui64& a, ui64& b, ui64& c, ui64& d, const ui64* m) {
-        a = a + b + m[Sigma[r][2 * i + 0]];
-        d = Rotr(d ^ a, 32);
-        c = c + d;
-        b = Rotr(b ^ c, 24);
-        a = a + b + m[Sigma[r][2 * i + 1]];
-        d = Rotr(d ^ a, 16);
-        c = c + d;
-        b = Rotr(b ^ c, 63);
-    }
-
-    static inline void Round(ui64 r, ui64* v, const ui64* m) {
-        GRef(r, 0, v[0], v[4], v[8], v[12], m);
-        GRef(r, 1, v[1], v[5], v[9], v[13], m);
-        GRef(r, 2, v[2], v[6], v[10], v[14], m);
-        GRef(r, 3, v[3], v[7], v[11], v[15], m);
-        GRef(r, 4, v[0], v[5], v[10], v[15], m);
-        GRef(r, 5, v[1], v[6], v[11], v[12], m);
-        GRef(r, 6, v[2], v[7], v[8], v[13], m);
-        GRef(r, 7, v[3], v[4], v[9], v[14], m);
-    }
-
-    template <>
-    void* TBlake2B<EInstructionSet::REF>::GetIV_() const {
-        return nullptr;
-    }
-
-    template <>
-    void TBlake2B<EInstructionSet::REF>::InitialXor_(ui8* h, const ui8* p) {
-        for (size_t i = 0; i < 8; ++i)
-            ((ui64*)h)[i] = Iv[i] ^ ((ui64*)p)[i];
-    }
-
-    template <>
-    void TBlake2B<EInstructionSet::REF>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) {
-        ui64 v[16];
-        for (size_t i = 0; i < 8; ++i) {
-            v[i] = State_.H[i];
-        }
-
-        v[8] = Iv[0];
-        v[9] = Iv[1];
-        v[10] = Iv[2];
-        v[11] = Iv[3];
-        v[12] = Iv[4] ^ State_.T[0];
-        v[13] = Iv[5] ^ State_.T[1];
-        v[14] = Iv[6] ^ State_.F[0];
-        v[15] = Iv[7] ^ State_.F[1];
-
-        for (ui64 r = 0; r < 12; ++r)
-            Round(r, v, block);
-
-        for (size_t i = 0; i < 8; ++i) {
-            State_.H[i] = State_.H[i] ^ v[i] ^ v[i + 8];
-        }
-    }
-}
+ 
+namespace NArgonish { 
+    static const ui8 Sigma[12][16] = { 
+        {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
+        {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, 
+        {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, 
+        {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, 
+        {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, 
+        {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, 
+        {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, 
+        {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, 
+        {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, 
+        {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, 
+        {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
+        {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}}; 
+ 
+    static const ui64 Iv[8] = { 
+        0x6a09e667f3bcc908ULL, 0xbb67ae8584caa73bULL, 
+        0x3c6ef372fe94f82bULL, 0xa54ff53a5f1d36f1ULL, 
+        0x510e527fade682d1ULL, 0x9b05688c2b3e6c1fULL, 
+        0x1f83d9abfb41bd6bULL, 0x5be0cd19137e2179ULL}; 
+ 
+    static inline void GRef(ui64 r, ui64 i, ui64& a, ui64& b, ui64& c, ui64& d, const ui64* m) { 
+        a = a + b + m[Sigma[r][2 * i + 0]]; 
+        d = Rotr(d ^ a, 32); 
+        c = c + d; 
+        b = Rotr(b ^ c, 24); 
+        a = a + b + m[Sigma[r][2 * i + 1]]; 
+        d = Rotr(d ^ a, 16); 
+        c = c + d; 
+        b = Rotr(b ^ c, 63); 
+    } 
+ 
+    static inline void Round(ui64 r, ui64* v, const ui64* m) { 
+        GRef(r, 0, v[0], v[4], v[8], v[12], m); 
+        GRef(r, 1, v[1], v[5], v[9], v[13], m); 
+        GRef(r, 2, v[2], v[6], v[10], v[14], m); 
+        GRef(r, 3, v[3], v[7], v[11], v[15], m); 
+        GRef(r, 4, v[0], v[5], v[10], v[15], m); 
+        GRef(r, 5, v[1], v[6], v[11], v[12], m); 
+        GRef(r, 6, v[2], v[7], v[8], v[13], m); 
+        GRef(r, 7, v[3], v[4], v[9], v[14], m); 
+    } 
+ 
+    template <> 
+    void* TBlake2B<EInstructionSet::REF>::GetIV_() const { 
+        return nullptr; 
+    } 
+ 
+    template <> 
+    void TBlake2B<EInstructionSet::REF>::InitialXor_(ui8* h, const ui8* p) { 
+        for (size_t i = 0; i < 8; ++i) 
+            ((ui64*)h)[i] = Iv[i] ^ ((ui64*)p)[i]; 
+    } 
+ 
+    template <> 
+    void TBlake2B<EInstructionSet::REF>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) { 
+        ui64 v[16]; 
+        for (size_t i = 0; i < 8; ++i) { 
+            v[i] = State_.H[i]; 
+        } 
+ 
+        v[8] = Iv[0]; 
+        v[9] = Iv[1]; 
+        v[10] = Iv[2]; 
+        v[11] = Iv[3]; 
+        v[12] = Iv[4] ^ State_.T[0]; 
+        v[13] = Iv[5] ^ State_.T[1]; 
+        v[14] = Iv[6] ^ State_.F[0]; 
+        v[15] = Iv[7] ^ State_.F[1]; 
+ 
+        for (ui64 r = 0; r < 12; ++r) 
+            Round(r, v, block); 
+ 
+        for (size_t i = 0; i < 8; ++i) { 
+            State_.H[i] = State_.H[i] ^ v[i] ^ v[i + 8]; 
+        } 
+    } 
+} 
diff --git a/library/cpp/digest/argonish/internal/blake2b/blake2b_sse2.h b/library/cpp/digest/argonish/internal/blake2b/blake2b_sse2.h
index e85a78044c..0b4f8f85cc 100644
--- a/library/cpp/digest/argonish/internal/blake2b/blake2b_sse2.h
+++ b/library/cpp/digest/argonish/internal/blake2b/blake2b_sse2.h
@@ -1,163 +1,163 @@
-#pragma once
-
-#include <emmintrin.h>
-#include "blake2b.h"
+#pragma once 
+ 
+#include <emmintrin.h> 
+#include "blake2b.h" 
 #include <library/cpp/digest/argonish/internal/rotations/rotations_sse2.h>
-
-namespace NArgonish {
-    template <>
-    void* TBlake2B<EInstructionSet::SSE2>::GetIV_() const {
-        static const __m128i Iv[4] = {
-            _mm_set_epi64x(0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL),
-            _mm_set_epi64x(0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL),
-            _mm_set_epi64x(0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL),
-            _mm_set_epi64x(0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL)};
-
-        return (void*)Iv;
-    }
-
-    static const ui32 Sigma[12][16] = {
-        {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
-        {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
-        {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
-        {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
-        {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
-        {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
-        {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
-        {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
-        {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
-        {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
-        {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
-        {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}};
-
-    static inline void G1(
-        __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l,
-        __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h,
-        __m128i& b0, __m128i& b1) {
-        row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l);
-        row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h);
-
-        row4l = _mm_xor_si128(row4l, row1l);
-        row4h = _mm_xor_si128(row4h, row1h);
-
-        row4l = Rotr32(row4l);
-        row4h = Rotr32(row4h);
-
-        row3l = _mm_add_epi64(row3l, row4l);
-        row3h = _mm_add_epi64(row3h, row4h);
-
-        row2l = _mm_xor_si128(row2l, row3l);
-        row2h = _mm_xor_si128(row2h, row3h);
-
-        row2l = Rotr24(row2l);
-        row2h = Rotr24(row2h);
-    }
-
-    static inline void G2(
-        __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l,
-        __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h,
-        __m128i& b0, __m128i& b1) {
-        row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l);
-        row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h);
-
-        row4l = _mm_xor_si128(row4l, row1l);
-        row4h = _mm_xor_si128(row4h, row1h);
-
-        row4l = Rotr16(row4l);
-        row4h = Rotr16(row4h);
-
-        row3l = _mm_add_epi64(row3l, row4l);
-        row3h = _mm_add_epi64(row3h, row4h);
-
-        row2l = _mm_xor_si128(row2l, row3l);
-        row2h = _mm_xor_si128(row2h, row3h);
-
-        row2l = Rotr63(row2l);
-        row2h = Rotr63(row2h);
-    }
-
-    static inline void Diagonalize(
-        __m128i& row2l, __m128i& row3l, __m128i& row4l,
-        __m128i& row2h, __m128i& row3h, __m128i& row4h) {
-        __m128i t0 = row4l;
-        __m128i t1 = row2l;
-        row4l = row3l;
-        row3l = row3h;
-        row3h = row4l;
-        row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0));
-        row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h));
-        row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h));
-        row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1));
-    }
-
-    static inline void Undiagonalize(
-        __m128i& row2l, __m128i& row3l, __m128i& row4l,
-        __m128i& row2h, __m128i& row3h, __m128i& row4h) {
-        __m128i t0 = row3l;
-        row3l = row3h;
-        row3h = t0;
-        t0 = row2l;
-        __m128i t1 = row4l;
-        row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l));
-        row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h));
-        row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h));
-        row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1));
-    }
-
-    static inline void Round(int r, const ui64* block_ptr,
-                             __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l,
-                             __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h) {
-        __m128i b0, b1;
-        b0 = _mm_set_epi64x(block_ptr[Sigma[r][2]], block_ptr[Sigma[r][0]]);
-        b1 = _mm_set_epi64x(block_ptr[Sigma[r][6]], block_ptr[Sigma[r][4]]);
-        G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1);
-        b0 = _mm_set_epi64x(block_ptr[Sigma[r][3]], block_ptr[Sigma[r][1]]);
-        b1 = _mm_set_epi64x(block_ptr[Sigma[r][7]], block_ptr[Sigma[r][5]]);
-        G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1);
-        Diagonalize(row2l, row3l, row4l, row2h, row3h, row4h);
-        b0 = _mm_set_epi64x(block_ptr[Sigma[r][10]], block_ptr[Sigma[r][8]]);
-        b1 = _mm_set_epi64x(block_ptr[Sigma[r][14]], block_ptr[Sigma[r][12]]);
-        G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1);
-        b0 = _mm_set_epi64x(block_ptr[Sigma[r][11]], block_ptr[Sigma[r][9]]);
-        b1 = _mm_set_epi64x(block_ptr[Sigma[r][15]], block_ptr[Sigma[r][13]]);
-        G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1);
-        Undiagonalize(row2l, row3l, row4l, row2h, row3h, row4h);
-    }
-
-    template <>
-    void TBlake2B<EInstructionSet::SSE2>::InitialXor_(ui8* h, const ui8* p) {
-        __m128i* m_res = (__m128i*)h;
-        const __m128i* m_p = (__m128i*)p;
-        __m128i* iv = (__m128i*)GetIV_();
-
-        _mm_storeu_si128(m_res + 0, _mm_xor_si128(iv[0], _mm_loadu_si128(m_p + 0)));
-        _mm_storeu_si128(m_res + 1, _mm_xor_si128(iv[1], _mm_loadu_si128(m_p + 1)));
-        _mm_storeu_si128(m_res + 2, _mm_xor_si128(iv[2], _mm_loadu_si128(m_p + 2)));
-        _mm_storeu_si128(m_res + 3, _mm_xor_si128(iv[3], _mm_loadu_si128(m_p + 3)));
-    }
-
-    template <>
-    void TBlake2B<EInstructionSet::SSE2>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) {
-        __m128i* iv = (__m128i*)GetIV_();
-        __m128i row1l = _mm_loadu_si128((__m128i*)&State_.H[0]);
-        __m128i row1h = _mm_loadu_si128((__m128i*)&State_.H[2]);
-        __m128i row2l = _mm_loadu_si128((__m128i*)&State_.H[4]);
-        __m128i row2h = _mm_loadu_si128((__m128i*)&State_.H[6]);
-        __m128i row3l = iv[0];
-        __m128i row3h = iv[1];
-        __m128i row4l = _mm_xor_si128(iv[2], _mm_loadu_si128((__m128i*)&State_.T[0]));
-        __m128i row4h = _mm_xor_si128(iv[3], _mm_loadu_si128((__m128i*)&State_.F[0]));
-
-        for (int r = 0; r < 12; r++)
-            Round(r, block, row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h);
-
-        _mm_storeu_si128((__m128i*)&State_.H[0],
-                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[0]), _mm_xor_si128(row3l, row1l)));
-        _mm_storeu_si128((__m128i*)&State_.H[2],
-                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[2]), _mm_xor_si128(row3h, row1h)));
-        _mm_storeu_si128((__m128i*)&State_.H[4],
-                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[4]), _mm_xor_si128(row4l, row2l)));
-        _mm_storeu_si128((__m128i*)&State_.H[6],
-                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[6]), _mm_xor_si128(row4h, row2h)));
-    }
-}
+ 
+namespace NArgonish { 
+    template <> 
+    void* TBlake2B<EInstructionSet::SSE2>::GetIV_() const { 
+        static const __m128i Iv[4] = { 
+            _mm_set_epi64x(0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL), 
+            _mm_set_epi64x(0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL), 
+            _mm_set_epi64x(0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL), 
+            _mm_set_epi64x(0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL)}; 
+ 
+        return (void*)Iv; 
+    } 
+ 
+    static const ui32 Sigma[12][16] = { 
+        {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
+        {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, 
+        {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, 
+        {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, 
+        {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, 
+        {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, 
+        {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, 
+        {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, 
+        {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, 
+        {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, 
+        {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
+        {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}}; 
+ 
+    static inline void G1( 
+        __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, 
+        __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h, 
+        __m128i& b0, __m128i& b1) { 
+        row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); 
+        row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); 
+ 
+        row4l = _mm_xor_si128(row4l, row1l); 
+        row4h = _mm_xor_si128(row4h, row1h); 
+ 
+        row4l = Rotr32(row4l); 
+        row4h = Rotr32(row4h); 
+ 
+        row3l = _mm_add_epi64(row3l, row4l); 
+        row3h = _mm_add_epi64(row3h, row4h); 
+ 
+        row2l = _mm_xor_si128(row2l, row3l); 
+        row2h = _mm_xor_si128(row2h, row3h); 
+ 
+        row2l = Rotr24(row2l); 
+        row2h = Rotr24(row2h); 
+    } 
+ 
+    static inline void G2( 
+        __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, 
+        __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h, 
+        __m128i& b0, __m128i& b1) { 
+        row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); 
+        row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); 
+ 
+        row4l = _mm_xor_si128(row4l, row1l); 
+        row4h = _mm_xor_si128(row4h, row1h); 
+ 
+        row4l = Rotr16(row4l); 
+        row4h = Rotr16(row4h); 
+ 
+        row3l = _mm_add_epi64(row3l, row4l); 
+        row3h = _mm_add_epi64(row3h, row4h); 
+ 
+        row2l = _mm_xor_si128(row2l, row3l); 
+        row2h = _mm_xor_si128(row2h, row3h); 
+ 
+        row2l = Rotr63(row2l); 
+        row2h = Rotr63(row2h); 
+    } 
+ 
+    static inline void Diagonalize( 
+        __m128i& row2l, __m128i& row3l, __m128i& row4l, 
+        __m128i& row2h, __m128i& row3h, __m128i& row4h) { 
+        __m128i t0 = row4l; 
+        __m128i t1 = row2l; 
+        row4l = row3l; 
+        row3l = row3h; 
+        row3h = row4l; 
+        row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); 
+        row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); 
+        row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); 
+        row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1)); 
+    } 
+ 
+    static inline void Undiagonalize( 
+        __m128i& row2l, __m128i& row3l, __m128i& row4l, 
+        __m128i& row2h, __m128i& row3h, __m128i& row4h) { 
+        __m128i t0 = row3l; 
+        row3l = row3h; 
+        row3h = t0; 
+        t0 = row2l; 
+        __m128i t1 = row4l; 
+        row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); 
+        row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); 
+        row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); 
+        row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1)); 
+    } 
+ 
+    static inline void Round(int r, const ui64* block_ptr, 
+                             __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, 
+                             __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h) { 
+        __m128i b0, b1; 
+        b0 = _mm_set_epi64x(block_ptr[Sigma[r][2]], block_ptr[Sigma[r][0]]); 
+        b1 = _mm_set_epi64x(block_ptr[Sigma[r][6]], block_ptr[Sigma[r][4]]); 
+        G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); 
+        b0 = _mm_set_epi64x(block_ptr[Sigma[r][3]], block_ptr[Sigma[r][1]]); 
+        b1 = _mm_set_epi64x(block_ptr[Sigma[r][7]], block_ptr[Sigma[r][5]]); 
+        G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); 
+        Diagonalize(row2l, row3l, row4l, row2h, row3h, row4h); 
+        b0 = _mm_set_epi64x(block_ptr[Sigma[r][10]], block_ptr[Sigma[r][8]]); 
+        b1 = _mm_set_epi64x(block_ptr[Sigma[r][14]], block_ptr[Sigma[r][12]]); 
+        G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); 
+        b0 = _mm_set_epi64x(block_ptr[Sigma[r][11]], block_ptr[Sigma[r][9]]); 
+        b1 = _mm_set_epi64x(block_ptr[Sigma[r][15]], block_ptr[Sigma[r][13]]); 
+        G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); 
+        Undiagonalize(row2l, row3l, row4l, row2h, row3h, row4h); 
+    } 
+ 
+    template <> 
+    void TBlake2B<EInstructionSet::SSE2>::InitialXor_(ui8* h, const ui8* p) { 
+        __m128i* m_res = (__m128i*)h; 
+        const __m128i* m_p = (__m128i*)p; 
+        __m128i* iv = (__m128i*)GetIV_(); 
+ 
+        _mm_storeu_si128(m_res + 0, _mm_xor_si128(iv[0], _mm_loadu_si128(m_p + 0))); 
+        _mm_storeu_si128(m_res + 1, _mm_xor_si128(iv[1], _mm_loadu_si128(m_p + 1))); 
+        _mm_storeu_si128(m_res + 2, _mm_xor_si128(iv[2], _mm_loadu_si128(m_p + 2))); 
+        _mm_storeu_si128(m_res + 3, _mm_xor_si128(iv[3], _mm_loadu_si128(m_p + 3))); 
+    } 
+ 
+    template <> 
+    void TBlake2B<EInstructionSet::SSE2>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) { 
+        __m128i* iv = (__m128i*)GetIV_(); 
+        __m128i row1l = _mm_loadu_si128((__m128i*)&State_.H[0]); 
+        __m128i row1h = _mm_loadu_si128((__m128i*)&State_.H[2]); 
+        __m128i row2l = _mm_loadu_si128((__m128i*)&State_.H[4]); 
+        __m128i row2h = _mm_loadu_si128((__m128i*)&State_.H[6]); 
+        __m128i row3l = iv[0]; 
+        __m128i row3h = iv[1]; 
+        __m128i row4l = _mm_xor_si128(iv[2], _mm_loadu_si128((__m128i*)&State_.T[0])); 
+        __m128i row4h = _mm_xor_si128(iv[3], _mm_loadu_si128((__m128i*)&State_.F[0])); 
+ 
+        for (int r = 0; r < 12; r++) 
+            Round(r, block, row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); 
+ 
+        _mm_storeu_si128((__m128i*)&State_.H[0], 
+                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[0]), _mm_xor_si128(row3l, row1l))); 
+        _mm_storeu_si128((__m128i*)&State_.H[2], 
+                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[2]), _mm_xor_si128(row3h, row1h))); 
+        _mm_storeu_si128((__m128i*)&State_.H[4], 
+                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[4]), _mm_xor_si128(row4l, row2l))); 
+        _mm_storeu_si128((__m128i*)&State_.H[6], 
+                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[6]), _mm_xor_si128(row4h, row2h))); 
+    } 
+} 
diff --git a/library/cpp/digest/argonish/internal/blake2b/blake2b_sse41.h b/library/cpp/digest/argonish/internal/blake2b/blake2b_sse41.h
index 1a033bcceb..c1103db4c9 100644
--- a/library/cpp/digest/argonish/internal/blake2b/blake2b_sse41.h
+++ b/library/cpp/digest/argonish/internal/blake2b/blake2b_sse41.h
@@ -1,172 +1,172 @@
-#pragma once
-
-#include <smmintrin.h>
-#include "blake2b.h"
-#include "load_sse41.h"
+#pragma once 
+ 
+#include <smmintrin.h> 
+#include "blake2b.h" 
+#include "load_sse41.h" 
 #include <library/cpp/digest/argonish/internal/rotations/rotations_ssse3.h>
-
-namespace NArgonish {
-    template <>
-    void* TBlake2B<EInstructionSet::SSE41>::GetIV_() const {
-        static const __m128i Iv[4] = {
-            _mm_set_epi64x(0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL),
-            _mm_set_epi64x(0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL),
-            _mm_set_epi64x(0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL),
-            _mm_set_epi64x(0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL)};
-        return (void*)Iv;
-    }
-
-    static inline void G1(
-        __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l,
-        __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h,
-        __m128i& b0, __m128i& b1) {
-        row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l);
-        row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h);
-
-        row4l = _mm_xor_si128(row4l, row1l);
-        row4h = _mm_xor_si128(row4h, row1h);
-
-        row4l = Rotr32(row4l);
-        row4h = Rotr32(row4h);
-
-        row3l = _mm_add_epi64(row3l, row4l);
-        row3h = _mm_add_epi64(row3h, row4h);
-
-        row2l = _mm_xor_si128(row2l, row3l);
-        row2h = _mm_xor_si128(row2h, row3h);
-
-        row2l = Rotr24(row2l);
-        row2h = Rotr24(row2h);
-    }
-
-    static inline void G2(
-        __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l,
-        __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h,
-        __m128i& b0, __m128i& b1) {
-        row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l);
-        row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h);
-
-        row4l = _mm_xor_si128(row4l, row1l);
-        row4h = _mm_xor_si128(row4h, row1h);
-
-        row4l = Rotr16(row4l);
-        row4h = Rotr16(row4h);
-
-        row3l = _mm_add_epi64(row3l, row4l);
-        row3h = _mm_add_epi64(row3h, row4h);
-
-        row2l = _mm_xor_si128(row2l, row3l);
-        row2h = _mm_xor_si128(row2h, row3h);
-
-        row2l = Rotr63(row2l);
-        row2h = Rotr63(row2h);
-    }
-
-    static inline void Diagonalize(
-        __m128i& row2l, __m128i& row3l, __m128i& row4l,
-        __m128i& row2h, __m128i& row3h, __m128i& row4h) {
-        __m128i t0 = _mm_alignr_epi8(row2h, row2l, 8);
-        __m128i t1 = _mm_alignr_epi8(row2l, row2h, 8);
-        row2l = t0;
-        row2h = t1;
-
-        t0 = row3l;
-        row3l = row3h;
-        row3h = t0;
-
-        t0 = _mm_alignr_epi8(row4h, row4l, 8);
-        t1 = _mm_alignr_epi8(row4l, row4h, 8);
-        row4l = t1;
-        row4h = t0;
-    }
-
-    static inline void Undiagonalize(
-        __m128i& row2l, __m128i& row3l, __m128i& row4l,
-        __m128i& row2h, __m128i& row3h, __m128i& row4h) {
-        __m128i t0 = _mm_alignr_epi8(row2l, row2h, 8);
-        __m128i t1 = _mm_alignr_epi8(row2h, row2l, 8);
-        row2l = t0;
-        row2h = t1;
-
-        t0 = row3l;
-        row3l = row3h;
-        row3h = t0;
-
-        t0 = _mm_alignr_epi8(row4l, row4h, 8);
-        t1 = _mm_alignr_epi8(row4h, row4l, 8);
-        row4l = t1;
-        row4h = t0;
-    }
-
-#define ROUND(r)                                                        \
-    LOAD_MSG_##r##_1(b0, b1);                                           \
-    G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
-    LOAD_MSG_##r##_2(b0, b1);                                           \
-    G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
-    Diagonalize(row2l, row3l, row4l, row2h, row3h, row4h);              \
-    LOAD_MSG_##r##_3(b0, b1);                                           \
-    G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
-    LOAD_MSG_##r##_4(b0, b1);                                           \
-    G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \
-    Undiagonalize(row2l, row3l, row4l, row2h, row3h, row4h);
-
-    template <>
-    void TBlake2B<EInstructionSet::SSE41>::InitialXor_(ui8* h, const ui8* p) {
-        __m128i* m_res = (__m128i*)h;
-        const __m128i* m_p = (__m128i*)p;
-        __m128i* iv = (__m128i*)GetIV_();
-
-        _mm_storeu_si128(m_res + 0, _mm_xor_si128(iv[0], _mm_loadu_si128(m_p + 0)));
-        _mm_storeu_si128(m_res + 1, _mm_xor_si128(iv[1], _mm_loadu_si128(m_p + 1)));
-        _mm_storeu_si128(m_res + 2, _mm_xor_si128(iv[2], _mm_loadu_si128(m_p + 2)));
-        _mm_storeu_si128(m_res + 3, _mm_xor_si128(iv[3], _mm_loadu_si128(m_p + 3)));
-    }
-
-    template <>
-    void TBlake2B<EInstructionSet::SSE41>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) {
-        const __m128i* block_ptr = (__m128i*)block;
-        __m128i* iv = (__m128i*)GetIV_();
-        const __m128i m0 = _mm_loadu_si128(block_ptr + 0);
-        const __m128i m1 = _mm_loadu_si128(block_ptr + 1);
-        const __m128i m2 = _mm_loadu_si128(block_ptr + 2);
-        const __m128i m3 = _mm_loadu_si128(block_ptr + 3);
-        const __m128i m4 = _mm_loadu_si128(block_ptr + 4);
-        const __m128i m5 = _mm_loadu_si128(block_ptr + 5);
-        const __m128i m6 = _mm_loadu_si128(block_ptr + 6);
-        const __m128i m7 = _mm_loadu_si128(block_ptr + 7);
-
-        __m128i row1l = _mm_loadu_si128((__m128i*)&State_.H[0]);
-        __m128i row1h = _mm_loadu_si128((__m128i*)&State_.H[2]);
-        __m128i row2l = _mm_loadu_si128((__m128i*)&State_.H[4]);
-        __m128i row2h = _mm_loadu_si128((__m128i*)&State_.H[6]);
-        __m128i row3l = iv[0];
-        __m128i row3h = iv[1];
-        __m128i row4l = _mm_xor_si128(iv[2], _mm_loadu_si128((__m128i*)&State_.T[0]));
-        __m128i row4h = _mm_xor_si128(iv[3], _mm_loadu_si128((__m128i*)&State_.F[0]));
-        __m128i b0, b1;
-
-        ROUND(0);
-        ROUND(1);
-        ROUND(2);
-        ROUND(3);
-        ROUND(4);
-        ROUND(5);
-        ROUND(6);
-        ROUND(7);
-        ROUND(8);
-        ROUND(9);
-        ROUND(10);
-        ROUND(11);
-
-        _mm_storeu_si128((__m128i*)&State_.H[0],
-                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[0]), _mm_xor_si128(row3l, row1l)));
-        _mm_storeu_si128((__m128i*)&State_.H[2],
-                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[2]), _mm_xor_si128(row3h, row1h)));
-        _mm_storeu_si128((__m128i*)&State_.H[4],
-                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[4]), _mm_xor_si128(row4l, row2l)));
-        _mm_storeu_si128((__m128i*)&State_.H[6],
-                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[6]), _mm_xor_si128(row4h, row2h)));
-    }
-
-#undef ROUND
-}
+ 
+namespace NArgonish { 
+    template <> 
+    void* TBlake2B<EInstructionSet::SSE41>::GetIV_() const { 
+        static const __m128i Iv[4] = { 
+            _mm_set_epi64x(0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL), 
+            _mm_set_epi64x(0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL), 
+            _mm_set_epi64x(0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL), 
+            _mm_set_epi64x(0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL)}; 
+        return (void*)Iv; 
+    } 
+ 
+    static inline void G1( 
+        __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, 
+        __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h, 
+        __m128i& b0, __m128i& b1) { 
+        row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); 
+        row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); 
+ 
+        row4l = _mm_xor_si128(row4l, row1l); 
+        row4h = _mm_xor_si128(row4h, row1h); 
+ 
+        row4l = Rotr32(row4l); 
+        row4h = Rotr32(row4h); 
+ 
+        row3l = _mm_add_epi64(row3l, row4l); 
+        row3h = _mm_add_epi64(row3h, row4h); 
+ 
+        row2l = _mm_xor_si128(row2l, row3l); 
+        row2h = _mm_xor_si128(row2h, row3h); 
+ 
+        row2l = Rotr24(row2l); 
+        row2h = Rotr24(row2h); 
+    } 
+ 
+    static inline void G2( 
+        __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, 
+        __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h, 
+        __m128i& b0, __m128i& b1) { 
+        row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); 
+        row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); 
+ 
+        row4l = _mm_xor_si128(row4l, row1l); 
+        row4h = _mm_xor_si128(row4h, row1h); 
+ 
+        row4l = Rotr16(row4l); 
+        row4h = Rotr16(row4h); 
+ 
+        row3l = _mm_add_epi64(row3l, row4l); 
+        row3h = _mm_add_epi64(row3h, row4h); 
+ 
+        row2l = _mm_xor_si128(row2l, row3l); 
+        row2h = _mm_xor_si128(row2h, row3h); 
+ 
+        row2l = Rotr63(row2l); 
+        row2h = Rotr63(row2h); 
+    } 
+ 
+    static inline void Diagonalize( 
+        __m128i& row2l, __m128i& row3l, __m128i& row4l, 
+        __m128i& row2h, __m128i& row3h, __m128i& row4h) { 
+        __m128i t0 = _mm_alignr_epi8(row2h, row2l, 8); 
+        __m128i t1 = _mm_alignr_epi8(row2l, row2h, 8); 
+        row2l = t0; 
+        row2h = t1; 
+ 
+        t0 = row3l; 
+        row3l = row3h; 
+        row3h = t0; 
+ 
+        t0 = _mm_alignr_epi8(row4h, row4l, 8); 
+        t1 = _mm_alignr_epi8(row4l, row4h, 8); 
+        row4l = t1; 
+        row4h = t0; 
+    } 
+ 
+    static inline void Undiagonalize( 
+        __m128i& row2l, __m128i& row3l, __m128i& row4l, 
+        __m128i& row2h, __m128i& row3h, __m128i& row4h) { 
+        __m128i t0 = _mm_alignr_epi8(row2l, row2h, 8); 
+        __m128i t1 = _mm_alignr_epi8(row2h, row2l, 8); 
+        row2l = t0; 
+        row2h = t1; 
+ 
+        t0 = row3l; 
+        row3l = row3h; 
+        row3h = t0; 
+ 
+        t0 = _mm_alignr_epi8(row4l, row4h, 8); 
+        t1 = _mm_alignr_epi8(row4h, row4l, 8); 
+        row4l = t1; 
+        row4h = t0; 
+    } 
+ 
+#define ROUND(r)                                                        \ 
+    LOAD_MSG_##r##_1(b0, b1);                                           \ 
+    G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ 
+    LOAD_MSG_##r##_2(b0, b1);                                           \ 
+    G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ 
+    Diagonalize(row2l, row3l, row4l, row2h, row3h, row4h);              \ 
+    LOAD_MSG_##r##_3(b0, b1);                                           \ 
+    G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ 
+    LOAD_MSG_##r##_4(b0, b1);                                           \ 
+    G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); \ 
+    Undiagonalize(row2l, row3l, row4l, row2h, row3h, row4h); 
+ 
+    template <> 
+    void TBlake2B<EInstructionSet::SSE41>::InitialXor_(ui8* h, const ui8* p) { 
+        __m128i* m_res = (__m128i*)h; 
+        const __m128i* m_p = (__m128i*)p; 
+        __m128i* iv = (__m128i*)GetIV_(); 
+ 
+        _mm_storeu_si128(m_res + 0, _mm_xor_si128(iv[0], _mm_loadu_si128(m_p + 0))); 
+        _mm_storeu_si128(m_res + 1, _mm_xor_si128(iv[1], _mm_loadu_si128(m_p + 1))); 
+        _mm_storeu_si128(m_res + 2, _mm_xor_si128(iv[2], _mm_loadu_si128(m_p + 2))); 
+        _mm_storeu_si128(m_res + 3, _mm_xor_si128(iv[3], _mm_loadu_si128(m_p + 3))); 
+    } 
+ 
+    template <> 
+    void TBlake2B<EInstructionSet::SSE41>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) { 
+        const __m128i* block_ptr = (__m128i*)block; 
+        __m128i* iv = (__m128i*)GetIV_(); 
+        const __m128i m0 = _mm_loadu_si128(block_ptr + 0); 
+        const __m128i m1 = _mm_loadu_si128(block_ptr + 1); 
+        const __m128i m2 = _mm_loadu_si128(block_ptr + 2); 
+        const __m128i m3 = _mm_loadu_si128(block_ptr + 3); 
+        const __m128i m4 = _mm_loadu_si128(block_ptr + 4); 
+        const __m128i m5 = _mm_loadu_si128(block_ptr + 5); 
+        const __m128i m6 = _mm_loadu_si128(block_ptr + 6); 
+        const __m128i m7 = _mm_loadu_si128(block_ptr + 7); 
+ 
+        __m128i row1l = _mm_loadu_si128((__m128i*)&State_.H[0]); 
+        __m128i row1h = _mm_loadu_si128((__m128i*)&State_.H[2]); 
+        __m128i row2l = _mm_loadu_si128((__m128i*)&State_.H[4]); 
+        __m128i row2h = _mm_loadu_si128((__m128i*)&State_.H[6]); 
+        __m128i row3l = iv[0]; 
+        __m128i row3h = iv[1]; 
+        __m128i row4l = _mm_xor_si128(iv[2], _mm_loadu_si128((__m128i*)&State_.T[0])); 
+        __m128i row4h = _mm_xor_si128(iv[3], _mm_loadu_si128((__m128i*)&State_.F[0])); 
+        __m128i b0, b1; 
+ 
+        ROUND(0); 
+        ROUND(1); 
+        ROUND(2); 
+        ROUND(3); 
+        ROUND(4); 
+        ROUND(5); 
+        ROUND(6); 
+        ROUND(7); 
+        ROUND(8); 
+        ROUND(9); 
+        ROUND(10); 
+        ROUND(11); 
+ 
+        _mm_storeu_si128((__m128i*)&State_.H[0], 
+                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[0]), _mm_xor_si128(row3l, row1l))); 
+        _mm_storeu_si128((__m128i*)&State_.H[2], 
+                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[2]), _mm_xor_si128(row3h, row1h))); 
+        _mm_storeu_si128((__m128i*)&State_.H[4], 
+                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[4]), _mm_xor_si128(row4l, row2l))); 
+        _mm_storeu_si128((__m128i*)&State_.H[6], 
+                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[6]), _mm_xor_si128(row4h, row2h))); 
+    } 
+ 
+#undef ROUND 
+} 
diff --git a/library/cpp/digest/argonish/internal/blake2b/blake2b_ssse3.h b/library/cpp/digest/argonish/internal/blake2b/blake2b_ssse3.h
index 4cca5a5e7f..24bf8ea31a 100644
--- a/library/cpp/digest/argonish/internal/blake2b/blake2b_ssse3.h
+++ b/library/cpp/digest/argonish/internal/blake2b/blake2b_ssse3.h
@@ -1,171 +1,171 @@
-#pragma once
-
-#include <emmintrin.h>
-#include <tmmintrin.h>
-#include "blake2b.h"
+#pragma once 
+ 
+#include <emmintrin.h> 
+#include <tmmintrin.h> 
+#include "blake2b.h" 
 #include <library/cpp/digest/argonish/internal/rotations/rotations_ssse3.h>
-
-namespace NArgonish {
-    template <>
-    void* TBlake2B<EInstructionSet::SSSE3>::GetIV_() const {
-        static const __m128i Iv[4] = {
-            _mm_set_epi64x(0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL),
-            _mm_set_epi64x(0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL),
-            _mm_set_epi64x(0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL),
-            _mm_set_epi64x(0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL)};
-        return (void*)Iv;
-    }
-
-    static const ui32 Sigma[12][16] = {
-        {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
-        {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3},
-        {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4},
-        {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8},
-        {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13},
-        {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9},
-        {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11},
-        {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10},
-        {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5},
-        {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0},
-        {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
-        {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}};
-
-    static inline void G1(
-        __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l,
-        __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h,
-        __m128i& b0, __m128i& b1) {
-        row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l);
-        row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h);
-
-        row4l = _mm_xor_si128(row4l, row1l);
-        row4h = _mm_xor_si128(row4h, row1h);
-
-        row4l = Rotr32(row4l);
-        row4h = Rotr32(row4h);
-
-        row3l = _mm_add_epi64(row3l, row4l);
-        row3h = _mm_add_epi64(row3h, row4h);
-
-        row2l = _mm_xor_si128(row2l, row3l);
-        row2h = _mm_xor_si128(row2h, row3h);
-
-        row2l = Rotr24(row2l);
-        row2h = Rotr24(row2h);
-    }
-
-    static inline void G2(
-        __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l,
-        __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h,
-        __m128i& b0, __m128i& b1) {
-        row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l);
-        row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h);
-
-        row4l = _mm_xor_si128(row4l, row1l);
-        row4h = _mm_xor_si128(row4h, row1h);
-
-        row4l = Rotr16(row4l);
-        row4h = Rotr16(row4h);
-
-        row3l = _mm_add_epi64(row3l, row4l);
-        row3h = _mm_add_epi64(row3h, row4h);
-
-        row2l = _mm_xor_si128(row2l, row3l);
-        row2h = _mm_xor_si128(row2h, row3h);
-
-        row2l = Rotr63(row2l);
-        row2h = Rotr63(row2h);
-    }
-
-    static inline void Diagonalize(
-        __m128i& row2l, __m128i& row3l, __m128i& row4l,
-        __m128i& row2h, __m128i& row3h, __m128i& row4h) {
-        __m128i t0 = _mm_alignr_epi8(row2h, row2l, 8);
-        __m128i t1 = _mm_alignr_epi8(row2l, row2h, 8);
-        row2l = t0;
-        row2h = t1;
-
-        t0 = row3l;
-        row3l = row3h;
-        row3h = t0;
-
-        t0 = _mm_alignr_epi8(row4h, row4l, 8);
-        t1 = _mm_alignr_epi8(row4l, row4h, 8);
-        row4l = t1;
-        row4h = t0;
-    }
-
-    static inline void Undiagonalize(
-        __m128i& row2l, __m128i& row3l, __m128i& row4l,
-        __m128i& row2h, __m128i& row3h, __m128i& row4h) {
-        __m128i t0 = _mm_alignr_epi8(row2l, row2h, 8);
-        __m128i t1 = _mm_alignr_epi8(row2h, row2l, 8);
-        row2l = t0;
-        row2h = t1;
-
-        t0 = row3l;
-        row3l = row3h;
-        row3h = t0;
-
-        t0 = _mm_alignr_epi8(row4l, row4h, 8);
-        t1 = _mm_alignr_epi8(row4h, row4l, 8);
-        row4l = t1;
-        row4h = t0;
-    }
-
-    static inline void Round(int r, const ui64* block_ptr,
-                             __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l,
-                             __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h) {
-        __m128i b0, b1;
-        b0 = _mm_set_epi64x(block_ptr[Sigma[r][2]], block_ptr[Sigma[r][0]]);
-        b1 = _mm_set_epi64x(block_ptr[Sigma[r][6]], block_ptr[Sigma[r][4]]);
-        G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1);
-        b0 = _mm_set_epi64x(block_ptr[Sigma[r][3]], block_ptr[Sigma[r][1]]);
-        b1 = _mm_set_epi64x(block_ptr[Sigma[r][7]], block_ptr[Sigma[r][5]]);
-        G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1);
-        Diagonalize(row2l, row3l, row4l, row2h, row3h, row4h);
-        b0 = _mm_set_epi64x(block_ptr[Sigma[r][10]], block_ptr[Sigma[r][8]]);
-        b1 = _mm_set_epi64x(block_ptr[Sigma[r][14]], block_ptr[Sigma[r][12]]);
-        G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1);
-        b0 = _mm_set_epi64x(block_ptr[Sigma[r][11]], block_ptr[Sigma[r][9]]);
-        b1 = _mm_set_epi64x(block_ptr[Sigma[r][15]], block_ptr[Sigma[r][13]]);
-        G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1);
-        Undiagonalize(row2l, row3l, row4l, row2h, row3h, row4h);
-    }
-
-    template <>
-    void TBlake2B<EInstructionSet::SSSE3>::InitialXor_(ui8* h, const ui8* p) {
-        __m128i* m_res = (__m128i*)h;
-        const __m128i* m_p = (__m128i*)p;
-        __m128i* iv = (__m128i*)GetIV_();
-
-        _mm_storeu_si128(m_res + 0, _mm_xor_si128(iv[0], _mm_loadu_si128(m_p + 0)));
-        _mm_storeu_si128(m_res + 1, _mm_xor_si128(iv[1], _mm_loadu_si128(m_p + 1)));
-        _mm_storeu_si128(m_res + 2, _mm_xor_si128(iv[2], _mm_loadu_si128(m_p + 2)));
-        _mm_storeu_si128(m_res + 3, _mm_xor_si128(iv[3], _mm_loadu_si128(m_p + 3)));
-    }
-
-    template <>
-    void TBlake2B<EInstructionSet::SSSE3>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) {
-        __m128i* iv = (__m128i*)GetIV_();
-        __m128i row1l = _mm_loadu_si128((__m128i*)&State_.H[0]);
-        __m128i row1h = _mm_loadu_si128((__m128i*)&State_.H[2]);
-        __m128i row2l = _mm_loadu_si128((__m128i*)&State_.H[4]);
-        __m128i row2h = _mm_loadu_si128((__m128i*)&State_.H[6]);
-        __m128i row3l = iv[0];
-        __m128i row3h = iv[1];
-        __m128i row4l = _mm_xor_si128(iv[2], _mm_loadu_si128((__m128i*)&State_.T[0]));
-        __m128i row4h = _mm_xor_si128(iv[3], _mm_loadu_si128((__m128i*)&State_.F[0]));
-
-        for (int r = 0; r < 12; ++r)
-            Round(r, block, row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h);
-
-        _mm_storeu_si128((__m128i*)&State_.H[0],
-                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[0]), _mm_xor_si128(row3l, row1l)));
-        _mm_storeu_si128((__m128i*)&State_.H[2],
-                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[2]), _mm_xor_si128(row3h, row1h)));
-        _mm_storeu_si128((__m128i*)&State_.H[4],
-                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[4]), _mm_xor_si128(row4l, row2l)));
-        _mm_storeu_si128((__m128i*)&State_.H[6],
-                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[6]), _mm_xor_si128(row4h, row2h)));
-    }
-}
+ 
+namespace NArgonish { 
+    template <> 
+    void* TBlake2B<EInstructionSet::SSSE3>::GetIV_() const { 
+        static const __m128i Iv[4] = { 
+            _mm_set_epi64x(0xbb67ae8584caa73bULL, 0x6a09e667f3bcc908ULL), 
+            _mm_set_epi64x(0xa54ff53a5f1d36f1ULL, 0x3c6ef372fe94f82bULL), 
+            _mm_set_epi64x(0x9b05688c2b3e6c1fULL, 0x510e527fade682d1ULL), 
+            _mm_set_epi64x(0x5be0cd19137e2179ULL, 0x1f83d9abfb41bd6bULL)}; 
+        return (void*)Iv; 
+    } 
+ 
+    static const ui32 Sigma[12][16] = { 
+        {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
+        {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}, 
+        {11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4}, 
+        {7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8}, 
+        {9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13}, 
+        {2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9}, 
+        {12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11}, 
+        {13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10}, 
+        {6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5}, 
+        {10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0}, 
+        {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, 
+        {14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3}}; 
+ 
+    static inline void G1( 
+        __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, 
+        __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h, 
+        __m128i& b0, __m128i& b1) { 
+        row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); 
+        row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); 
+ 
+        row4l = _mm_xor_si128(row4l, row1l); 
+        row4h = _mm_xor_si128(row4h, row1h); 
+ 
+        row4l = Rotr32(row4l); 
+        row4h = Rotr32(row4h); 
+ 
+        row3l = _mm_add_epi64(row3l, row4l); 
+        row3h = _mm_add_epi64(row3h, row4h); 
+ 
+        row2l = _mm_xor_si128(row2l, row3l); 
+        row2h = _mm_xor_si128(row2h, row3h); 
+ 
+        row2l = Rotr24(row2l); 
+        row2h = Rotr24(row2h); 
+    } 
+ 
+    static inline void G2( 
+        __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, 
+        __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h, 
+        __m128i& b0, __m128i& b1) { 
+        row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); 
+        row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); 
+ 
+        row4l = _mm_xor_si128(row4l, row1l); 
+        row4h = _mm_xor_si128(row4h, row1h); 
+ 
+        row4l = Rotr16(row4l); 
+        row4h = Rotr16(row4h); 
+ 
+        row3l = _mm_add_epi64(row3l, row4l); 
+        row3h = _mm_add_epi64(row3h, row4h); 
+ 
+        row2l = _mm_xor_si128(row2l, row3l); 
+        row2h = _mm_xor_si128(row2h, row3h); 
+ 
+        row2l = Rotr63(row2l); 
+        row2h = Rotr63(row2h); 
+    } 
+ 
+    static inline void Diagonalize( 
+        __m128i& row2l, __m128i& row3l, __m128i& row4l, 
+        __m128i& row2h, __m128i& row3h, __m128i& row4h) { 
+        __m128i t0 = _mm_alignr_epi8(row2h, row2l, 8); 
+        __m128i t1 = _mm_alignr_epi8(row2l, row2h, 8); 
+        row2l = t0; 
+        row2h = t1; 
+ 
+        t0 = row3l; 
+        row3l = row3h; 
+        row3h = t0; 
+ 
+        t0 = _mm_alignr_epi8(row4h, row4l, 8); 
+        t1 = _mm_alignr_epi8(row4l, row4h, 8); 
+        row4l = t1; 
+        row4h = t0; 
+    } 
+ 
+    static inline void Undiagonalize( 
+        __m128i& row2l, __m128i& row3l, __m128i& row4l, 
+        __m128i& row2h, __m128i& row3h, __m128i& row4h) { 
+        __m128i t0 = _mm_alignr_epi8(row2l, row2h, 8); 
+        __m128i t1 = _mm_alignr_epi8(row2h, row2l, 8); 
+        row2l = t0; 
+        row2h = t1; 
+ 
+        t0 = row3l; 
+        row3l = row3h; 
+        row3h = t0; 
+ 
+        t0 = _mm_alignr_epi8(row4l, row4h, 8); 
+        t1 = _mm_alignr_epi8(row4h, row4l, 8); 
+        row4l = t1; 
+        row4h = t0; 
+    } 
+ 
+    static inline void Round(int r, const ui64* block_ptr, 
+                             __m128i& row1l, __m128i& row2l, __m128i& row3l, __m128i& row4l, 
+                             __m128i& row1h, __m128i& row2h, __m128i& row3h, __m128i& row4h) { 
+        __m128i b0, b1; 
+        b0 = _mm_set_epi64x(block_ptr[Sigma[r][2]], block_ptr[Sigma[r][0]]); 
+        b1 = _mm_set_epi64x(block_ptr[Sigma[r][6]], block_ptr[Sigma[r][4]]); 
+        G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); 
+        b0 = _mm_set_epi64x(block_ptr[Sigma[r][3]], block_ptr[Sigma[r][1]]); 
+        b1 = _mm_set_epi64x(block_ptr[Sigma[r][7]], block_ptr[Sigma[r][5]]); 
+        G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); 
+        Diagonalize(row2l, row3l, row4l, row2h, row3h, row4h); 
+        b0 = _mm_set_epi64x(block_ptr[Sigma[r][10]], block_ptr[Sigma[r][8]]); 
+        b1 = _mm_set_epi64x(block_ptr[Sigma[r][14]], block_ptr[Sigma[r][12]]); 
+        G1(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); 
+        b0 = _mm_set_epi64x(block_ptr[Sigma[r][11]], block_ptr[Sigma[r][9]]); 
+        b1 = _mm_set_epi64x(block_ptr[Sigma[r][15]], block_ptr[Sigma[r][13]]); 
+        G2(row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h, b0, b1); 
+        Undiagonalize(row2l, row3l, row4l, row2h, row3h, row4h); 
+    } 
+ 
+    template <> 
+    void TBlake2B<EInstructionSet::SSSE3>::InitialXor_(ui8* h, const ui8* p) { 
+        __m128i* m_res = (__m128i*)h; 
+        const __m128i* m_p = (__m128i*)p; 
+        __m128i* iv = (__m128i*)GetIV_(); 
+ 
+        _mm_storeu_si128(m_res + 0, _mm_xor_si128(iv[0], _mm_loadu_si128(m_p + 0))); 
+        _mm_storeu_si128(m_res + 1, _mm_xor_si128(iv[1], _mm_loadu_si128(m_p + 1))); 
+        _mm_storeu_si128(m_res + 2, _mm_xor_si128(iv[2], _mm_loadu_si128(m_p + 2))); 
+        _mm_storeu_si128(m_res + 3, _mm_xor_si128(iv[3], _mm_loadu_si128(m_p + 3))); 
+    } 
+ 
+    template <> 
+    void TBlake2B<EInstructionSet::SSSE3>::Compress_(const ui64 block[BLAKE2B_BLOCKQWORDS]) { 
+        __m128i* iv = (__m128i*)GetIV_(); 
+        __m128i row1l = _mm_loadu_si128((__m128i*)&State_.H[0]); 
+        __m128i row1h = _mm_loadu_si128((__m128i*)&State_.H[2]); 
+        __m128i row2l = _mm_loadu_si128((__m128i*)&State_.H[4]); 
+        __m128i row2h = _mm_loadu_si128((__m128i*)&State_.H[6]); 
+        __m128i row3l = iv[0]; 
+        __m128i row3h = iv[1]; 
+        __m128i row4l = _mm_xor_si128(iv[2], _mm_loadu_si128((__m128i*)&State_.T[0])); 
+        __m128i row4h = _mm_xor_si128(iv[3], _mm_loadu_si128((__m128i*)&State_.F[0])); 
+ 
+        for (int r = 0; r < 12; ++r) 
+            Round(r, block, row1l, row2l, row3l, row4l, row1h, row2h, row3h, row4h); 
+ 
+        _mm_storeu_si128((__m128i*)&State_.H[0], 
+                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[0]), _mm_xor_si128(row3l, row1l))); 
+        _mm_storeu_si128((__m128i*)&State_.H[2], 
+                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[2]), _mm_xor_si128(row3h, row1h))); 
+        _mm_storeu_si128((__m128i*)&State_.H[4], 
+                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[4]), _mm_xor_si128(row4l, row2l))); 
+        _mm_storeu_si128((__m128i*)&State_.H[6], 
+                         _mm_xor_si128(_mm_loadu_si128((__m128i*)&State_.H[6]), _mm_xor_si128(row4h, row2h))); 
+    } 
+} 
diff --git a/library/cpp/digest/argonish/internal/blake2b/load_sse41.h b/library/cpp/digest/argonish/internal/blake2b/load_sse41.h
index 060455aac2..9b1f7781f9 100644
--- a/library/cpp/digest/argonish/internal/blake2b/load_sse41.h
+++ b/library/cpp/digest/argonish/internal/blake2b/load_sse41.h
@@ -1,301 +1,301 @@
-#pragma once
-
-/*
-   BLAKE2 reference source code package - optimized C implementations
-   Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the
-   terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at
-   your option.  The terms of these licenses can be found at:
-   - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0
-   - OpenSSL license   : https://www.openssl.org/source/license.html
-   - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0
-   More information about the BLAKE2 hash function can be found at
-   https://blake2.net.
-*/
-
-#define LOAD_MSG_0_1(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpacklo_epi64(m0, m1); \
-        b1 = _mm_unpacklo_epi64(m2, m3); \
-    } while (0)
-
-#define LOAD_MSG_0_2(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpackhi_epi64(m0, m1); \
-        b1 = _mm_unpackhi_epi64(m2, m3); \
-    } while (0)
-
-#define LOAD_MSG_0_3(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpacklo_epi64(m4, m5); \
-        b1 = _mm_unpacklo_epi64(m6, m7); \
-    } while (0)
-
-#define LOAD_MSG_0_4(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpackhi_epi64(m4, m5); \
-        b1 = _mm_unpackhi_epi64(m6, m7); \
-    } while (0)
-
-#define LOAD_MSG_1_1(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpacklo_epi64(m7, m2); \
-        b1 = _mm_unpackhi_epi64(m4, m6); \
-    } while (0)
-
-#define LOAD_MSG_1_2(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpacklo_epi64(m5, m4); \
-        b1 = _mm_alignr_epi8(m3, m7, 8); \
-    } while (0)
-
-#define LOAD_MSG_1_3(b0, b1)                                 \
-    do {                                                     \
-        b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \
-        b1 = _mm_unpackhi_epi64(m5, m2);                     \
-    } while (0)
-
-#define LOAD_MSG_1_4(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpacklo_epi64(m6, m1); \
-        b1 = _mm_unpackhi_epi64(m3, m1); \
-    } while (0)
-
-#define LOAD_MSG_2_1(b0, b1)             \
-    do {                                 \
-        b0 = _mm_alignr_epi8(m6, m5, 8); \
-        b1 = _mm_unpackhi_epi64(m2, m7); \
-    } while (0)
-
-#define LOAD_MSG_2_2(b0, b1)                \
-    do {                                    \
-        b0 = _mm_unpacklo_epi64(m4, m0);    \
-        b1 = _mm_blend_epi16(m1, m6, 0xF0); \
-    } while (0)
-
-#define LOAD_MSG_2_3(b0, b1)                \
-    do {                                    \
-        b0 = _mm_blend_epi16(m5, m1, 0xF0); \
-        b1 = _mm_unpackhi_epi64(m3, m4);    \
-    } while (0)
-
-#define LOAD_MSG_2_4(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpacklo_epi64(m7, m3); \
-        b1 = _mm_alignr_epi8(m2, m0, 8); \
-    } while (0)
-
-#define LOAD_MSG_3_1(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpackhi_epi64(m3, m1); \
-        b1 = _mm_unpackhi_epi64(m6, m5); \
-    } while (0)
-
-#define LOAD_MSG_3_2(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpackhi_epi64(m4, m0); \
-        b1 = _mm_unpacklo_epi64(m6, m7); \
-    } while (0)
-
-#define LOAD_MSG_3_3(b0, b1)                \
-    do {                                    \
-        b0 = _mm_blend_epi16(m1, m2, 0xF0); \
-        b1 = _mm_blend_epi16(m2, m7, 0xF0); \
-    } while (0)
-
-#define LOAD_MSG_3_4(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpacklo_epi64(m3, m5); \
-        b1 = _mm_unpacklo_epi64(m0, m4); \
-    } while (0)
-
-#define LOAD_MSG_4_1(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpackhi_epi64(m4, m2); \
-        b1 = _mm_unpacklo_epi64(m1, m5); \
-    } while (0)
-
-#define LOAD_MSG_4_2(b0, b1)                \
-    do {                                    \
-        b0 = _mm_blend_epi16(m0, m3, 0xF0); \
-        b1 = _mm_blend_epi16(m2, m7, 0xF0); \
-    } while (0)
-
-#define LOAD_MSG_4_3(b0, b1)                \
-    do {                                    \
-        b0 = _mm_blend_epi16(m7, m5, 0xF0); \
-        b1 = _mm_blend_epi16(m3, m1, 0xF0); \
-    } while (0)
-
-#define LOAD_MSG_4_4(b0, b1)                \
-    do {                                    \
-        b0 = _mm_alignr_epi8(m6, m0, 8);    \
-        b1 = _mm_blend_epi16(m4, m6, 0xF0); \
-    } while (0)
-
-#define LOAD_MSG_5_1(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpacklo_epi64(m1, m3); \
-        b1 = _mm_unpacklo_epi64(m0, m4); \
-    } while (0)
-
-#define LOAD_MSG_5_2(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpacklo_epi64(m6, m5); \
-        b1 = _mm_unpackhi_epi64(m5, m1); \
-    } while (0)
-
-#define LOAD_MSG_5_3(b0, b1)                \
-    do {                                    \
-        b0 = _mm_blend_epi16(m2, m3, 0xF0); \
-        b1 = _mm_unpackhi_epi64(m7, m0);    \
-    } while (0)
-
-#define LOAD_MSG_5_4(b0, b1)                \
-    do {                                    \
-        b0 = _mm_unpackhi_epi64(m6, m2);    \
-        b1 = _mm_blend_epi16(m7, m4, 0xF0); \
-    } while (0)
-
-#define LOAD_MSG_6_1(b0, b1)                \
-    do {                                    \
-        b0 = _mm_blend_epi16(m6, m0, 0xF0); \
-        b1 = _mm_unpacklo_epi64(m7, m2);    \
-    } while (0)
-
-#define LOAD_MSG_6_2(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpackhi_epi64(m2, m7); \
-        b1 = _mm_alignr_epi8(m5, m6, 8); \
-    } while (0)
-
-#define LOAD_MSG_6_3(b0, b1)                                 \
-    do {                                                     \
-        b0 = _mm_unpacklo_epi64(m0, m3);                     \
-        b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1, 0, 3, 2)); \
-    } while (0)
-
-#define LOAD_MSG_6_4(b0, b1)                \
-    do {                                    \
-        b0 = _mm_unpackhi_epi64(m3, m1);    \
-        b1 = _mm_blend_epi16(m1, m5, 0xF0); \
-    } while (0)
-
-#define LOAD_MSG_7_1(b0, b1)                \
-    do {                                    \
-        b0 = _mm_unpackhi_epi64(m6, m3);    \
-        b1 = _mm_blend_epi16(m6, m1, 0xF0); \
-    } while (0)
-
-#define LOAD_MSG_7_2(b0, b1)             \
-    do {                                 \
-        b0 = _mm_alignr_epi8(m7, m5, 8); \
-        b1 = _mm_unpackhi_epi64(m0, m4); \
-    } while (0)
-
-#define LOAD_MSG_7_3(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpackhi_epi64(m2, m7); \
-        b1 = _mm_unpacklo_epi64(m4, m1); \
-    } while (0)
-
-#define LOAD_MSG_7_4(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpacklo_epi64(m0, m2); \
-        b1 = _mm_unpacklo_epi64(m3, m5); \
-    } while (0)
-
-#define LOAD_MSG_8_1(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpacklo_epi64(m3, m7); \
-        b1 = _mm_alignr_epi8(m0, m5, 8); \
-    } while (0)
-
-#define LOAD_MSG_8_2(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpackhi_epi64(m7, m4); \
-        b1 = _mm_alignr_epi8(m4, m1, 8); \
-    } while (0)
-
-#define LOAD_MSG_8_3(b0, b1)             \
-    do {                                 \
-        b0 = m6;                         \
-        b1 = _mm_alignr_epi8(m5, m0, 8); \
-    } while (0)
-
-#define LOAD_MSG_8_4(b0, b1)                \
-    do {                                    \
-        b0 = _mm_blend_epi16(m1, m3, 0xF0); \
-        b1 = m2;                            \
-    } while (0)
-
-#define LOAD_MSG_9_1(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpacklo_epi64(m5, m4); \
-        b1 = _mm_unpackhi_epi64(m3, m0); \
-    } while (0)
-
-#define LOAD_MSG_9_2(b0, b1)                \
-    do {                                    \
-        b0 = _mm_unpacklo_epi64(m1, m2);    \
-        b1 = _mm_blend_epi16(m3, m2, 0xF0); \
-    } while (0)
-
-#define LOAD_MSG_9_3(b0, b1)             \
-    do {                                 \
-        b0 = _mm_unpackhi_epi64(m7, m4); \
-        b1 = _mm_unpackhi_epi64(m1, m6); \
-    } while (0)
-
-#define LOAD_MSG_9_4(b0, b1)             \
-    do {                                 \
-        b0 = _mm_alignr_epi8(m7, m5, 8); \
-        b1 = _mm_unpacklo_epi64(m6, m0); \
-    } while (0)
-
-#define LOAD_MSG_10_1(b0, b1)            \
-    do {                                 \
-        b0 = _mm_unpacklo_epi64(m0, m1); \
-        b1 = _mm_unpacklo_epi64(m2, m3); \
-    } while (0)
-
-#define LOAD_MSG_10_2(b0, b1)            \
-    do {                                 \
-        b0 = _mm_unpackhi_epi64(m0, m1); \
-        b1 = _mm_unpackhi_epi64(m2, m3); \
-    } while (0)
-
-#define LOAD_MSG_10_3(b0, b1)            \
-    do {                                 \
-        b0 = _mm_unpacklo_epi64(m4, m5); \
-        b1 = _mm_unpacklo_epi64(m6, m7); \
-    } while (0)
-
-#define LOAD_MSG_10_4(b0, b1)            \
-    do {                                 \
-        b0 = _mm_unpackhi_epi64(m4, m5); \
-        b1 = _mm_unpackhi_epi64(m6, m7); \
-    } while (0)
-
-#define LOAD_MSG_11_1(b0, b1)            \
-    do {                                 \
-        b0 = _mm_unpacklo_epi64(m7, m2); \
-        b1 = _mm_unpackhi_epi64(m4, m6); \
-    } while (0)
-
-#define LOAD_MSG_11_2(b0, b1)            \
-    do {                                 \
-        b0 = _mm_unpacklo_epi64(m5, m4); \
-        b1 = _mm_alignr_epi8(m3, m7, 8); \
-    } while (0)
-
-#define LOAD_MSG_11_3(b0, b1)                                \
-    do {                                                     \
-        b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \
-        b1 = _mm_unpackhi_epi64(m5, m2);                     \
-    } while (0)
-
-#define LOAD_MSG_11_4(b0, b1)            \
-    do {                                 \
-        b0 = _mm_unpacklo_epi64(m6, m1); \
-        b1 = _mm_unpackhi_epi64(m3, m1); \
-    } while (0)
+#pragma once 
+ 
+/* 
+   BLAKE2 reference source code package - optimized C implementations 
+   Copyright 2012, Samuel Neves <sneves@dei.uc.pt>.  You may use this under the 
+   terms of the CC0, the OpenSSL Licence, or the Apache Public License 2.0, at 
+   your option.  The terms of these licenses can be found at: 
+   - CC0 1.0 Universal : http://creativecommons.org/publicdomain/zero/1.0 
+   - OpenSSL license   : https://www.openssl.org/source/license.html 
+   - Apache 2.0        : http://www.apache.org/licenses/LICENSE-2.0 
+   More information about the BLAKE2 hash function can be found at 
+   https://blake2.net. 
+*/ 
+ 
+#define LOAD_MSG_0_1(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpacklo_epi64(m0, m1); \ 
+        b1 = _mm_unpacklo_epi64(m2, m3); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_0_2(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpackhi_epi64(m0, m1); \ 
+        b1 = _mm_unpackhi_epi64(m2, m3); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_0_3(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpacklo_epi64(m4, m5); \ 
+        b1 = _mm_unpacklo_epi64(m6, m7); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_0_4(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpackhi_epi64(m4, m5); \ 
+        b1 = _mm_unpackhi_epi64(m6, m7); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_1_1(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpacklo_epi64(m7, m2); \ 
+        b1 = _mm_unpackhi_epi64(m4, m6); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_1_2(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpacklo_epi64(m5, m4); \ 
+        b1 = _mm_alignr_epi8(m3, m7, 8); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_1_3(b0, b1)                                 \ 
+    do {                                                     \ 
+        b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \ 
+        b1 = _mm_unpackhi_epi64(m5, m2);                     \ 
+    } while (0) 
+ 
+#define LOAD_MSG_1_4(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpacklo_epi64(m6, m1); \ 
+        b1 = _mm_unpackhi_epi64(m3, m1); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_2_1(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_alignr_epi8(m6, m5, 8); \ 
+        b1 = _mm_unpackhi_epi64(m2, m7); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_2_2(b0, b1)                \ 
+    do {                                    \ 
+        b0 = _mm_unpacklo_epi64(m4, m0);    \ 
+        b1 = _mm_blend_epi16(m1, m6, 0xF0); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_2_3(b0, b1)                \ 
+    do {                                    \ 
+        b0 = _mm_blend_epi16(m5, m1, 0xF0); \ 
+        b1 = _mm_unpackhi_epi64(m3, m4);    \ 
+    } while (0) 
+ 
+#define LOAD_MSG_2_4(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpacklo_epi64(m7, m3); \ 
+        b1 = _mm_alignr_epi8(m2, m0, 8); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_3_1(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpackhi_epi64(m3, m1); \ 
+        b1 = _mm_unpackhi_epi64(m6, m5); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_3_2(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpackhi_epi64(m4, m0); \ 
+        b1 = _mm_unpacklo_epi64(m6, m7); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_3_3(b0, b1)                \ 
+    do {                                    \ 
+        b0 = _mm_blend_epi16(m1, m2, 0xF0); \ 
+        b1 = _mm_blend_epi16(m2, m7, 0xF0); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_3_4(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpacklo_epi64(m3, m5); \ 
+        b1 = _mm_unpacklo_epi64(m0, m4); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_4_1(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpackhi_epi64(m4, m2); \ 
+        b1 = _mm_unpacklo_epi64(m1, m5); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_4_2(b0, b1)                \ 
+    do {                                    \ 
+        b0 = _mm_blend_epi16(m0, m3, 0xF0); \ 
+        b1 = _mm_blend_epi16(m2, m7, 0xF0); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_4_3(b0, b1)                \ 
+    do {                                    \ 
+        b0 = _mm_blend_epi16(m7, m5, 0xF0); \ 
+        b1 = _mm_blend_epi16(m3, m1, 0xF0); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_4_4(b0, b1)                \ 
+    do {                                    \ 
+        b0 = _mm_alignr_epi8(m6, m0, 8);    \ 
+        b1 = _mm_blend_epi16(m4, m6, 0xF0); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_5_1(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpacklo_epi64(m1, m3); \ 
+        b1 = _mm_unpacklo_epi64(m0, m4); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_5_2(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpacklo_epi64(m6, m5); \ 
+        b1 = _mm_unpackhi_epi64(m5, m1); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_5_3(b0, b1)                \ 
+    do {                                    \ 
+        b0 = _mm_blend_epi16(m2, m3, 0xF0); \ 
+        b1 = _mm_unpackhi_epi64(m7, m0);    \ 
+    } while (0) 
+ 
+#define LOAD_MSG_5_4(b0, b1)                \ 
+    do {                                    \ 
+        b0 = _mm_unpackhi_epi64(m6, m2);    \ 
+        b1 = _mm_blend_epi16(m7, m4, 0xF0); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_6_1(b0, b1)                \ 
+    do {                                    \ 
+        b0 = _mm_blend_epi16(m6, m0, 0xF0); \ 
+        b1 = _mm_unpacklo_epi64(m7, m2);    \ 
+    } while (0) 
+ 
+#define LOAD_MSG_6_2(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpackhi_epi64(m2, m7); \ 
+        b1 = _mm_alignr_epi8(m5, m6, 8); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_6_3(b0, b1)                                 \ 
+    do {                                                     \ 
+        b0 = _mm_unpacklo_epi64(m0, m3);                     \ 
+        b1 = _mm_shuffle_epi32(m4, _MM_SHUFFLE(1, 0, 3, 2)); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_6_4(b0, b1)                \ 
+    do {                                    \ 
+        b0 = _mm_unpackhi_epi64(m3, m1);    \ 
+        b1 = _mm_blend_epi16(m1, m5, 0xF0); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_7_1(b0, b1)                \ 
+    do {                                    \ 
+        b0 = _mm_unpackhi_epi64(m6, m3);    \ 
+        b1 = _mm_blend_epi16(m6, m1, 0xF0); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_7_2(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_alignr_epi8(m7, m5, 8); \ 
+        b1 = _mm_unpackhi_epi64(m0, m4); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_7_3(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpackhi_epi64(m2, m7); \ 
+        b1 = _mm_unpacklo_epi64(m4, m1); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_7_4(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpacklo_epi64(m0, m2); \ 
+        b1 = _mm_unpacklo_epi64(m3, m5); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_8_1(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpacklo_epi64(m3, m7); \ 
+        b1 = _mm_alignr_epi8(m0, m5, 8); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_8_2(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpackhi_epi64(m7, m4); \ 
+        b1 = _mm_alignr_epi8(m4, m1, 8); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_8_3(b0, b1)             \ 
+    do {                                 \ 
+        b0 = m6;                         \ 
+        b1 = _mm_alignr_epi8(m5, m0, 8); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_8_4(b0, b1)                \ 
+    do {                                    \ 
+        b0 = _mm_blend_epi16(m1, m3, 0xF0); \ 
+        b1 = m2;                            \ 
+    } while (0) 
+ 
+#define LOAD_MSG_9_1(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpacklo_epi64(m5, m4); \ 
+        b1 = _mm_unpackhi_epi64(m3, m0); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_9_2(b0, b1)                \ 
+    do {                                    \ 
+        b0 = _mm_unpacklo_epi64(m1, m2);    \ 
+        b1 = _mm_blend_epi16(m3, m2, 0xF0); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_9_3(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_unpackhi_epi64(m7, m4); \ 
+        b1 = _mm_unpackhi_epi64(m1, m6); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_9_4(b0, b1)             \ 
+    do {                                 \ 
+        b0 = _mm_alignr_epi8(m7, m5, 8); \ 
+        b1 = _mm_unpacklo_epi64(m6, m0); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_10_1(b0, b1)            \ 
+    do {                                 \ 
+        b0 = _mm_unpacklo_epi64(m0, m1); \ 
+        b1 = _mm_unpacklo_epi64(m2, m3); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_10_2(b0, b1)            \ 
+    do {                                 \ 
+        b0 = _mm_unpackhi_epi64(m0, m1); \ 
+        b1 = _mm_unpackhi_epi64(m2, m3); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_10_3(b0, b1)            \ 
+    do {                                 \ 
+        b0 = _mm_unpacklo_epi64(m4, m5); \ 
+        b1 = _mm_unpacklo_epi64(m6, m7); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_10_4(b0, b1)            \ 
+    do {                                 \ 
+        b0 = _mm_unpackhi_epi64(m4, m5); \ 
+        b1 = _mm_unpackhi_epi64(m6, m7); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_11_1(b0, b1)            \ 
+    do {                                 \ 
+        b0 = _mm_unpacklo_epi64(m7, m2); \ 
+        b1 = _mm_unpackhi_epi64(m4, m6); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_11_2(b0, b1)            \ 
+    do {                                 \ 
+        b0 = _mm_unpacklo_epi64(m5, m4); \ 
+        b1 = _mm_alignr_epi8(m3, m7, 8); \ 
+    } while (0) 
+ 
+#define LOAD_MSG_11_3(b0, b1)                                \ 
+    do {                                                     \ 
+        b0 = _mm_shuffle_epi32(m0, _MM_SHUFFLE(1, 0, 3, 2)); \ 
+        b1 = _mm_unpackhi_epi64(m5, m2);                     \ 
+    } while (0) 
+ 
+#define LOAD_MSG_11_4(b0, b1)            \ 
+    do {                                 \ 
+        b0 = _mm_unpacklo_epi64(m6, m1); \ 
+        b1 = _mm_unpackhi_epi64(m3, m1); \ 
+    } while (0) 
diff --git a/library/cpp/digest/argonish/internal/blake2b/ya.make b/library/cpp/digest/argonish/internal/blake2b/ya.make
index 0aa6806b31..1f6d903166 100644
--- a/library/cpp/digest/argonish/internal/blake2b/ya.make
+++ b/library/cpp/digest/argonish/internal/blake2b/ya.make
@@ -1,9 +1,9 @@
-LIBRARY()
-
-OWNER(e-sidorov)
-
+LIBRARY() 
+ 
+OWNER(e-sidorov) 
+ 
 PEERDIR(
     library/cpp/digest/argonish/internal/rotations
 )
-
-END()
+ 
+END() 
diff --git a/library/cpp/digest/argonish/internal/blamka/blamka_avx2.h b/library/cpp/digest/argonish/internal/blamka/blamka_avx2.h
index 02c506d6ff..bb701799c4 100644
--- a/library/cpp/digest/argonish/internal/blamka/blamka_avx2.h
+++ b/library/cpp/digest/argonish/internal/blamka/blamka_avx2.h
@@ -1,136 +1,136 @@
-#pragma once
-
-#include <immintrin.h>
+#pragma once 
+ 
+#include <immintrin.h> 
 #include <library/cpp/digest/argonish/internal/rotations/rotations_avx2.h>
-
-namespace NArgonish {
-    static inline void BlamkaG1AVX2(
-        __m256i& a0, __m256i& a1, __m256i& b0, __m256i& b1,
-        __m256i& c0, __m256i& c1, __m256i& d0, __m256i& d1) {
-        __m256i ml = _mm256_mul_epu32(a0, b0);
-        ml = _mm256_add_epi64(ml, ml);
-        a0 = _mm256_add_epi64(a0, _mm256_add_epi64(b0, ml));
-        d0 = _mm256_xor_si256(d0, a0);
-        d0 = Rotr32(d0);
-
-        ml = _mm256_mul_epu32(c0, d0);
-        ml = _mm256_add_epi64(ml, ml);
-        c0 = _mm256_add_epi64(c0, _mm256_add_epi64(d0, ml));
-
-        b0 = _mm256_xor_si256(b0, c0);
-        b0 = Rotr24(b0);
-
-        ml = _mm256_mul_epu32(a1, b1);
-        ml = _mm256_add_epi64(ml, ml);
-        a1 = _mm256_add_epi64(a1, _mm256_add_epi64(b1, ml));
-        d1 = _mm256_xor_si256(d1, a1);
-        d1 = Rotr32(d1);
-
-        ml = _mm256_mul_epu32(c1, d1);
-        ml = _mm256_add_epi64(ml, ml);
-        c1 = _mm256_add_epi64(c1, _mm256_add_epi64(d1, ml));
-
-        b1 = _mm256_xor_si256(b1, c1);
-        b1 = Rotr24(b1);
-    }
-
-    static inline void BlamkaG2AVX2(
-        __m256i& a0, __m256i& a1, __m256i& b0, __m256i& b1,
-        __m256i& c0, __m256i& c1, __m256i& d0, __m256i& d1) {
-        __m256i ml = _mm256_mul_epu32(a0, b0);
-        ml = _mm256_add_epi64(ml, ml);
-        a0 = _mm256_add_epi64(a0, _mm256_add_epi64(b0, ml));
-        d0 = _mm256_xor_si256(d0, a0);
-        d0 = Rotr16(d0);
-
-        ml = _mm256_mul_epu32(c0, d0);
-        ml = _mm256_add_epi64(ml, ml);
-        c0 = _mm256_add_epi64(c0, _mm256_add_epi64(d0, ml));
-        b0 = _mm256_xor_si256(b0, c0);
-        b0 = Rotr63(b0);
-
-        ml = _mm256_mul_epu32(a1, b1);
-        ml = _mm256_add_epi64(ml, ml);
-        a1 = _mm256_add_epi64(a1, _mm256_add_epi64(b1, ml));
-        d1 = _mm256_xor_si256(d1, a1);
-        d1 = Rotr16(d1);
-
-        ml = _mm256_mul_epu32(c1, d1);
-        ml = _mm256_add_epi64(ml, ml);
-        c1 = _mm256_add_epi64(c1, _mm256_add_epi64(d1, ml));
-        b1 = _mm256_xor_si256(b1, c1);
-        b1 = Rotr63(b1);
-    }
-
-    /* a = ( v0,  v1,  v2,  v3) */
-    /* b = ( v4,  v5,  v6,  v7) */
-    /* c = ( v8,  v9, v10, v11) */
-    /* d = (v12, v13, v14, v15) */
-    static inline void DiagonalizeAVX21(
-        __m256i& b0, __m256i& c0, __m256i& d0, __m256i& b1, __m256i& c1, __m256i& d1) {
-        /* (v4, v5, v6, v7) -> (v5, v6, v7, v4) */
-        b0 = _mm256_permute4x64_epi64(b0, _MM_SHUFFLE(0, 3, 2, 1));
-        /* (v8, v9, v10, v11) -> (v10, v11, v8, v9) */
-        c0 = _mm256_permute4x64_epi64(c0, _MM_SHUFFLE(1, 0, 3, 2));
-        /* (v12, v13, v14, v15) -> (v15, v12, v13, v14) */
-        d0 = _mm256_permute4x64_epi64(d0, _MM_SHUFFLE(2, 1, 0, 3));
-
-        b1 = _mm256_permute4x64_epi64(b1, _MM_SHUFFLE(0, 3, 2, 1));
-        c1 = _mm256_permute4x64_epi64(c1, _MM_SHUFFLE(1, 0, 3, 2));
-        d1 = _mm256_permute4x64_epi64(d1, _MM_SHUFFLE(2, 1, 0, 3));
-    }
-
-    static inline void DiagonalizeAVX22(
-        __m256i& b0, __m256i& b1, __m256i& c0, __m256i& c1, __m256i& d0, __m256i& d1) {
-        /* (v4, v5, v6, v7) -> (v5, v6, v7, v4) */
-        __m256i tmp1 = _mm256_blend_epi32(b0, b1, 0b11001100);        /* v4v7 */
-        __m256i tmp2 = _mm256_blend_epi32(b0, b1, 0b00110011);        /* v6v5 */
-        b1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2, 3, 0, 1)); /* v7v4 */
-        b0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2, 3, 0, 1)); /* v5v6 */
-
-        /* (v8, v9, v10, v11) -> (v10, v11, v8, v9) */
-        tmp1 = c0;
-        c0 = c1;
-        c1 = tmp1;
-
-        /* (v12, v13, v14, v15) -> (v15, v12, v13, v14) */
-        tmp1 = _mm256_blend_epi32(d0, d1, 0b11001100);                /* v12v15 */
-        tmp2 = _mm256_blend_epi32(d0, d1, 0b00110011);                /* v14v13 */
-        d0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2, 3, 0, 1)); /* v15v12 */
-        d1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2, 3, 0, 1)); /* v13v14 */
-    }
-
-    static inline void UndiagonalizeAVX21(
-        __m256i& b0, __m256i& c0, __m256i& d0, __m256i& b1, __m256i& c1, __m256i& d1) {
-        /* (v5, v6, v7, v4) -> (v4, v5, v6, v7) */
-        b0 = _mm256_permute4x64_epi64(b0, _MM_SHUFFLE(2, 1, 0, 3));
-        /* (v10, v11, v8, v9) -> (v8, v9, v10, v11) */
-        c0 = _mm256_permute4x64_epi64(c0, _MM_SHUFFLE(1, 0, 3, 2));
-        /* (v15, v12, v13, v14) -> (v12, v13, v14, v15) */
-        d0 = _mm256_permute4x64_epi64(d0, _MM_SHUFFLE(0, 3, 2, 1));
-
-        b1 = _mm256_permute4x64_epi64(b1, _MM_SHUFFLE(2, 1, 0, 3));
-        c1 = _mm256_permute4x64_epi64(c1, _MM_SHUFFLE(1, 0, 3, 2));
-        d1 = _mm256_permute4x64_epi64(d1, _MM_SHUFFLE(0, 3, 2, 1));
-    }
-
-    static inline void UndiagonalizeAVX22(
-        __m256i& b0, __m256i& b1, __m256i& c0, __m256i& c1, __m256i& d0, __m256i& d1) {
-        /* (v5, v6, v7, v4) -> (v4, v5, v6, v7) */
-        __m256i tmp1 = _mm256_blend_epi32(b0, b1, 0b11001100);        /* v5v4 */
-        __m256i tmp2 = _mm256_blend_epi32(b0, b1, 0b00110011);        /* v7v6 */
-        b0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2, 3, 0, 1)); /* v4v5 */
-        b1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2, 3, 0, 1)); /* v6v7 */
-
-        /* (v10,v11,v8,v9) -> (v8,v9,v10,v11) */
-        tmp1 = c0;
-        c0 = c1;
-        c1 = tmp1;
-
-        /* (v15,v12,v13,v14) -> (v12,v13,v14,v15) */
-        tmp1 = _mm256_blend_epi32(d0, d1, 0b00110011); /* v13v12 */
-        tmp2 = _mm256_blend_epi32(d0, d1, 0b11001100); /* v15v14 */
-        d0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2, 3, 0, 1));
-        d1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2, 3, 0, 1));
-    }
-}
+ 
+namespace NArgonish { 
+    static inline void BlamkaG1AVX2( 
+        __m256i& a0, __m256i& a1, __m256i& b0, __m256i& b1, 
+        __m256i& c0, __m256i& c1, __m256i& d0, __m256i& d1) { 
+        __m256i ml = _mm256_mul_epu32(a0, b0); 
+        ml = _mm256_add_epi64(ml, ml); 
+        a0 = _mm256_add_epi64(a0, _mm256_add_epi64(b0, ml)); 
+        d0 = _mm256_xor_si256(d0, a0); 
+        d0 = Rotr32(d0); 
+ 
+        ml = _mm256_mul_epu32(c0, d0); 
+        ml = _mm256_add_epi64(ml, ml); 
+        c0 = _mm256_add_epi64(c0, _mm256_add_epi64(d0, ml)); 
+ 
+        b0 = _mm256_xor_si256(b0, c0); 
+        b0 = Rotr24(b0); 
+ 
+        ml = _mm256_mul_epu32(a1, b1); 
+        ml = _mm256_add_epi64(ml, ml); 
+        a1 = _mm256_add_epi64(a1, _mm256_add_epi64(b1, ml)); 
+        d1 = _mm256_xor_si256(d1, a1); 
+        d1 = Rotr32(d1); 
+ 
+        ml = _mm256_mul_epu32(c1, d1); 
+        ml = _mm256_add_epi64(ml, ml); 
+        c1 = _mm256_add_epi64(c1, _mm256_add_epi64(d1, ml)); 
+ 
+        b1 = _mm256_xor_si256(b1, c1); 
+        b1 = Rotr24(b1); 
+    } 
+ 
+    static inline void BlamkaG2AVX2( 
+        __m256i& a0, __m256i& a1, __m256i& b0, __m256i& b1, 
+        __m256i& c0, __m256i& c1, __m256i& d0, __m256i& d1) { 
+        __m256i ml = _mm256_mul_epu32(a0, b0); 
+        ml = _mm256_add_epi64(ml, ml); 
+        a0 = _mm256_add_epi64(a0, _mm256_add_epi64(b0, ml)); 
+        d0 = _mm256_xor_si256(d0, a0); 
+        d0 = Rotr16(d0); 
+ 
+        ml = _mm256_mul_epu32(c0, d0); 
+        ml = _mm256_add_epi64(ml, ml); 
+        c0 = _mm256_add_epi64(c0, _mm256_add_epi64(d0, ml)); 
+        b0 = _mm256_xor_si256(b0, c0); 
+        b0 = Rotr63(b0); 
+ 
+        ml = _mm256_mul_epu32(a1, b1); 
+        ml = _mm256_add_epi64(ml, ml); 
+        a1 = _mm256_add_epi64(a1, _mm256_add_epi64(b1, ml)); 
+        d1 = _mm256_xor_si256(d1, a1); 
+        d1 = Rotr16(d1); 
+ 
+        ml = _mm256_mul_epu32(c1, d1); 
+        ml = _mm256_add_epi64(ml, ml); 
+        c1 = _mm256_add_epi64(c1, _mm256_add_epi64(d1, ml)); 
+        b1 = _mm256_xor_si256(b1, c1); 
+        b1 = Rotr63(b1); 
+    } 
+ 
+    /* a = ( v0,  v1,  v2,  v3) */ 
+    /* b = ( v4,  v5,  v6,  v7) */ 
+    /* c = ( v8,  v9, v10, v11) */ 
+    /* d = (v12, v13, v14, v15) */ 
+    static inline void DiagonalizeAVX21( 
+        __m256i& b0, __m256i& c0, __m256i& d0, __m256i& b1, __m256i& c1, __m256i& d1) { 
+        /* (v4, v5, v6, v7) -> (v5, v6, v7, v4) */ 
+        b0 = _mm256_permute4x64_epi64(b0, _MM_SHUFFLE(0, 3, 2, 1)); 
+        /* (v8, v9, v10, v11) -> (v10, v11, v8, v9) */ 
+        c0 = _mm256_permute4x64_epi64(c0, _MM_SHUFFLE(1, 0, 3, 2)); 
+        /* (v12, v13, v14, v15) -> (v15, v12, v13, v14) */ 
+        d0 = _mm256_permute4x64_epi64(d0, _MM_SHUFFLE(2, 1, 0, 3)); 
+ 
+        b1 = _mm256_permute4x64_epi64(b1, _MM_SHUFFLE(0, 3, 2, 1)); 
+        c1 = _mm256_permute4x64_epi64(c1, _MM_SHUFFLE(1, 0, 3, 2)); 
+        d1 = _mm256_permute4x64_epi64(d1, _MM_SHUFFLE(2, 1, 0, 3)); 
+    } 
+ 
+    static inline void DiagonalizeAVX22( 
+        __m256i& b0, __m256i& b1, __m256i& c0, __m256i& c1, __m256i& d0, __m256i& d1) { 
+        /* (v4, v5, v6, v7) -> (v5, v6, v7, v4) */ 
+        __m256i tmp1 = _mm256_blend_epi32(b0, b1, 0b11001100);        /* v4v7 */ 
+        __m256i tmp2 = _mm256_blend_epi32(b0, b1, 0b00110011);        /* v6v5 */ 
+        b1 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2, 3, 0, 1)); /* v7v4 */ 
+        b0 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2, 3, 0, 1)); /* v5v6 */ 
+ 
+        /* (v8, v9, v10, v11) -> (v10, v11, v8, v9) */ 
+        tmp1 = c0; 
+        c0 = c1; 
+        c1 = tmp1; 
+ 
+        /* (v12, v13, v14, v15) -> (v15, v12, v13, v14) */ 
+        tmp1 = _mm256_blend_epi32(d0, d1, 0b11001100);                /* v12v15 */ 
+        tmp2 = _mm256_blend_epi32(d0, d1, 0b00110011);                /* v14v13 */ 
+        d0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2, 3, 0, 1)); /* v15v12 */ 
+        d1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2, 3, 0, 1)); /* v13v14 */ 
+    } 
+ 
+    static inline void UndiagonalizeAVX21( 
+        __m256i& b0, __m256i& c0, __m256i& d0, __m256i& b1, __m256i& c1, __m256i& d1) { 
+        /* (v5, v6, v7, v4) -> (v4, v5, v6, v7) */ 
+        b0 = _mm256_permute4x64_epi64(b0, _MM_SHUFFLE(2, 1, 0, 3)); 
+        /* (v10, v11, v8, v9) -> (v8, v9, v10, v11) */ 
+        c0 = _mm256_permute4x64_epi64(c0, _MM_SHUFFLE(1, 0, 3, 2)); 
+        /* (v15, v12, v13, v14) -> (v12, v13, v14, v15) */ 
+        d0 = _mm256_permute4x64_epi64(d0, _MM_SHUFFLE(0, 3, 2, 1)); 
+ 
+        b1 = _mm256_permute4x64_epi64(b1, _MM_SHUFFLE(2, 1, 0, 3)); 
+        c1 = _mm256_permute4x64_epi64(c1, _MM_SHUFFLE(1, 0, 3, 2)); 
+        d1 = _mm256_permute4x64_epi64(d1, _MM_SHUFFLE(0, 3, 2, 1)); 
+    } 
+ 
+    static inline void UndiagonalizeAVX22( 
+        __m256i& b0, __m256i& b1, __m256i& c0, __m256i& c1, __m256i& d0, __m256i& d1) { 
+        /* (v5, v6, v7, v4) -> (v4, v5, v6, v7) */ 
+        __m256i tmp1 = _mm256_blend_epi32(b0, b1, 0b11001100);        /* v5v4 */ 
+        __m256i tmp2 = _mm256_blend_epi32(b0, b1, 0b00110011);        /* v7v6 */ 
+        b0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2, 3, 0, 1)); /* v4v5 */ 
+        b1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2, 3, 0, 1)); /* v6v7 */ 
+ 
+        /* (v10,v11,v8,v9) -> (v8,v9,v10,v11) */ 
+        tmp1 = c0; 
+        c0 = c1; 
+        c1 = tmp1; 
+ 
+        /* (v15,v12,v13,v14) -> (v12,v13,v14,v15) */ 
+        tmp1 = _mm256_blend_epi32(d0, d1, 0b00110011); /* v13v12 */ 
+        tmp2 = _mm256_blend_epi32(d0, d1, 0b11001100); /* v15v14 */ 
+        d0 = _mm256_permute4x64_epi64(tmp1, _MM_SHUFFLE(2, 3, 0, 1)); 
+        d1 = _mm256_permute4x64_epi64(tmp2, _MM_SHUFFLE(2, 3, 0, 1)); 
+    } 
+} 
diff --git a/library/cpp/digest/argonish/internal/blamka/blamka_sse2.h b/library/cpp/digest/argonish/internal/blamka/blamka_sse2.h
index 1b55651b34..b46fc7624a 100644
--- a/library/cpp/digest/argonish/internal/blamka/blamka_sse2.h
+++ b/library/cpp/digest/argonish/internal/blamka/blamka_sse2.h
@@ -1,95 +1,95 @@
-#pragma once
-
+#pragma once 
+ 
 #include <library/cpp/digest/argonish/internal/rotations/rotations_sse2.h>
-
-namespace NArgonish {
-    static inline void BlamkaG1SSE2(
-        __m128i& a0, __m128i& a1, __m128i& b0, __m128i& b1,
-        __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) {
-        __m128i ml = _mm_mul_epu32(a0, b0);
-        ml = _mm_add_epi64(ml, ml);
-        a0 = _mm_add_epi64(a0, _mm_add_epi64(b0, ml));
-
-        ml = _mm_mul_epu32(a1, b1);
-        ml = _mm_add_epi64(ml, ml);
-        a1 = _mm_add_epi64(a1, _mm_add_epi64(b1, ml));
-
-        d0 = _mm_xor_si128(d0, a0);
-        d1 = _mm_xor_si128(d1, a1);
-
-        d0 = Rotr32(d0);
-        d1 = Rotr32(d1);
-
-        ml = _mm_mul_epu32(c0, d0);
-        ml = _mm_add_epi64(ml, ml);
-        c0 = _mm_add_epi64(c0, _mm_add_epi64(d0, ml));
-
-        ml = _mm_mul_epu32(c1, d1);
-        ml = _mm_add_epi64(ml, ml);
-        c1 = _mm_add_epi64(c1, _mm_add_epi64(ml, d1));
-
-        b0 = _mm_xor_si128(b0, c0);
-        b1 = _mm_xor_si128(b1, c1);
-
-        b0 = Rotr24(b0);
-        b1 = Rotr24(b1);
-    }
-
-    static inline void BlamkaG2SSE2(
-        __m128i& a0, __m128i& a1, __m128i& b0, __m128i& b1,
-        __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) {
-        __m128i ml = _mm_mul_epu32(a0, b0);
-        ml = _mm_add_epi64(ml, ml);
-        a0 = _mm_add_epi64(a0, _mm_add_epi64(b0, ml));
-
-        ml = _mm_mul_epu32(a1, b1);
-        ml = _mm_add_epi64(ml, ml);
-        a1 = _mm_add_epi64(a1, _mm_add_epi64(b1, ml));
-
-        d0 = _mm_xor_si128(d0, a0);
-        d1 = _mm_xor_si128(d1, a1);
-
-        d0 = Rotr16(d0);
-        d1 = Rotr16(d1);
-
-        ml = _mm_mul_epu32(c0, d0);
-        ml = _mm_add_epi64(ml, ml);
-        c0 = _mm_add_epi64(c0, _mm_add_epi64(d0, ml));
-
-        ml = _mm_mul_epu32(c1, d1);
-        ml = _mm_add_epi64(ml, ml);
-        c1 = _mm_add_epi64(c1, _mm_add_epi64(ml, d1));
-
-        b0 = _mm_xor_si128(b0, c0);
-        b1 = _mm_xor_si128(b1, c1);
-
-        b0 = Rotr63(b0);
-        b1 = Rotr63(b1);
-    }
-
-    static inline void DiagonalizeSSE2(
-        __m128i& b0, __m128i& b1, __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) {
-        __m128i tmp0 = d0;
-        __m128i tmp1 = b0;
-        d0 = c0;
-        c0 = c1;
-        c1 = d0;
-        d0 = _mm_unpackhi_epi64(d1, _mm_unpacklo_epi64(tmp0, tmp0));
-        d1 = _mm_unpackhi_epi64(tmp0, _mm_unpacklo_epi64(d1, d1));
-        b0 = _mm_unpackhi_epi64(b0, _mm_unpacklo_epi64(b1, b1));
-        b1 = _mm_unpackhi_epi64(b1, _mm_unpacklo_epi64(tmp1, tmp1));
-    }
-
-    static inline void UndiagonalizeSSE2(
-        __m128i& b0, __m128i& b1, __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) {
-        __m128i tmp0 = c0;
-        c0 = c1;
-        c1 = tmp0;
-        tmp0 = b0;
-        __m128i tmp1 = d0;
-        b0 = _mm_unpackhi_epi64(b1, _mm_unpacklo_epi64(b0, b0));
-        b1 = _mm_unpackhi_epi64(tmp0, _mm_unpacklo_epi64(b1, b1));
-        d0 = _mm_unpackhi_epi64(d0, _mm_unpacklo_epi64(d1, d1));
-        d1 = _mm_unpackhi_epi64(d1, _mm_unpacklo_epi64(tmp1, tmp1));
-    }
-}
+ 
+namespace NArgonish { 
+    static inline void BlamkaG1SSE2( 
+        __m128i& a0, __m128i& a1, __m128i& b0, __m128i& b1, 
+        __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { 
+        __m128i ml = _mm_mul_epu32(a0, b0); 
+        ml = _mm_add_epi64(ml, ml); 
+        a0 = _mm_add_epi64(a0, _mm_add_epi64(b0, ml)); 
+ 
+        ml = _mm_mul_epu32(a1, b1); 
+        ml = _mm_add_epi64(ml, ml); 
+        a1 = _mm_add_epi64(a1, _mm_add_epi64(b1, ml)); 
+ 
+        d0 = _mm_xor_si128(d0, a0); 
+        d1 = _mm_xor_si128(d1, a1); 
+ 
+        d0 = Rotr32(d0); 
+        d1 = Rotr32(d1); 
+ 
+        ml = _mm_mul_epu32(c0, d0); 
+        ml = _mm_add_epi64(ml, ml); 
+        c0 = _mm_add_epi64(c0, _mm_add_epi64(d0, ml)); 
+ 
+        ml = _mm_mul_epu32(c1, d1); 
+        ml = _mm_add_epi64(ml, ml); 
+        c1 = _mm_add_epi64(c1, _mm_add_epi64(ml, d1)); 
+ 
+        b0 = _mm_xor_si128(b0, c0); 
+        b1 = _mm_xor_si128(b1, c1); 
+ 
+        b0 = Rotr24(b0); 
+        b1 = Rotr24(b1); 
+    } 
+ 
+    static inline void BlamkaG2SSE2( 
+        __m128i& a0, __m128i& a1, __m128i& b0, __m128i& b1, 
+        __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { 
+        __m128i ml = _mm_mul_epu32(a0, b0); 
+        ml = _mm_add_epi64(ml, ml); 
+        a0 = _mm_add_epi64(a0, _mm_add_epi64(b0, ml)); 
+ 
+        ml = _mm_mul_epu32(a1, b1); 
+        ml = _mm_add_epi64(ml, ml); 
+        a1 = _mm_add_epi64(a1, _mm_add_epi64(b1, ml)); 
+ 
+        d0 = _mm_xor_si128(d0, a0); 
+        d1 = _mm_xor_si128(d1, a1); 
+ 
+        d0 = Rotr16(d0); 
+        d1 = Rotr16(d1); 
+ 
+        ml = _mm_mul_epu32(c0, d0); 
+        ml = _mm_add_epi64(ml, ml); 
+        c0 = _mm_add_epi64(c0, _mm_add_epi64(d0, ml)); 
+ 
+        ml = _mm_mul_epu32(c1, d1); 
+        ml = _mm_add_epi64(ml, ml); 
+        c1 = _mm_add_epi64(c1, _mm_add_epi64(ml, d1)); 
+ 
+        b0 = _mm_xor_si128(b0, c0); 
+        b1 = _mm_xor_si128(b1, c1); 
+ 
+        b0 = Rotr63(b0); 
+        b1 = Rotr63(b1); 
+    } 
+ 
+    static inline void DiagonalizeSSE2( 
+        __m128i& b0, __m128i& b1, __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { 
+        __m128i tmp0 = d0; 
+        __m128i tmp1 = b0; 
+        d0 = c0; 
+        c0 = c1; 
+        c1 = d0; 
+        d0 = _mm_unpackhi_epi64(d1, _mm_unpacklo_epi64(tmp0, tmp0)); 
+        d1 = _mm_unpackhi_epi64(tmp0, _mm_unpacklo_epi64(d1, d1)); 
+        b0 = _mm_unpackhi_epi64(b0, _mm_unpacklo_epi64(b1, b1)); 
+        b1 = _mm_unpackhi_epi64(b1, _mm_unpacklo_epi64(tmp1, tmp1)); 
+    } 
+ 
+    static inline void UndiagonalizeSSE2( 
+        __m128i& b0, __m128i& b1, __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { 
+        __m128i tmp0 = c0; 
+        c0 = c1; 
+        c1 = tmp0; 
+        tmp0 = b0; 
+        __m128i tmp1 = d0; 
+        b0 = _mm_unpackhi_epi64(b1, _mm_unpacklo_epi64(b0, b0)); 
+        b1 = _mm_unpackhi_epi64(tmp0, _mm_unpacklo_epi64(b1, b1)); 
+        d0 = _mm_unpackhi_epi64(d0, _mm_unpacklo_epi64(d1, d1)); 
+        d1 = _mm_unpackhi_epi64(d1, _mm_unpacklo_epi64(tmp1, tmp1)); 
+    } 
+} 
diff --git a/library/cpp/digest/argonish/internal/blamka/blamka_ssse3.h b/library/cpp/digest/argonish/internal/blamka/blamka_ssse3.h
index 46e8500cd6..a7bd0c9539 100644
--- a/library/cpp/digest/argonish/internal/blamka/blamka_ssse3.h
+++ b/library/cpp/digest/argonish/internal/blamka/blamka_ssse3.h
@@ -1,103 +1,103 @@
-#pragma once
-
+#pragma once 
+ 
 #include <library/cpp/digest/argonish/internal/rotations/rotations_ssse3.h>
-
-namespace NArgonish {
-    static inline void BlamkaG1SSSE3(
-        __m128i& a0, __m128i& a1, __m128i& b0, __m128i& b1,
-        __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) {
-        __m128i ml = _mm_mul_epu32(a0, b0);
-        ml = _mm_add_epi64(ml, ml);
-        a0 = _mm_add_epi64(a0, _mm_add_epi64(b0, ml));
-
-        ml = _mm_mul_epu32(a1, b1);
-        ml = _mm_add_epi64(ml, ml);
-        a1 = _mm_add_epi64(a1, _mm_add_epi64(b1, ml));
-
-        d0 = _mm_xor_si128(d0, a0);
-        d1 = _mm_xor_si128(d1, a1);
-
-        d0 = Rotr32(d0);
-        d1 = Rotr32(d1);
-
-        ml = _mm_mul_epu32(c0, d0);
-        ml = _mm_add_epi64(ml, ml);
-        c0 = _mm_add_epi64(c0, _mm_add_epi64(d0, ml));
-
-        ml = _mm_mul_epu32(c1, d1);
-        ml = _mm_add_epi64(ml, ml);
-        c1 = _mm_add_epi64(c1, _mm_add_epi64(ml, d1));
-
-        b0 = _mm_xor_si128(b0, c0);
-        b1 = _mm_xor_si128(b1, c1);
-
-        b0 = Rotr24(b0);
-        b1 = Rotr24(b1);
-    }
-
-    static inline void BlamkaG2SSSE3(
-        __m128i& a0, __m128i& a1, __m128i& b0, __m128i& b1,
-        __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) {
-        __m128i ml = _mm_mul_epu32(a0, b0);
-        ml = _mm_add_epi64(ml, ml);
-        a0 = _mm_add_epi64(a0, _mm_add_epi64(b0, ml));
-
-        ml = _mm_mul_epu32(a1, b1);
-        ml = _mm_add_epi64(ml, ml);
-        a1 = _mm_add_epi64(a1, _mm_add_epi64(b1, ml));
-
-        d0 = _mm_xor_si128(d0, a0);
-        d1 = _mm_xor_si128(d1, a1);
-
-        d0 = Rotr16(d0);
-        d1 = Rotr16(d1);
-
-        ml = _mm_mul_epu32(c0, d0);
-        ml = _mm_add_epi64(ml, ml);
-        c0 = _mm_add_epi64(c0, _mm_add_epi64(d0, ml));
-
-        ml = _mm_mul_epu32(c1, d1);
-        ml = _mm_add_epi64(ml, ml);
-        c1 = _mm_add_epi64(c1, _mm_add_epi64(ml, d1));
-
-        b0 = _mm_xor_si128(b0, c0);
-        b1 = _mm_xor_si128(b1, c1);
-
-        b0 = Rotr63(b0);
-        b1 = Rotr63(b1);
-    }
-
-    static inline void DiagonalizeSSSE3(
-        __m128i& b0, __m128i& b1, __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) {
-        __m128i t0 = _mm_alignr_epi8(b1, b0, 8);
-        __m128i t1 = _mm_alignr_epi8(b0, b1, 8);
-        b0 = t0;
-        b1 = t1;
-
-        t0 = c0;
-        c0 = c1;
-        c1 = t0;
-
-        t0 = _mm_alignr_epi8(d1, d0, 8);
-        t1 = _mm_alignr_epi8(d0, d1, 8);
-        d0 = t1;
-        d1 = t0;
-    }
-
-    static inline void UndiagonalizeSSSE3(
-        __m128i& b0, __m128i& b1, __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) {
-        __m128i t0 = _mm_alignr_epi8(b0, b1, 8);
-        __m128i t1 = _mm_alignr_epi8(b1, b0, 8);
-        b0 = t0;
-        b1 = t1;
-
-        t0 = c0;
-        c0 = c1;
-        c1 = t0;
-
-        t0 = _mm_alignr_epi8(d0, d1, 8);
-        t1 = _mm_alignr_epi8(d1, d0, 8);
-        d0 = t1;
-        d1 = t0;
-    }
-}
+ 
+namespace NArgonish { 
+    static inline void BlamkaG1SSSE3( 
+        __m128i& a0, __m128i& a1, __m128i& b0, __m128i& b1, 
+        __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { 
+        __m128i ml = _mm_mul_epu32(a0, b0); 
+        ml = _mm_add_epi64(ml, ml); 
+        a0 = _mm_add_epi64(a0, _mm_add_epi64(b0, ml)); 
+ 
+        ml = _mm_mul_epu32(a1, b1); 
+        ml = _mm_add_epi64(ml, ml); 
+        a1 = _mm_add_epi64(a1, _mm_add_epi64(b1, ml)); 
+ 
+        d0 = _mm_xor_si128(d0, a0); 
+        d1 = _mm_xor_si128(d1, a1); 
+ 
+        d0 = Rotr32(d0); 
+        d1 = Rotr32(d1); 
+ 
+        ml = _mm_mul_epu32(c0, d0); 
+        ml = _mm_add_epi64(ml, ml); 
+        c0 = _mm_add_epi64(c0, _mm_add_epi64(d0, ml)); 
+ 
+        ml = _mm_mul_epu32(c1, d1); 
+        ml = _mm_add_epi64(ml, ml); 
+        c1 = _mm_add_epi64(c1, _mm_add_epi64(ml, d1)); 
+ 
+        b0 = _mm_xor_si128(b0, c0); 
+        b1 = _mm_xor_si128(b1, c1); 
+ 
+        b0 = Rotr24(b0); 
+        b1 = Rotr24(b1); 
+    } 
+ 
+    static inline void BlamkaG2SSSE3( 
+        __m128i& a0, __m128i& a1, __m128i& b0, __m128i& b1, 
+        __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { 
+        __m128i ml = _mm_mul_epu32(a0, b0); 
+        ml = _mm_add_epi64(ml, ml); 
+        a0 = _mm_add_epi64(a0, _mm_add_epi64(b0, ml)); 
+ 
+        ml = _mm_mul_epu32(a1, b1); 
+        ml = _mm_add_epi64(ml, ml); 
+        a1 = _mm_add_epi64(a1, _mm_add_epi64(b1, ml)); 
+ 
+        d0 = _mm_xor_si128(d0, a0); 
+        d1 = _mm_xor_si128(d1, a1); 
+ 
+        d0 = Rotr16(d0); 
+        d1 = Rotr16(d1); 
+ 
+        ml = _mm_mul_epu32(c0, d0); 
+        ml = _mm_add_epi64(ml, ml); 
+        c0 = _mm_add_epi64(c0, _mm_add_epi64(d0, ml)); 
+ 
+        ml = _mm_mul_epu32(c1, d1); 
+        ml = _mm_add_epi64(ml, ml); 
+        c1 = _mm_add_epi64(c1, _mm_add_epi64(ml, d1)); 
+ 
+        b0 = _mm_xor_si128(b0, c0); 
+        b1 = _mm_xor_si128(b1, c1); 
+ 
+        b0 = Rotr63(b0); 
+        b1 = Rotr63(b1); 
+    } 
+ 
+    static inline void DiagonalizeSSSE3( 
+        __m128i& b0, __m128i& b1, __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { 
+        __m128i t0 = _mm_alignr_epi8(b1, b0, 8); 
+        __m128i t1 = _mm_alignr_epi8(b0, b1, 8); 
+        b0 = t0; 
+        b1 = t1; 
+ 
+        t0 = c0; 
+        c0 = c1; 
+        c1 = t0; 
+ 
+        t0 = _mm_alignr_epi8(d1, d0, 8); 
+        t1 = _mm_alignr_epi8(d0, d1, 8); 
+        d0 = t1; 
+        d1 = t0; 
+    } 
+ 
+    static inline void UndiagonalizeSSSE3( 
+        __m128i& b0, __m128i& b1, __m128i& c0, __m128i& c1, __m128i& d0, __m128i& d1) { 
+        __m128i t0 = _mm_alignr_epi8(b0, b1, 8); 
+        __m128i t1 = _mm_alignr_epi8(b1, b0, 8); 
+        b0 = t0; 
+        b1 = t1; 
+ 
+        t0 = c0; 
+        c0 = c1; 
+        c1 = t0; 
+ 
+        t0 = _mm_alignr_epi8(d0, d1, 8); 
+        t1 = _mm_alignr_epi8(d1, d0, 8); 
+        d0 = t1; 
+        d1 = t0; 
+    } 
+} 
diff --git a/library/cpp/digest/argonish/internal/blamka/ya.make b/library/cpp/digest/argonish/internal/blamka/ya.make
index 0aa6806b31..1f6d903166 100644
--- a/library/cpp/digest/argonish/internal/blamka/ya.make
+++ b/library/cpp/digest/argonish/internal/blamka/ya.make
@@ -1,9 +1,9 @@
-LIBRARY()
-
-OWNER(e-sidorov)
-
+LIBRARY() 
+ 
+OWNER(e-sidorov) 
+ 
 PEERDIR(
     library/cpp/digest/argonish/internal/rotations
 )
-
-END()
+ 
+END() 
diff --git a/library/cpp/digest/argonish/internal/proxies/avx2/proxy_avx2.cpp b/library/cpp/digest/argonish/internal/proxies/avx2/proxy_avx2.cpp
index c1cf004f58..8d320063f4 100644
--- a/library/cpp/digest/argonish/internal/proxies/avx2/proxy_avx2.cpp
+++ b/library/cpp/digest/argonish/internal/proxies/avx2/proxy_avx2.cpp
@@ -1,18 +1,18 @@
-//
-// Created by Evgeny Sidorov on 12/04/17.
-//
-
-#include "proxy_avx2.h"
+// 
+// Created by Evgeny Sidorov on 12/04/17. 
+// 
+ 
+#include "proxy_avx2.h" 
 #include <library/cpp/digest/argonish/internal/argon2/argon2_base.h>
 #include <library/cpp/digest/argonish/internal/argon2/argon2_avx2.h>
 #include <library/cpp/digest/argonish/internal/blake2b/blake2b.h>
 #include <library/cpp/digest/argonish/internal/blake2b/blake2b_avx2.h>
-
-#define ZEROUPPER _mm256_zeroupper();
-
-namespace NArgonish {
-    ARGON2_PROXY_CLASS_IMPL(AVX2)
-    BLAKE2B_PROXY_CLASS_IMPL(AVX2)
-}
-
-#undef ZEROUPPER
+ 
+#define ZEROUPPER _mm256_zeroupper(); 
+ 
+namespace NArgonish { 
+    ARGON2_PROXY_CLASS_IMPL(AVX2) 
+    BLAKE2B_PROXY_CLASS_IMPL(AVX2) 
+} 
+ 
+#undef ZEROUPPER 
diff --git a/library/cpp/digest/argonish/internal/proxies/avx2/proxy_avx2.h b/library/cpp/digest/argonish/internal/proxies/avx2/proxy_avx2.h
index eec0094563..fca23250a2 100644
--- a/library/cpp/digest/argonish/internal/proxies/avx2/proxy_avx2.h
+++ b/library/cpp/digest/argonish/internal/proxies/avx2/proxy_avx2.h
@@ -1,11 +1,11 @@
-#pragma once
-
-#include <util/generic/yexception.h>
+#pragma once 
+ 
+#include <util/generic/yexception.h> 
 #include <library/cpp/digest/argonish/argon2.h>
 #include <library/cpp/digest/argonish/blake2b.h>
 #include <library/cpp/digest/argonish/internal/proxies/macro/proxy_macros.h>
-
-namespace NArgonish {
-    ARGON2_PROXY_CLASS_DECL(AVX2)
-    BLAKE2B_PROXY_CLASS_DECL(AVX2)
-}
+ 
+namespace NArgonish { 
+    ARGON2_PROXY_CLASS_DECL(AVX2) 
+    BLAKE2B_PROXY_CLASS_DECL(AVX2) 
+} 
diff --git a/library/cpp/digest/argonish/internal/proxies/avx2/ya.make b/library/cpp/digest/argonish/internal/proxies/avx2/ya.make
index 53f814c48d..94ce211e06 100644
--- a/library/cpp/digest/argonish/internal/proxies/avx2/ya.make
+++ b/library/cpp/digest/argonish/internal/proxies/avx2/ya.make
@@ -1,18 +1,18 @@
-OWNER(e-sidorov)
-
-LIBRARY()
-
-NO_UTIL()
-
-IF (ARCH_X86_64 OR ARCH_I386)
-    PEERDIR(
-        library/cpp/digest/argonish/internal/proxies/macro
-        library/cpp/digest/argonish/internal/argon2
-        library/cpp/digest/argonish/internal/blake2b
-    )
-    SRC_CPP_AVX2(
-        proxy_avx2.cpp
-    )
-ENDIF()
-
-END()
+OWNER(e-sidorov) 
+ 
+LIBRARY() 
+ 
+NO_UTIL() 
+ 
+IF (ARCH_X86_64 OR ARCH_I386) 
+    PEERDIR( 
+        library/cpp/digest/argonish/internal/proxies/macro 
+        library/cpp/digest/argonish/internal/argon2 
+        library/cpp/digest/argonish/internal/blake2b 
+    ) 
+    SRC_CPP_AVX2( 
+        proxy_avx2.cpp 
+    ) 
+ENDIF() 
+ 
+END() 
diff --git a/library/cpp/digest/argonish/internal/proxies/macro/proxy_macros.h b/library/cpp/digest/argonish/internal/proxies/macro/proxy_macros.h
index 5ed5f53b4f..d9bddf55bd 100644
--- a/library/cpp/digest/argonish/internal/proxies/macro/proxy_macros.h
+++ b/library/cpp/digest/argonish/internal/proxies/macro/proxy_macros.h
@@ -1,194 +1,194 @@
-#pragma once
-
-//
-// Created by Evgeny Sidorov on 12/04/17.
-//
-/**
- * ZEROUPPER macro is only used for AVX2 instruction set to clear up the upper half of YMM registers
- * It's done to avoid performance penalty when CPU switches to non-AVX2 code (according to Agner)
- * and the post at https://software.intel.com/en-us/articles/intel-avx-state-transitions-migrating-sse-code-to-avx
- */
-
-#define ARGON2_PROXY_CLASS_DECL(IS)                                                                                   \
-    class TArgon2Proxy##IS final: public IArgon2Base {                                                                \
-    public:                                                                                                           \
-        TArgon2Proxy##IS(EArgon2Type atype, ui32 tcost, ui32 mcost, ui32 threads,                                     \
-                         const ui8* key = nullptr, ui32 keylen = 0);                                                  \
-        virtual ~TArgon2Proxy##IS();                                                                                  \
-                                                                                                                      \
-        virtual void Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen,                                 \
-                          ui8* out, ui32 outlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override;           \
-        virtual bool Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen,                               \
-                            const ui8* hash, ui32 hashlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override; \
-        virtual void HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen,                      \
-                                          const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen,                       \
-                                          const ui8* aad = nullptr, ui32 aadlen = 0) const override;                  \
-        virtual bool VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen,                    \
-                                            const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen,             \
-                                            const ui8* aad = nullptr, ui32 aadlen = 0) const override;                \
-        virtual size_t GetMemorySize() const override;                                                                \
-                                                                                                                      \
-    protected:                                                                                                        \
-        THolder<IArgon2Base> argon2;                                                                                  \
-    };
-
-#define ARGON2_INSTANCE_DECL(IS_val, mcost_val, threads_val)                                     \
-    if (mcost == mcost_val && threads == threads_val) {                                          \
-        argon2 = MakeHolder<TArgon2##IS_val<mcost_val, threads_val>>(atype, tcost, key, keylen); \
-        return;                                                                                  \
-    }
-
-#define ARGON2_PROXY_CLASS_IMPL(IS)                                                                                           \
-    TArgon2Proxy##IS::TArgon2Proxy##IS(EArgon2Type atype, ui32 tcost, ui32 mcost, ui32 threads,                               \
-                                       const ui8* key, ui32 keylen) {                                                         \
-        if ((key == nullptr && keylen > 0) || keylen > ARGON2_SECRET_MAX_LENGTH)                                              \
-            ythrow yexception() << "key is null or keylen equals 0 or key is too long";                                       \
-                                                                                                                              \
-        ARGON2_INSTANCE_DECL(IS, 1, 1)                                                                                        \
+#pragma once 
+ 
+// 
+// Created by Evgeny Sidorov on 12/04/17. 
+// 
+/** 
+ * ZEROUPPER macro is only used for AVX2 instruction set to clear up the upper half of YMM registers 
+ * It's done to avoid performance penalty when CPU switches to non-AVX2 code (according to Agner) 
+ * and the post at https://software.intel.com/en-us/articles/intel-avx-state-transitions-migrating-sse-code-to-avx 
+ */ 
+ 
+#define ARGON2_PROXY_CLASS_DECL(IS)                                                                                   \ 
+    class TArgon2Proxy##IS final: public IArgon2Base {                                                                \ 
+    public:                                                                                                           \ 
+        TArgon2Proxy##IS(EArgon2Type atype, ui32 tcost, ui32 mcost, ui32 threads,                                     \ 
+                         const ui8* key = nullptr, ui32 keylen = 0);                                                  \ 
+        virtual ~TArgon2Proxy##IS();                                                                                  \ 
+                                                                                                                      \ 
+        virtual void Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen,                                 \ 
+                          ui8* out, ui32 outlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override;           \ 
+        virtual bool Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen,                               \ 
+                            const ui8* hash, ui32 hashlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override; \ 
+        virtual void HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen,                      \ 
+                                          const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen,                       \ 
+                                          const ui8* aad = nullptr, ui32 aadlen = 0) const override;                  \ 
+        virtual bool VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen,                    \ 
+                                            const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen,             \ 
+                                            const ui8* aad = nullptr, ui32 aadlen = 0) const override;                \ 
+        virtual size_t GetMemorySize() const override;                                                                \ 
+                                                                                                                      \ 
+    protected:                                                                                                        \ 
+        THolder<IArgon2Base> argon2;                                                                                  \ 
+    }; 
+ 
+#define ARGON2_INSTANCE_DECL(IS_val, mcost_val, threads_val)                                     \ 
+    if (mcost == mcost_val && threads == threads_val) {                                          \ 
+        argon2 = MakeHolder<TArgon2##IS_val<mcost_val, threads_val>>(atype, tcost, key, keylen); \ 
+        return;                                                                                  \ 
+    } 
+ 
+#define ARGON2_PROXY_CLASS_IMPL(IS)                                                                                           \ 
+    TArgon2Proxy##IS::TArgon2Proxy##IS(EArgon2Type atype, ui32 tcost, ui32 mcost, ui32 threads,                               \ 
+                                       const ui8* key, ui32 keylen) {                                                         \ 
+        if ((key == nullptr && keylen > 0) || keylen > ARGON2_SECRET_MAX_LENGTH)                                              \ 
+            ythrow yexception() << "key is null or keylen equals 0 or key is too long";                                       \ 
+                                                                                                                              \ 
+        ARGON2_INSTANCE_DECL(IS, 1, 1)                                                                                        \ 
         ARGON2_INSTANCE_DECL(IS, 8, 1)                                                                                        \
-        ARGON2_INSTANCE_DECL(IS, 16, 1)                                                                                       \
-        ARGON2_INSTANCE_DECL(IS, 32, 1)                                                                                       \
-        ARGON2_INSTANCE_DECL(IS, 64, 1)                                                                                       \
+        ARGON2_INSTANCE_DECL(IS, 16, 1)                                                                                       \ 
+        ARGON2_INSTANCE_DECL(IS, 32, 1)                                                                                       \ 
+        ARGON2_INSTANCE_DECL(IS, 64, 1)                                                                                       \ 
         ARGON2_INSTANCE_DECL(IS, 128, 1)                                                                                       \
         ARGON2_INSTANCE_DECL(IS, 256, 1)                                                                                       \
-        ARGON2_INSTANCE_DECL(IS, 512, 1)                                                                                      \
-        ARGON2_INSTANCE_DECL(IS, 1024, 1)                                                                                     \
-        ARGON2_INSTANCE_DECL(IS, 2048, 1)                                                                                     \
-        ARGON2_INSTANCE_DECL(IS, 4096, 1)                                                                                     \
-        ARGON2_INSTANCE_DECL(IS, 8192, 1)                                                                                     \
-        ARGON2_INSTANCE_DECL(IS, 16384, 1)                                                                                    \
-        ARGON2_INSTANCE_DECL(IS, 32768, 1)                                                                                    \
-        ARGON2_INSTANCE_DECL(IS, 65536, 1)                                                                                    \
-        ARGON2_INSTANCE_DECL(IS, 131072, 1)                                                                                   \
-        ARGON2_INSTANCE_DECL(IS, 262144, 1)                                                                                   \
-        ARGON2_INSTANCE_DECL(IS, 524288, 1)                                                                                   \
-        ARGON2_INSTANCE_DECL(IS, 1048576, 1)                                                                                  \
-        ARGON2_INSTANCE_DECL(IS, 1, 2)                                                                                        \
-        ARGON2_INSTANCE_DECL(IS, 32, 2)                                                                                       \
-        ARGON2_INSTANCE_DECL(IS, 64, 2)                                                                                       \
-        ARGON2_INSTANCE_DECL(IS, 512, 2)                                                                                      \
-        ARGON2_INSTANCE_DECL(IS, 1024, 2)                                                                                     \
-        ARGON2_INSTANCE_DECL(IS, 2048, 2)                                                                                     \
-        ARGON2_INSTANCE_DECL(IS, 4096, 2)                                                                                     \
-        ARGON2_INSTANCE_DECL(IS, 8192, 2)                                                                                     \
-        ARGON2_INSTANCE_DECL(IS, 16384, 2)                                                                                    \
-        ARGON2_INSTANCE_DECL(IS, 32768, 2)                                                                                    \
-        ARGON2_INSTANCE_DECL(IS, 65536, 2)                                                                                    \
-        ARGON2_INSTANCE_DECL(IS, 131072, 2)                                                                                   \
-        ARGON2_INSTANCE_DECL(IS, 262144, 2)                                                                                   \
-        ARGON2_INSTANCE_DECL(IS, 524288, 2)                                                                                   \
-        ARGON2_INSTANCE_DECL(IS, 1048576, 2)                                                                                  \
-        ARGON2_INSTANCE_DECL(IS, 1, 4)                                                                                        \
-        ARGON2_INSTANCE_DECL(IS, 32, 4)                                                                                       \
-        ARGON2_INSTANCE_DECL(IS, 64, 4)                                                                                       \
-        ARGON2_INSTANCE_DECL(IS, 512, 4)                                                                                      \
-        ARGON2_INSTANCE_DECL(IS, 1024, 4)                                                                                     \
-        ARGON2_INSTANCE_DECL(IS, 2048, 4)                                                                                     \
-        ARGON2_INSTANCE_DECL(IS, 4096, 4)                                                                                     \
-        ARGON2_INSTANCE_DECL(IS, 8192, 4)                                                                                     \
-        ARGON2_INSTANCE_DECL(IS, 16384, 4)                                                                                    \
-        ARGON2_INSTANCE_DECL(IS, 32768, 4)                                                                                    \
-        ARGON2_INSTANCE_DECL(IS, 65536, 4)                                                                                    \
-        ARGON2_INSTANCE_DECL(IS, 131072, 4)                                                                                   \
-        ARGON2_INSTANCE_DECL(IS, 262144, 4)                                                                                   \
-        ARGON2_INSTANCE_DECL(IS, 524288, 4)                                                                                   \
-        ARGON2_INSTANCE_DECL(IS, 1048576, 4)                                                                                  \
-                                                                                                                              \
-        ythrow yexception() << "These parameters are not supported. Please add the corresponding ARGON2_INSTANCE_DECL macro"; \
-    }                                                                                                                         \
-                                                                                                                              \
-    TArgon2Proxy##IS::~TArgon2Proxy##IS() {                                                                                   \
-    }                                                                                                                         \
-                                                                                                                              \
-    void TArgon2Proxy##IS::Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen,                                   \
-                                ui8* out, ui32 outlen, const ui8* aad, ui32 aadlen) const {                                   \
-        if (saltlen < ARGON2_SALT_MIN_LEN)                                                                                    \
-            ythrow yexception() << "salt is too short";                                                                       \
-        if (outlen < ARGON2_MIN_OUTLEN)                                                                                       \
-            ythrow yexception() << "output length is too short";                                                              \
-                                                                                                                              \
-        argon2->Hash(pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen);                                                   \
-        ZEROUPPER                                                                                                             \
-    }                                                                                                                         \
-                                                                                                                              \
-    bool TArgon2Proxy##IS::Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen,                                 \
-                                  const ui8* hash, ui32 hashlen, const ui8* aad, ui32 aadlen) const {                         \
-        if (saltlen < ARGON2_SALT_MIN_LEN)                                                                                    \
-            ythrow yexception() << "salt is too short";                                                                       \
-        if (hashlen < ARGON2_MIN_OUTLEN)                                                                                      \
-            ythrow yexception() << "hash length is too short";                                                                \
-                                                                                                                              \
-        return argon2->Verify(pwd, pwdlen, salt, saltlen, hash, hashlen, aad, aadlen);                                        \
-        ZEROUPPER                                                                                                             \
-    }                                                                                                                         \
-                                                                                                                              \
-    void TArgon2Proxy##IS::HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen,                        \
-                                                const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen,                         \
-                                                const ui8* aad, ui32 aadlen) const {                                          \
-        if (saltlen < ARGON2_SALT_MIN_LEN)                                                                                    \
-            ythrow yexception() << "salt is too short";                                                                       \
-        if (outlen < ARGON2_MIN_OUTLEN)                                                                                       \
-            ythrow yexception() << "output length is too short";                                                              \
-                                                                                                                              \
-        argon2->HashWithCustomMemory(memory, mlen, pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen);                     \
-        ZEROUPPER                                                                                                             \
-    }                                                                                                                         \
-                                                                                                                              \
-    bool TArgon2Proxy##IS::VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen,                      \
-                                                  const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen,               \
-                                                  const ui8* aad, ui32 aadlen) const {                                        \
-        if (saltlen < ARGON2_SALT_MIN_LEN)                                                                                    \
-            ythrow yexception() << "salt is too short";                                                                       \
-        if (hashlen < ARGON2_MIN_OUTLEN)                                                                                      \
-            ythrow yexception() << "hash length is too short";                                                                \
-                                                                                                                              \
-        return argon2->VerifyWithCustomMemory(memory, mlen, pwd, pwdlen, salt, saltlen, hash, hashlen, aad, aadlen);          \
-        ZEROUPPER                                                                                                             \
-    }                                                                                                                         \
-                                                                                                                              \
-    size_t TArgon2Proxy##IS::GetMemorySize() const {                                                                          \
-        return argon2->GetMemorySize();                                                                                       \
-    }
-
-#define BLAKE2B_PROXY_CLASS_DECL(IS)                                                    \
-    class TBlake2BProxy##IS final: public IBlake2Base {                                 \
-    public:                                                                             \
-        TBlake2BProxy##IS(size_t outlen, const void* key = nullptr, size_t keylen = 0); \
-        virtual void Update(ui32 in) override;                                          \
-        virtual void Update(const void* pin, size_t inlen) override;                    \
-        virtual void Final(void* out, size_t outlen) override;                          \
-                                                                                        \
-    protected:                                                                          \
-        THolder<IBlake2Base> blake2;                                                    \
-    };
-
-#define BLAKE2B_PROXY_CLASS_IMPL(IS)                                                      \
-    TBlake2BProxy##IS::TBlake2BProxy##IS(size_t outlen, const void* key, size_t keylen) { \
-        if (!outlen || outlen > BLAKE2B_OUTBYTES)                                         \
-            ythrow yexception() << "outlen equals 0 or too long";                         \
-                                                                                          \
-        if (key == nullptr) {                                                             \
-            blake2 = MakeHolder<TBlake2B<EInstructionSet::IS>>(outlen);                   \
-            return;                                                                       \
-        }                                                                                 \
-                                                                                          \
-        if (!key || !keylen || keylen > BLAKE2B_KEYBYTES)                                 \
-            ythrow yexception() << "key is null or too long";                             \
-                                                                                          \
-        blake2 = MakeHolder<TBlake2B<EInstructionSet::IS>>(outlen, key, keylen);          \
-    }                                                                                     \
-                                                                                          \
-    void TBlake2BProxy##IS::Update(ui32 in) {                                             \
-        blake2->Update(in);                                                               \
-        ZEROUPPER                                                                         \
-    }                                                                                     \
-                                                                                          \
-    void TBlake2BProxy##IS::Update(const void* pin, size_t inlen) {                       \
-        blake2->Update(pin, inlen);                                                       \
-        ZEROUPPER                                                                         \
-    }                                                                                     \
-                                                                                          \
-    void TBlake2BProxy##IS::Final(void* out, size_t outlen) {                             \
-        blake2->Final(out, outlen);                                                       \
-        ZEROUPPER                                                                         \
-    }
+        ARGON2_INSTANCE_DECL(IS, 512, 1)                                                                                      \ 
+        ARGON2_INSTANCE_DECL(IS, 1024, 1)                                                                                     \ 
+        ARGON2_INSTANCE_DECL(IS, 2048, 1)                                                                                     \ 
+        ARGON2_INSTANCE_DECL(IS, 4096, 1)                                                                                     \ 
+        ARGON2_INSTANCE_DECL(IS, 8192, 1)                                                                                     \ 
+        ARGON2_INSTANCE_DECL(IS, 16384, 1)                                                                                    \ 
+        ARGON2_INSTANCE_DECL(IS, 32768, 1)                                                                                    \ 
+        ARGON2_INSTANCE_DECL(IS, 65536, 1)                                                                                    \ 
+        ARGON2_INSTANCE_DECL(IS, 131072, 1)                                                                                   \ 
+        ARGON2_INSTANCE_DECL(IS, 262144, 1)                                                                                   \ 
+        ARGON2_INSTANCE_DECL(IS, 524288, 1)                                                                                   \ 
+        ARGON2_INSTANCE_DECL(IS, 1048576, 1)                                                                                  \ 
+        ARGON2_INSTANCE_DECL(IS, 1, 2)                                                                                        \ 
+        ARGON2_INSTANCE_DECL(IS, 32, 2)                                                                                       \ 
+        ARGON2_INSTANCE_DECL(IS, 64, 2)                                                                                       \ 
+        ARGON2_INSTANCE_DECL(IS, 512, 2)                                                                                      \ 
+        ARGON2_INSTANCE_DECL(IS, 1024, 2)                                                                                     \ 
+        ARGON2_INSTANCE_DECL(IS, 2048, 2)                                                                                     \ 
+        ARGON2_INSTANCE_DECL(IS, 4096, 2)                                                                                     \ 
+        ARGON2_INSTANCE_DECL(IS, 8192, 2)                                                                                     \ 
+        ARGON2_INSTANCE_DECL(IS, 16384, 2)                                                                                    \ 
+        ARGON2_INSTANCE_DECL(IS, 32768, 2)                                                                                    \ 
+        ARGON2_INSTANCE_DECL(IS, 65536, 2)                                                                                    \ 
+        ARGON2_INSTANCE_DECL(IS, 131072, 2)                                                                                   \ 
+        ARGON2_INSTANCE_DECL(IS, 262144, 2)                                                                                   \ 
+        ARGON2_INSTANCE_DECL(IS, 524288, 2)                                                                                   \ 
+        ARGON2_INSTANCE_DECL(IS, 1048576, 2)                                                                                  \ 
+        ARGON2_INSTANCE_DECL(IS, 1, 4)                                                                                        \ 
+        ARGON2_INSTANCE_DECL(IS, 32, 4)                                                                                       \ 
+        ARGON2_INSTANCE_DECL(IS, 64, 4)                                                                                       \ 
+        ARGON2_INSTANCE_DECL(IS, 512, 4)                                                                                      \ 
+        ARGON2_INSTANCE_DECL(IS, 1024, 4)                                                                                     \ 
+        ARGON2_INSTANCE_DECL(IS, 2048, 4)                                                                                     \ 
+        ARGON2_INSTANCE_DECL(IS, 4096, 4)                                                                                     \ 
+        ARGON2_INSTANCE_DECL(IS, 8192, 4)                                                                                     \ 
+        ARGON2_INSTANCE_DECL(IS, 16384, 4)                                                                                    \ 
+        ARGON2_INSTANCE_DECL(IS, 32768, 4)                                                                                    \ 
+        ARGON2_INSTANCE_DECL(IS, 65536, 4)                                                                                    \ 
+        ARGON2_INSTANCE_DECL(IS, 131072, 4)                                                                                   \ 
+        ARGON2_INSTANCE_DECL(IS, 262144, 4)                                                                                   \ 
+        ARGON2_INSTANCE_DECL(IS, 524288, 4)                                                                                   \ 
+        ARGON2_INSTANCE_DECL(IS, 1048576, 4)                                                                                  \ 
+                                                                                                                              \ 
+        ythrow yexception() << "These parameters are not supported. Please add the corresponding ARGON2_INSTANCE_DECL macro"; \ 
+    }                                                                                                                         \ 
+                                                                                                                              \ 
+    TArgon2Proxy##IS::~TArgon2Proxy##IS() {                                                                                   \ 
+    }                                                                                                                         \ 
+                                                                                                                              \ 
+    void TArgon2Proxy##IS::Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen,                                   \ 
+                                ui8* out, ui32 outlen, const ui8* aad, ui32 aadlen) const {                                   \ 
+        if (saltlen < ARGON2_SALT_MIN_LEN)                                                                                    \ 
+            ythrow yexception() << "salt is too short";                                                                       \ 
+        if (outlen < ARGON2_MIN_OUTLEN)                                                                                       \ 
+            ythrow yexception() << "output length is too short";                                                              \ 
+                                                                                                                              \ 
+        argon2->Hash(pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen);                                                   \ 
+        ZEROUPPER                                                                                                             \ 
+    }                                                                                                                         \ 
+                                                                                                                              \ 
+    bool TArgon2Proxy##IS::Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen,                                 \ 
+                                  const ui8* hash, ui32 hashlen, const ui8* aad, ui32 aadlen) const {                         \ 
+        if (saltlen < ARGON2_SALT_MIN_LEN)                                                                                    \ 
+            ythrow yexception() << "salt is too short";                                                                       \ 
+        if (hashlen < ARGON2_MIN_OUTLEN)                                                                                      \ 
+            ythrow yexception() << "hash length is too short";                                                                \ 
+                                                                                                                              \ 
+        return argon2->Verify(pwd, pwdlen, salt, saltlen, hash, hashlen, aad, aadlen);                                        \ 
+        ZEROUPPER                                                                                                             \ 
+    }                                                                                                                         \ 
+                                                                                                                              \ 
+    void TArgon2Proxy##IS::HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen,                        \ 
+                                                const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen,                         \ 
+                                                const ui8* aad, ui32 aadlen) const {                                          \ 
+        if (saltlen < ARGON2_SALT_MIN_LEN)                                                                                    \ 
+            ythrow yexception() << "salt is too short";                                                                       \ 
+        if (outlen < ARGON2_MIN_OUTLEN)                                                                                       \ 
+            ythrow yexception() << "output length is too short";                                                              \ 
+                                                                                                                              \ 
+        argon2->HashWithCustomMemory(memory, mlen, pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen);                     \ 
+        ZEROUPPER                                                                                                             \ 
+    }                                                                                                                         \ 
+                                                                                                                              \ 
+    bool TArgon2Proxy##IS::VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen,                      \ 
+                                                  const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen,               \ 
+                                                  const ui8* aad, ui32 aadlen) const {                                        \ 
+        if (saltlen < ARGON2_SALT_MIN_LEN)                                                                                    \ 
+            ythrow yexception() << "salt is too short";                                                                       \ 
+        if (hashlen < ARGON2_MIN_OUTLEN)                                                                                      \ 
+            ythrow yexception() << "hash length is too short";                                                                \ 
+                                                                                                                              \ 
+        return argon2->VerifyWithCustomMemory(memory, mlen, pwd, pwdlen, salt, saltlen, hash, hashlen, aad, aadlen);          \ 
+        ZEROUPPER                                                                                                             \ 
+    }                                                                                                                         \ 
+                                                                                                                              \ 
+    size_t TArgon2Proxy##IS::GetMemorySize() const {                                                                          \ 
+        return argon2->GetMemorySize();                                                                                       \ 
+    } 
+ 
+#define BLAKE2B_PROXY_CLASS_DECL(IS)                                                    \ 
+    class TBlake2BProxy##IS final: public IBlake2Base {                                 \ 
+    public:                                                                             \ 
+        TBlake2BProxy##IS(size_t outlen, const void* key = nullptr, size_t keylen = 0); \ 
+        virtual void Update(ui32 in) override;                                          \ 
+        virtual void Update(const void* pin, size_t inlen) override;                    \ 
+        virtual void Final(void* out, size_t outlen) override;                          \ 
+                                                                                        \ 
+    protected:                                                                          \ 
+        THolder<IBlake2Base> blake2;                                                    \ 
+    }; 
+ 
+#define BLAKE2B_PROXY_CLASS_IMPL(IS)                                                      \ 
+    TBlake2BProxy##IS::TBlake2BProxy##IS(size_t outlen, const void* key, size_t keylen) { \ 
+        if (!outlen || outlen > BLAKE2B_OUTBYTES)                                         \ 
+            ythrow yexception() << "outlen equals 0 or too long";                         \ 
+                                                                                          \ 
+        if (key == nullptr) {                                                             \ 
+            blake2 = MakeHolder<TBlake2B<EInstructionSet::IS>>(outlen);                   \ 
+            return;                                                                       \ 
+        }                                                                                 \ 
+                                                                                          \ 
+        if (!key || !keylen || keylen > BLAKE2B_KEYBYTES)                                 \ 
+            ythrow yexception() << "key is null or too long";                             \ 
+                                                                                          \ 
+        blake2 = MakeHolder<TBlake2B<EInstructionSet::IS>>(outlen, key, keylen);          \ 
+    }                                                                                     \ 
+                                                                                          \ 
+    void TBlake2BProxy##IS::Update(ui32 in) {                                             \ 
+        blake2->Update(in);                                                               \ 
+        ZEROUPPER                                                                         \ 
+    }                                                                                     \ 
+                                                                                          \ 
+    void TBlake2BProxy##IS::Update(const void* pin, size_t inlen) {                       \ 
+        blake2->Update(pin, inlen);                                                       \ 
+        ZEROUPPER                                                                         \ 
+    }                                                                                     \ 
+                                                                                          \ 
+    void TBlake2BProxy##IS::Final(void* out, size_t outlen) {                             \ 
+        blake2->Final(out, outlen);                                                       \ 
+        ZEROUPPER                                                                         \ 
+    } 
diff --git a/library/cpp/digest/argonish/internal/proxies/macro/ya.make b/library/cpp/digest/argonish/internal/proxies/macro/ya.make
index 5f639d4571..b2b79b2b2a 100644
--- a/library/cpp/digest/argonish/internal/proxies/macro/ya.make
+++ b/library/cpp/digest/argonish/internal/proxies/macro/ya.make
@@ -1,5 +1,5 @@
-LIBRARY()
-
-OWNER(e-sidorov)
-
-END()
+LIBRARY() 
+ 
+OWNER(e-sidorov) 
+ 
+END() 
diff --git a/library/cpp/digest/argonish/internal/proxies/ref/proxy_ref.cpp b/library/cpp/digest/argonish/internal/proxies/ref/proxy_ref.cpp
index 0bc51866fd..55832396be 100644
--- a/library/cpp/digest/argonish/internal/proxies/ref/proxy_ref.cpp
+++ b/library/cpp/digest/argonish/internal/proxies/ref/proxy_ref.cpp
@@ -1,20 +1,20 @@
-//
-// Created by Evgeny Sidorov on 12/04/17.
-//
-
-#include "proxy_ref.h"
+// 
+// Created by Evgeny Sidorov on 12/04/17. 
+// 
+ 
+#include "proxy_ref.h" 
 #include <library/cpp/digest/argonish/internal/argon2/argon2_base.h>
 #include <library/cpp/digest/argonish/internal/argon2/argon2_ref.h>
 #include <library/cpp/digest/argonish/internal/blake2b/blake2b.h>
 #include <library/cpp/digest/argonish/internal/blake2b/blake2b_ref.h>
-
-#include <stdexcept>
-
-#define ZEROUPPER ;
-
-namespace NArgonish {
-    ARGON2_PROXY_CLASS_IMPL(REF)
-    BLAKE2B_PROXY_CLASS_IMPL(REF)
-}
-
-#undef ZEROUPPER
+ 
+#include <stdexcept> 
+ 
+#define ZEROUPPER ; 
+ 
+namespace NArgonish { 
+    ARGON2_PROXY_CLASS_IMPL(REF) 
+    BLAKE2B_PROXY_CLASS_IMPL(REF) 
+} 
+ 
+#undef ZEROUPPER 
diff --git a/library/cpp/digest/argonish/internal/proxies/ref/proxy_ref.h b/library/cpp/digest/argonish/internal/proxies/ref/proxy_ref.h
index 821abc50cd..c9217a986c 100644
--- a/library/cpp/digest/argonish/internal/proxies/ref/proxy_ref.h
+++ b/library/cpp/digest/argonish/internal/proxies/ref/proxy_ref.h
@@ -1,11 +1,11 @@
-#pragma once
-
-#include <util/generic/yexception.h>
+#pragma once 
+ 
+#include <util/generic/yexception.h> 
 #include <library/cpp/digest/argonish/argon2.h>
 #include <library/cpp/digest/argonish/blake2b.h>
 #include <library/cpp/digest/argonish/internal/proxies/macro/proxy_macros.h>
-
-namespace NArgonish {
-    ARGON2_PROXY_CLASS_DECL(REF)
-    BLAKE2B_PROXY_CLASS_DECL(REF)
-}
+ 
+namespace NArgonish { 
+    ARGON2_PROXY_CLASS_DECL(REF) 
+    BLAKE2B_PROXY_CLASS_DECL(REF) 
+} 
diff --git a/library/cpp/digest/argonish/internal/proxies/ref/ya.make b/library/cpp/digest/argonish/internal/proxies/ref/ya.make
index 7a15f44611..08ac4bb77d 100644
--- a/library/cpp/digest/argonish/internal/proxies/ref/ya.make
+++ b/library/cpp/digest/argonish/internal/proxies/ref/ya.make
@@ -1,17 +1,17 @@
-OWNER(e-sidorov)
-
-LIBRARY()
-
-NO_UTIL()
-
-PEERDIR(
+OWNER(e-sidorov) 
+ 
+LIBRARY() 
+ 
+NO_UTIL() 
+ 
+PEERDIR( 
     library/cpp/digest/argonish/internal/proxies/macro
     library/cpp/digest/argonish/internal/argon2
     library/cpp/digest/argonish/internal/blake2b
-)
-
-SRCS(
-    proxy_ref.cpp
-)
-
-END()
+) 
+ 
+SRCS( 
+    proxy_ref.cpp 
+) 
+ 
+END() 
diff --git a/library/cpp/digest/argonish/internal/proxies/sse2/proxy_sse2.cpp b/library/cpp/digest/argonish/internal/proxies/sse2/proxy_sse2.cpp
index 3e63c9ad62..d56396cee8 100644
--- a/library/cpp/digest/argonish/internal/proxies/sse2/proxy_sse2.cpp
+++ b/library/cpp/digest/argonish/internal/proxies/sse2/proxy_sse2.cpp
@@ -1,18 +1,18 @@
-//
-// Created by Evgeny Sidorov on 12/04/17.
-//
-
-#include "proxy_sse2.h"
+// 
+// Created by Evgeny Sidorov on 12/04/17. 
+// 
+ 
+#include "proxy_sse2.h" 
 #include <library/cpp/digest/argonish/internal/argon2/argon2_base.h>
 #include <library/cpp/digest/argonish/internal/argon2/argon2_sse2.h>
 #include <library/cpp/digest/argonish/internal/blake2b/blake2b.h>
 #include <library/cpp/digest/argonish/internal/blake2b/blake2b_sse2.h>
-
-#define ZEROUPPER ;
-
-namespace NArgonish {
-    ARGON2_PROXY_CLASS_IMPL(SSE2)
-    BLAKE2B_PROXY_CLASS_IMPL(SSE2)
-}
-
-#undef ZEROUPPER
+ 
+#define ZEROUPPER ; 
+ 
+namespace NArgonish { 
+    ARGON2_PROXY_CLASS_IMPL(SSE2) 
+    BLAKE2B_PROXY_CLASS_IMPL(SSE2) 
+} 
+ 
+#undef ZEROUPPER 
diff --git a/library/cpp/digest/argonish/internal/proxies/sse2/proxy_sse2.h b/library/cpp/digest/argonish/internal/proxies/sse2/proxy_sse2.h
index a2b74cd9a7..553b5797a8 100644
--- a/library/cpp/digest/argonish/internal/proxies/sse2/proxy_sse2.h
+++ b/library/cpp/digest/argonish/internal/proxies/sse2/proxy_sse2.h
@@ -1,11 +1,11 @@
-#pragma once
-
-#include <util/generic/yexception.h>
+#pragma once 
+ 
+#include <util/generic/yexception.h> 
 #include <library/cpp/digest/argonish/argon2.h>
 #include <library/cpp/digest/argonish/blake2b.h>
 #include <library/cpp/digest/argonish/internal/proxies/macro/proxy_macros.h>
-
-namespace NArgonish {
-    ARGON2_PROXY_CLASS_DECL(SSE2)
-    BLAKE2B_PROXY_CLASS_DECL(SSE2)
-}
+ 
+namespace NArgonish { 
+    ARGON2_PROXY_CLASS_DECL(SSE2) 
+    BLAKE2B_PROXY_CLASS_DECL(SSE2) 
+} 
diff --git a/library/cpp/digest/argonish/internal/proxies/sse2/ya.make b/library/cpp/digest/argonish/internal/proxies/sse2/ya.make
index 1c752f0dd5..1529a982fa 100644
--- a/library/cpp/digest/argonish/internal/proxies/sse2/ya.make
+++ b/library/cpp/digest/argonish/internal/proxies/sse2/ya.make
@@ -1,18 +1,18 @@
-OWNER(e-sidorov)
-
-LIBRARY()
-
-NO_UTIL()
-
-IF (ARCH_X86_64 OR ARCH_I386)
-    PEERDIR(
-        library/cpp/digest/argonish/internal/proxies/macro
-        library/cpp/digest/argonish/internal/argon2
-        library/cpp/digest/argonish/internal/blake2b
-    )
-    SRC_CPP_SSE2(
-        proxy_sse2.cpp
-    )
-ENDIF()
-
-END()
+OWNER(e-sidorov) 
+ 
+LIBRARY() 
+ 
+NO_UTIL() 
+ 
+IF (ARCH_X86_64 OR ARCH_I386) 
+    PEERDIR( 
+        library/cpp/digest/argonish/internal/proxies/macro 
+        library/cpp/digest/argonish/internal/argon2 
+        library/cpp/digest/argonish/internal/blake2b 
+    ) 
+    SRC_CPP_SSE2( 
+        proxy_sse2.cpp 
+    ) 
+ENDIF() 
+ 
+END() 
diff --git a/library/cpp/digest/argonish/internal/proxies/sse41/proxy_sse41.cpp b/library/cpp/digest/argonish/internal/proxies/sse41/proxy_sse41.cpp
index b633ad8cbf..fe1b28bf24 100644
--- a/library/cpp/digest/argonish/internal/proxies/sse41/proxy_sse41.cpp
+++ b/library/cpp/digest/argonish/internal/proxies/sse41/proxy_sse41.cpp
@@ -1,18 +1,18 @@
-//
-// Created by Evgeny Sidorov on 12/04/17.
-//
-
-#include "proxy_sse41.h"
+// 
+// Created by Evgeny Sidorov on 12/04/17. 
+// 
+ 
+#include "proxy_sse41.h" 
 #include <library/cpp/digest/argonish/internal/argon2/argon2_base.h>
 #include <library/cpp/digest/argonish/internal/argon2/argon2_sse41.h>
 #include <library/cpp/digest/argonish/internal/blake2b/blake2b.h>
 #include <library/cpp/digest/argonish/internal/blake2b/blake2b_sse41.h>
-
-#define ZEROUPPER ;
-
-namespace NArgonish {
-    ARGON2_PROXY_CLASS_IMPL(SSE41)
-    BLAKE2B_PROXY_CLASS_IMPL(SSE41)
-}
-
-#undef ZEROUPPER
+ 
+#define ZEROUPPER ; 
+ 
+namespace NArgonish { 
+    ARGON2_PROXY_CLASS_IMPL(SSE41) 
+    BLAKE2B_PROXY_CLASS_IMPL(SSE41) 
+} 
+ 
+#undef ZEROUPPER 
diff --git a/library/cpp/digest/argonish/internal/proxies/sse41/proxy_sse41.h b/library/cpp/digest/argonish/internal/proxies/sse41/proxy_sse41.h
index 2a4b6614aa..c56f41750c 100644
--- a/library/cpp/digest/argonish/internal/proxies/sse41/proxy_sse41.h
+++ b/library/cpp/digest/argonish/internal/proxies/sse41/proxy_sse41.h
@@ -1,11 +1,11 @@
-#pragma once
-
-#include <util/generic/yexception.h>
+#pragma once 
+ 
+#include <util/generic/yexception.h> 
 #include <library/cpp/digest/argonish/argon2.h>
 #include <library/cpp/digest/argonish/blake2b.h>
 #include <library/cpp/digest/argonish/internal/proxies/macro/proxy_macros.h>
-
-namespace NArgonish {
-    ARGON2_PROXY_CLASS_DECL(SSE41)
-    BLAKE2B_PROXY_CLASS_DECL(SSE41)
-}
+ 
+namespace NArgonish { 
+    ARGON2_PROXY_CLASS_DECL(SSE41) 
+    BLAKE2B_PROXY_CLASS_DECL(SSE41) 
+} 
diff --git a/library/cpp/digest/argonish/internal/proxies/sse41/ya.make b/library/cpp/digest/argonish/internal/proxies/sse41/ya.make
index 16a9922016..5da63f0bbf 100644
--- a/library/cpp/digest/argonish/internal/proxies/sse41/ya.make
+++ b/library/cpp/digest/argonish/internal/proxies/sse41/ya.make
@@ -1,18 +1,18 @@
-OWNER(e-sidorov)
-
-LIBRARY()
-
-NO_UTIL()
-
-IF (ARCH_X86_64 OR ARCH_I386)
-    PEERDIR(
-        library/cpp/digest/argonish/internal/proxies/macro
-        library/cpp/digest/argonish/internal/argon2
-        library/cpp/digest/argonish/internal/blake2b
-    )
-    SRC_CPP_SSE41(
-        proxy_sse41.cpp
-    )
-ENDIF()
-
-END()
+OWNER(e-sidorov) 
+ 
+LIBRARY() 
+ 
+NO_UTIL() 
+ 
+IF (ARCH_X86_64 OR ARCH_I386) 
+    PEERDIR( 
+        library/cpp/digest/argonish/internal/proxies/macro 
+        library/cpp/digest/argonish/internal/argon2 
+        library/cpp/digest/argonish/internal/blake2b 
+    ) 
+    SRC_CPP_SSE41( 
+        proxy_sse41.cpp 
+    ) 
+ENDIF() 
+ 
+END() 
diff --git a/library/cpp/digest/argonish/internal/proxies/ssse3/proxy_ssse3.cpp b/library/cpp/digest/argonish/internal/proxies/ssse3/proxy_ssse3.cpp
index d77b55737c..24b70e22d3 100644
--- a/library/cpp/digest/argonish/internal/proxies/ssse3/proxy_ssse3.cpp
+++ b/library/cpp/digest/argonish/internal/proxies/ssse3/proxy_ssse3.cpp
@@ -1,18 +1,18 @@
-//
-// Created by Evgeny Sidorov on 12/04/17.
-//
-
-#include "proxy_ssse3.h"
+// 
+// Created by Evgeny Sidorov on 12/04/17. 
+// 
+ 
+#include "proxy_ssse3.h" 
 #include <library/cpp/digest/argonish/internal/argon2/argon2_base.h>
 #include <library/cpp/digest/argonish/internal/argon2/argon2_ssse3.h>
 #include <library/cpp/digest/argonish/internal/blake2b/blake2b.h>
 #include <library/cpp/digest/argonish/internal/blake2b/blake2b_ssse3.h>
-
-#define ZEROUPPER ;
-
-namespace NArgonish {
-    ARGON2_PROXY_CLASS_IMPL(SSSE3)
-    BLAKE2B_PROXY_CLASS_IMPL(SSSE3)
-}
-
-#undef ZEROUPPER
+ 
+#define ZEROUPPER ; 
+ 
+namespace NArgonish { 
+    ARGON2_PROXY_CLASS_IMPL(SSSE3) 
+    BLAKE2B_PROXY_CLASS_IMPL(SSSE3) 
+} 
+ 
+#undef ZEROUPPER 
diff --git a/library/cpp/digest/argonish/internal/proxies/ssse3/proxy_ssse3.h b/library/cpp/digest/argonish/internal/proxies/ssse3/proxy_ssse3.h
index 994133e88e..93be69e3c6 100644
--- a/library/cpp/digest/argonish/internal/proxies/ssse3/proxy_ssse3.h
+++ b/library/cpp/digest/argonish/internal/proxies/ssse3/proxy_ssse3.h
@@ -1,11 +1,11 @@
-#pragma once
-
-#include <util/generic/yexception.h>
+#pragma once 
+ 
+#include <util/generic/yexception.h> 
 #include <library/cpp/digest/argonish/argon2.h>
 #include <library/cpp/digest/argonish/blake2b.h>
 #include <library/cpp/digest/argonish/internal/proxies/macro/proxy_macros.h>
-
-namespace NArgonish {
-    ARGON2_PROXY_CLASS_DECL(SSSE3)
-    BLAKE2B_PROXY_CLASS_DECL(SSSE3)
-}
+ 
+namespace NArgonish { 
+    ARGON2_PROXY_CLASS_DECL(SSSE3) 
+    BLAKE2B_PROXY_CLASS_DECL(SSSE3) 
+} 
diff --git a/library/cpp/digest/argonish/internal/proxies/ssse3/ya.make b/library/cpp/digest/argonish/internal/proxies/ssse3/ya.make
index 82d5116559..e585a09fca 100644
--- a/library/cpp/digest/argonish/internal/proxies/ssse3/ya.make
+++ b/library/cpp/digest/argonish/internal/proxies/ssse3/ya.make
@@ -1,19 +1,19 @@
-LIBRARY()
-
-OWNER(e-sidorov)
-
-NO_UTIL()
-
-IF (ARCH_X86_64 OR ARCH_I386)
-    PEERDIR(
-        library/cpp/digest/argonish/internal/proxies/macro
-        library/cpp/digest/argonish/internal/argon2
-        library/cpp/digest/argonish/internal/blake2b
-    )
-
-    SRC_CPP_SSSE3(
-        proxy_ssse3.cpp
-    )
-ENDIF()
-
-END()
+LIBRARY() 
+ 
+OWNER(e-sidorov) 
+ 
+NO_UTIL() 
+ 
+IF (ARCH_X86_64 OR ARCH_I386) 
+    PEERDIR( 
+        library/cpp/digest/argonish/internal/proxies/macro 
+        library/cpp/digest/argonish/internal/argon2 
+        library/cpp/digest/argonish/internal/blake2b 
+    ) 
+ 
+    SRC_CPP_SSSE3( 
+        proxy_ssse3.cpp 
+    ) 
+ENDIF() 
+ 
+END() 
diff --git a/library/cpp/digest/argonish/internal/proxies/ya.make b/library/cpp/digest/argonish/internal/proxies/ya.make
index 62bb1bcc50..f7cceda5f0 100644
--- a/library/cpp/digest/argonish/internal/proxies/ya.make
+++ b/library/cpp/digest/argonish/internal/proxies/ya.make
@@ -1,8 +1,8 @@
-RECURSE(
-    avx2
-    ref
-    sse2
-    sse41
-    ssse3
-    macro
-)
+RECURSE( 
+    avx2 
+    ref 
+    sse2 
+    sse41 
+    ssse3 
+    macro 
+) 
diff --git a/library/cpp/digest/argonish/internal/rotations/rotations_avx2.h b/library/cpp/digest/argonish/internal/rotations/rotations_avx2.h
index 81cd171f59..6d1910d34c 100644
--- a/library/cpp/digest/argonish/internal/rotations/rotations_avx2.h
+++ b/library/cpp/digest/argonish/internal/rotations/rotations_avx2.h
@@ -1,30 +1,30 @@
-#pragma once
-
-#include <immintrin.h>
-
-namespace NArgonish {
-    static inline void XorValues(__m256i* result, const __m256i* val1, const __m256i* val2) {
-        _mm256_storeu_si256(result, _mm256_xor_si256(
-                                        _mm256_loadu_si256(val1), _mm256_loadu_si256(val2)));
-    }
-
-    static inline __m256i Rotr32(__m256i x) {
-        return _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1));
-    }
-
-    static inline __m256i Rotr24(__m256i x) {
-        return _mm256_shuffle_epi8(x, _mm256_setr_epi8(
-                                          3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10,
-                                          3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10));
-    }
-
-    static inline __m256i Rotr16(__m256i x) {
-        return _mm256_shuffle_epi8(x, _mm256_setr_epi8(
-                                          2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9,
-                                          2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9));
-    }
-
-    static inline __m256i Rotr63(__m256i x) {
-        return _mm256_xor_si256(_mm256_srli_epi64(x, 63), _mm256_add_epi64(x, x));
-    }
-}
+#pragma once 
+ 
+#include <immintrin.h> 
+ 
+namespace NArgonish { 
+    static inline void XorValues(__m256i* result, const __m256i* val1, const __m256i* val2) { 
+        _mm256_storeu_si256(result, _mm256_xor_si256( 
+                                        _mm256_loadu_si256(val1), _mm256_loadu_si256(val2))); 
+    } 
+ 
+    static inline __m256i Rotr32(__m256i x) { 
+        return _mm256_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1)); 
+    } 
+ 
+    static inline __m256i Rotr24(__m256i x) { 
+        return _mm256_shuffle_epi8(x, _mm256_setr_epi8( 
+                                          3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10, 
+                                          3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)); 
+    } 
+ 
+    static inline __m256i Rotr16(__m256i x) { 
+        return _mm256_shuffle_epi8(x, _mm256_setr_epi8( 
+                                          2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9, 
+                                          2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)); 
+    } 
+ 
+    static inline __m256i Rotr63(__m256i x) { 
+        return _mm256_xor_si256(_mm256_srli_epi64(x, 63), _mm256_add_epi64(x, x)); 
+    } 
+} 
diff --git a/library/cpp/digest/argonish/internal/rotations/rotations_ref.h b/library/cpp/digest/argonish/internal/rotations/rotations_ref.h
index 6f59e233a5..82ffcae640 100644
--- a/library/cpp/digest/argonish/internal/rotations/rotations_ref.h
+++ b/library/cpp/digest/argonish/internal/rotations/rotations_ref.h
@@ -1,7 +1,7 @@
-#pragma once
-
-namespace NArgonish {
-    static inline ui64 Rotr(const ui64 w, const unsigned c) {
-        return (w >> c) | (w << (64 - c));
-    }
-}
+#pragma once 
+ 
+namespace NArgonish { 
+    static inline ui64 Rotr(const ui64 w, const unsigned c) { 
+        return (w >> c) | (w << (64 - c)); 
+    } 
+} 
diff --git a/library/cpp/digest/argonish/internal/rotations/rotations_sse2.h b/library/cpp/digest/argonish/internal/rotations/rotations_sse2.h
index 55a10a31b0..9af07b67f5 100644
--- a/library/cpp/digest/argonish/internal/rotations/rotations_sse2.h
+++ b/library/cpp/digest/argonish/internal/rotations/rotations_sse2.h
@@ -1,27 +1,27 @@
-#pragma once
-
-#include <emmintrin.h>
-
-namespace NArgonish {
-    static inline void XorValues(__m128i* result, const __m128i* val1, const __m128i* val2) {
-        _mm_storeu_si128(result, _mm_xor_si128(
-                                     _mm_loadu_si128(val1),
-                                     _mm_loadu_si128(val2)));
-    }
-
-    static inline __m128i Rotr32(__m128i x) {
-        return _mm_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1));
-    }
-
-    static inline __m128i Rotr24(__m128i x) {
-        return _mm_xor_si128(_mm_srli_epi64(x, 24), _mm_slli_epi64(x, 40));
-    }
-
-    static inline __m128i Rotr16(__m128i x) {
-        return _mm_xor_si128(_mm_srli_epi64(x, 16), _mm_slli_epi64(x, 48));
-    }
-
-    static inline __m128i Rotr63(__m128i x) {
-        return _mm_xor_si128(_mm_srli_epi64(x, 63), _mm_add_epi64(x, x));
-    }
-}
+#pragma once 
+ 
+#include <emmintrin.h> 
+ 
+namespace NArgonish { 
+    static inline void XorValues(__m128i* result, const __m128i* val1, const __m128i* val2) { 
+        _mm_storeu_si128(result, _mm_xor_si128( 
+                                     _mm_loadu_si128(val1), 
+                                     _mm_loadu_si128(val2))); 
+    } 
+ 
+    static inline __m128i Rotr32(__m128i x) { 
+        return _mm_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1)); 
+    } 
+ 
+    static inline __m128i Rotr24(__m128i x) { 
+        return _mm_xor_si128(_mm_srli_epi64(x, 24), _mm_slli_epi64(x, 40)); 
+    } 
+ 
+    static inline __m128i Rotr16(__m128i x) { 
+        return _mm_xor_si128(_mm_srli_epi64(x, 16), _mm_slli_epi64(x, 48)); 
+    } 
+ 
+    static inline __m128i Rotr63(__m128i x) { 
+        return _mm_xor_si128(_mm_srli_epi64(x, 63), _mm_add_epi64(x, x)); 
+    } 
+} 
diff --git a/library/cpp/digest/argonish/internal/rotations/rotations_ssse3.h b/library/cpp/digest/argonish/internal/rotations/rotations_ssse3.h
index 39c9c5491b..88669dc76a 100644
--- a/library/cpp/digest/argonish/internal/rotations/rotations_ssse3.h
+++ b/library/cpp/digest/argonish/internal/rotations/rotations_ssse3.h
@@ -1,28 +1,28 @@
-#pragma once
-
-#include <emmintrin.h>
-#include <tmmintrin.h>
-
-namespace NArgonish {
-    static inline void XorValues(__m128i* result, __m128i* val1, __m128i* val2) {
-        _mm_storeu_si128(result, _mm_xor_si128(
-                                     _mm_loadu_si128(val1),
-                                     _mm_loadu_si128(val2)));
-    }
-
-    static inline __m128i Rotr32(__m128i x) {
-        return _mm_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1));
-    }
-
-    static inline __m128i Rotr24(__m128i x) {
-        return _mm_shuffle_epi8(x, _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10));
-    }
-
-    static inline __m128i Rotr16(__m128i x) {
-        return _mm_shuffle_epi8(x, _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9));
-    }
-
-    static inline __m128i Rotr63(__m128i x) {
-        return _mm_xor_si128(_mm_srli_epi64(x, 63), _mm_add_epi64(x, x));
-    }
-}
+#pragma once 
+ 
+#include <emmintrin.h> 
+#include <tmmintrin.h> 
+ 
+namespace NArgonish { 
+    static inline void XorValues(__m128i* result, __m128i* val1, __m128i* val2) { 
+        _mm_storeu_si128(result, _mm_xor_si128( 
+                                     _mm_loadu_si128(val1), 
+                                     _mm_loadu_si128(val2))); 
+    } 
+ 
+    static inline __m128i Rotr32(__m128i x) { 
+        return _mm_shuffle_epi32(x, _MM_SHUFFLE(2, 3, 0, 1)); 
+    } 
+ 
+    static inline __m128i Rotr24(__m128i x) { 
+        return _mm_shuffle_epi8(x, _mm_setr_epi8(3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10)); 
+    } 
+ 
+    static inline __m128i Rotr16(__m128i x) { 
+        return _mm_shuffle_epi8(x, _mm_setr_epi8(2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9)); 
+    } 
+ 
+    static inline __m128i Rotr63(__m128i x) { 
+        return _mm_xor_si128(_mm_srli_epi64(x, 63), _mm_add_epi64(x, x)); 
+    } 
+} 
diff --git a/library/cpp/digest/argonish/internal/rotations/ya.make b/library/cpp/digest/argonish/internal/rotations/ya.make
index 5f639d4571..b2b79b2b2a 100644
--- a/library/cpp/digest/argonish/internal/rotations/ya.make
+++ b/library/cpp/digest/argonish/internal/rotations/ya.make
@@ -1,5 +1,5 @@
-LIBRARY()
-
-OWNER(e-sidorov)
-
-END()
+LIBRARY() 
+ 
+OWNER(e-sidorov) 
+ 
+END() 
diff --git a/library/cpp/digest/argonish/internal/ya.make b/library/cpp/digest/argonish/internal/ya.make
index 4a69395970..35003e964e 100644
--- a/library/cpp/digest/argonish/internal/ya.make
+++ b/library/cpp/digest/argonish/internal/ya.make
@@ -1,7 +1,7 @@
-RECURSE(
-    proxies
-    argon2
-    blake2b
-    blamka
-    rotations
-)
+RECURSE( 
+    proxies 
+    argon2 
+    blake2b 
+    blamka 
+    rotations 
+)
author	e-sidorov <e-sidorov@yandex-team.ru>	2022-02-10 16:46:05 +0300
committer	Daniil Cherednik <dcherednik@yandex-team.ru>	2022-02-10 16:46:05 +0300
commit	1ec091f8998d76a211c6015ba6865a73b29d676a (patch)
tree	6c72f0309888be2dd18d007d19c490ed87740d66 /library/cpp/digest/argonish/internal
parent	3b241dd57cf58f20bbbd63fa6a0a758dbec09b68 (diff)
download	ydb-1ec091f8998d76a211c6015ba6865a73b29d676a.tar.gz