aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/digest/argonish/internal/argon2/argon2_base.h
blob: 2385cc947c89c287742e9146196b39c6130eff53 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
#pragma once

#include <util/generic/yexception.h>
#include <library/cpp/digest/argonish/argon2.h>
#include <library/cpp/digest/argonish/internal/blake2b/blake2b.h>
#include <library/cpp/threading/poor_man_openmp/thread_helper.h>

namespace NArgonish {
    const ui32 ARGON2_PREHASH_DIGEST_LENGTH = 64;
    const ui32 ARGON2_SECRET_MAX_LENGTH = 64;
    const ui32 ARGON2_PREHASH_SEED_LENGTH = 72;
    const ui32 ARGON2_BLOCK_SIZE = 1024;
    const ui32 ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8;
    const ui32 ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16;
    const ui32 ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32;
    const ui32 ARGON2_ADDRESSES_IN_BLOCK = 128;
    const ui32 ARGON2_SYNC_POINTS = 4;
    const ui32 ARGON2_SALT_MIN_LEN = 8;
    const ui32 ARGON2_MIN_OUTLEN = 4;

    struct TBlock {
        ui64 V[ARGON2_QWORDS_IN_BLOCK];
    };

    template <EInstructionSet instructionSet, ui32 mcost, ui32 threads>
    class TArgon2: public IArgon2Base {
    public:
        TArgon2(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen)
            : SecretLen_(keylen)
            , Tcost_(tcost)
            , Atype_(atype)
        {
            if (SecretLen_)
                memcpy(Secret_, key, keylen);
        }

        virtual ~TArgon2() override {
            if (SecretLen_) {
                SecureZeroMemory_(Secret_, SecretLen_);
                SecretLen_ = 0;
            }
        }

        virtual void Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen,
                          ui8* out, ui32 outlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override {
            TArrayHolder<TBlock> buffer(new TBlock[MemoryBlocks_]);
            InternalHash_(buffer.Get(), pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen);
        }

        virtual bool Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen,
                            const ui8* hash, ui32 hashlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override {
            TArrayHolder<ui8> hashResult(new ui8[hashlen]);
            Hash(pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen);

            return SecureCompare_(hash, hashResult.Get(), hashlen);
        }

        virtual void HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen,
                                          const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen,
                                          const ui8* aad = nullptr, ui32 aadlen = 0) const override {
            if (memory == nullptr || mlen < sizeof(TBlock) * MemoryBlocks_)
                ythrow yexception() << "memory is null or its size is not enough";

            InternalHash_((TBlock*)memory, pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen);
        }

        virtual bool VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen,
                                            const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen,
                                            const ui8* aad = nullptr, ui32 aadlen = 0) const override {
            TArrayHolder<ui8> hashResult(new ui8[hashlen]);
            HashWithCustomMemory(memory, mlen, pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen);

            return SecureCompare_(hashResult.Get(), hash, hashlen);
        }

        virtual size_t GetMemorySize() const override {
            return MemoryBlocks_ * sizeof(TBlock);
        }

    protected: /* Constants */
        ui8 Secret_[ARGON2_SECRET_MAX_LENGTH] = {0};
        ui32 SecretLen_ = 0;
        ui32 Tcost_;
        EArgon2Type Atype_;

        static constexpr ui32 Lanes_ = threads;
        static constexpr ui32 MemoryBlocks_ = (mcost >= 2 * ARGON2_SYNC_POINTS * Lanes_) ? (mcost - mcost % (Lanes_ * ARGON2_SYNC_POINTS)) : 2 * ARGON2_SYNC_POINTS * Lanes_;
        static constexpr ui32 SegmentLength_ = MemoryBlocks_ / (Lanes_ * ARGON2_SYNC_POINTS);
        static constexpr ui32 LaneLength_ = SegmentLength_ * ARGON2_SYNC_POINTS;

    protected: /* Prototypes */
        virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock,
                                TBlock* nextBlock, bool withXor) const = 0;

        virtual void CopyBlock_(TBlock* dst, const TBlock* src) const = 0;
        virtual void XorBlock_(TBlock* dst, const TBlock* src) const = 0;

    protected: /* Static functions */
        static bool SecureCompare_(const ui8* buffer1, const ui8* buffer2, ui32 len) {
            bool result = true;
            for (ui32 i = 0; i < len; ++i) {
                result &= (buffer1[i] == buffer2[i]);
            }
            return result;
        }

        static void SecureZeroMemory_(void* src, size_t len) {
            static void* (*const volatile memset_v)(void*, int, size_t) = &memset;
            memset_v(src, 0, len);
        }

        static void Store32_(ui32 value, void* mem) {
            *((ui32*)mem) = value;
        }

        static void Blake2BHash64_(ui8 out[BLAKE2B_OUTBYTES], const ui8 in[BLAKE2B_OUTBYTES]) {
            TBlake2B<instructionSet> hash(BLAKE2B_OUTBYTES);
            hash.Update(in, BLAKE2B_OUTBYTES);
            hash.Final(out, BLAKE2B_OUTBYTES);
        }

        static void ExpandBlockhash_(ui8 expanded[ARGON2_BLOCK_SIZE], const ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH]) {
            ui8 out_buffer[BLAKE2B_OUTBYTES];
            ui8 in_buffer[BLAKE2B_OUTBYTES];
            const ui32 HALF_OUT_BYTES = BLAKE2B_OUTBYTES / 2;
            const ui32 HASH_BLOCKS_COUNT = ((ARGON2_BLOCK_SIZE / HALF_OUT_BYTES));

            TBlake2B<instructionSet> hash(BLAKE2B_OUTBYTES);
            hash.Update(ARGON2_BLOCK_SIZE);
            hash.Update(blockhash, ARGON2_PREHASH_SEED_LENGTH);
            hash.Final(out_buffer, BLAKE2B_OUTBYTES);

            memcpy(expanded, out_buffer, HALF_OUT_BYTES);

            for (ui32 i = 1; i < HASH_BLOCKS_COUNT - 2; ++i) {
                memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
                Blake2BHash64_(out_buffer, in_buffer);
                memcpy(expanded + (i * HALF_OUT_BYTES), out_buffer, HALF_OUT_BYTES);
            }

            Blake2BHash64_(in_buffer, out_buffer);
            memcpy(expanded + HALF_OUT_BYTES * (HASH_BLOCKS_COUNT - 2), in_buffer, BLAKE2B_OUTBYTES);
        }

        static void Blake2BLong_(ui8* out, ui32 outlen, const ui8* in, ui32 inlen) {
            if (outlen < BLAKE2B_OUTBYTES) {
                TBlake2B<instructionSet> hash(outlen);
                hash.Update(outlen);
                hash.Update(in, inlen);
                hash.Final(out, outlen);
            } else {
                ui8 out_buffer[BLAKE2B_OUTBYTES];
                ui8 in_buffer[BLAKE2B_OUTBYTES];
                ui32 toproduce = outlen - BLAKE2B_OUTBYTES / 2;

                TBlake2B<instructionSet> hash1(BLAKE2B_OUTBYTES);
                hash1.Update(outlen);
                hash1.Update(in, inlen);
                hash1.Final(out_buffer, BLAKE2B_OUTBYTES);

                memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
                out += BLAKE2B_OUTBYTES / 2;

                while (toproduce > BLAKE2B_OUTBYTES) {
                    memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
                    TBlake2B<instructionSet> hash2(BLAKE2B_OUTBYTES);
                    hash2.Update(in_buffer, BLAKE2B_OUTBYTES);
                    hash2.Final(out_buffer, BLAKE2B_OUTBYTES);
                    memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2);
                    out += BLAKE2B_OUTBYTES / 2;
                    toproduce -= BLAKE2B_OUTBYTES / 2;
                }

                memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES);
                {
                    TBlake2B<instructionSet> hash3(toproduce);
                    hash3.Update(in_buffer, BLAKE2B_OUTBYTES);
                    hash3.Final(out_buffer, toproduce);
                    memcpy(out, out_buffer, toproduce);
                }
            }
        }

        static void InitBlockValue_(TBlock* b, ui8 in) {
            memset(b->V, in, sizeof(b->V));
        }

    protected: /* Functions */
        void InternalHash_(TBlock* memory, const ui8* pwd, ui32 pwdlen,
                           const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen,
                           const ui8* aad, ui32 aadlen) const {
            /*
             * all parameters checks are in proxy objects
             */

            Initialize_(memory, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen);
            FillMemoryBlocks_(memory);
            Finalize_(memory, out, outlen);
        }

        void InitialHash_(ui8 blockhash[ARGON2_PREHASH_DIGEST_LENGTH],
                          ui32 outlen, const ui8* pwd, ui32 pwdlen,
                          const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const {
            TBlake2B<instructionSet> hash(ARGON2_PREHASH_DIGEST_LENGTH);
            /* lanes, but lanes == threads */
            hash.Update(Lanes_);
            /* outlen */
            hash.Update(outlen);
            /* m_cost */
            hash.Update(mcost);
            /* t_cost */
            hash.Update(Tcost_);
            /* version */
            hash.Update(0x00000013);
            /* Argon2 type */
            hash.Update((ui32)Atype_);
            /* pwdlen */
            hash.Update(pwdlen);
            /* pwd */
            hash.Update(pwd, pwdlen);
            /* saltlen */
            hash.Update(saltlen);
            /* salt */
            if (saltlen)
                hash.Update(salt, saltlen);
            /* secret */
            hash.Update(SecretLen_);
            if (SecretLen_)
                hash.Update((void*)Secret_, SecretLen_);
            /* aadlen */
            hash.Update(aadlen);
            if (aadlen)
                hash.Update((void*)aad, aadlen);
            hash.Final(blockhash, ARGON2_PREHASH_DIGEST_LENGTH);
        }

        void FillFirstBlocks_(TBlock* blocks, ui8* blockhash) const {
            for (ui32 l = 0; l < Lanes_; l++) {
                /* fill the first block of the lane */
                Store32_(l, blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4);
                Store32_(0, blockhash + ARGON2_PREHASH_DIGEST_LENGTH);
                ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_]), blockhash);

                /* fill the second block of the lane */
                Store32_(1, blockhash + ARGON2_PREHASH_DIGEST_LENGTH);
                ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_ + 1]), blockhash);
            }
        }

        /* The 'if' will be optimized out as the number of threads is known at the compile time */
        void FillMemoryBlocks_(TBlock* memory) const {
            for (ui32 t = 0; t < Tcost_; ++t) {
                for (ui32 s = 0; s < ARGON2_SYNC_POINTS; ++s) {
                    if (Lanes_ == 1)
                        FillSegment_(memory, t, 0, s);
                    else {
                        NYmp::SetThreadCount(Lanes_);
                        NYmp::ParallelForStaticAutoChunk<ui32>(0, Lanes_, [this, &memory, s, t](int k) {
                            this->FillSegment_(memory, t, k, s);
                        });
                    }
                }
            }
        }

        void Initialize_(TBlock* memory, ui32 outlen, const ui8* pwd, ui32 pwdlen,
                         const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const {
            ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH];
            InitialHash_(blockhash, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen);
            FillFirstBlocks_(memory, blockhash);
        }

        ui32 ComputeReferenceArea_(ui32 pass, ui32 slice, ui32 index, bool sameLane) const {
            ui32 passVal = pass == 0 ? (slice * SegmentLength_) : (LaneLength_ - SegmentLength_);
            return sameLane ? passVal + (index - 1) : passVal + (index == 0 ? -1 : 0);
        }

        ui32 IndexAlpha_(ui32 pass, ui32 slice, ui32 index, ui32 pseudoRand, bool sameLane) const {
            ui32 referenceAreaSize = ComputeReferenceArea_(pass, slice, index, sameLane);

            ui64 relativePosition = pseudoRand;
            relativePosition = relativePosition * relativePosition >> 32;
            relativePosition = referenceAreaSize - 1 - (referenceAreaSize * relativePosition >> 32);

            ui32 startPosition = 0;
            if (pass != 0)
                startPosition = (slice == ARGON2_SYNC_POINTS - 1) ? 0 : (slice + 1) * SegmentLength_;

            return (ui32)((startPosition + relativePosition) % LaneLength_);
        }

        void NextAddresses_(TBlock* addressBlock, TBlock* inputBlock, const TBlock* zeroBlock) const {
            inputBlock->V[6]++;
            FillBlock_(zeroBlock, inputBlock, addressBlock, false);
            FillBlock_(zeroBlock, addressBlock, addressBlock, false);
        }

        void Finalize_(const TBlock* memory, ui8* out, ui32 outlen) const {
            TBlock blockhash;
            CopyBlock_(&blockhash, memory + LaneLength_ - 1);

            /* XOR the last blocks */
            for (ui32 l = 1; l < Lanes_; ++l) {
                ui32 lastBlockInLane = l * LaneLength_ + (LaneLength_ - 1);
                XorBlock_(&blockhash, memory + lastBlockInLane);
            }

            Blake2BLong_(out, outlen, (ui8*)blockhash.V, ARGON2_BLOCK_SIZE);
        }

        /* The switch will be optimized out by the compiler as the type is known at the compile time */
        void FillSegment_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const {
            switch (Atype_) {
                case EArgon2Type::Argon2d:
                    FillSegmentD_(memory, pass, lane, slice);
                    return;
                case EArgon2Type::Argon2i:
                    FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2i);
                    return;
                case EArgon2Type::Argon2id:
                    if (pass == 0 && slice < ARGON2_SYNC_POINTS / 2)
                        FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2id);
                    else
                        FillSegmentD_(memory, pass, lane, slice);
                    return;
            }
        }

        void FillSegmentD_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const {
            ui32 startingIndex = (pass == 0 && slice == 0) ? 2 : 0;
            ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex;
            ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1;

            for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) {
                if (currOffset % LaneLength_ == 1) {
                    prevOffset = currOffset - 1;
                }

                ui64 pseudoRand = memory[prevOffset].V[0];
                ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_);
                ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane);

                TBlock* refBlock = memory + LaneLength_ * refLane + refIndex;
                FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0);
            }
        }

        void FillSegmentI_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice, EArgon2Type atp) const {
            TBlock addressBlock, inputBlock, zeroBlock;
            InitBlockValue_(&zeroBlock, 0);
            InitBlockValue_(&inputBlock, 0);

            inputBlock.V[0] = pass;
            inputBlock.V[1] = lane;
            inputBlock.V[2] = slice;
            inputBlock.V[3] = MemoryBlocks_;
            inputBlock.V[4] = Tcost_;
            inputBlock.V[5] = (ui64)atp;

            ui32 startingIndex = 0;

            if (pass == 0 && slice == 0) {
                startingIndex = 2;
                NextAddresses_(&addressBlock, &inputBlock, &zeroBlock);
            }

            ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex;
            ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1;

            for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) {
                if (currOffset % LaneLength_ == 1) {
                    prevOffset = currOffset - 1;
                }

                if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) {
                    NextAddresses_(&addressBlock, &inputBlock, &zeroBlock);
                }

                ui64 pseudoRand = addressBlock.V[i % ARGON2_ADDRESSES_IN_BLOCK];
                ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_);
                ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane);

                TBlock* refBlock = memory + LaneLength_ * refLane + refIndex;
                FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0);
            }
        }
    };
}