diff options
author | AlexSm <alex@ydb.tech> | 2024-03-05 10:40:59 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-05 12:40:59 +0300 |
commit | 1ac13c847b5358faba44dbb638a828e24369467b (patch) | |
tree | 07672b4dd3604ad3dee540a02c6494cb7d10dc3d /contrib/tools/python3/Modules/_blake2/impl/blake2b-round.h | |
parent | ffcca3e7f7958ddc6487b91d3df8c01054bd0638 (diff) | |
download | ydb-1ac13c847b5358faba44dbb638a828e24369467b.tar.gz |
Library import 16 (#2433)
Co-authored-by: robot-piglet <robot-piglet@yandex-team.com>
Co-authored-by: deshevoy <deshevoy@yandex-team.com>
Co-authored-by: robot-contrib <robot-contrib@yandex-team.com>
Co-authored-by: thegeorg <thegeorg@yandex-team.com>
Co-authored-by: robot-ya-builder <robot-ya-builder@yandex-team.com>
Co-authored-by: svidyuk <svidyuk@yandex-team.com>
Co-authored-by: shadchin <shadchin@yandex-team.com>
Co-authored-by: robot-ratatosk <robot-ratatosk@yandex-team.com>
Co-authored-by: innokentii <innokentii@yandex-team.com>
Co-authored-by: arkady-e1ppa <arkady-e1ppa@yandex-team.com>
Co-authored-by: snermolaev <snermolaev@yandex-team.com>
Co-authored-by: dimdim11 <dimdim11@yandex-team.com>
Co-authored-by: kickbutt <kickbutt@yandex-team.com>
Co-authored-by: abdullinsaid <abdullinsaid@yandex-team.com>
Co-authored-by: korsunandrei <korsunandrei@yandex-team.com>
Co-authored-by: petrk <petrk@yandex-team.com>
Co-authored-by: miroslav2 <miroslav2@yandex-team.com>
Co-authored-by: serjflint <serjflint@yandex-team.com>
Co-authored-by: akhropov <akhropov@yandex-team.com>
Co-authored-by: prettyboy <prettyboy@yandex-team.com>
Co-authored-by: ilikepugs <ilikepugs@yandex-team.com>
Co-authored-by: hiddenpath <hiddenpath@yandex-team.com>
Co-authored-by: mikhnenko <mikhnenko@yandex-team.com>
Co-authored-by: spreis <spreis@yandex-team.com>
Co-authored-by: andreyshspb <andreyshspb@yandex-team.com>
Co-authored-by: dimaandreev <dimaandreev@yandex-team.com>
Co-authored-by: rashid <rashid@yandex-team.com>
Co-authored-by: robot-ydb-importer <robot-ydb-importer@yandex-team.com>
Co-authored-by: r-vetrov <r-vetrov@yandex-team.com>
Co-authored-by: ypodlesov <ypodlesov@yandex-team.com>
Co-authored-by: zaverden <zaverden@yandex-team.com>
Co-authored-by: vpozdyayev <vpozdyayev@yandex-team.com>
Co-authored-by: robot-cozmo <robot-cozmo@yandex-team.com>
Co-authored-by: v-korovin <v-korovin@yandex-team.com>
Co-authored-by: arikon <arikon@yandex-team.com>
Co-authored-by: khoden <khoden@yandex-team.com>
Co-authored-by: psydmm <psydmm@yandex-team.com>
Co-authored-by: robot-javacom <robot-javacom@yandex-team.com>
Co-authored-by: dtorilov <dtorilov@yandex-team.com>
Co-authored-by: sennikovmv <sennikovmv@yandex-team.com>
Co-authored-by: hcpp <hcpp@ydb.tech>
Diffstat (limited to 'contrib/tools/python3/Modules/_blake2/impl/blake2b-round.h')
-rw-r--r-- | contrib/tools/python3/Modules/_blake2/impl/blake2b-round.h | 160 |
1 files changed, 160 insertions, 0 deletions
diff --git a/contrib/tools/python3/Modules/_blake2/impl/blake2b-round.h b/contrib/tools/python3/Modules/_blake2/impl/blake2b-round.h new file mode 100644 index 0000000000..5b452c4d63 --- /dev/null +++ b/contrib/tools/python3/Modules/_blake2/impl/blake2b-round.h @@ -0,0 +1,160 @@ +/* + BLAKE2 reference source code package - optimized C implementations + + Written in 2012 by Samuel Neves <sneves@dei.uc.pt> + + To the extent possible under law, the author(s) have dedicated all copyright + and related and neighboring rights to this software to the public domain + worldwide. This software is distributed without any warranty. + + You should have received a copy of the CC0 Public Domain Dedication along with + this software. If not, see <http://creativecommons.org/publicdomain/zero/1.0/>. +*/ +#pragma once +#ifndef __BLAKE2B_ROUND_H__ +#define __BLAKE2B_ROUND_H__ + +#define LOAD(p) _mm_load_si128( (__m128i *)(p) ) +#define STORE(p,r) _mm_store_si128((__m128i *)(p), r) + +#define LOADU(p) _mm_loadu_si128( (__m128i *)(p) ) +#define STOREU(p,r) _mm_storeu_si128((__m128i *)(p), r) + +#define TOF(reg) _mm_castsi128_ps((reg)) +#define TOI(reg) _mm_castps_si128((reg)) + +#define LIKELY(x) __builtin_expect((x),1) + + +/* Microarchitecture-specific macros */ +#ifndef HAVE_XOP +#ifdef HAVE_SSSE3 +#define _mm_roti_epi64(x, c) \ + (-(c) == 32) ? _mm_shuffle_epi32((x), _MM_SHUFFLE(2,3,0,1)) \ + : (-(c) == 24) ? _mm_shuffle_epi8((x), r24) \ + : (-(c) == 16) ? _mm_shuffle_epi8((x), r16) \ + : (-(c) == 63) ? _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_add_epi64((x), (x))) \ + : _mm_xor_si128(_mm_srli_epi64((x), -(c)), _mm_slli_epi64((x), 64-(-(c)))) +#else +#define _mm_roti_epi64(r, c) _mm_xor_si128(_mm_srli_epi64( (r), -(c) ),_mm_slli_epi64( (r), 64-(-(c)) )) +#endif +#else +/* ... */ +#endif + + + +#define G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \ + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ + \ + row4l = _mm_xor_si128(row4l, row1l); \ + row4h = _mm_xor_si128(row4h, row1h); \ + \ + row4l = _mm_roti_epi64(row4l, -32); \ + row4h = _mm_roti_epi64(row4h, -32); \ + \ + row3l = _mm_add_epi64(row3l, row4l); \ + row3h = _mm_add_epi64(row3h, row4h); \ + \ + row2l = _mm_xor_si128(row2l, row3l); \ + row2h = _mm_xor_si128(row2h, row3h); \ + \ + row2l = _mm_roti_epi64(row2l, -24); \ + row2h = _mm_roti_epi64(row2h, -24); \ + +#define G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1) \ + row1l = _mm_add_epi64(_mm_add_epi64(row1l, b0), row2l); \ + row1h = _mm_add_epi64(_mm_add_epi64(row1h, b1), row2h); \ + \ + row4l = _mm_xor_si128(row4l, row1l); \ + row4h = _mm_xor_si128(row4h, row1h); \ + \ + row4l = _mm_roti_epi64(row4l, -16); \ + row4h = _mm_roti_epi64(row4h, -16); \ + \ + row3l = _mm_add_epi64(row3l, row4l); \ + row3h = _mm_add_epi64(row3h, row4h); \ + \ + row2l = _mm_xor_si128(row2l, row3l); \ + row2h = _mm_xor_si128(row2h, row3h); \ + \ + row2l = _mm_roti_epi64(row2l, -63); \ + row2h = _mm_roti_epi64(row2h, -63); \ + +#if defined(HAVE_SSSE3) +#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = _mm_alignr_epi8(row2h, row2l, 8); \ + t1 = _mm_alignr_epi8(row2l, row2h, 8); \ + row2l = t0; \ + row2h = t1; \ + \ + t0 = row3l; \ + row3l = row3h; \ + row3h = t0; \ + \ + t0 = _mm_alignr_epi8(row4h, row4l, 8); \ + t1 = _mm_alignr_epi8(row4l, row4h, 8); \ + row4l = t1; \ + row4h = t0; + +#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = _mm_alignr_epi8(row2l, row2h, 8); \ + t1 = _mm_alignr_epi8(row2h, row2l, 8); \ + row2l = t0; \ + row2h = t1; \ + \ + t0 = row3l; \ + row3l = row3h; \ + row3h = t0; \ + \ + t0 = _mm_alignr_epi8(row4l, row4h, 8); \ + t1 = _mm_alignr_epi8(row4h, row4l, 8); \ + row4l = t1; \ + row4h = t0; +#else + +#define DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = row4l;\ + t1 = row2l;\ + row4l = row3l;\ + row3l = row3h;\ + row3h = row4l;\ + row4l = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t0, t0)); \ + row4h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row4h, row4h)); \ + row2l = _mm_unpackhi_epi64(row2l, _mm_unpacklo_epi64(row2h, row2h)); \ + row2h = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(t1, t1)) + +#define UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h) \ + t0 = row3l;\ + row3l = row3h;\ + row3h = t0;\ + t0 = row2l;\ + t1 = row4l;\ + row2l = _mm_unpackhi_epi64(row2h, _mm_unpacklo_epi64(row2l, row2l)); \ + row2h = _mm_unpackhi_epi64(t0, _mm_unpacklo_epi64(row2h, row2h)); \ + row4l = _mm_unpackhi_epi64(row4l, _mm_unpacklo_epi64(row4h, row4h)); \ + row4h = _mm_unpackhi_epi64(row4h, _mm_unpacklo_epi64(t1, t1)) + +#endif + +#if defined(HAVE_SSE4_1) +#include "blake2b-load-sse41.h" +#else +#include "blake2b-load-sse2.h" +#endif + +#define ROUND(r) \ + LOAD_MSG_ ##r ##_1(b0, b1); \ + G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + LOAD_MSG_ ##r ##_2(b0, b1); \ + G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + DIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); \ + LOAD_MSG_ ##r ##_3(b0, b1); \ + G1(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + LOAD_MSG_ ##r ##_4(b0, b1); \ + G2(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h,b0,b1); \ + UNDIAGONALIZE(row1l,row2l,row3l,row4l,row1h,row2h,row3h,row4h); + +#endif + |