diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/digest/md5 | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/digest/md5')
-rw-r--r-- | library/cpp/digest/md5/bench/main.cpp | 20 | ||||
-rw-r--r-- | library/cpp/digest/md5/bench/ya.make | 16 | ||||
-rw-r--r-- | library/cpp/digest/md5/md5.cpp | 268 | ||||
-rw-r--r-- | library/cpp/digest/md5/md5.h | 79 | ||||
-rw-r--r-- | library/cpp/digest/md5/md5_medium_ut.cpp | 25 | ||||
-rw-r--r-- | library/cpp/digest/md5/md5_ut.cpp | 66 | ||||
-rw-r--r-- | library/cpp/digest/md5/medium_ut/ya.make | 18 | ||||
-rw-r--r-- | library/cpp/digest/md5/ut/ya.make | 12 | ||||
-rw-r--r-- | library/cpp/digest/md5/ya.make | 17 |
9 files changed, 521 insertions, 0 deletions
diff --git a/library/cpp/digest/md5/bench/main.cpp b/library/cpp/digest/md5/bench/main.cpp new file mode 100644 index 00000000000..8f6386d8a1b --- /dev/null +++ b/library/cpp/digest/md5/bench/main.cpp @@ -0,0 +1,20 @@ +#include <library/cpp/testing/benchmark/bench.h> +#include <library/cpp/digest/md5/md5.h> + +#include <util/generic/xrange.h> + +#define MD5_DEF(N) \ + Y_CPU_BENCHMARK(MD5_##N, iface) { \ + char buf[N]; \ + for (const auto i : xrange(iface.Iterations())) { \ + Y_UNUSED(i); \ + Y_DO_NOT_OPTIMIZE_AWAY(MD5().Update(buf, sizeof(buf))); \ + } \ + } + +MD5_DEF(32) +MD5_DEF(64) +MD5_DEF(128) + +MD5_DEF(1024) +MD5_DEF(2048) diff --git a/library/cpp/digest/md5/bench/ya.make b/library/cpp/digest/md5/bench/ya.make new file mode 100644 index 00000000000..ad78159d8e0 --- /dev/null +++ b/library/cpp/digest/md5/bench/ya.make @@ -0,0 +1,16 @@ +Y_BENCHMARK() + +OWNER( + pg + g:util +) + +PEERDIR( + library/cpp/digest/md5 +) + +SRCS( + main.cpp +) + +END() diff --git a/library/cpp/digest/md5/md5.cpp b/library/cpp/digest/md5/md5.cpp new file mode 100644 index 00000000000..27af7f88d7b --- /dev/null +++ b/library/cpp/digest/md5/md5.cpp @@ -0,0 +1,268 @@ +#include "md5.h" + +#include <contrib/libs/nayuki_md5/md5.h> + +#include <util/generic/strbuf.h> +#include <util/generic/string.h> +#include <util/stream/input.h> +#include <util/stream/file.h> +#include <library/cpp/string_utils/base64/base64.h> +#include <util/string/hex.h> + +#include <cstring> +#include <cstdlib> + +namespace { + + constexpr size_t MD5_HEX_DIGEST_LENGTH = 32; + + struct TMd5Stream: public IOutputStream { + inline TMd5Stream(MD5* md5) + : M_(md5) + { + } + + void DoWrite(const void* buf, size_t len) override { + M_->Update(buf, len); + } + + MD5* M_; + }; +} + +char* MD5::File(const char* filename, char* buf) { + try { + TUnbufferedFileInput fi(filename); + + return Stream(&fi, buf); + } catch (...) { + } + + return nullptr; +} + +TString MD5::File(const TString& filename) { + TString buf; + buf.ReserveAndResize(MD5_HEX_DIGEST_LENGTH); + auto result = MD5::File(filename.data(), buf.begin()); + if (result == nullptr) { + buf.clear(); + } + return buf; +} + +char* MD5::Data(const void* data, size_t len, char* buf) { + MD5 md5; + md5.Update(data, len); + return md5.End(buf); +} + +TString MD5::Data(TArrayRef<ui8> data) { + MD5 md5; + md5.Update(data.data(), data.size()); + + TString buf; + buf.ReserveAndResize(MD5_HEX_DIGEST_LENGTH); + md5.End(buf.begin()); + return buf; +} + +TString MD5::Data(TStringBuf data) { + MD5 md5; + md5.Update(data.data(), data.size()); + + TString buf; + buf.ReserveAndResize(MD5_HEX_DIGEST_LENGTH); + md5.End(buf.begin()); + return buf; +} + +char* MD5::Stream(IInputStream* in, char* buf) { + return MD5().Update(in).End(buf); +} + +MD5& MD5::Update(IInputStream* in) { + TMd5Stream md5(this); + + TransferData(in, &md5); + + return *this; +} + +static inline void MD5Transform(ui32 state[4], const unsigned char block[64]) { + return md5_compress((uint32_t*)state, (const ui8*)block); +} + +/* + * Encodes input (ui32) into output (unsigned char). Assumes len is + * a multiple of 4. + */ + +static void Encode(unsigned char* output, ui32* input, unsigned int len) { + unsigned int i, j; + + for (i = 0, j = 0; j < len; i++, j += 4) { + output[j] = (unsigned char)(input[i] & 0xff); + output[j + 1] = (unsigned char)((input[i] >> 8) & 0xff); + output[j + 2] = (unsigned char)((input[i] >> 16) & 0xff); + output[j + 3] = (unsigned char)((input[i] >> 24) & 0xff); + } +} + +static unsigned char PADDING[64] = { + 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; + +/* MD5 initialization. Begins an MD5 operation, writing a new context. */ + +void MD5::Init() { + Count[0] = Count[1] = 0; + /* Load magic initialization constants. */ + State[0] = 0x67452301; + State[1] = 0xefcdab89; + State[2] = 0x98badcfe; + State[3] = 0x10325476; +} + +/* + * MD5 block update operation. Continues an MD5 message-digest + * operation, processing another message block, and updating the + * context. + */ + +void MD5::UpdatePart(const void* inputPtr, unsigned int inputLen) { + const unsigned char* input = (const unsigned char*)inputPtr; + unsigned int i, index, partLen; + /* Compute number of bytes mod 64 */ + index = (unsigned int)((Count[0] >> 3) & 0x3F); + /* Update number of bits */ + if ((Count[0] += ((ui32)inputLen << 3)) < ((ui32)inputLen << 3)) + Count[1]++; + Count[1] += ((ui32)inputLen >> 29); + partLen = 64 - index; + /* Transform as many times as possible. */ + if (inputLen >= partLen) { + memcpy((void*)&Buffer[index], (const void*)input, partLen); + MD5Transform(State, Buffer); + for (i = partLen; i + 63 < inputLen; i += 64) + MD5Transform(State, &input[i]); + index = 0; + } else + i = 0; + /* Buffer remaining input */ + memcpy((void*)&Buffer[index], (const void*)&input[i], inputLen - i); +} + +/* + * MD5 padding. Adds padding followed by original length. + */ + +void MD5::Pad() { + unsigned char bits[8]; + unsigned int index, padLen; + /* Save number of bits */ + Encode(bits, Count, 8); + /* Pad out to 56 mod 64. */ + index = (unsigned int)((Count[0] >> 3) & 0x3f); + padLen = (index < 56) ? (56 - index) : (120 - index); + Update(PADDING, padLen); + /* Append length (before padding) */ + Update(bits, 8); +} + +/* + * MD5 finalization. Ends an MD5 message-digest operation, writing the + * the message digest and zeroizing the context. + */ + +unsigned char* MD5::Final(unsigned char digest[16]) { + /* Do padding. */ + Pad(); + /* Store state in digest */ + Encode(digest, State, 16); + /* Zeroize sensitive information. */ + memset((void*)this, 0, sizeof(*this)); + + return digest; +} + +char* MD5::End(char* buf) { + unsigned char digest[16]; + static const char hex[] = "0123456789abcdef"; + if (!buf) + buf = (char*)malloc(33); + if (!buf) + return nullptr; + Final(digest); + int i = 0; + for (; i < 16; i++) { + buf[i + i] = hex[digest[i] >> 4]; + buf[i + i + 1] = hex[digest[i] & 0x0f]; + } + buf[i + i] = '\0'; + return buf; +} + +char* MD5::End_b64(char* buf) { + unsigned char digest[16]; + if (!buf) + buf = (char*)malloc(25); + if (!buf) + return nullptr; + Final(digest); + Base64Encode(buf, digest, 16); + buf[24] = '\0'; + return buf; +} + +ui64 MD5::EndHalfMix() { + unsigned char digest[16]; + Final(digest); + ui64 res = 0; + for (int i = 3; i >= 0; i--) { + res |= (ui64)(digest[0 + i] ^ digest[8 + i]) << ((3 - i) << 3); + res |= (ui64)(digest[4 + i] ^ digest[12 + i]) << ((7 - i) << 3); + } + return res; +} + +TString MD5::Calc(const TStringBuf& data) { + TString result; + result.resize(32); + + Data((const unsigned char*)data.data(), data.size(), result.begin()); + + return result; +} + +TString MD5::CalcRaw(const TStringBuf& data) { + TString result; + result.resize(16); + MD5 md5; + md5.Update(data.data(), data.size()); + md5.Final(reinterpret_cast<unsigned char*>(result.begin())); + return result; +} + +ui64 MD5::CalcHalfMix(const char* data, size_t len) { + MD5 md5; + md5.Update(data, len); + return md5.EndHalfMix(); +} + +ui64 MD5::CalcHalfMix(const TStringBuf& data) { + return CalcHalfMix(data.data(), data.size()); +} + +bool MD5::IsMD5(const TStringBuf& data) { + if (data.size() != 32) { + return false; + } + for (const char *p = data.data(), *e = data.data() + data.size(); p != e; ++p) { + if (Char2DigitTable[(unsigned char)*p] == '\xff') { + return false; + } + } + return true; +} diff --git a/library/cpp/digest/md5/md5.h b/library/cpp/digest/md5/md5.h new file mode 100644 index 00000000000..1568567e3c0 --- /dev/null +++ b/library/cpp/digest/md5/md5.h @@ -0,0 +1,79 @@ +#pragma once + +#include <util/system/defaults.h> +#include <util/generic/string.h> +#include <util/generic/array_ref.h> +#include <util/generic/strbuf.h> + +class IInputStream; + +class MD5 { +public: + MD5() { + Init(); + } + + void Init(); + + inline MD5& Update(const void* data, size_t len) { + const char* buf = (const char*)data; + + while (len) { + // NOTE: we don't want buffSz to be near Max<unsigned int>() + // because otherwise integer overflow might happen in UpdatePart + const unsigned int buffSz = Min(size_t(Max<unsigned int>() / 2), len); + + UpdatePart(buf, buffSz); + buf += buffSz; + len -= buffSz; + } + return *this; + } + + inline MD5& Update(const TStringBuf& data) { + return Update(data.data(), data.size()); + } + + void Pad(); + unsigned char* Final(unsigned char[16]); + + // buf must be char[33]; + char* End(char* buf); + + // buf must be char[25]; + char* End_b64(char* buf); + + // 8-byte xor-based mix + ui64 EndHalfMix(); + + MD5& Update(IInputStream* in); + + /* + * Return hex-encoded md5 checksum for given file. + * + * Return nullptr / empty string if the file does not exist. + */ + static char* File(const char* filename, char* buf); + static TString File(const TString& filename); + + static char* Data(const void* data, size_t len, char* buf); + static TString Data(TArrayRef<ui8> data); + static TString Data(TStringBuf data); + static char* Stream(IInputStream* in, char* buf); + + static TString Calc(const TStringBuf& data); // 32-byte hex-encoded + static TString CalcRaw(const TStringBuf& data); // 16-byte raw + + static ui64 CalcHalfMix(const TStringBuf& data); + static ui64 CalcHalfMix(const char* data, size_t len); + + static bool IsMD5(const TStringBuf& data); + +private: + void UpdatePart(const void* data, unsigned int len); + +private: + ui32 State[4]; /* state (ABCD) */ + ui32 Count[2]; /* number of bits, modulo 2^64 (lsb first) */ + unsigned char Buffer[64]; /* input buffer */ +}; diff --git a/library/cpp/digest/md5/md5_medium_ut.cpp b/library/cpp/digest/md5/md5_medium_ut.cpp new file mode 100644 index 00000000000..a940c5cb66c --- /dev/null +++ b/library/cpp/digest/md5/md5_medium_ut.cpp @@ -0,0 +1,25 @@ +#include "md5.h" + +#include <library/cpp/testing/unittest/registar.h> + +Y_UNIT_TEST_SUITE(TMD5MediumTest) { + Y_UNIT_TEST(TestOverflow) { + if (sizeof(size_t) > sizeof(unsigned int)) { + const size_t maxUi32 = (size_t)Max<unsigned int>(); + TArrayHolder<char> buf(new char[maxUi32]); + + memset(buf.Get(), 0, maxUi32); + + MD5 r; + for (int i = 0; i < 5; ++i) { + r.Update(buf.Get(), maxUi32); + } + + char rs[33]; + TString s(r.End(rs)); + s.to_lower(); + + UNIT_ASSERT_VALUES_EQUAL(s, "34a5a7ed4f0221310084e21a1e599659"); + } + } +} diff --git a/library/cpp/digest/md5/md5_ut.cpp b/library/cpp/digest/md5/md5_ut.cpp new file mode 100644 index 00000000000..fecccca9ba9 --- /dev/null +++ b/library/cpp/digest/md5/md5_ut.cpp @@ -0,0 +1,66 @@ +#include "md5.h" + +#include <library/cpp/testing/unittest/registar.h> + +#include <util/system/fs.h> +#include <util/stream/file.h> + +Y_UNIT_TEST_SUITE(TMD5Test) { + Y_UNIT_TEST(TestMD5) { + // echo -n 'qwertyuiopqwertyuiopasdfghjklasdfghjkl' | md5sum + constexpr const char* b = "qwertyuiopqwertyuiopasdfghjklasdfghjkl"; + + MD5 r; + r.Update((const unsigned char*)b, 15); + r.Update((const unsigned char*)b + 15, strlen(b) - 15); + + char rs[33]; + TString s(r.End(rs)); + s.to_lower(); + + UNIT_ASSERT_EQUAL(s, TStringBuf("3ac00dd696b966fd74deee3c35a59d8f")); + + TString result = r.Calc(TStringBuf(b)); + result.to_lower(); + UNIT_ASSERT_EQUAL(result, TStringBuf("3ac00dd696b966fd74deee3c35a59d8f")); + } + + Y_UNIT_TEST(TestFile) { + TString s = NUnitTest::RandomString(1000000, 1); + const TString tmpFile = "tmp"; + + { + TFixedBufferFileOutput fo(tmpFile); + fo.Write(s.data(), s.size()); + } + + char fileBuf[100]; + char memBuf[100]; + TString fileHash = MD5::File(tmpFile.data(), fileBuf); + TString memoryHash = MD5::Data((const unsigned char*)s.data(), s.size(), memBuf); + + UNIT_ASSERT_EQUAL(fileHash, memoryHash); + + fileHash = MD5::File(tmpFile); + UNIT_ASSERT_EQUAL(fileHash, memoryHash); + + NFs::Remove(tmpFile); + fileHash = MD5::File(tmpFile); + UNIT_ASSERT_EQUAL(fileHash.size(), 0); + } + + Y_UNIT_TEST(TestIsMD5) { + UNIT_ASSERT_EQUAL(false, MD5::IsMD5(TStringBuf())); + UNIT_ASSERT_EQUAL(false, MD5::IsMD5(TStringBuf("4136ebb0e4c45d21e2b09294c75cfa0"))); // length 31 + UNIT_ASSERT_EQUAL(false, MD5::IsMD5(TStringBuf("4136ebb0e4c45d21e2b09294c75cfa000"))); // length 33 + UNIT_ASSERT_EQUAL(false, MD5::IsMD5(TStringBuf("4136ebb0e4c45d21e2b09294c75cfa0g"))); // wrong character 'g' + UNIT_ASSERT_EQUAL(true, MD5::IsMD5(TStringBuf("4136EBB0E4C45D21E2B09294C75CFA08"))); + UNIT_ASSERT_EQUAL(true, MD5::IsMD5(TStringBuf("4136ebb0E4C45D21e2b09294C75CfA08"))); + UNIT_ASSERT_EQUAL(true, MD5::IsMD5(TStringBuf("4136ebb0e4c45d21e2b09294c75cfa08"))); + } + + Y_UNIT_TEST(TestMd5HalfMix) { + UNIT_ASSERT_EQUAL(MD5::CalcHalfMix(""), 7203772011789518145ul); + UNIT_ASSERT_EQUAL(MD5::CalcHalfMix("qwertyuiopqwertyuiopasdfghjklasdfghjkl"), 11753545595885642730ul); + } +} diff --git a/library/cpp/digest/md5/medium_ut/ya.make b/library/cpp/digest/md5/medium_ut/ya.make new file mode 100644 index 00000000000..418c57f086e --- /dev/null +++ b/library/cpp/digest/md5/medium_ut/ya.make @@ -0,0 +1,18 @@ +UNITTEST_FOR(library/cpp/digest/md5) + +SIZE(MEDIUM) + +TIMEOUT(120) + +OWNER( + pg + g:util +) + +SRCS( + md5_medium_ut.cpp +) + +REQUIREMENTS(ram:10) + +END() diff --git a/library/cpp/digest/md5/ut/ya.make b/library/cpp/digest/md5/ut/ya.make new file mode 100644 index 00000000000..ad1eddbff2a --- /dev/null +++ b/library/cpp/digest/md5/ut/ya.make @@ -0,0 +1,12 @@ +UNITTEST_FOR(library/cpp/digest/md5) + +OWNER( + pg + g:util +) + +SRCS( + md5_ut.cpp +) + +END() diff --git a/library/cpp/digest/md5/ya.make b/library/cpp/digest/md5/ya.make new file mode 100644 index 00000000000..c09ec1c326f --- /dev/null +++ b/library/cpp/digest/md5/ya.make @@ -0,0 +1,17 @@ +LIBRARY() + +OWNER( + pg + g:util +) + +SRCS( + md5.cpp +) + +PEERDIR( + contrib/libs/nayuki_md5 + library/cpp/string_utils/base64 +) + +END() |