aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/digest/md5
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/digest/md5
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/digest/md5')
-rw-r--r--library/cpp/digest/md5/bench/main.cpp20
-rw-r--r--library/cpp/digest/md5/bench/ya.make16
-rw-r--r--library/cpp/digest/md5/md5.cpp268
-rw-r--r--library/cpp/digest/md5/md5.h79
-rw-r--r--library/cpp/digest/md5/md5_medium_ut.cpp25
-rw-r--r--library/cpp/digest/md5/md5_ut.cpp66
-rw-r--r--library/cpp/digest/md5/medium_ut/ya.make18
-rw-r--r--library/cpp/digest/md5/ut/ya.make12
-rw-r--r--library/cpp/digest/md5/ya.make17
9 files changed, 521 insertions, 0 deletions
diff --git a/library/cpp/digest/md5/bench/main.cpp b/library/cpp/digest/md5/bench/main.cpp
new file mode 100644
index 00000000000..8f6386d8a1b
--- /dev/null
+++ b/library/cpp/digest/md5/bench/main.cpp
@@ -0,0 +1,20 @@
+#include <library/cpp/testing/benchmark/bench.h>
+#include <library/cpp/digest/md5/md5.h>
+
+#include <util/generic/xrange.h>
+
+#define MD5_DEF(N) \
+ Y_CPU_BENCHMARK(MD5_##N, iface) { \
+ char buf[N]; \
+ for (const auto i : xrange(iface.Iterations())) { \
+ Y_UNUSED(i); \
+ Y_DO_NOT_OPTIMIZE_AWAY(MD5().Update(buf, sizeof(buf))); \
+ } \
+ }
+
+MD5_DEF(32)
+MD5_DEF(64)
+MD5_DEF(128)
+
+MD5_DEF(1024)
+MD5_DEF(2048)
diff --git a/library/cpp/digest/md5/bench/ya.make b/library/cpp/digest/md5/bench/ya.make
new file mode 100644
index 00000000000..ad78159d8e0
--- /dev/null
+++ b/library/cpp/digest/md5/bench/ya.make
@@ -0,0 +1,16 @@
+Y_BENCHMARK()
+
+OWNER(
+ pg
+ g:util
+)
+
+PEERDIR(
+ library/cpp/digest/md5
+)
+
+SRCS(
+ main.cpp
+)
+
+END()
diff --git a/library/cpp/digest/md5/md5.cpp b/library/cpp/digest/md5/md5.cpp
new file mode 100644
index 00000000000..27af7f88d7b
--- /dev/null
+++ b/library/cpp/digest/md5/md5.cpp
@@ -0,0 +1,268 @@
+#include "md5.h"
+
+#include <contrib/libs/nayuki_md5/md5.h>
+
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+#include <util/stream/input.h>
+#include <util/stream/file.h>
+#include <library/cpp/string_utils/base64/base64.h>
+#include <util/string/hex.h>
+
+#include <cstring>
+#include <cstdlib>
+
+namespace {
+
+ constexpr size_t MD5_HEX_DIGEST_LENGTH = 32;
+
+ struct TMd5Stream: public IOutputStream {
+ inline TMd5Stream(MD5* md5)
+ : M_(md5)
+ {
+ }
+
+ void DoWrite(const void* buf, size_t len) override {
+ M_->Update(buf, len);
+ }
+
+ MD5* M_;
+ };
+}
+
+char* MD5::File(const char* filename, char* buf) {
+ try {
+ TUnbufferedFileInput fi(filename);
+
+ return Stream(&fi, buf);
+ } catch (...) {
+ }
+
+ return nullptr;
+}
+
+TString MD5::File(const TString& filename) {
+ TString buf;
+ buf.ReserveAndResize(MD5_HEX_DIGEST_LENGTH);
+ auto result = MD5::File(filename.data(), buf.begin());
+ if (result == nullptr) {
+ buf.clear();
+ }
+ return buf;
+}
+
+char* MD5::Data(const void* data, size_t len, char* buf) {
+ MD5 md5;
+ md5.Update(data, len);
+ return md5.End(buf);
+}
+
+TString MD5::Data(TArrayRef<ui8> data) {
+ MD5 md5;
+ md5.Update(data.data(), data.size());
+
+ TString buf;
+ buf.ReserveAndResize(MD5_HEX_DIGEST_LENGTH);
+ md5.End(buf.begin());
+ return buf;
+}
+
+TString MD5::Data(TStringBuf data) {
+ MD5 md5;
+ md5.Update(data.data(), data.size());
+
+ TString buf;
+ buf.ReserveAndResize(MD5_HEX_DIGEST_LENGTH);
+ md5.End(buf.begin());
+ return buf;
+}
+
+char* MD5::Stream(IInputStream* in, char* buf) {
+ return MD5().Update(in).End(buf);
+}
+
+MD5& MD5::Update(IInputStream* in) {
+ TMd5Stream md5(this);
+
+ TransferData(in, &md5);
+
+ return *this;
+}
+
+static inline void MD5Transform(ui32 state[4], const unsigned char block[64]) {
+ return md5_compress((uint32_t*)state, (const ui8*)block);
+}
+
+/*
+ * Encodes input (ui32) into output (unsigned char). Assumes len is
+ * a multiple of 4.
+ */
+
+static void Encode(unsigned char* output, ui32* input, unsigned int len) {
+ unsigned int i, j;
+
+ for (i = 0, j = 0; j < len; i++, j += 4) {
+ output[j] = (unsigned char)(input[i] & 0xff);
+ output[j + 1] = (unsigned char)((input[i] >> 8) & 0xff);
+ output[j + 2] = (unsigned char)((input[i] >> 16) & 0xff);
+ output[j + 3] = (unsigned char)((input[i] >> 24) & 0xff);
+ }
+}
+
+static unsigned char PADDING[64] = {
+ 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
+
+/* MD5 initialization. Begins an MD5 operation, writing a new context. */
+
+void MD5::Init() {
+ Count[0] = Count[1] = 0;
+ /* Load magic initialization constants. */
+ State[0] = 0x67452301;
+ State[1] = 0xefcdab89;
+ State[2] = 0x98badcfe;
+ State[3] = 0x10325476;
+}
+
+/*
+ * MD5 block update operation. Continues an MD5 message-digest
+ * operation, processing another message block, and updating the
+ * context.
+ */
+
+void MD5::UpdatePart(const void* inputPtr, unsigned int inputLen) {
+ const unsigned char* input = (const unsigned char*)inputPtr;
+ unsigned int i, index, partLen;
+ /* Compute number of bytes mod 64 */
+ index = (unsigned int)((Count[0] >> 3) & 0x3F);
+ /* Update number of bits */
+ if ((Count[0] += ((ui32)inputLen << 3)) < ((ui32)inputLen << 3))
+ Count[1]++;
+ Count[1] += ((ui32)inputLen >> 29);
+ partLen = 64 - index;
+ /* Transform as many times as possible. */
+ if (inputLen >= partLen) {
+ memcpy((void*)&Buffer[index], (const void*)input, partLen);
+ MD5Transform(State, Buffer);
+ for (i = partLen; i + 63 < inputLen; i += 64)
+ MD5Transform(State, &input[i]);
+ index = 0;
+ } else
+ i = 0;
+ /* Buffer remaining input */
+ memcpy((void*)&Buffer[index], (const void*)&input[i], inputLen - i);
+}
+
+/*
+ * MD5 padding. Adds padding followed by original length.
+ */
+
+void MD5::Pad() {
+ unsigned char bits[8];
+ unsigned int index, padLen;
+ /* Save number of bits */
+ Encode(bits, Count, 8);
+ /* Pad out to 56 mod 64. */
+ index = (unsigned int)((Count[0] >> 3) & 0x3f);
+ padLen = (index < 56) ? (56 - index) : (120 - index);
+ Update(PADDING, padLen);
+ /* Append length (before padding) */
+ Update(bits, 8);
+}
+
+/*
+ * MD5 finalization. Ends an MD5 message-digest operation, writing the
+ * the message digest and zeroizing the context.
+ */
+
+unsigned char* MD5::Final(unsigned char digest[16]) {
+ /* Do padding. */
+ Pad();
+ /* Store state in digest */
+ Encode(digest, State, 16);
+ /* Zeroize sensitive information. */
+ memset((void*)this, 0, sizeof(*this));
+
+ return digest;
+}
+
+char* MD5::End(char* buf) {
+ unsigned char digest[16];
+ static const char hex[] = "0123456789abcdef";
+ if (!buf)
+ buf = (char*)malloc(33);
+ if (!buf)
+ return nullptr;
+ Final(digest);
+ int i = 0;
+ for (; i < 16; i++) {
+ buf[i + i] = hex[digest[i] >> 4];
+ buf[i + i + 1] = hex[digest[i] & 0x0f];
+ }
+ buf[i + i] = '\0';
+ return buf;
+}
+
+char* MD5::End_b64(char* buf) {
+ unsigned char digest[16];
+ if (!buf)
+ buf = (char*)malloc(25);
+ if (!buf)
+ return nullptr;
+ Final(digest);
+ Base64Encode(buf, digest, 16);
+ buf[24] = '\0';
+ return buf;
+}
+
+ui64 MD5::EndHalfMix() {
+ unsigned char digest[16];
+ Final(digest);
+ ui64 res = 0;
+ for (int i = 3; i >= 0; i--) {
+ res |= (ui64)(digest[0 + i] ^ digest[8 + i]) << ((3 - i) << 3);
+ res |= (ui64)(digest[4 + i] ^ digest[12 + i]) << ((7 - i) << 3);
+ }
+ return res;
+}
+
+TString MD5::Calc(const TStringBuf& data) {
+ TString result;
+ result.resize(32);
+
+ Data((const unsigned char*)data.data(), data.size(), result.begin());
+
+ return result;
+}
+
+TString MD5::CalcRaw(const TStringBuf& data) {
+ TString result;
+ result.resize(16);
+ MD5 md5;
+ md5.Update(data.data(), data.size());
+ md5.Final(reinterpret_cast<unsigned char*>(result.begin()));
+ return result;
+}
+
+ui64 MD5::CalcHalfMix(const char* data, size_t len) {
+ MD5 md5;
+ md5.Update(data, len);
+ return md5.EndHalfMix();
+}
+
+ui64 MD5::CalcHalfMix(const TStringBuf& data) {
+ return CalcHalfMix(data.data(), data.size());
+}
+
+bool MD5::IsMD5(const TStringBuf& data) {
+ if (data.size() != 32) {
+ return false;
+ }
+ for (const char *p = data.data(), *e = data.data() + data.size(); p != e; ++p) {
+ if (Char2DigitTable[(unsigned char)*p] == '\xff') {
+ return false;
+ }
+ }
+ return true;
+}
diff --git a/library/cpp/digest/md5/md5.h b/library/cpp/digest/md5/md5.h
new file mode 100644
index 00000000000..1568567e3c0
--- /dev/null
+++ b/library/cpp/digest/md5/md5.h
@@ -0,0 +1,79 @@
+#pragma once
+
+#include <util/system/defaults.h>
+#include <util/generic/string.h>
+#include <util/generic/array_ref.h>
+#include <util/generic/strbuf.h>
+
+class IInputStream;
+
+class MD5 {
+public:
+ MD5() {
+ Init();
+ }
+
+ void Init();
+
+ inline MD5& Update(const void* data, size_t len) {
+ const char* buf = (const char*)data;
+
+ while (len) {
+ // NOTE: we don't want buffSz to be near Max<unsigned int>()
+ // because otherwise integer overflow might happen in UpdatePart
+ const unsigned int buffSz = Min(size_t(Max<unsigned int>() / 2), len);
+
+ UpdatePart(buf, buffSz);
+ buf += buffSz;
+ len -= buffSz;
+ }
+ return *this;
+ }
+
+ inline MD5& Update(const TStringBuf& data) {
+ return Update(data.data(), data.size());
+ }
+
+ void Pad();
+ unsigned char* Final(unsigned char[16]);
+
+ // buf must be char[33];
+ char* End(char* buf);
+
+ // buf must be char[25];
+ char* End_b64(char* buf);
+
+ // 8-byte xor-based mix
+ ui64 EndHalfMix();
+
+ MD5& Update(IInputStream* in);
+
+ /*
+ * Return hex-encoded md5 checksum for given file.
+ *
+ * Return nullptr / empty string if the file does not exist.
+ */
+ static char* File(const char* filename, char* buf);
+ static TString File(const TString& filename);
+
+ static char* Data(const void* data, size_t len, char* buf);
+ static TString Data(TArrayRef<ui8> data);
+ static TString Data(TStringBuf data);
+ static char* Stream(IInputStream* in, char* buf);
+
+ static TString Calc(const TStringBuf& data); // 32-byte hex-encoded
+ static TString CalcRaw(const TStringBuf& data); // 16-byte raw
+
+ static ui64 CalcHalfMix(const TStringBuf& data);
+ static ui64 CalcHalfMix(const char* data, size_t len);
+
+ static bool IsMD5(const TStringBuf& data);
+
+private:
+ void UpdatePart(const void* data, unsigned int len);
+
+private:
+ ui32 State[4]; /* state (ABCD) */
+ ui32 Count[2]; /* number of bits, modulo 2^64 (lsb first) */
+ unsigned char Buffer[64]; /* input buffer */
+};
diff --git a/library/cpp/digest/md5/md5_medium_ut.cpp b/library/cpp/digest/md5/md5_medium_ut.cpp
new file mode 100644
index 00000000000..a940c5cb66c
--- /dev/null
+++ b/library/cpp/digest/md5/md5_medium_ut.cpp
@@ -0,0 +1,25 @@
+#include "md5.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+Y_UNIT_TEST_SUITE(TMD5MediumTest) {
+ Y_UNIT_TEST(TestOverflow) {
+ if (sizeof(size_t) > sizeof(unsigned int)) {
+ const size_t maxUi32 = (size_t)Max<unsigned int>();
+ TArrayHolder<char> buf(new char[maxUi32]);
+
+ memset(buf.Get(), 0, maxUi32);
+
+ MD5 r;
+ for (int i = 0; i < 5; ++i) {
+ r.Update(buf.Get(), maxUi32);
+ }
+
+ char rs[33];
+ TString s(r.End(rs));
+ s.to_lower();
+
+ UNIT_ASSERT_VALUES_EQUAL(s, "34a5a7ed4f0221310084e21a1e599659");
+ }
+ }
+}
diff --git a/library/cpp/digest/md5/md5_ut.cpp b/library/cpp/digest/md5/md5_ut.cpp
new file mode 100644
index 00000000000..fecccca9ba9
--- /dev/null
+++ b/library/cpp/digest/md5/md5_ut.cpp
@@ -0,0 +1,66 @@
+#include "md5.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/system/fs.h>
+#include <util/stream/file.h>
+
+Y_UNIT_TEST_SUITE(TMD5Test) {
+ Y_UNIT_TEST(TestMD5) {
+ // echo -n 'qwertyuiopqwertyuiopasdfghjklasdfghjkl' | md5sum
+ constexpr const char* b = "qwertyuiopqwertyuiopasdfghjklasdfghjkl";
+
+ MD5 r;
+ r.Update((const unsigned char*)b, 15);
+ r.Update((const unsigned char*)b + 15, strlen(b) - 15);
+
+ char rs[33];
+ TString s(r.End(rs));
+ s.to_lower();
+
+ UNIT_ASSERT_EQUAL(s, TStringBuf("3ac00dd696b966fd74deee3c35a59d8f"));
+
+ TString result = r.Calc(TStringBuf(b));
+ result.to_lower();
+ UNIT_ASSERT_EQUAL(result, TStringBuf("3ac00dd696b966fd74deee3c35a59d8f"));
+ }
+
+ Y_UNIT_TEST(TestFile) {
+ TString s = NUnitTest::RandomString(1000000, 1);
+ const TString tmpFile = "tmp";
+
+ {
+ TFixedBufferFileOutput fo(tmpFile);
+ fo.Write(s.data(), s.size());
+ }
+
+ char fileBuf[100];
+ char memBuf[100];
+ TString fileHash = MD5::File(tmpFile.data(), fileBuf);
+ TString memoryHash = MD5::Data((const unsigned char*)s.data(), s.size(), memBuf);
+
+ UNIT_ASSERT_EQUAL(fileHash, memoryHash);
+
+ fileHash = MD5::File(tmpFile);
+ UNIT_ASSERT_EQUAL(fileHash, memoryHash);
+
+ NFs::Remove(tmpFile);
+ fileHash = MD5::File(tmpFile);
+ UNIT_ASSERT_EQUAL(fileHash.size(), 0);
+ }
+
+ Y_UNIT_TEST(TestIsMD5) {
+ UNIT_ASSERT_EQUAL(false, MD5::IsMD5(TStringBuf()));
+ UNIT_ASSERT_EQUAL(false, MD5::IsMD5(TStringBuf("4136ebb0e4c45d21e2b09294c75cfa0"))); // length 31
+ UNIT_ASSERT_EQUAL(false, MD5::IsMD5(TStringBuf("4136ebb0e4c45d21e2b09294c75cfa000"))); // length 33
+ UNIT_ASSERT_EQUAL(false, MD5::IsMD5(TStringBuf("4136ebb0e4c45d21e2b09294c75cfa0g"))); // wrong character 'g'
+ UNIT_ASSERT_EQUAL(true, MD5::IsMD5(TStringBuf("4136EBB0E4C45D21E2B09294C75CFA08")));
+ UNIT_ASSERT_EQUAL(true, MD5::IsMD5(TStringBuf("4136ebb0E4C45D21e2b09294C75CfA08")));
+ UNIT_ASSERT_EQUAL(true, MD5::IsMD5(TStringBuf("4136ebb0e4c45d21e2b09294c75cfa08")));
+ }
+
+ Y_UNIT_TEST(TestMd5HalfMix) {
+ UNIT_ASSERT_EQUAL(MD5::CalcHalfMix(""), 7203772011789518145ul);
+ UNIT_ASSERT_EQUAL(MD5::CalcHalfMix("qwertyuiopqwertyuiopasdfghjklasdfghjkl"), 11753545595885642730ul);
+ }
+}
diff --git a/library/cpp/digest/md5/medium_ut/ya.make b/library/cpp/digest/md5/medium_ut/ya.make
new file mode 100644
index 00000000000..418c57f086e
--- /dev/null
+++ b/library/cpp/digest/md5/medium_ut/ya.make
@@ -0,0 +1,18 @@
+UNITTEST_FOR(library/cpp/digest/md5)
+
+SIZE(MEDIUM)
+
+TIMEOUT(120)
+
+OWNER(
+ pg
+ g:util
+)
+
+SRCS(
+ md5_medium_ut.cpp
+)
+
+REQUIREMENTS(ram:10)
+
+END()
diff --git a/library/cpp/digest/md5/ut/ya.make b/library/cpp/digest/md5/ut/ya.make
new file mode 100644
index 00000000000..ad1eddbff2a
--- /dev/null
+++ b/library/cpp/digest/md5/ut/ya.make
@@ -0,0 +1,12 @@
+UNITTEST_FOR(library/cpp/digest/md5)
+
+OWNER(
+ pg
+ g:util
+)
+
+SRCS(
+ md5_ut.cpp
+)
+
+END()
diff --git a/library/cpp/digest/md5/ya.make b/library/cpp/digest/md5/ya.make
new file mode 100644
index 00000000000..c09ec1c326f
--- /dev/null
+++ b/library/cpp/digest/md5/ya.make
@@ -0,0 +1,17 @@
+LIBRARY()
+
+OWNER(
+ pg
+ g:util
+)
+
+SRCS(
+ md5.cpp
+)
+
+PEERDIR(
+ contrib/libs/nayuki_md5
+ library/cpp/string_utils/base64
+)
+
+END()