diff options
author | conterouz <conterouz@yandex-team.com> | 2022-12-16 10:04:06 +0300 |
---|---|---|
committer | conterouz <conterouz@yandex-team.com> | 2022-12-16 10:04:06 +0300 |
commit | 64fdfff412a1c555a909da6fd654cba5628e61e8 (patch) | |
tree | 93f211a4cfb841fef1595737a37233d9860c09a0 | |
parent | fa371e5210e9cdda3fd6fc33af87a2458b69ab84 (diff) | |
download | ydb-64fdfff412a1c555a909da6fd654cba5628e61e8.tar.gz |
Добавил в библиотеку метод для создания Base64 строки url-friendly
-rw-r--r-- | library/cpp/string_utils/base64/base64.cpp | 36 | ||||
-rw-r--r-- | library/cpp/string_utils/base64/base64.h | 32 | ||||
-rw-r--r-- | library/cpp/string_utils/base64/base64_ut.cpp | 48 |
3 files changed, 100 insertions, 16 deletions
diff --git a/library/cpp/string_utils/base64/base64.cpp b/library/cpp/string_utils/base64/base64.cpp index 05c201f0de..610136fd1a 100644 --- a/library/cpp/string_utils/base64/base64.cpp +++ b/library/cpp/string_utils/base64/base64.cpp @@ -111,7 +111,7 @@ static inline unsigned char GetBase64EncodedIndex3(unsigned char octet2) { return (octet2 & 0x3f); } -template <bool urlVersion> +template <bool urlVersion, bool usePadding = true> static inline char* Base64EncodeImpl(char* outstr, const unsigned char* instr, size_t len) { const char* const base64_etab = (urlVersion ? base64_etab_url : base64_etab_std); const char pad = (urlVersion ? ',' : '='); @@ -132,9 +132,13 @@ static inline char* Base64EncodeImpl(char* outstr, const unsigned char* instr, s *outstr++ = base64_etab[GetBase64EncodedIndex2(instr[idx + 1], '\0')]; } else { *outstr++ = base64_etab[GetBase64EncodedIndex1(instr[idx], '\0')]; + if (usePadding) { + *outstr++ = pad; + } + } + if (usePadding) { *outstr++ = pad; } - *outstr++ = pad; } *outstr = 0; @@ -149,6 +153,10 @@ char* Base64EncodeUrl(char* outstr, const unsigned char* instr, size_t len) { return Base64EncodeImpl<true>(outstr, instr, len); } +char* Base64EncodeUrlNoPadding(char* outstr, const unsigned char* instr, size_t len) { + return Base64EncodeImpl<true, false>(outstr, instr, len); +} + inline void uudecode_1(char* dst, unsigned char* src) { dst[0] = char((base64_bkw[src[0]] << 2) | (base64_bkw[src[1]] >> 4)); dst[1] = char((base64_bkw[src[1]] << 4) | (base64_bkw[src[2]] >> 2)); @@ -245,13 +253,27 @@ size_t Base64Decode(void* dst, const char* b, const char* e) { return outLen; } -TString Base64DecodeUneven(const TStringBuf s) { - if (s.length() % 4 == 0) { - return Base64Decode(s); +size_t Base64DecodeUneven(void* dst, const TStringBuf s) { + const size_t tailSize = s.length() % 4; + if (tailSize == 0) { + return Base64Decode(dst, s.begin(), s.end()); } - // padding to 4 - return Base64Decode(TString(s) + TString(4 - (s.length() % 4), '=')); + // divide s into even part and tail and decode in two step, to avoid memory allocation + char tail[4] = {'=', '=', '=', '='}; + memcpy(tail, s.end() - tailSize, tailSize); + size_t decodedEven = s.length() > 4 ? Base64Decode(dst, s.begin(), s.end() - tailSize) : 0; + // there should not be tail of size 1 it's incorrect for 8-bit bytes + size_t decodedTail = tailSize != 1 ? Base64Decode(static_cast<char*>(dst) + decodedEven, tail, tail + 4) : 0; + return decodedEven + decodedTail; +} + +TString Base64DecodeUneven(const TStringBuf s) { + TString ret; + ret.ReserveAndResize(Base64DecodeBufSize(s.size())); + size_t size = Base64DecodeUneven(const_cast<char*>(ret.data()), s); + ret.resize(size); + return ret; } char* Base64Encode(char* outstr, const unsigned char* instr, size_t len) { diff --git a/library/cpp/string_utils/base64/base64.h b/library/cpp/string_utils/base64/base64.h index f778a6425a..3823fc1b8d 100644 --- a/library/cpp/string_utils/base64/base64.h +++ b/library/cpp/string_utils/base64/base64.h @@ -24,7 +24,7 @@ constexpr size_t Base64DecodeBufSize(const size_t len) noexcept { size_t Base64Decode(void* dst, const char* b, const char* e); inline TStringBuf Base64Decode(const TStringBuf src, void* dst) { - return TStringBuf((const char*)dst, Base64Decode(dst, src.begin(), src.end())); + return TStringBuf(static_cast<const char*>(dst), Base64Decode(dst, src.begin(), src.end())); } inline void Base64Decode(const TStringBuf src, TString& dst) { @@ -64,7 +64,7 @@ size_t Base64StrictDecode(void* dst, const char* b, const char* e); /// @return Returns dst wrapped into TStringBuf. /// inline TStringBuf Base64StrictDecode(const TStringBuf src, void* dst) { - return TStringBuf((const char*)dst, Base64StrictDecode(dst, src.begin(), src.end())); + return TStringBuf(static_cast<const char*>(dst), Base64StrictDecode(dst, src.begin(), src.end())); } /// @@ -90,6 +90,7 @@ inline TString Base64StrictDecode(const TStringBuf src) { /// Works with strings which length is not divisible by 4. TString Base64DecodeUneven(const TStringBuf s); +size_t Base64DecodeUneven(void* dst, const TStringBuf s); //encode constexpr size_t Base64EncodeBufSize(const size_t len) noexcept { @@ -99,12 +100,26 @@ constexpr size_t Base64EncodeBufSize(const size_t len) noexcept { char* Base64Encode(char* outstr, const unsigned char* instr, size_t len); char* Base64EncodeUrl(char* outstr, const unsigned char* instr, size_t len); -inline TStringBuf Base64Encode(const TStringBuf src, void* tmp) { - return TStringBuf((const char*)tmp, Base64Encode((char*)tmp, (const unsigned char*)src.data(), src.size())); +/// Make base64 string which stay unchaged after applying 'urlencode' function +/// as it doesn't contain character, which cannot be used in urls +/// @param outstr a pointer to allocated memory for writing result. +/// @param instr a to buffer to encode +/// @param len size of instr buffer +/// +/// @return Returns pointer to last symbol in outstr buffer. +/// +char* Base64EncodeUrlNoPadding(char* outstr, const unsigned char* instr, size_t len); + +inline TStringBuf Base64Encode(const TStringBuf src, void* output) { + return TStringBuf(static_cast<const char*>(output), Base64Encode(static_cast<char*>(output), reinterpret_cast<const unsigned char*>(src.data()), src.size())); +} + +inline TStringBuf Base64EncodeUrl(const TStringBuf src, void* output) { + return TStringBuf(static_cast<const char*>(output), Base64EncodeUrl(static_cast<char*>(output), reinterpret_cast<const unsigned char*>(src.data()), src.size())); } -inline TStringBuf Base64EncodeUrl(const TStringBuf src, void* tmp) { - return TStringBuf((const char*)tmp, Base64EncodeUrl((char*)tmp, (const unsigned char*)src.data(), src.size())); +inline TStringBuf Base64EncodeUrlNoPadding(const TStringBuf src, void* output) { + return TStringBuf(static_cast<const char*>(output), Base64EncodeUrlNoPadding(static_cast<char*>(output), reinterpret_cast<const unsigned char*>(src.data()), src.size())); } inline void Base64Encode(const TStringBuf src, TString& dst) { @@ -117,6 +132,11 @@ inline void Base64EncodeUrl(const TStringBuf src, TString& dst) { dst.resize(Base64EncodeUrl(src, dst.begin()).size()); } +inline void Base64EncodeUrlNoPadding(const TStringBuf src, TString& dst) { + dst.ReserveAndResize(Base64EncodeBufSize(src.size())); + dst.resize(Base64EncodeUrlNoPadding(src, dst.begin()).size()); +} + inline TString Base64Encode(const TStringBuf s) { TString ret; Base64Encode(s, ret); diff --git a/library/cpp/string_utils/base64/base64_ut.cpp b/library/cpp/string_utils/base64/base64_ut.cpp index bcc1e65879..cb6ca7def2 100644 --- a/library/cpp/string_utils/base64/base64_ut.cpp +++ b/library/cpp/string_utils/base64/base64_ut.cpp @@ -163,7 +163,7 @@ void Out<NB64Etalon::TImpls::EImpl>(IOutputStream& o, typename TTypeTraits<NB64E } } -static void TestEncodeDecodeIntoString(const TString& plain, const TString& encoded, const TString& encodedUrl) { +static void TestEncodeDecodeIntoString(const TString& plain, const TString& encoded, const TString& encodedUrl, const TString& encodedUrlNoPadding) { TString a, b; Base64Encode(plain, a); @@ -177,6 +177,12 @@ static void TestEncodeDecodeIntoString(const TString& plain, const TString& enco Base64Decode(a, b); UNIT_ASSERT_VALUES_EQUAL(b, plain); + + Base64EncodeUrlNoPadding(plain, a); + UNIT_ASSERT_VALUES_EQUAL(a, encodedUrlNoPadding); + + TString c = Base64DecodeUneven(a); + UNIT_ASSERT_VALUES_EQUAL(c, plain); } static void TestEncodeStrictDecodeIntoString(const TString& plain, const TString& encoded, const TString& encodedUrl) { @@ -225,8 +231,16 @@ Y_UNIT_TEST_SUITE(TBase64) { "oqOkpaanqKmqq6ytrq-wsbKztLW2t7i5uru8vb6_wMHCw8TFxsfIyc" "rLzM3Oz9DR0tPU1dbX2Nna29zd3t_g4eLj5OXm5-jp6uvs7e7v8PHy" "8_T19vf4-fr7_P3-_w,,"; + const TString base64UrlWithoutPadding = + "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJy" + "gpKissLS4vMDEyMzQ1Njc4OTo7PD0-P0BBQkNERUZHSElKS0xNTk9Q" + "UVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eH" + "l6e3x9fn-AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6Ch" + "oqOkpaanqKmqq6ytrq-wsbKztLW2t7i5uru8vb6_wMHCw8TFxsfIyc" + "rLzM3Oz9DR0tPU1dbX2Nna29zd3t_g4eLj5OXm5-jp6uvs7e7v8PHy" + "8_T19vf4-fr7_P3-_w"; - TestEncodeDecodeIntoString(str, base64, base64Url); + TestEncodeDecodeIntoString(str, base64, base64Url, base64UrlWithoutPadding); TestEncodeStrictDecodeIntoString(str, base64, base64Url); } @@ -235,8 +249,9 @@ Y_UNIT_TEST_SUITE(TBase64) { const TString base64 = "aHR0cDovL3lhbmRleC5ydToxMjM0L3JlcXVlc3Q/cGFyYW09dmFsdWUmbGxsPWZmZiNmcmFnbWVudA=="; const TString base64Url = "aHR0cDovL3lhbmRleC5ydToxMjM0L3JlcXVlc3Q_cGFyYW09dmFsdWUmbGxsPWZmZiNmcmFnbWVudA,,"; + const TString base64UrlWithoutPadding = "aHR0cDovL3lhbmRleC5ydToxMjM0L3JlcXVlc3Q_cGFyYW09dmFsdWUmbGxsPWZmZiNmcmFnbWVudA"; - TestEncodeDecodeIntoString(str, base64, base64Url); + TestEncodeDecodeIntoString(str, base64, base64Url, base64UrlWithoutPadding); TestEncodeStrictDecodeIntoString(str, base64, base64Url); } } @@ -293,8 +308,11 @@ Y_UNIT_TEST_SUITE(TBase64) { } TString output; TString encoded = Base64Encode(input); + TString encodedUrl = TString::Uninitialized(Base64EncodeBufSize(input.length())); + Base64EncodeUrlNoPadding(input, encodedUrl); UNIT_ASSERT_VALUES_EQUAL(Base64Decode(encoded), input); UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode(encoded), input); + UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven(encodedUrl), input); } Y_UNIT_TEST(TestAllPossibleOctets) { @@ -459,6 +477,12 @@ Y_UNIT_TEST_SUITE(TBase64) { UNIT_ASSERT_VALUES_EQUAL(x, xDec); } + Y_UNIT_TEST(TestDecodeURLEncodedWithoutPadding) { + const auto x = "1"; + const auto xDec = Base64DecodeUneven("MQ"); + UNIT_ASSERT_VALUES_EQUAL(x, xDec); + } + Y_UNIT_TEST(TestDecodeNoPaddingLongString) { const auto x = "How do I convert between big-endian and little-endian values in C++?a"; const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9h"); @@ -494,4 +518,22 @@ Y_UNIT_TEST_SUITE(TBase64) { const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9hYQ,,"); UNIT_ASSERT_VALUES_EQUAL(x, xDec); } + + Y_UNIT_TEST(TestDecodeUnevenDst) { + const auto x = "How do I convert between big-endian and little-endian values in C++?aa"; + TString b64 = "SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9hYQ"; + TVector<char> buf(Base64DecodeBufSize(b64.Size()), '\0'); + Base64DecodeUneven(buf.begin(), b64); + TString res(buf.data()); + UNIT_ASSERT_VALUES_EQUAL(x, res); + } + + Y_UNIT_TEST(TestDecodeUnevenDst2) { + const auto x = "How do I convert between big-endian and little-endian values in C++?"; + TString b64 = "SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz8"; + TVector<char> buf(Base64DecodeBufSize(b64.Size()), '\0'); + Base64DecodeUneven(buf.begin(), b64); + TString res(buf.data()); + UNIT_ASSERT_VALUES_EQUAL(x, res); + } } |