aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorconterouz <conterouz@yandex-team.com>2022-12-16 10:04:06 +0300
committerconterouz <conterouz@yandex-team.com>2022-12-16 10:04:06 +0300
commit64fdfff412a1c555a909da6fd654cba5628e61e8 (patch)
tree93f211a4cfb841fef1595737a37233d9860c09a0
parentfa371e5210e9cdda3fd6fc33af87a2458b69ab84 (diff)
downloadydb-64fdfff412a1c555a909da6fd654cba5628e61e8.tar.gz
Добавил в библиотеку метод для создания Base64 строки url-friendly
-rw-r--r--library/cpp/string_utils/base64/base64.cpp36
-rw-r--r--library/cpp/string_utils/base64/base64.h32
-rw-r--r--library/cpp/string_utils/base64/base64_ut.cpp48
3 files changed, 100 insertions, 16 deletions
diff --git a/library/cpp/string_utils/base64/base64.cpp b/library/cpp/string_utils/base64/base64.cpp
index 05c201f0de..610136fd1a 100644
--- a/library/cpp/string_utils/base64/base64.cpp
+++ b/library/cpp/string_utils/base64/base64.cpp
@@ -111,7 +111,7 @@ static inline unsigned char GetBase64EncodedIndex3(unsigned char octet2) {
return (octet2 & 0x3f);
}
-template <bool urlVersion>
+template <bool urlVersion, bool usePadding = true>
static inline char* Base64EncodeImpl(char* outstr, const unsigned char* instr, size_t len) {
const char* const base64_etab = (urlVersion ? base64_etab_url : base64_etab_std);
const char pad = (urlVersion ? ',' : '=');
@@ -132,9 +132,13 @@ static inline char* Base64EncodeImpl(char* outstr, const unsigned char* instr, s
*outstr++ = base64_etab[GetBase64EncodedIndex2(instr[idx + 1], '\0')];
} else {
*outstr++ = base64_etab[GetBase64EncodedIndex1(instr[idx], '\0')];
+ if (usePadding) {
+ *outstr++ = pad;
+ }
+ }
+ if (usePadding) {
*outstr++ = pad;
}
- *outstr++ = pad;
}
*outstr = 0;
@@ -149,6 +153,10 @@ char* Base64EncodeUrl(char* outstr, const unsigned char* instr, size_t len) {
return Base64EncodeImpl<true>(outstr, instr, len);
}
+char* Base64EncodeUrlNoPadding(char* outstr, const unsigned char* instr, size_t len) {
+ return Base64EncodeImpl<true, false>(outstr, instr, len);
+}
+
inline void uudecode_1(char* dst, unsigned char* src) {
dst[0] = char((base64_bkw[src[0]] << 2) | (base64_bkw[src[1]] >> 4));
dst[1] = char((base64_bkw[src[1]] << 4) | (base64_bkw[src[2]] >> 2));
@@ -245,13 +253,27 @@ size_t Base64Decode(void* dst, const char* b, const char* e) {
return outLen;
}
-TString Base64DecodeUneven(const TStringBuf s) {
- if (s.length() % 4 == 0) {
- return Base64Decode(s);
+size_t Base64DecodeUneven(void* dst, const TStringBuf s) {
+ const size_t tailSize = s.length() % 4;
+ if (tailSize == 0) {
+ return Base64Decode(dst, s.begin(), s.end());
}
- // padding to 4
- return Base64Decode(TString(s) + TString(4 - (s.length() % 4), '='));
+ // divide s into even part and tail and decode in two step, to avoid memory allocation
+ char tail[4] = {'=', '=', '=', '='};
+ memcpy(tail, s.end() - tailSize, tailSize);
+ size_t decodedEven = s.length() > 4 ? Base64Decode(dst, s.begin(), s.end() - tailSize) : 0;
+ // there should not be tail of size 1 it's incorrect for 8-bit bytes
+ size_t decodedTail = tailSize != 1 ? Base64Decode(static_cast<char*>(dst) + decodedEven, tail, tail + 4) : 0;
+ return decodedEven + decodedTail;
+}
+
+TString Base64DecodeUneven(const TStringBuf s) {
+ TString ret;
+ ret.ReserveAndResize(Base64DecodeBufSize(s.size()));
+ size_t size = Base64DecodeUneven(const_cast<char*>(ret.data()), s);
+ ret.resize(size);
+ return ret;
}
char* Base64Encode(char* outstr, const unsigned char* instr, size_t len) {
diff --git a/library/cpp/string_utils/base64/base64.h b/library/cpp/string_utils/base64/base64.h
index f778a6425a..3823fc1b8d 100644
--- a/library/cpp/string_utils/base64/base64.h
+++ b/library/cpp/string_utils/base64/base64.h
@@ -24,7 +24,7 @@ constexpr size_t Base64DecodeBufSize(const size_t len) noexcept {
size_t Base64Decode(void* dst, const char* b, const char* e);
inline TStringBuf Base64Decode(const TStringBuf src, void* dst) {
- return TStringBuf((const char*)dst, Base64Decode(dst, src.begin(), src.end()));
+ return TStringBuf(static_cast<const char*>(dst), Base64Decode(dst, src.begin(), src.end()));
}
inline void Base64Decode(const TStringBuf src, TString& dst) {
@@ -64,7 +64,7 @@ size_t Base64StrictDecode(void* dst, const char* b, const char* e);
/// @return Returns dst wrapped into TStringBuf.
///
inline TStringBuf Base64StrictDecode(const TStringBuf src, void* dst) {
- return TStringBuf((const char*)dst, Base64StrictDecode(dst, src.begin(), src.end()));
+ return TStringBuf(static_cast<const char*>(dst), Base64StrictDecode(dst, src.begin(), src.end()));
}
///
@@ -90,6 +90,7 @@ inline TString Base64StrictDecode(const TStringBuf src) {
/// Works with strings which length is not divisible by 4.
TString Base64DecodeUneven(const TStringBuf s);
+size_t Base64DecodeUneven(void* dst, const TStringBuf s);
//encode
constexpr size_t Base64EncodeBufSize(const size_t len) noexcept {
@@ -99,12 +100,26 @@ constexpr size_t Base64EncodeBufSize(const size_t len) noexcept {
char* Base64Encode(char* outstr, const unsigned char* instr, size_t len);
char* Base64EncodeUrl(char* outstr, const unsigned char* instr, size_t len);
-inline TStringBuf Base64Encode(const TStringBuf src, void* tmp) {
- return TStringBuf((const char*)tmp, Base64Encode((char*)tmp, (const unsigned char*)src.data(), src.size()));
+/// Make base64 string which stay unchaged after applying 'urlencode' function
+/// as it doesn't contain character, which cannot be used in urls
+/// @param outstr a pointer to allocated memory for writing result.
+/// @param instr a to buffer to encode
+/// @param len size of instr buffer
+///
+/// @return Returns pointer to last symbol in outstr buffer.
+///
+char* Base64EncodeUrlNoPadding(char* outstr, const unsigned char* instr, size_t len);
+
+inline TStringBuf Base64Encode(const TStringBuf src, void* output) {
+ return TStringBuf(static_cast<const char*>(output), Base64Encode(static_cast<char*>(output), reinterpret_cast<const unsigned char*>(src.data()), src.size()));
+}
+
+inline TStringBuf Base64EncodeUrl(const TStringBuf src, void* output) {
+ return TStringBuf(static_cast<const char*>(output), Base64EncodeUrl(static_cast<char*>(output), reinterpret_cast<const unsigned char*>(src.data()), src.size()));
}
-inline TStringBuf Base64EncodeUrl(const TStringBuf src, void* tmp) {
- return TStringBuf((const char*)tmp, Base64EncodeUrl((char*)tmp, (const unsigned char*)src.data(), src.size()));
+inline TStringBuf Base64EncodeUrlNoPadding(const TStringBuf src, void* output) {
+ return TStringBuf(static_cast<const char*>(output), Base64EncodeUrlNoPadding(static_cast<char*>(output), reinterpret_cast<const unsigned char*>(src.data()), src.size()));
}
inline void Base64Encode(const TStringBuf src, TString& dst) {
@@ -117,6 +132,11 @@ inline void Base64EncodeUrl(const TStringBuf src, TString& dst) {
dst.resize(Base64EncodeUrl(src, dst.begin()).size());
}
+inline void Base64EncodeUrlNoPadding(const TStringBuf src, TString& dst) {
+ dst.ReserveAndResize(Base64EncodeBufSize(src.size()));
+ dst.resize(Base64EncodeUrlNoPadding(src, dst.begin()).size());
+}
+
inline TString Base64Encode(const TStringBuf s) {
TString ret;
Base64Encode(s, ret);
diff --git a/library/cpp/string_utils/base64/base64_ut.cpp b/library/cpp/string_utils/base64/base64_ut.cpp
index bcc1e65879..cb6ca7def2 100644
--- a/library/cpp/string_utils/base64/base64_ut.cpp
+++ b/library/cpp/string_utils/base64/base64_ut.cpp
@@ -163,7 +163,7 @@ void Out<NB64Etalon::TImpls::EImpl>(IOutputStream& o, typename TTypeTraits<NB64E
}
}
-static void TestEncodeDecodeIntoString(const TString& plain, const TString& encoded, const TString& encodedUrl) {
+static void TestEncodeDecodeIntoString(const TString& plain, const TString& encoded, const TString& encodedUrl, const TString& encodedUrlNoPadding) {
TString a, b;
Base64Encode(plain, a);
@@ -177,6 +177,12 @@ static void TestEncodeDecodeIntoString(const TString& plain, const TString& enco
Base64Decode(a, b);
UNIT_ASSERT_VALUES_EQUAL(b, plain);
+
+ Base64EncodeUrlNoPadding(plain, a);
+ UNIT_ASSERT_VALUES_EQUAL(a, encodedUrlNoPadding);
+
+ TString c = Base64DecodeUneven(a);
+ UNIT_ASSERT_VALUES_EQUAL(c, plain);
}
static void TestEncodeStrictDecodeIntoString(const TString& plain, const TString& encoded, const TString& encodedUrl) {
@@ -225,8 +231,16 @@ Y_UNIT_TEST_SUITE(TBase64) {
"oqOkpaanqKmqq6ytrq-wsbKztLW2t7i5uru8vb6_wMHCw8TFxsfIyc"
"rLzM3Oz9DR0tPU1dbX2Nna29zd3t_g4eLj5OXm5-jp6uvs7e7v8PHy"
"8_T19vf4-fr7_P3-_w,,";
+ const TString base64UrlWithoutPadding =
+ "AAECAwQFBgcICQoLDA0ODxAREhMUFRYXGBkaGxwdHh8gISIjJCUmJy"
+ "gpKissLS4vMDEyMzQ1Njc4OTo7PD0-P0BBQkNERUZHSElKS0xNTk9Q"
+ "UVJTVFVWV1hZWltcXV5fYGFiY2RlZmdoaWprbG1ub3BxcnN0dXZ3eH"
+ "l6e3x9fn-AgYKDhIWGh4iJiouMjY6PkJGSk5SVlpeYmZqbnJ2en6Ch"
+ "oqOkpaanqKmqq6ytrq-wsbKztLW2t7i5uru8vb6_wMHCw8TFxsfIyc"
+ "rLzM3Oz9DR0tPU1dbX2Nna29zd3t_g4eLj5OXm5-jp6uvs7e7v8PHy"
+ "8_T19vf4-fr7_P3-_w";
- TestEncodeDecodeIntoString(str, base64, base64Url);
+ TestEncodeDecodeIntoString(str, base64, base64Url, base64UrlWithoutPadding);
TestEncodeStrictDecodeIntoString(str, base64, base64Url);
}
@@ -235,8 +249,9 @@ Y_UNIT_TEST_SUITE(TBase64) {
const TString base64 = "aHR0cDovL3lhbmRleC5ydToxMjM0L3JlcXVlc3Q/cGFyYW09dmFsdWUmbGxsPWZmZiNmcmFnbWVudA==";
const TString base64Url = "aHR0cDovL3lhbmRleC5ydToxMjM0L3JlcXVlc3Q_cGFyYW09dmFsdWUmbGxsPWZmZiNmcmFnbWVudA,,";
+ const TString base64UrlWithoutPadding = "aHR0cDovL3lhbmRleC5ydToxMjM0L3JlcXVlc3Q_cGFyYW09dmFsdWUmbGxsPWZmZiNmcmFnbWVudA";
- TestEncodeDecodeIntoString(str, base64, base64Url);
+ TestEncodeDecodeIntoString(str, base64, base64Url, base64UrlWithoutPadding);
TestEncodeStrictDecodeIntoString(str, base64, base64Url);
}
}
@@ -293,8 +308,11 @@ Y_UNIT_TEST_SUITE(TBase64) {
}
TString output;
TString encoded = Base64Encode(input);
+ TString encodedUrl = TString::Uninitialized(Base64EncodeBufSize(input.length()));
+ Base64EncodeUrlNoPadding(input, encodedUrl);
UNIT_ASSERT_VALUES_EQUAL(Base64Decode(encoded), input);
UNIT_ASSERT_VALUES_EQUAL(Base64StrictDecode(encoded), input);
+ UNIT_ASSERT_VALUES_EQUAL(Base64DecodeUneven(encodedUrl), input);
}
Y_UNIT_TEST(TestAllPossibleOctets) {
@@ -459,6 +477,12 @@ Y_UNIT_TEST_SUITE(TBase64) {
UNIT_ASSERT_VALUES_EQUAL(x, xDec);
}
+ Y_UNIT_TEST(TestDecodeURLEncodedWithoutPadding) {
+ const auto x = "1";
+ const auto xDec = Base64DecodeUneven("MQ");
+ UNIT_ASSERT_VALUES_EQUAL(x, xDec);
+ }
+
Y_UNIT_TEST(TestDecodeNoPaddingLongString) {
const auto x = "How do I convert between big-endian and little-endian values in C++?a";
const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9h");
@@ -494,4 +518,22 @@ Y_UNIT_TEST_SUITE(TBase64) {
const auto xDec = Base64Decode("SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9hYQ,,");
UNIT_ASSERT_VALUES_EQUAL(x, xDec);
}
+
+ Y_UNIT_TEST(TestDecodeUnevenDst) {
+ const auto x = "How do I convert between big-endian and little-endian values in C++?aa";
+ TString b64 = "SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz9hYQ";
+ TVector<char> buf(Base64DecodeBufSize(b64.Size()), '\0');
+ Base64DecodeUneven(buf.begin(), b64);
+ TString res(buf.data());
+ UNIT_ASSERT_VALUES_EQUAL(x, res);
+ }
+
+ Y_UNIT_TEST(TestDecodeUnevenDst2) {
+ const auto x = "How do I convert between big-endian and little-endian values in C++?";
+ TString b64 = "SG93IGRvIEkgY29udmVydCBiZXR3ZWVuIGJpZy1lbmRpYW4gYW5kIGxpdHRsZS1lbmRpYW4gdmFsdWVzIGluIEMrKz8";
+ TVector<char> buf(Base64DecodeBufSize(b64.Size()), '\0');
+ Base64DecodeUneven(buf.begin(), b64);
+ TString res(buf.data());
+ UNIT_ASSERT_VALUES_EQUAL(x, res);
+ }
}