intermediate changes

ref:cde9a383711a11544ce7e107a78147fb96cc4029
author: Devtools Arcadia <arcadia-devtools@yandex-team.ru> 2022-02-07 18:08:42 +0300
committer: Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> 2022-02-07 18:08:42 +0300
commit: 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree: e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/string_utils/quote
download: ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
5 files changed, 721 insertions, 0 deletions
diff --git a/library/cpp/string_utils/quote/quote.cpp b/library/cpp/string_utils/quote/quote.cpp
new file mode 100644
index 0000000000..e523350b80
--- /dev/null
+++ b/library/cpp/string_utils/quote/quote.cpp
@@ -0,0 +1,311 @@
+#include "quote.h"
+
+#include <util/memory/tempbuf.h>
+#include <util/string/ascii.h>
+#include <util/string/cstriter.h>
+
+#include <cctype>
+
+/* note: (x & 0xdf) makes x upper case */
+#define GETXC                                                           \
+    do {                                                                \
+        c *= 16;                                                        \
+        c += (x[0] >= 'A' ? ((x[0] & 0xdf) - 'A') + 10 : (x[0] - '0')); \
+        ++x;                                                            \
+    } while (0)
+
+#define GETSBXC                                                         \
+    do {                                                                \
+        c *= 16;                                                        \
+        c += (x[0] >= 'A' ? ((x[0] & 0xdf) - 'A') + 10 : (x[0] - '0')); \
+        x.Skip(1);                                                      \
+    } while (0)
+
+
+namespace {
+    class TFromHexZeroTerm {
+    public:
+        static inline char x2c(const char*& x) {
+            if (!IsAsciiHex((ui8)x[0]) || !IsAsciiHex((ui8)x[1]))
+                return '%';
+            ui8 c = 0;
+
+            GETXC;
+            GETXC;
+            return c;
+        }
+
+        static inline char x2c(TStringBuf& x) {
+            if (!IsAsciiHex((ui8)x[0]) || !IsAsciiHex((ui8)x[1]))
+                return '%';
+            ui8 c = 0;
+
+            GETSBXC;
+            GETSBXC;
+            return c;
+        }
+    };
+
+    class TFromHexLenLimited {
+    public:
+        TFromHexLenLimited(const char* end)
+            : End(end)
+        {
+        }
+
+        inline char x2c(const char*& x) {
+            if (x + 2 > End)
+                return '%';
+            return TFromHexZeroTerm::x2c(x);
+        }
+
+    private:
+        const char* End;
+    };
+}
+
+static inline char d2x(unsigned x) {
+    return (char)((x < 10) ? ('0' + x) : ('A' + x - 10));
+}
+
+static inline const char* FixZero(const char* s) noexcept {
+    return s ? s : "";
+}
+
+// we escape:
+// '\"', '|', '(', ')',
+// '%',  '&', '+', ',',
+// '#',  '<', '=', '>',
+// '[',  '\\',']', '?',
+//  ':', '{', '}',
+// all below ' ' (0x20) and above '~' (0x7E).
+// ' ' converted to '+'
+static const bool chars_to_url_escape[256] = {
+    //  0  1  2  3   4  5  6  7   8  9  A  B   C  D  E  F
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //0
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //1
+    0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, //2
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, //3
+
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //4
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, //5
+    1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //6
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, //7
+
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //8
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //9
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //A
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //B
+
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //C
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //D
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //E
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //F
+};
+
+template <class It1, class It2, class It3>
+static inline It1 Escape(It1 to, It2 from, It3 end, const bool* escape_map = chars_to_url_escape) {
+    while (from != end) {
+        if (escape_map[(unsigned char)*from]) {
+            *to++ = '%';
+            *to++ = d2x((unsigned char)*from >> 4);
+            *to++ = d2x((unsigned char)*from & 0xF);
+        } else {
+            *to++ = (*from == ' ' ? '+' : *from);
+        }
+
+        ++from;
+    }
+
+    *to = 0;
+
+    return to;
+}
+
+template <class It1, class It2, class It3, class FromHex>
+static inline It1 Unescape(It1 to, It2 from, It3 end, FromHex fromHex) {
+    (void)fromHex;
+
+    while (from != end) {
+        switch (*from) {
+            case '%':
+                ++from;
+                *to++ = fromHex.x2c(from);
+                break;
+            case '+':
+                *to++ = ' ';
+                ++from;
+                break;
+            default:
+                *to++ = *from++;
+        }
+    }
+    *to = 0;
+    return to;
+}
+
+// CGIEscape returns pointer to the end of the result string
+// so as it could be possible to populate single long buffer
+// with several calls to CGIEscape in a row.
+char* CGIEscape(char* to, const char* from) {
+    return Escape(to, FixZero(from), TCStringEndIterator());
+}
+
+char* CGIEscape(char* to, const char* from, size_t len) {
+    return Escape(to, from, from + len);
+}
+
+void CGIEscape(TString& url) {
+    TTempBuf tempBuf(CgiEscapeBufLen(url.size()));
+    char* to = tempBuf.Data();
+
+    url.AssignNoAlias(to, CGIEscape(to, url.data(), url.size()));
+}
+
+TString CGIEscapeRet(const TStringBuf url) {
+    TString to;
+    to.ReserveAndResize(CgiEscapeBufLen(url.size()));
+    to.resize(CGIEscape(to.begin(), url.data(), url.size()) - to.data());
+    return to;
+}
+
+TString& AppendCgiEscaped(const TStringBuf value, TString& to) {
+    const size_t origLength = to.length();
+    to.ReserveAndResize(origLength + CgiEscapeBufLen(value.size()));
+    to.resize(CGIEscape(to.begin() + origLength, value.data(), value.size()) - to.data());
+    return to;
+}
+
+// More general version of CGIEscape. The optional safe parameter specifies
+// additional characters that should not be quoted — its default value is '/'.
+
+// Also returns pointer to the end of result string.
+
+template <class It1, class It2, class It3>
+static inline It1 Quote(It1 to, It2 from, It3 end, const char* safe) {
+    bool escape_map[256];
+    memcpy(escape_map, chars_to_url_escape, 256);
+    // RFC 3986 Uniform Resource Identifiers (URI): Generic Syntax
+    // lists following reserved characters:
+    const char* reserved = ":/?#[]@!$&\'()*+,;=";
+    for (const char* p = reserved; *p; ++p) {
+        escape_map[(unsigned char)*p] = 1;
+    }
+    // characters we think are safe at the moment
+    for (const char* p = safe; *p; ++p) {
+        escape_map[(unsigned char)*p] = 0;
+    }
+
+    return Escape(to, from, end, escape_map);
+}
+
+char* Quote(char* to, const char* from, const char* safe) {
+    return Quote(to, FixZero(from), TCStringEndIterator(), safe);
+}
+
+char* Quote(char* to, const TStringBuf s, const char* safe) {
+    return Quote(to, s.data(), s.data() + s.size(), safe);
+}
+
+void Quote(TString& url, const char* safe) {
+    TTempBuf tempBuf(CgiEscapeBufLen(url.size()));
+    char* to = tempBuf.Data();
+
+    url.AssignNoAlias(to, Quote(to, url, safe));
+}
+
+char* CGIUnescape(char* to, const char* from) {
+    return Unescape(to, FixZero(from), TCStringEndIterator(), TFromHexZeroTerm());
+}
+
+char* CGIUnescape(char* to, const char* from, size_t len) {
+    return Unescape(to, from, from + len, TFromHexLenLimited(from + len));
+}
+
+void CGIUnescape(TString& url) {
+    if (url.empty()) {
+        return;
+    }
+    if (url.IsDetached()) { // in-place when refcount == 1
+        char* resBegin = url.begin();
+        const char* resEnd = CGIUnescape(resBegin, resBegin, url.size());
+        url.resize(resEnd - resBegin);
+    } else {
+        url = CGIUnescapeRet(url);
+    }
+}
+
+TString CGIUnescapeRet(const TStringBuf from) {
+    TString to;
+    to.ReserveAndResize(CgiUnescapeBufLen(from.size()));
+    to.resize(CGIUnescape(to.begin(), from.data(), from.size()) - to.data());
+    return to;
+}
+
+char* UrlUnescape(char* to, TStringBuf from) {
+    while (!from.empty()) {
+        char ch = from[0];
+        from.Skip(1);
+        if ('%' == ch && 2 <= from.length())
+            ch = TFromHexZeroTerm::x2c(from);
+        *to++ = ch;
+    }
+
+    *to = 0;
+
+    return to;
+}
+
+void UrlUnescape(TString& url) {
+    if (url.empty()) {
+        return;
+    }
+    if (url.IsDetached()) { // in-place when refcount == 1
+        char* resBegin = url.begin();
+        const char* resEnd = UrlUnescape(resBegin, url);
+        url.resize(resEnd - resBegin);
+    } else {
+        url = UrlUnescapeRet(url);
+    }
+}
+
+TString UrlUnescapeRet(const TStringBuf from) {
+    TString to;
+    to.ReserveAndResize(CgiUnescapeBufLen(from.size()));
+    to.resize(UrlUnescape(to.begin(), from) - to.data());
+    return to;
+}
+
+char* UrlEscape(char* to, const char* from, bool forceEscape) {
+    from = FixZero(from);
+
+    while (*from) {
+        const bool escapePercent = (*from == '%') &&
+                                   (forceEscape || !((*(from + 1) && IsAsciiHex(*(from + 1)) && *(from + 2) && IsAsciiHex(*(from + 2)))));
+
+        if (escapePercent || (unsigned char)*from <= ' ' || (unsigned char)*from > '~') {
+            *to++ = '%';
+            *to++ = d2x((unsigned char)*from >> 4);
+            *to++ = d2x((unsigned char)*from & 0xF);
+        } else
+            *to++ = *from;
+        ++from;
+    }
+
+    *to = 0;
+
+    return to;
+}
+
+void UrlEscape(TString& url, bool forceEscape) {
+    TTempBuf tempBuf(CgiEscapeBufLen(url.size()));
+    char* to = tempBuf.Data();
+    url.AssignNoAlias(to, UrlEscape(to, url.data(), forceEscape));
+}
+
+TString UrlEscapeRet(const TStringBuf from, bool forceEscape) {
+    TString to;
+    to.ReserveAndResize(CgiEscapeBufLen(from.size()));
+    to.resize(UrlEscape(to.begin(), from.begin(), forceEscape) - to.data());
+    return to;
+}
diff --git a/library/cpp/string_utils/quote/quote.h b/library/cpp/string_utils/quote/quote.h
new file mode 100644
index 0000000000..3b7221154e
--- /dev/null
+++ b/library/cpp/string_utils/quote/quote.h
@@ -0,0 +1,72 @@
+#pragma once
+
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+
+//CGIEscape*:
+// ' ' converted to '+',
+// Some punctuation and chars outside [32, 126] range are converted to %xx
+// Use function CgiEscapeBufLen to determine number of characters needed for 'char* to' parameter.
+// Returns pointer to the end of the result string
+char* CGIEscape(char* to, const char* from);
+char* CGIEscape(char* to, const char* from, size_t len);
+inline char* CGIEscape(char* to, const TStringBuf from) {
+    return CGIEscape(to, from.data(), from.size());
+}
+void CGIEscape(TString& url);
+TString CGIEscapeRet(const TStringBuf url);
+TString& AppendCgiEscaped(const TStringBuf value, TString& to);
+
+inline TStringBuf CgiEscapeBuf(char* to, const TStringBuf from) {
+    return TStringBuf(to, CGIEscape(to, from.data(), from.size()));
+}
+inline TStringBuf CgiEscape(void* tmp, const TStringBuf s) {
+    return CgiEscapeBuf(static_cast<char*>(tmp), s);
+}
+
+//CgiUnescape*:
+// Decodes '%xx' to bytes, '+' to space.
+// Use function CgiUnescapeBufLen to determine number of characters needed for 'char* to' parameter.
+// If pointer returned, then this is pointer to the end of the result string.
+char* CGIUnescape(char* to, const char* from);
+char* CGIUnescape(char* to, const char* from, size_t len);
+void CGIUnescape(TString& url);
+TString CGIUnescapeRet(const TStringBuf from);
+
+inline TStringBuf CgiUnescapeBuf(char* to, const TStringBuf from) {
+    return TStringBuf(to, CGIUnescape(to, from.data(), from.size()));
+}
+inline TStringBuf CgiUnescape(void* tmp, const TStringBuf s) {
+    return CgiUnescapeBuf(static_cast<char*>(tmp), s);
+}
+
+//Quote:
+// Is like CGIEscape, also skips encoding of user-supplied 'safe' characters.
+char* Quote(char* to, const char* from, const char* safe = "/");
+char* Quote(char* to, const TStringBuf s, const char* safe = "/");
+void Quote(TString& url, const char* safe = "/");
+
+//UrlEscape:
+// Can't be used for cgi parameters ('&' character is not escaped)!
+// escapes only '%' not followed by two hex-digits or if forceEscape set to ture,
+// and chars outside [32, 126] range.
+// Can't handle '\0'-chars in TString.
+char* UrlEscape(char* to, const char* from, bool forceEscape = false);
+void UrlEscape(TString& url, bool forceEscape = false);
+TString UrlEscapeRet(const TStringBuf from, bool forceEscape = false);
+
+//UrlUnescape:
+// '+' is NOT converted to space!
+// %xx converted to bytes, other characters are copied unchanged.
+char* UrlUnescape(char* to, TStringBuf from);
+void UrlUnescape(TString& url);
+TString UrlUnescapeRet(const TStringBuf from);
+
+//*BufLen: how much characters you should allocate for 'char* to' buffers.
+constexpr size_t CgiEscapeBufLen(const size_t len) noexcept {
+    return 3 * len + 1;
+}
+
+constexpr size_t CgiUnescapeBufLen(const size_t len) noexcept {
+    return len + 1;
+}
diff --git a/library/cpp/string_utils/quote/quote_ut.cpp b/library/cpp/string_utils/quote/quote_ut.cpp
new file mode 100644
index 0000000000..6c552b279e
--- /dev/null
+++ b/library/cpp/string_utils/quote/quote_ut.cpp
@@ -0,0 +1,319 @@
+#include "quote.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+Y_UNIT_TEST_SUITE(TCGIEscapeTest) {
+    Y_UNIT_TEST(ReturnsEndOfTo) {
+        char r[10];
+        const char* returned = CGIEscape(r, "123");
+        UNIT_ASSERT_VALUES_EQUAL(r + strlen("123"), returned);
+        UNIT_ASSERT_VALUES_EQUAL('\0', *returned);
+    }
+
+    Y_UNIT_TEST(NotZeroTerminated) {
+        char r[] = {'1', '2', '3', '4'};
+        char buf[sizeof(r) * 3 + 2];
+
+        TString ret(buf, CGIEscape(buf, r, sizeof(r)));
+
+        UNIT_ASSERT_EQUAL(ret, "1234");
+    }
+
+    Y_UNIT_TEST(StringBuf) {
+        char tmp[100];
+
+        UNIT_ASSERT_VALUES_EQUAL(CgiEscape(tmp, "!@#$%^&*(){}[]\" "), TStringBuf("!@%23$%25^%26*%28%29%7B%7D%5B%5D%22+"));
+    }
+
+    Y_UNIT_TEST(StrokaRet) {
+        UNIT_ASSERT_VALUES_EQUAL(CGIEscapeRet("!@#$%^&*(){}[]\" "), TString("!@%23$%25^%26*%28%29%7B%7D%5B%5D%22+"));
+    }
+
+    Y_UNIT_TEST(StrokaAppendRet) {
+        TString param;
+        AppendCgiEscaped("!@#$%^&*(){}[]\" ", param);
+        UNIT_ASSERT_VALUES_EQUAL(param, TString("!@%23$%25^%26*%28%29%7B%7D%5B%5D%22+"));
+
+        TString param2 = "&param=";
+        AppendCgiEscaped("!@#$%^&*(){}[]\" ", param2);
+        UNIT_ASSERT_VALUES_EQUAL(param2,
+            TString("&param=!@%23$%25^%26*%28%29%7B%7D%5B%5D%22+"));
+
+        param2.append("&param_param=");
+        AppendCgiEscaped("!@#$%^&*(){}[]\" ", param2);
+        UNIT_ASSERT_VALUES_EQUAL(param2,
+            TString("&param=!@%23$%25^%26*%28%29%7B%7D%5B%5D%22+&param_param=!@%23$%25^%26*%28%29%7B%7D%5B%5D%22+"));
+    }
+
+}
+
+Y_UNIT_TEST_SUITE(TCGIUnescapeTest) {
+    Y_UNIT_TEST(StringBuf) {
+        char tmp[100];
+
+        UNIT_ASSERT_VALUES_EQUAL(CgiUnescape(tmp, "!@%23$%25^%26*%28%29"), TStringBuf("!@#$%^&*()"));
+    }
+
+    Y_UNIT_TEST(TestValidZeroTerm) {
+        char r[10];
+
+        CGIUnescape(r, "1234");
+        UNIT_ASSERT_VALUES_EQUAL(r, "1234");
+
+        CGIUnescape(r, "%3d");
+        UNIT_ASSERT_VALUES_EQUAL(r, "=");
+
+        CGIUnescape(r, "12%3D34");
+        UNIT_ASSERT_VALUES_EQUAL(r, "12=34");
+    }
+
+    Y_UNIT_TEST(TestInvalidZeroTerm) {
+        char r[10];
+
+        CGIUnescape(r, "%");
+        UNIT_ASSERT_VALUES_EQUAL(r, "%");
+
+        CGIUnescape(r, "%3");
+        UNIT_ASSERT_VALUES_EQUAL(r, "%3");
+
+        CGIUnescape(r, "%3g");
+        UNIT_ASSERT_VALUES_EQUAL(r, "%3g");
+
+        CGIUnescape(r, "12%3g34");
+        UNIT_ASSERT_VALUES_EQUAL(r, "12%3g34");
+
+        CGIUnescape(r, "%3u123");
+        UNIT_ASSERT_VALUES_EQUAL(r, "%3u123");
+    }
+
+    Y_UNIT_TEST(TestValidNotZeroTerm) {
+        char r[10];
+
+        CGIUnescape(r, "123456789", 4);
+        UNIT_ASSERT_VALUES_EQUAL(r, "1234");
+
+        CGIUnescape(r, "%3d1234", 3);
+        UNIT_ASSERT_VALUES_EQUAL(r, "=");
+
+        CGIUnescape(r, "12%3D345678", 7);
+        UNIT_ASSERT_VALUES_EQUAL(r, "12=34");
+    }
+
+    Y_UNIT_TEST(TestInvalidNotZeroTerm) {
+        char r[10];
+
+        CGIUnescape(r, "%3d", 1);
+        UNIT_ASSERT_VALUES_EQUAL(r, "%");
+
+        CGIUnescape(r, "%3d", 2);
+        UNIT_ASSERT_VALUES_EQUAL(r, "%3");
+
+        CGIUnescape(r, "%3g1234", 3);
+        UNIT_ASSERT_VALUES_EQUAL(r, "%3g");
+
+        CGIUnescape(r, "12%3g345678", 7);
+        UNIT_ASSERT_VALUES_EQUAL(r, "12%3g34");
+
+        CGIUnescape(r, "%3u1234", 2);
+        UNIT_ASSERT_VALUES_EQUAL(r, "%3");
+
+        CGIUnescape(r, "%3u1234", 3);
+        UNIT_ASSERT_VALUES_EQUAL(r, "%3u");
+
+        CGIUnescape(r, "%3u1234", 4);
+        UNIT_ASSERT_VALUES_EQUAL(r, "%3u1");
+    }
+
+    Y_UNIT_TEST(StrokaOutParameterInplace) {
+        TString s;
+
+        s = "hello%3dworld";
+        CGIUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "hello=world");
+
+        s = "+%23+";
+        CGIUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, " # ");
+
+        s = "hello%3u";
+        CGIUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "hello%3u");
+
+        s = "0123456789012345";
+        CGIUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "0123456789012345");
+
+        s = "";
+        CGIUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "");
+    }
+
+    Y_UNIT_TEST(StrokaOutParameterNotInplace) {
+        TString s, sCopy;
+
+        s = "hello%3dworld";
+        sCopy = s;
+        CGIUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "hello=world");
+
+        s = "+%23+";
+        sCopy = s;
+        CGIUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, " # ");
+
+        s = "hello%3u";
+        sCopy = s;
+        CGIUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "hello%3u");
+
+        s = "0123456789012345";
+        sCopy = s;
+        CGIUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "0123456789012345");
+
+        s = "";
+        sCopy = s;
+        CGIUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "");
+    }
+}
+
+Y_UNIT_TEST_SUITE(TUrlEscapeTest) {
+    Y_UNIT_TEST(EscapeEscaped) {
+        TString s;
+
+        s = "hello%3dworld";
+        UNIT_ASSERT_VALUES_EQUAL(UrlEscapeRet(s), "hello%3dworld");
+        UrlEscape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "hello%3dworld");
+    }
+
+    Y_UNIT_TEST(EscapeUnescape) {
+        TString s;
+
+        s = "hello%3dworld";
+        UrlEscape(s);
+        UrlUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "hello=world");
+    }
+
+    Y_UNIT_TEST(EscapeUnescapeRet) {
+        TString s;
+
+        s = "hello%3dworld";
+        UNIT_ASSERT_VALUES_EQUAL(UrlUnescapeRet(UrlEscapeRet(s)), "hello=world");
+    }
+
+    Y_UNIT_TEST(EscapeEscapedForce) {
+        TString s;
+
+        s = "hello%3dworld";
+        UNIT_ASSERT_VALUES_EQUAL(UrlEscapeRet(s, true), "hello%253dworld");
+        UrlEscape(s, true);
+        UNIT_ASSERT_VALUES_EQUAL(s, "hello%253dworld");
+    }
+
+    Y_UNIT_TEST(EscapeUnescapeForce) {
+        TString s;
+
+        s = "hello%3dworld";
+        UrlEscape(s, true);
+        UrlUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "hello%3dworld");
+    }
+
+    Y_UNIT_TEST(EscapeUnescapeForceRet) {
+        TString s;
+
+        s = "hello%3dworld";
+        UNIT_ASSERT_VALUES_EQUAL(UrlUnescapeRet(UrlEscapeRet(s, true)), "hello%3dworld");
+    }
+}
+
+Y_UNIT_TEST_SUITE(TUrlUnescapeTest) {
+    Y_UNIT_TEST(StrokaOutParameterInplace) {
+        TString s;
+
+        s = "hello%3dworld";
+        UrlUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "hello=world");
+
+        s = "+%23+";
+        UrlUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "+#+");
+
+        s = "hello%3u";
+        UrlUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "hello%3u");
+
+        s = "0123456789012345";
+        UrlUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "0123456789012345");
+
+        s = "";
+        UrlUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "");
+    }
+
+    Y_UNIT_TEST(StrokaOutParameterNotInplace) {
+        TString s, sCopy;
+
+        s = "hello%3dworld";
+        sCopy = s;
+        UrlUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "hello=world");
+
+        s = "+%23+";
+        sCopy = s;
+        UrlUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "+#+");
+
+        s = "hello%3u";
+        sCopy = s;
+        UrlUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "hello%3u");
+
+        s = "0123456789012345";
+        sCopy = s;
+        UrlUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "0123456789012345");
+
+        s = "";
+        sCopy = s;
+        UrlUnescape(s);
+        UNIT_ASSERT_VALUES_EQUAL(s, "");
+    }
+}
+
+Y_UNIT_TEST_SUITE(TQuoteTest) {
+    Y_UNIT_TEST(ReturnsEndOfTo) {
+        char r[10];
+        const char* returned = Quote(r, "123");
+        UNIT_ASSERT_VALUES_EQUAL(r + strlen("123"), returned);
+        UNIT_ASSERT_VALUES_EQUAL('\0', *returned);
+    }
+
+    Y_UNIT_TEST(SlashIsSafeByDefault) {
+        char r[100];
+        Quote(r, "/path;tail/path,tail/");
+        UNIT_ASSERT_VALUES_EQUAL("/path%3Btail/path%2Ctail/", r);
+        TString s("/path;tail/path,tail/");
+        Quote(s);
+        UNIT_ASSERT_VALUES_EQUAL("/path%3Btail/path%2Ctail/", s.c_str());
+    }
+
+    Y_UNIT_TEST(SafeColons) {
+        char r[100];
+        Quote(r, "/path;tail/path,tail/", ";,");
+        UNIT_ASSERT_VALUES_EQUAL("%2Fpath;tail%2Fpath,tail%2F", r);
+        TString s("/path;tail/path,tail/");
+        Quote(s, ";,");
+        UNIT_ASSERT_VALUES_EQUAL("%2Fpath;tail%2Fpath,tail%2F", s.c_str());
+    }
+
+    Y_UNIT_TEST(StringBuf) {
+        char r[100];
+        char* end = Quote(r, "abc\0/path", "");
+        UNIT_ASSERT_VALUES_EQUAL("abc\0%2Fpath", TStringBuf(r, end));
+    }
+}
diff --git a/library/cpp/string_utils/quote/ut/ya.make b/library/cpp/string_utils/quote/ut/ya.make
new file mode 100644
index 0000000000..eca955144f
--- /dev/null
+++ b/library/cpp/string_utils/quote/ut/ya.make
@@ -0,0 +1,9 @@
+UNITTEST_FOR(library/cpp/string_utils/quote)
+
+OWNER(vladon)
+
+SRCS(
+    quote_ut.cpp
+)
+
+END()
diff --git a/library/cpp/string_utils/quote/ya.make b/library/cpp/string_utils/quote/ya.make
new file mode 100644
index 0000000000..55bb3cf939
--- /dev/null
+++ b/library/cpp/string_utils/quote/ya.make
@@ -0,0 +1,10 @@
+LIBRARY()
+
+OWNER(g:util)
+
+SRCS(
+    quote.cpp
+    quote.h
+)
+
+END()
author	Devtools Arcadia <arcadia-devtools@yandex-team.ru>	2022-02-07 18:08:42 +0300
committer	Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>	2022-02-07 18:08:42 +0300
commit	1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
tree	e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/string_utils/quote
download	ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz