diff options
| author | styopkin <[email protected]> | 2026-02-24 23:25:06 +0300 |
|---|---|---|
| committer | styopkin <[email protected]> | 2026-02-24 23:52:05 +0300 |
| commit | c71699717685cbba8eb4ee1b6e54fbc178e556da (patch) | |
| tree | 6ca03f102b90e5eb1aef21c874368db9de7c489e /library/cpp/string_utils | |
| parent | 901c3b42a699af9eb1b689b5d3d10250abb8daa0 (diff) | |
faster escape
2% int'ов сидят в Escape. pepe watafa.
<https://nda.ya.ru/t/AJC4zGq27VFEkb>
Тут играясь с блоками получилось на реальном cgi из прода \~33% срезать. Мелочь, а приятно. На самом деле, хотелось бы узнать как правильно делать "некрасивую, но ускоряющую" гадость в library/cpp. Все тесты работают, но тем не менее =)
До: <https://nda.ya.ru/t/Iv6Y0jQf7VFEkc>
После: <https://nda.ya.ru/t/KO_Rgvi87VFEke>
Тут, к сожалению не на прогрузе, но уже видно.
commit_hash:671028887e7d9320c8e9a393a0abb203207eb883
Diffstat (limited to 'library/cpp/string_utils')
| -rw-r--r-- | library/cpp/string_utils/quote/bench/main.cpp | 124 | ||||
| -rw-r--r-- | library/cpp/string_utils/quote/bench/ya.make | 22 | ||||
| -rw-r--r-- | library/cpp/string_utils/quote/quote.cpp | 65 | ||||
| -rw-r--r-- | library/cpp/string_utils/quote/ut/quote_ut.cpp (renamed from library/cpp/string_utils/quote/quote_ut.cpp) | 2 | ||||
| -rw-r--r-- | library/cpp/string_utils/quote/ut/ya.make | 1 | ||||
| -rw-r--r-- | library/cpp/string_utils/quote/ya.make | 1 |
6 files changed, 200 insertions, 15 deletions
diff --git a/library/cpp/string_utils/quote/bench/main.cpp b/library/cpp/string_utils/quote/bench/main.cpp new file mode 100644 index 00000000000..6763691c52c --- /dev/null +++ b/library/cpp/string_utils/quote/bench/main.cpp @@ -0,0 +1,124 @@ +#include <library/cpp/string_utils/quote/quote.cpp> +#include <library/cpp/testing/benchmark/bench.h> + +#include <library/cpp/resource/resource.h> + +#include <util/string/vector.h> + +Y_CPU_BENCHMARK(OldEscapeSmall, iface) { + const auto n = iface.Iterations(); + TString r = "1234"; + char buf[20]; + for (size_t i = 0; i < n; ++i) { + CGIEscape(buf, r.c_str()); + Y_FAKE_READ(buf); + } +} + +Y_CPU_BENCHMARK(NewEscapeSmall, iface) { + const auto n = iface.Iterations(); + TString r = "1234"; + char buf[20]; + for (size_t i = 0; i < n; ++i) { + CGIEscape(buf, r.begin(), r.size()); + Y_FAKE_READ(buf); + } +} + +Y_CPU_BENCHMARK(OldEscapeMedium, iface) { + const auto n = iface.Iterations(); + TString kekw = "!@#$%^&*(){}[]\" ¶m=!@#$%^&*(){}[]\" ¶m_param=!@#$%^&*(){}[]\" "; + char buf[300]; + for (size_t i = 0; i < n; ++i) { + CGIEscape(buf, kekw.c_str()); + Y_FAKE_READ(buf); + } +} + +Y_CPU_BENCHMARK(NewEscapeMedium, iface) { + const auto n = iface.Iterations(); + TString kekw = "!@#$%^&*(){}[]\" ¶m=!@#$%^&*(){}[]\" ¶m_param=!@#$%^&*(){}[]\" "; + char buf[300]; + for (size_t i = 0; i < n; ++i) { + CGIEscape(buf, kekw.begin(), kekw.size()); + Y_FAKE_READ(buf); + } +} + +Y_CPU_BENCHMARK(OldEscapeBig, iface) { + const auto n = iface.Iterations(); + + TString kekw = NResource::Find("/test_files/long_cgi.txt"); + char buf[200'000]; + for (size_t i = 0; i < n; ++i) { + CGIEscape(buf, kekw.c_str()); + Y_FAKE_READ(buf); + } +} + +Y_CPU_BENCHMARK(NewEscapeBig, iface) { + const auto n = iface.Iterations(); + + TString kekw = NResource::Find("/test_files/long_cgi.txt"); + char buf[200'000]; + for (size_t i = 0; i < n; ++i) { + CGIEscape(buf, kekw.begin(), kekw.size()); + Y_FAKE_READ(buf); + } +} + +Y_CPU_BENCHMARK(OldEscapeArray, iface) { + const auto n = iface.Iterations(); + + TString kek = NResource::Find("/test_files/cgi_array.txt"); + TVector<TString> inputs = SplitString(kek, "\n"); + char buf[350'000]; + for (size_t i = 0; i < n; ++i) { + TString& kekw = inputs[i % inputs.size()]; + + CGIEscape(buf, kekw.c_str()); + Y_FAKE_READ(buf); + } +} + +Y_CPU_BENCHMARK(NewEscapeArray, iface) { + const auto n = iface.Iterations(); + + TString kek = NResource::Find("/test_files/cgi_array.txt"); + TVector<TString> inputs = SplitString(kek, "\n"); + char buf[350'000]; + for (size_t i = 0; i < n; ++i) { + TString& kekw = inputs[i % inputs.size()]; + + CGIEscape(buf, kekw.begin(), kekw.size()); + Y_FAKE_READ(buf); + } +} + +Y_CPU_BENCHMARK(OldEscapeHugeArray, iface) { + const auto n = iface.Iterations(); + + TString kek = NResource::Find("/test_files/cgi_huge_array.txt"); + TVector<TString> inputs = SplitString(kek, "\n"); + char buf[350'000]; + for (size_t i = 0; i < n; ++i) { + TString& kekw = inputs[i % inputs.size()]; + + CGIEscape(buf, kekw.c_str()); + Y_FAKE_READ(buf); + } +} + +Y_CPU_BENCHMARK(NewEscapeHugeArray, iface) { + const auto n = iface.Iterations(); + + TString kek = NResource::Find("/test_files/cgi_huge_array.txt"); + TVector<TString> inputs = SplitString(kek, "\n"); + char buf[350'000]; + for (size_t i = 0; i < n; ++i) { + TString& kekw = inputs[i % inputs.size()]; + + CGIEscape(buf, kekw.begin(), kekw.size()); + Y_FAKE_READ(buf); + } +} diff --git a/library/cpp/string_utils/quote/bench/ya.make b/library/cpp/string_utils/quote/bench/ya.make new file mode 100644 index 00000000000..dc6a8e7e29d --- /dev/null +++ b/library/cpp/string_utils/quote/bench/ya.make @@ -0,0 +1,22 @@ + +Y_BENCHMARK() + +SRCS( + main.cpp +) + +PEERDIR( + library/cpp/string_utils/quote +) + +FROM_SANDBOX(FILE 11167534572 RENAME RESOURCE OUT_NOAUTO cgi_huge_array.txt) +FROM_SANDBOX(FILE 11167537234 RENAME RESOURCE OUT_NOAUTO cgi_array.txt) +FROM_SANDBOX(FILE 11167578180 RENAME RESOURCE OUT_NOAUTO long_cgi.txt) + +RESOURCE( + cgi_huge_array.txt /test_files/cgi_huge_array.txt + cgi_array.txt /test_files/cgi_array.txt + long_cgi.txt /test_files/long_cgi.txt +) + +END() diff --git a/library/cpp/string_utils/quote/quote.cpp b/library/cpp/string_utils/quote/quote.cpp index 9559132ae6f..91e31da1102 100644 --- a/library/cpp/string_utils/quote/quote.cpp +++ b/library/cpp/string_utils/quote/quote.cpp @@ -62,10 +62,6 @@ namespace { }; } -static inline char d2x(unsigned x) { - return (char)((x < 10) ? ('0' + x) : ('A' + x - 10)); -} - static inline const char* FixZero(const char* s) noexcept { return s ? s : ""; } @@ -101,22 +97,65 @@ static const bool chars_to_url_escape[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //F }; +static const char d2x[16] = { + '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F' +}; + +template <class It1> +static inline void EscapeHelper(It1& to, unsigned char c, const bool* escape_map) { + if (!escape_map[c]) { + *to++ = (c == ' ') ? '+' : c; + } else { + *to++ = '%'; + *to++ = d2x[c >> 4]; + *to++ = d2x[c & 0xF]; + } +} + template <class It1, class It2, class It3> static inline It1 Escape(It1 to, It2 from, It3 end, const bool* escape_map = chars_to_url_escape) { while (from != end) { - if (escape_map[(unsigned char)*from]) { - *to++ = '%'; - *to++ = d2x((unsigned char)*from >> 4); - *to++ = d2x((unsigned char)*from & 0xF); + EscapeHelper(to, *from++, escape_map); + } + + *to = 0; + + return to; +} + +static inline char* Escape(char* to, const char* from, const char* end, const bool* escape_map = chars_to_url_escape) { + constexpr size_t BLOCK = 16; + + while (from + BLOCK <= end) { + uint32_t escape_count = 0; + for (size_t i = 0; i < BLOCK; ++i) { + escape_count |= escape_map[(unsigned char)from[i]]; + } + + if (escape_count == 0) { + unsigned char src[BLOCK]; + for (size_t i = 0; i < BLOCK; ++i) { + src[i] = from[i]; + } + for (size_t i = 0; i < BLOCK; ++i) { + unsigned char c = src[i]; + to[i] = (c == ' ') ? '+' : c; + } + to += BLOCK; } else { - *to++ = (*from == ' ' ? '+' : *from); + for (size_t i = 0; i < BLOCK; ++i) { + unsigned char c = from[i]; + EscapeHelper(to, c, escape_map); + } } + from += BLOCK; + } - ++from; + while (from != end) { + EscapeHelper(to, *from++, escape_map); } *to = 0; - return to; } @@ -282,8 +321,8 @@ char* UrlEscape(char* to, TStringBuf src, bool forceEscape) { if (escapePercent || (unsigned char)*from <= ' ' || (unsigned char)*from > '~') { *to++ = '%'; - *to++ = d2x((unsigned char)*from >> 4); - *to++ = d2x((unsigned char)*from & 0xF); + *to++ = d2x[(unsigned char)*from >> 4]; + *to++ = d2x[(unsigned char)*from & 0xF]; } else *to++ = *from; } diff --git a/library/cpp/string_utils/quote/quote_ut.cpp b/library/cpp/string_utils/quote/ut/quote_ut.cpp index 9d6f56ffbb0..b0773ebe996 100644 --- a/library/cpp/string_utils/quote/quote_ut.cpp +++ b/library/cpp/string_utils/quote/ut/quote_ut.cpp @@ -1,4 +1,4 @@ -#include "quote.h" +#include <library/cpp/string_utils/quote/quote.h> #include <library/cpp/testing/unittest/registar.h> diff --git a/library/cpp/string_utils/quote/ut/ya.make b/library/cpp/string_utils/quote/ut/ya.make index 1aea753bc6b..ad951b1ed1c 100644 --- a/library/cpp/string_utils/quote/ut/ya.make +++ b/library/cpp/string_utils/quote/ut/ya.make @@ -1,6 +1,5 @@ UNITTEST_FOR(library/cpp/string_utils/quote) - SRCS( quote_ut.cpp ) diff --git a/library/cpp/string_utils/quote/ya.make b/library/cpp/string_utils/quote/ya.make index 109592c235e..f98c4ac6d23 100644 --- a/library/cpp/string_utils/quote/ya.make +++ b/library/cpp/string_utils/quote/ya.make @@ -8,5 +8,6 @@ SRCS( END() RECURSE_FOR_TESTS( + bench ut ) |
