summaryrefslogtreecommitdiffstats
path: root/library/cpp/string_utils
diff options
context:
space:
mode:
authorstyopkin <[email protected]>2026-02-24 23:25:06 +0300
committerstyopkin <[email protected]>2026-02-24 23:52:05 +0300
commitc71699717685cbba8eb4ee1b6e54fbc178e556da (patch)
tree6ca03f102b90e5eb1aef21c874368db9de7c489e /library/cpp/string_utils
parent901c3b42a699af9eb1b689b5d3d10250abb8daa0 (diff)
faster escape
2% int'ов сидят в Escape. pepe watafa. <https://nda.ya.ru/t/AJC4zGq27VFEkb> Тут играясь с блоками получилось на реальном cgi из прода \~33% срезать. Мелочь, а приятно. На самом деле, хотелось бы узнать как правильно делать "некрасивую, но ускоряющую" гадость в library/cpp. Все тесты работают, но тем не менее =) До: <https://nda.ya.ru/t/Iv6Y0jQf7VFEkc> После: <https://nda.ya.ru/t/KO_Rgvi87VFEke> Тут, к сожалению не на прогрузе, но уже видно. commit_hash:671028887e7d9320c8e9a393a0abb203207eb883
Diffstat (limited to 'library/cpp/string_utils')
-rw-r--r--library/cpp/string_utils/quote/bench/main.cpp124
-rw-r--r--library/cpp/string_utils/quote/bench/ya.make22
-rw-r--r--library/cpp/string_utils/quote/quote.cpp65
-rw-r--r--library/cpp/string_utils/quote/ut/quote_ut.cpp (renamed from library/cpp/string_utils/quote/quote_ut.cpp)2
-rw-r--r--library/cpp/string_utils/quote/ut/ya.make1
-rw-r--r--library/cpp/string_utils/quote/ya.make1
6 files changed, 200 insertions, 15 deletions
diff --git a/library/cpp/string_utils/quote/bench/main.cpp b/library/cpp/string_utils/quote/bench/main.cpp
new file mode 100644
index 00000000000..6763691c52c
--- /dev/null
+++ b/library/cpp/string_utils/quote/bench/main.cpp
@@ -0,0 +1,124 @@
+#include <library/cpp/string_utils/quote/quote.cpp>
+#include <library/cpp/testing/benchmark/bench.h>
+
+#include <library/cpp/resource/resource.h>
+
+#include <util/string/vector.h>
+
+Y_CPU_BENCHMARK(OldEscapeSmall, iface) {
+ const auto n = iface.Iterations();
+ TString r = "1234";
+ char buf[20];
+ for (size_t i = 0; i < n; ++i) {
+ CGIEscape(buf, r.c_str());
+ Y_FAKE_READ(buf);
+ }
+}
+
+Y_CPU_BENCHMARK(NewEscapeSmall, iface) {
+ const auto n = iface.Iterations();
+ TString r = "1234";
+ char buf[20];
+ for (size_t i = 0; i < n; ++i) {
+ CGIEscape(buf, r.begin(), r.size());
+ Y_FAKE_READ(buf);
+ }
+}
+
+Y_CPU_BENCHMARK(OldEscapeMedium, iface) {
+ const auto n = iface.Iterations();
+ TString kekw = "!@#$%^&*(){}[]\" &param=!@#$%^&*(){}[]\" &param_param=!@#$%^&*(){}[]\" ";
+ char buf[300];
+ for (size_t i = 0; i < n; ++i) {
+ CGIEscape(buf, kekw.c_str());
+ Y_FAKE_READ(buf);
+ }
+}
+
+Y_CPU_BENCHMARK(NewEscapeMedium, iface) {
+ const auto n = iface.Iterations();
+ TString kekw = "!@#$%^&*(){}[]\" &param=!@#$%^&*(){}[]\" &param_param=!@#$%^&*(){}[]\" ";
+ char buf[300];
+ for (size_t i = 0; i < n; ++i) {
+ CGIEscape(buf, kekw.begin(), kekw.size());
+ Y_FAKE_READ(buf);
+ }
+}
+
+Y_CPU_BENCHMARK(OldEscapeBig, iface) {
+ const auto n = iface.Iterations();
+
+ TString kekw = NResource::Find("/test_files/long_cgi.txt");
+ char buf[200'000];
+ for (size_t i = 0; i < n; ++i) {
+ CGIEscape(buf, kekw.c_str());
+ Y_FAKE_READ(buf);
+ }
+}
+
+Y_CPU_BENCHMARK(NewEscapeBig, iface) {
+ const auto n = iface.Iterations();
+
+ TString kekw = NResource::Find("/test_files/long_cgi.txt");
+ char buf[200'000];
+ for (size_t i = 0; i < n; ++i) {
+ CGIEscape(buf, kekw.begin(), kekw.size());
+ Y_FAKE_READ(buf);
+ }
+}
+
+Y_CPU_BENCHMARK(OldEscapeArray, iface) {
+ const auto n = iface.Iterations();
+
+ TString kek = NResource::Find("/test_files/cgi_array.txt");
+ TVector<TString> inputs = SplitString(kek, "\n");
+ char buf[350'000];
+ for (size_t i = 0; i < n; ++i) {
+ TString& kekw = inputs[i % inputs.size()];
+
+ CGIEscape(buf, kekw.c_str());
+ Y_FAKE_READ(buf);
+ }
+}
+
+Y_CPU_BENCHMARK(NewEscapeArray, iface) {
+ const auto n = iface.Iterations();
+
+ TString kek = NResource::Find("/test_files/cgi_array.txt");
+ TVector<TString> inputs = SplitString(kek, "\n");
+ char buf[350'000];
+ for (size_t i = 0; i < n; ++i) {
+ TString& kekw = inputs[i % inputs.size()];
+
+ CGIEscape(buf, kekw.begin(), kekw.size());
+ Y_FAKE_READ(buf);
+ }
+}
+
+Y_CPU_BENCHMARK(OldEscapeHugeArray, iface) {
+ const auto n = iface.Iterations();
+
+ TString kek = NResource::Find("/test_files/cgi_huge_array.txt");
+ TVector<TString> inputs = SplitString(kek, "\n");
+ char buf[350'000];
+ for (size_t i = 0; i < n; ++i) {
+ TString& kekw = inputs[i % inputs.size()];
+
+ CGIEscape(buf, kekw.c_str());
+ Y_FAKE_READ(buf);
+ }
+}
+
+Y_CPU_BENCHMARK(NewEscapeHugeArray, iface) {
+ const auto n = iface.Iterations();
+
+ TString kek = NResource::Find("/test_files/cgi_huge_array.txt");
+ TVector<TString> inputs = SplitString(kek, "\n");
+ char buf[350'000];
+ for (size_t i = 0; i < n; ++i) {
+ TString& kekw = inputs[i % inputs.size()];
+
+ CGIEscape(buf, kekw.begin(), kekw.size());
+ Y_FAKE_READ(buf);
+ }
+}
diff --git a/library/cpp/string_utils/quote/bench/ya.make b/library/cpp/string_utils/quote/bench/ya.make
new file mode 100644
index 00000000000..dc6a8e7e29d
--- /dev/null
+++ b/library/cpp/string_utils/quote/bench/ya.make
@@ -0,0 +1,22 @@
+
+Y_BENCHMARK()
+
+SRCS(
+ main.cpp
+)
+
+PEERDIR(
+ library/cpp/string_utils/quote
+)
+
+FROM_SANDBOX(FILE 11167534572 RENAME RESOURCE OUT_NOAUTO cgi_huge_array.txt)
+FROM_SANDBOX(FILE 11167537234 RENAME RESOURCE OUT_NOAUTO cgi_array.txt)
+FROM_SANDBOX(FILE 11167578180 RENAME RESOURCE OUT_NOAUTO long_cgi.txt)
+
+RESOURCE(
+ cgi_huge_array.txt /test_files/cgi_huge_array.txt
+ cgi_array.txt /test_files/cgi_array.txt
+ long_cgi.txt /test_files/long_cgi.txt
+)
+
+END()
diff --git a/library/cpp/string_utils/quote/quote.cpp b/library/cpp/string_utils/quote/quote.cpp
index 9559132ae6f..91e31da1102 100644
--- a/library/cpp/string_utils/quote/quote.cpp
+++ b/library/cpp/string_utils/quote/quote.cpp
@@ -62,10 +62,6 @@ namespace {
};
}
-static inline char d2x(unsigned x) {
- return (char)((x < 10) ? ('0' + x) : ('A' + x - 10));
-}
-
static inline const char* FixZero(const char* s) noexcept {
return s ? s : "";
}
@@ -101,22 +97,65 @@ static const bool chars_to_url_escape[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //F
};
+static const char d2x[16] = {
+ '0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'
+};
+
+template <class It1>
+static inline void EscapeHelper(It1& to, unsigned char c, const bool* escape_map) {
+ if (!escape_map[c]) {
+ *to++ = (c == ' ') ? '+' : c;
+ } else {
+ *to++ = '%';
+ *to++ = d2x[c >> 4];
+ *to++ = d2x[c & 0xF];
+ }
+}
+
template <class It1, class It2, class It3>
static inline It1 Escape(It1 to, It2 from, It3 end, const bool* escape_map = chars_to_url_escape) {
while (from != end) {
- if (escape_map[(unsigned char)*from]) {
- *to++ = '%';
- *to++ = d2x((unsigned char)*from >> 4);
- *to++ = d2x((unsigned char)*from & 0xF);
+ EscapeHelper(to, *from++, escape_map);
+ }
+
+ *to = 0;
+
+ return to;
+}
+
+static inline char* Escape(char* to, const char* from, const char* end, const bool* escape_map = chars_to_url_escape) {
+ constexpr size_t BLOCK = 16;
+
+ while (from + BLOCK <= end) {
+ uint32_t escape_count = 0;
+ for (size_t i = 0; i < BLOCK; ++i) {
+ escape_count |= escape_map[(unsigned char)from[i]];
+ }
+
+ if (escape_count == 0) {
+ unsigned char src[BLOCK];
+ for (size_t i = 0; i < BLOCK; ++i) {
+ src[i] = from[i];
+ }
+ for (size_t i = 0; i < BLOCK; ++i) {
+ unsigned char c = src[i];
+ to[i] = (c == ' ') ? '+' : c;
+ }
+ to += BLOCK;
} else {
- *to++ = (*from == ' ' ? '+' : *from);
+ for (size_t i = 0; i < BLOCK; ++i) {
+ unsigned char c = from[i];
+ EscapeHelper(to, c, escape_map);
+ }
}
+ from += BLOCK;
+ }
- ++from;
+ while (from != end) {
+ EscapeHelper(to, *from++, escape_map);
}
*to = 0;
-
return to;
}
@@ -282,8 +321,8 @@ char* UrlEscape(char* to, TStringBuf src, bool forceEscape) {
if (escapePercent || (unsigned char)*from <= ' ' || (unsigned char)*from > '~') {
*to++ = '%';
- *to++ = d2x((unsigned char)*from >> 4);
- *to++ = d2x((unsigned char)*from & 0xF);
+ *to++ = d2x[(unsigned char)*from >> 4];
+ *to++ = d2x[(unsigned char)*from & 0xF];
} else
*to++ = *from;
}
diff --git a/library/cpp/string_utils/quote/quote_ut.cpp b/library/cpp/string_utils/quote/ut/quote_ut.cpp
index 9d6f56ffbb0..b0773ebe996 100644
--- a/library/cpp/string_utils/quote/quote_ut.cpp
+++ b/library/cpp/string_utils/quote/ut/quote_ut.cpp
@@ -1,4 +1,4 @@
-#include "quote.h"
+#include <library/cpp/string_utils/quote/quote.h>
#include <library/cpp/testing/unittest/registar.h>
diff --git a/library/cpp/string_utils/quote/ut/ya.make b/library/cpp/string_utils/quote/ut/ya.make
index 1aea753bc6b..ad951b1ed1c 100644
--- a/library/cpp/string_utils/quote/ut/ya.make
+++ b/library/cpp/string_utils/quote/ut/ya.make
@@ -1,6 +1,5 @@
UNITTEST_FOR(library/cpp/string_utils/quote)
-
SRCS(
quote_ut.cpp
)
diff --git a/library/cpp/string_utils/quote/ya.make b/library/cpp/string_utils/quote/ya.make
index 109592c235e..f98c4ac6d23 100644
--- a/library/cpp/string_utils/quote/ya.make
+++ b/library/cpp/string_utils/quote/ya.make
@@ -8,5 +8,6 @@ SRCS(
END()
RECURSE_FOR_TESTS(
+ bench
ut
)