aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorsobols <sobols@yandex-team.com>2022-10-10 22:48:50 +0300
committersobols <sobols@yandex-team.com>2022-10-10 22:48:50 +0300
commit230d573d6775493133032c2762f00f857a9336ce (patch)
tree06736a793655cdb36fa71dd41220e94e93d0d5b2
parent79bf94c9cbcfde5575a1bb13f6261c994492d418 (diff)
downloadydb-230d573d6775493133032c2762f00f857a9336ce.tar.gz
[relaxed_escaper] Speed up
-rw-r--r--library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h34
1 files changed, 19 insertions, 15 deletions
diff --git a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h
index d7ea7c1259..1ff4fb3311 100644
--- a/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h
+++ b/library/cpp/string_utils/relaxed_escaper/relaxed_escaper.h
@@ -41,14 +41,14 @@ namespace NEscJ {
}
struct TEscapeUtil {
- static const size_t ESCAPE_C_BUFFER_SIZE = 6;
+ static constexpr size_t ESCAPE_C_BUFFER_SIZE = 6;
- template <bool asunicode>
+ template <bool asunicode, bool hasCustomSafeUnsafe>
static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) {
// (1) Printable characters go as-is, except backslash and double quote.
// (2) Characters \r, \n, \t and \0 ... \7 replaced by their simple escape characters (if possible).
// (3) Otherwise, character is encoded using hexadecimal escape sequence (if possible), or octal.
- if (safe.find(c) != TStringBuf::npos) {
+ if (hasCustomSafeUnsafe && safe.find(c) != TStringBuf::npos) {
r[0] = c;
return 1;
}
@@ -60,7 +60,7 @@ namespace NEscJ {
r[0] = '\\';
r[1] = '\\';
return 2;
- } else if (IsPrintable(c) && unsafe.find(c) == TStringBuf::npos) {
+ } else if (IsPrintable(c) && (!hasCustomSafeUnsafe || unsafe.find(c) == TStringBuf::npos)) {
r[0] = c;
return 1;
} else if (c == '\b') {
@@ -109,46 +109,50 @@ namespace NEscJ {
return 4;
}
}
-
- static inline size_t EscapeJ(ui8 c, ui8 next, char r[ESCAPE_C_BUFFER_SIZE], TStringBuf safe, TStringBuf unsafe) {
- return EscapeJ<false>(c, next, r, safe, unsafe);
- }
};
inline size_t SuggestBuffer(size_t len) {
return len * TEscapeUtil::ESCAPE_C_BUFFER_SIZE;
}
- template <bool tounicode>
- inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
+ template <bool tounicode, bool hasCustomSafeUnsafe>
+ inline size_t EscapeJImpl(const char* str, size_t len, char* out, TStringBuf safe, TStringBuf unsafe) {
char* out0 = out;
char buffer[TEscapeUtil::ESCAPE_C_BUFFER_SIZE];
size_t i, j;
for (i = 0, j = 0; i < len; ++i) {
- size_t rlen = TEscapeUtil::EscapeJ<tounicode>(str[i], (i + 1 < len ? str[i + 1] : 0), buffer, safe, unsafe);
+ size_t rlen = TEscapeUtil::EscapeJ<tounicode, hasCustomSafeUnsafe>(str[i], (i + 1 < len ? str[i + 1] : 0), buffer, safe, unsafe);
if (rlen > 1) {
- strncpy(out, str + j, i - j);
+ memcpy(out, str + j, i - j);
out += i - j;
j = i + 1;
- strncpy(out, buffer, rlen);
+ memcpy(out, buffer, rlen);
out += rlen;
}
}
if (j > 0) {
- strncpy(out, str + j, len - j);
+ memcpy(out, str + j, len - j);
out += len - j;
} else {
- strncpy(out, str, len);
+ memcpy(out, str, len);
out += len;
}
return out - out0;
}
+ template <bool tounicode>
+ inline size_t EscapeJ(const char* str, size_t len, char* out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
+ if (Y_LIKELY(safe.empty() && unsafe.empty())) {
+ return EscapeJImpl<tounicode, false>(str, len, out, safe, unsafe);
+ }
+ return EscapeJImpl<tounicode, true>(str, len, out, safe, unsafe);
+ }
+
template <bool quote, bool tounicode>
inline void EscapeJ(TStringBuf in, IOutputStream& out, TStringBuf safe = TStringBuf(), TStringBuf unsafe = TStringBuf()) {
TTempBuf b(SuggestBuffer(in.size()) + 2);