diff options
| author | babenko <[email protected]> | 2026-06-12 23:53:40 +0300 |
|---|---|---|
| committer | babenko <[email protected]> | 2026-06-13 00:17:12 +0300 |
| commit | 42e4b751702f065de932ef765bc6948c5e7d1e4b (patch) | |
| tree | bb8a1f8aa302a60fe7fc7eeb478cece2b2db6e35 /library/cpp/yt/string | |
| parent | bf1639fd14cbc553114b0b296d799fda6b1f97c8 (diff) | |
Speed up NYT::Format
Profile-driven optimizations of the `Format` hot path, benchmarked against a representative master debug log (structured `"Key: %v"` messages dominated by GUIDs, strings, integers, bools and durations). Median improvements of ~15-20% across the workload, measured on a dedicated host.
Changes:
- `string_builder`: use `resize_uninitialized` in `DoReserve` to avoid zero-filling the buffer on every `Format` call.
- `format`: replace the per-argument `memchr` (`spec.Contains('n')`) with an inline scan, force-inline `RunFormatterAt`, and add a `FormatString` fast path for the common plain `%v` / empty spec.
- `guid`: rewrite `WriteGuidToBuffer` using a `clz`-derived digit count and a back-to-front fill instead of the per-magnitude branch cascade (cut from ~26% to ~12% of a GUID-heavy line). Validated against an `%x` reference over 2M random GUIDs plus edge cases.
Also adds `library/cpp/yt/string/benchmark` to track `Format` performance.
### Benchmarks
Median ns/op (lower is better), pinned core on a dedicated Xeon E5-2650 v2, 9x1s repetitions. See `library/cpp/yt/string/benchmark`.
| Benchmark | What it formats | Before | After | Speedup |
| --- | --- | ---: | ---: | ---: |
| `ManyMixedArgs` | ~18 args: GUIDs, strings, duration, ints | 1030 | 833 | -19% |
| `StringAndTwoGuids` | literal prefix + two GUIDs | 233 | 185 | -21% |
| `IntAndGuid` | one int + one GUID | 205 | 179 | -13% |
| `ManyInts` | six integers | 389 | 340 | -13% |
| `Guid` | a single GUID | 156 | 131 | -16% |
| `String` | a single string | 139 | 104 | -25% |
| `Int` | a single integer | 142 | 120 | -15% |
| `NoArgs` | a literal with no arguments | 88.8 | 85.7 | -3% |
commit_hash:ce9957a06c3ff28b2889aa65fbbddf4ca444f9fe
Diffstat (limited to 'library/cpp/yt/string')
| -rw-r--r-- | library/cpp/yt/string/format-inl.h | 22 | ||||
| -rw-r--r-- | library/cpp/yt/string/string_builder-inl.h | 4 | ||||
| -rw-r--r-- | library/cpp/yt/string/ya.make | 6 |
3 files changed, 27 insertions, 5 deletions
diff --git a/library/cpp/yt/string/format-inl.h b/library/cpp/yt/string/format-inl.h index 89b55650b79..d17d2783dbe 100644 --- a/library/cpp/yt/string/format-inl.h +++ b/library/cpp/yt/string/format-inl.h @@ -427,7 +427,9 @@ auto MakeLazyMultiValueFormatter(TStringBuf format, TArgs&&... args) template <class TStringBuilder> void FormatString(TStringBuilder* builder, TStringBuf value, TStringBuf spec) { - if (!spec) { + // Fast path: plain "%v" (the overwhelmingly common case) and empty spec + // both mean "emit the string verbatim". Skip alignment/flag parsing. + if (spec.empty() || (spec.size() == 1 && spec.front() == NDetail::GenericSpecSymbol)) { builder->AppendString(value); return; } @@ -1021,8 +1023,22 @@ concept CFormatter = CInvocable<T, void(size_t, TStringBuilderBase*, TStringBuf) //////////////////////////////////////////////////////////////////////////////// +// NB: |spec| is tiny (usually a single char), so an inline scan beats +// the AVX2 |memchr| that |TStringBuf::Contains| dispatches to. +Y_FORCE_INLINE bool SpecContains(TStringBuf spec, char symbol) +{ + for (char c : spec) { + if (c == symbol) { + return true; + } + } + return false; +} + +//////////////////////////////////////////////////////////////////////////////// + template <CFormatter TFormatter> -void RunFormatterAt( +Y_FORCE_INLINE void RunFormatterAt( const TFormatter& formatter, size_t index, TStringBuilderBase* builder, @@ -1031,7 +1047,7 @@ void RunFormatterAt( bool doubleQuotes) { // 'n' means 'nothing'; skip the argument. - if (!spec.Contains('n')) { + if (!SpecContains(spec, 'n')) { if (singleQuotes) { builder->AppendChar('\''); } diff --git a/library/cpp/yt/string/string_builder-inl.h b/library/cpp/yt/string/string_builder-inl.h index 9a0e34ca58d..ad1958d1e85 100644 --- a/library/cpp/yt/string/string_builder-inl.h +++ b/library/cpp/yt/string/string_builder-inl.h @@ -103,9 +103,9 @@ inline void TStringBuilder::DoReset() inline void TStringBuilder::DoReserve(size_t newLength) { - Buffer_.resize(newLength); + ResizeUninitialized(Buffer_, newLength); auto capacity = Buffer_.capacity(); - Buffer_.resize(capacity); + ResizeUninitialized(Buffer_, capacity); Begin_ = &*Buffer_.begin(); End_ = Begin_ + capacity; } diff --git a/library/cpp/yt/string/ya.make b/library/cpp/yt/string/ya.make index d22a6bf2b7f..bea7df0b1bf 100644 --- a/library/cpp/yt/string/ya.make +++ b/library/cpp/yt/string/ya.make @@ -32,3 +32,9 @@ END() RECURSE_FOR_TESTS( unittests ) + +IF (NOT OPENSOURCE) + RECURSE( + benchmark + ) +ENDIF() |
