aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/yt/string/format-inl.h
diff options
context:
space:
mode:
authorAlexander Smirnov <alex@ydb.tech>2024-11-20 11:14:58 +0000
committerAlexander Smirnov <alex@ydb.tech>2024-11-20 11:14:58 +0000
commit31773f157bf8164364649b5f470f52dece0a4317 (patch)
tree33d0f7eef45303ab68cf08ab381ce5e5e36c5240 /library/cpp/yt/string/format-inl.h
parent2c7938962d8689e175574fc1e817c05049f27905 (diff)
parenteff600952d5dfe17942f38f510a8ac2b203bb3a5 (diff)
downloadydb-31773f157bf8164364649b5f470f52dece0a4317.tar.gz
Merge branch 'rightlib' into mergelibs-241120-1113
Diffstat (limited to 'library/cpp/yt/string/format-inl.h')
-rw-r--r--library/cpp/yt/string/format-inl.h236
1 files changed, 194 insertions, 42 deletions
diff --git a/library/cpp/yt/string/format-inl.h b/library/cpp/yt/string/format-inl.h
index 7f1f725966..e90d68bfe1 100644
--- a/library/cpp/yt/string/format-inl.h
+++ b/library/cpp/yt/string/format-inl.h
@@ -871,12 +871,43 @@ concept CFormatter = CInvocable<T, void(size_t, TStringBuilderBase*, TStringBuf)
////////////////////////////////////////////////////////////////////////////////
template <CFormatter TFormatter>
-void RunFormatter(
+void RunFormatterAt(
+ const TFormatter& formatter,
+ size_t index,
+ TStringBuilderBase* builder,
+ TStringBuf spec,
+ bool singleQuotes,
+ bool doubleQuotes)
+{
+ // 'n' means 'nothing'; skip the argument.
+ if (!spec.Contains('n')) {
+ if (singleQuotes) {
+ builder->AppendChar('\'');
+ }
+ if (doubleQuotes) {
+ builder->AppendChar('"');
+ }
+
+ formatter(index, builder, spec);
+
+ if (singleQuotes) {
+ builder->AppendChar('\'');
+ }
+ if (doubleQuotes) {
+ builder->AppendChar('"');
+ }
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <CFormatter TFormatter>
+void RunFormatterFullScan(
TStringBuilderBase* builder,
TStringBuf format,
- const TFormatter& formatter)
+ const TFormatter& formatter,
+ int argIndex = 0)
{
- size_t argIndex = 0;
auto current = std::begin(format);
auto end = std::end(format);
while (true) {
@@ -912,27 +943,13 @@ void RunFormatter(
bool singleQuotes = false;
bool doubleQuotes = false;
+ static constexpr TStringBuf conversionSpecifiers =
+ "diuoxXfFeEgGaAcspn";
+
while (
argFormatEnd != end &&
- *argFormatEnd != GenericSpecSymbol && // value in generic format
- *argFormatEnd != 'd' && // others are standard specifiers supported by printf
- *argFormatEnd != 'i' &&
- *argFormatEnd != 'u' &&
- *argFormatEnd != 'o' &&
- *argFormatEnd != 'x' &&
- *argFormatEnd != 'X' &&
- *argFormatEnd != 'f' &&
- *argFormatEnd != 'F' &&
- *argFormatEnd != 'e' &&
- *argFormatEnd != 'E' &&
- *argFormatEnd != 'g' &&
- *argFormatEnd != 'G' &&
- *argFormatEnd != 'a' &&
- *argFormatEnd != 'A' &&
- *argFormatEnd != 'c' &&
- *argFormatEnd != 's' &&
- *argFormatEnd != 'p' &&
- *argFormatEnd != 'n')
+ *argFormatEnd != GenericSpecSymbol && // value in generic format
+ !conversionSpecifiers.Contains(*argFormatEnd)) // others are standard specifiers supported by printf
{
switch (*argFormatEnd) {
case 'q':
@@ -952,27 +969,162 @@ void RunFormatter(
++argFormatEnd;
}
- // 'n' means 'nothing'; skip the argument.
- if (*argFormatBegin != 'n') {
- // Format argument.
- TStringBuf argFormat(argFormatBegin, argFormatEnd);
- if (singleQuotes) {
- builder->AppendChar('\'');
- }
- if (doubleQuotes) {
- builder->AppendChar('"');
- }
- formatter(argIndex++, builder, argFormat);
- if (singleQuotes) {
- builder->AppendChar('\'');
- }
- if (doubleQuotes) {
- builder->AppendChar('"');
+ RunFormatterAt(
+ formatter,
+ argIndex++,
+ builder,
+ TStringBuf{argFormatBegin, argFormatEnd},
+ singleQuotes,
+ doubleQuotes);
+
+ current = argFormatEnd;
+ }
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+template <CFormatter TFormatter, class... TArgs>
+void RunFormatter(
+ TStringBuilderBase* builder,
+ TBasicFormatString<TArgs...> formatString,
+ const TFormatter& formatter)
+{
+ auto isValidLocations = [] (const auto& t) {
+ return std::get<0>(t) != std::get<1>(t);
+ };
+ // Generally marker is simply "%v" e.g. 2 symbols.
+ // We assume it is used to insert something for roughly 5 symbols
+ // of size.
+ builder->Preallocate(std::size(formatString.Get()) + sizeof...(TArgs) * (5 - 2));
+
+ // Empty marker positions -- fallback to the normal impl.
+ if constexpr (sizeof...(TArgs) != 0) {
+ if (!isValidLocations(formatString.Markers[0])) {
+ RunFormatterFullScan(builder, formatString.Get(), formatter);
+ return;
+ }
+ } else {
+ if (formatString.Escapes[0] == -2) {
+ RunFormatterFullScan(builder, formatString.Get(), formatter);
+ return;
+ }
+ }
+
+ int escapesFound = 0;
+ int currentPos = 0;
+
+ auto beginIt = formatString.Get().begin();
+ auto size = formatString.Get().size();
+
+ const auto& [markers, escapes] = std::tie(formatString.Markers, formatString.Escapes);
+
+ auto appendVerbatim = [&] (int offsetToEnd) {
+ builder->AppendString(TStringBuf{beginIt + currentPos, beginIt + offsetToEnd});
+ };
+
+ auto processEscape = [&] () mutable {
+ // OpenMP doesn't support structured bindings :(.
+ auto escapePos = formatString.Escapes[escapesFound];
+
+ // Append everything that's present until %%.
+ appendVerbatim(escapePos);
+
+ // Append '%'.
+ builder->AppendChar('%');
+
+ // Advance position to first '%' pos + 2.
+ currentPos = escapePos + 2;
+ };
+
+ int argIndex = 0;
+
+ while(argIndex < std::ssize(markers)) {
+ auto [markerStart, markerEnd] = markers[argIndex];
+
+ if (
+ escapes[escapesFound] != -1 &&
+ escapes[escapesFound] - currentPos < markerStart - currentPos)
+ {
+ // Escape sequence is closer.
+ processEscape();
+ ++escapesFound;
+ } else {
+ // Normal marker is closer.
+
+ // Append everything that's present until marker start.
+ appendVerbatim(markerStart);
+
+ // Parsing format string.
+
+ // We skip '%' here since spec does not contain it.
+ auto spec = TStringBuf{beginIt + markerStart + 1, beginIt + markerEnd};
+ bool singleQuotes = false;
+ bool doubleQuotes = false;
+ for (auto c : spec) {
+ if (c == 'q') {
+ singleQuotes = true;
+ }
+ if (c == 'Q') {
+ doubleQuotes = true;
+ }
}
+ RunFormatterAt(
+ formatter,
+ argIndex,
+ builder,
+ spec,
+ singleQuotes,
+ doubleQuotes);
+
+ // Advance position past marker's end.
+ currentPos = markerEnd;
+ ++argIndex;
+ continue;
}
- current = argFormatEnd;
+ // Check if the number of escapes we have found has exceeded the recorded limit
+ // e.g. we have to manually scan the rest of the formatString.
+ if (escapesFound == std::ssize(escapes)) {
+ break;
+ }
}
+
+ // Process remaining escapes.
+ while (escapesFound < std::ssize(escapes)) {
+ if (escapes[escapesFound] == -1) {
+ break;
+ }
+
+ processEscape();
+ ++escapesFound;
+ }
+
+ // We either ran out of markers or reached the limit of allowed
+ // escape sequences.
+
+ // Happy path: no limit on escape sequences.
+ if (escapesFound != std::ssize(escapes)) {
+ // Append whatever's left until the end.
+ appendVerbatim(size);
+ return;
+ }
+
+ // Sad path: we have to fully parse remainder of format.
+ RunFormatterFullScan(builder, TStringBuf{beginIt + currentPos, beginIt + size}, formatter, argIndex);
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
+// For benchmarking purposes.
+template <class... TArgs>
+TString FormatOld(TFormatString<TArgs...> format, TArgs&&... args)
+{
+ TStringBuilder builder;
+ if constexpr ((CFormattable<TArgs> && ...)) {
+ NYT::NDetail::TValueFormatter<0, TArgs...> formatter(args...);
+ NYT::NDetail::RunFormatterFullScan(&builder, format.Get(), formatter);
+ }
+ return builder.Flush();
}
} // namespace NDetail
@@ -991,7 +1143,7 @@ void Format(TStringBuilderBase* builder, TFormatString<TArgs...> format, TArgs&&
// a second error.
if constexpr ((CFormattable<TArgs> && ...)) {
NYT::NDetail::TValueFormatter<0, TArgs...> formatter(args...);
- NYT::NDetail::RunFormatter(builder, format.Get(), formatter);
+ NYT::NDetail::RunFormatter(builder, format, formatter);
}
}
@@ -1012,7 +1164,7 @@ void FormatVector(
const TVector& vec)
{
NYT::NDetail::TRangeFormatter<typename TVector::value_type> formatter(vec);
- NYT::NDetail::RunFormatter(builder, format, formatter);
+ NYT::NDetail::RunFormatterFullScan(builder, format, formatter);
}
template <class TVector>
@@ -1022,7 +1174,7 @@ void FormatVector(
const TVector& vec)
{
NYT::NDetail::TRangeFormatter<typename TVector::value_type> formatter(vec);
- NYT::NDetail::RunFormatter(builder, format, formatter);
+ NYT::NDetail::RunFormatterFullScan(builder, format, formatter);
}
template <size_t Length, class TVector>