diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/yt/string | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/yt/string')
-rw-r--r-- | library/cpp/yt/string/enum-inl.h | 118 | ||||
-rw-r--r-- | library/cpp/yt/string/enum.cpp | 44 | ||||
-rw-r--r-- | library/cpp/yt/string/enum.h | 31 | ||||
-rw-r--r-- | library/cpp/yt/string/format-inl.h | 744 | ||||
-rw-r--r-- | library/cpp/yt/string/format.h | 114 | ||||
-rw-r--r-- | library/cpp/yt/string/guid.cpp | 22 | ||||
-rw-r--r-- | library/cpp/yt/string/guid.h | 14 | ||||
-rw-r--r-- | library/cpp/yt/string/string.cpp | 272 | ||||
-rw-r--r-- | library/cpp/yt/string/string.h | 221 | ||||
-rw-r--r-- | library/cpp/yt/string/string_builder-inl.h | 129 | ||||
-rw-r--r-- | library/cpp/yt/string/string_builder.h | 116 | ||||
-rw-r--r-- | library/cpp/yt/string/unittests/enum_ut.cpp | 61 | ||||
-rw-r--r-- | library/cpp/yt/string/unittests/format_ut.cpp | 149 | ||||
-rw-r--r-- | library/cpp/yt/string/unittests/guid_ut.cpp | 58 | ||||
-rw-r--r-- | library/cpp/yt/string/unittests/string_ut.cpp | 52 | ||||
-rw-r--r-- | library/cpp/yt/string/unittests/ya.make | 17 | ||||
-rw-r--r-- | library/cpp/yt/string/ya.make | 30 |
17 files changed, 2192 insertions, 0 deletions
diff --git a/library/cpp/yt/string/enum-inl.h b/library/cpp/yt/string/enum-inl.h new file mode 100644 index 0000000000..ab8acff71b --- /dev/null +++ b/library/cpp/yt/string/enum-inl.h @@ -0,0 +1,118 @@ +#ifndef ENUM_INL_H_ +#error "Direct inclusion of this file is not allowed, include enum.h" +// For the sake of sane code completion. +#include "enum.h" +#endif + +#include <util/string/printf.h> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +namespace NDetail { + +[[noreturn]] +void ThrowMalformedEnumValueException( + TStringBuf typeName, + TStringBuf value); + +void FormatUnknownEnumValue( + TStringBuilderBase* builder, + TStringBuf name, + i64 value); + +} // namespace NDetail + +template <class T> +std::optional<T> TryParseEnum(TStringBuf value) +{ + static_assert(TEnumTraits<T>::IsEnum); + + auto tryFromString = [] (TStringBuf value) -> std::optional<T> { + T result; + if (auto ok = TEnumTraits<T>::FindValueByLiteral(DecodeEnumValue(value), &result)) { + return result; + } + return {}; + }; + + if constexpr (TEnumTraits<T>::IsBitEnum) { + T result{}; + TStringBuf token; + while (value.NextTok('|', token)) { + if (auto scalar = tryFromString(StripString(token))) { + result |= *scalar; + } else { + return {}; + } + } + return result; + } else { + return tryFromString(value); + } +} + +template <class T> +T ParseEnum(TStringBuf value) +{ + if (auto optionalResult = TryParseEnum<T>(value)) { + return *optionalResult; + } + NDetail::ThrowMalformedEnumValueException(TEnumTraits<T>::GetTypeName(), value); +} + +template <class T> +void FormatEnum(TStringBuilderBase* builder, T value, bool lowerCase) +{ + static_assert(TEnumTraits<T>::IsEnum); + + auto formatScalarValue = [builder, lowerCase] (T value) { + auto* literal = TEnumTraits<T>::FindLiteralByValue(value); + if (!literal) { + YT_VERIFY(!TEnumTraits<T>::IsBitEnum); + NDetail::FormatUnknownEnumValue( + builder, + TEnumTraits<T>::GetTypeName(), + static_cast<typename TEnumTraits<T>::TUnderlying>(value)); + return; + } + + if (lowerCase) { + CamelCaseToUnderscoreCase(builder, *literal); + } else { + builder->AppendString(*literal); + } + }; + + if constexpr (TEnumTraits<T>::IsBitEnum) { + if (auto* literal = TEnumTraits<T>::FindLiteralByValue(value)) { + formatScalarValue(value); + return; + } + auto first = true; + for (auto scalarValue : TEnumTraits<T>::GetDomainValues()) { + if (Any(value & scalarValue)) { + if (!first) { + builder->AppendString(TStringBuf(" | ")); + } + first = false; + formatScalarValue(scalarValue); + } + } + } else { + formatScalarValue(value); + } +} + +template <class T> +TString FormatEnum(T value, typename TEnumTraits<T>::TType*) +{ + TStringBuilder builder; + FormatEnum(&builder, value, /*lowerCase*/ true); + return builder.Flush(); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yt/string/enum.cpp b/library/cpp/yt/string/enum.cpp new file mode 100644 index 0000000000..7cb8e5c6b6 --- /dev/null +++ b/library/cpp/yt/string/enum.cpp @@ -0,0 +1,44 @@ +#include "enum.h" + +#include "format.h" + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +TString DecodeEnumValue(TStringBuf value) +{ + auto camelValue = UnderscoreCaseToCamelCase(value); + auto underscoreValue = CamelCaseToUnderscoreCase(camelValue); + if (value != underscoreValue) { + throw TSimpleException(Format("Enum value %Qv is not in a proper underscore case; did you mean %Qv?", + value, + underscoreValue)); + } + return camelValue; +} + +TString EncodeEnumValue(TStringBuf value) +{ + return CamelCaseToUnderscoreCase(value); +} + +namespace NDetail { + +void ThrowMalformedEnumValueException(TStringBuf typeName, TStringBuf value) +{ + throw TSimpleException(Format("Error parsing %v value %Qv", + typeName, + value)); +} + +void FormatUnknownEnumValue(TStringBuilderBase* builder, TStringBuf name, i64 value) +{ + builder->AppendFormat("%v(%v)", name, value); +} + +} // namespace NDetail + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yt/string/enum.h b/library/cpp/yt/string/enum.h new file mode 100644 index 0000000000..10dc02610f --- /dev/null +++ b/library/cpp/yt/string/enum.h @@ -0,0 +1,31 @@ +#pragma once + +#include "string.h" + +#include <library/cpp/yt/misc/enum.h> + +#include <optional> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +TString DecodeEnumValue(TStringBuf value); +TString EncodeEnumValue(TStringBuf value); + +template <class T> +T ParseEnum(TStringBuf value); + +template <class T> +void FormatEnum(TStringBuilderBase* builder, T value, bool lowerCase); + +template <class T> +TString FormatEnum(T value, typename TEnumTraits<T>::TType* = nullptr); + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT + +#define ENUM_INL_H_ +#include "enum-inl.h" +#undef ENUM_INL_H_ diff --git a/library/cpp/yt/string/format-inl.h b/library/cpp/yt/string/format-inl.h new file mode 100644 index 0000000000..5484d4a216 --- /dev/null +++ b/library/cpp/yt/string/format-inl.h @@ -0,0 +1,744 @@ +#ifndef FORMAT_INL_H_ +#error "Direct inclusion of this file is not allowed, include format.h" +// For the sake of sane code completion. +#include "format.h" +#endif + +#include "enum.h" +#include "string.h" + +#include <library/cpp/yt/assert/assert.h> + +#include <library/cpp/yt/small_containers/compact_vector.h> + +#include <library/cpp/yt/misc/enum.h> + +#include <cctype> +#include <optional> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +static const char GenericSpecSymbol = 'v'; + +inline bool IsQuotationSpecSymbol(char symbol) +{ + return symbol == 'Q' || symbol == 'q'; +} + +// TStringBuf +inline void FormatValue(TStringBuilderBase* builder, TStringBuf value, TStringBuf format) +{ + if (!format) { + builder->AppendString(value); + return; + } + + // Parse alignment. + bool alignLeft = false; + const char* current = format.begin(); + if (*current == '-') { + alignLeft = true; + ++current; + } + + bool hasAlign = false; + int alignSize = 0; + while (*current >= '0' && *current <= '9') { + hasAlign = true; + alignSize = 10 * alignSize + (*current - '0'); + if (alignSize > 1000000) { + builder->AppendString(TStringBuf("<alignment overflow>")); + return; + } + ++current; + } + + int padding = 0; + bool padLeft = false; + bool padRight = false; + if (hasAlign) { + padding = alignSize - value.size(); + if (padding < 0) { + padding = 0; + } + padLeft = !alignLeft; + padRight = alignLeft; + } + + bool singleQuotes = false; + bool doubleQuotes = false; + while (current < format.end()) { + if (*current == 'q') { + singleQuotes = true; + } else if (*current == 'Q') { + doubleQuotes = true; + } + ++current; + } + + if (padLeft) { + builder->AppendChar(' ', padding); + } + + if (singleQuotes || doubleQuotes) { + for (const char* valueCurrent = value.begin(); valueCurrent < value.end(); ++valueCurrent) { + char ch = *valueCurrent; + if (ch == '\n') { + builder->AppendString("\\n"); + } else if (ch == '\t') { + builder->AppendString("\\t"); + } else if (ch < PrintableASCIILow || ch > PrintableASCIIHigh) { + builder->AppendString("\\x"); + builder->AppendChar(Int2Hex[static_cast<ui8>(ch) >> 4]); + builder->AppendChar(Int2Hex[static_cast<ui8>(ch) & 0xf]); + } else if ((singleQuotes && ch == '\'') || (doubleQuotes && ch == '\"')) { + builder->AppendChar('\\'); + builder->AppendChar(ch); + } else { + builder->AppendChar(ch); + } + } + } else { + builder->AppendString(value); + } + + if (padRight) { + builder->AppendChar(' ', padding); + } +} + +// TString +inline void FormatValue(TStringBuilderBase* builder, const TString& value, TStringBuf format) +{ + FormatValue(builder, TStringBuf(value), format); +} + +// const char* +inline void FormatValue(TStringBuilderBase* builder, const char* value, TStringBuf format) +{ + FormatValue(builder, TStringBuf(value), format); +} + +// char +inline void FormatValue(TStringBuilderBase* builder, char value, TStringBuf format) +{ + FormatValue(builder, TStringBuf(&value, 1), format); +} + +// bool +inline void FormatValue(TStringBuilderBase* builder, bool value, TStringBuf format) +{ + // Parse custom flags. + bool lowercase = false; + const char* current = format.begin(); + while (current != format.end()) { + if (*current == 'l') { + ++current; + lowercase = true; + } else if (IsQuotationSpecSymbol(*current)) { + ++current; + } else + break; + } + + auto str = lowercase + ? (value ? TStringBuf("true") : TStringBuf("false")) + : (value ? TStringBuf("True") : TStringBuf("False")); + + builder->AppendString(str); +} + +// Fallback to ToString +struct TToStringFallbackValueFormatterTag +{ }; + +template <class TValue, class = void> +struct TValueFormatter +{ + static TToStringFallbackValueFormatterTag Do(TStringBuilderBase* builder, const TValue& value, TStringBuf format) + { + using ::ToString; + FormatValue(builder, ToString(value), format); + return {}; + } +}; + +// Enum +template <class TEnum> +struct TValueFormatter<TEnum, typename std::enable_if<TEnumTraits<TEnum>::IsEnum>::type> +{ + static void Do(TStringBuilderBase* builder, TEnum value, TStringBuf format) + { + // Parse custom flags. + bool lowercase = false; + const char* current = format.begin(); + while (current != format.end()) { + if (*current == 'l') { + ++current; + lowercase = true; + } else if (IsQuotationSpecSymbol(*current)) { + ++current; + } else { + break; + } + } + + FormatEnum(builder, value, lowercase); + } +}; + +template <class TRange, class TFormatter> +typename TFormattableView<TRange, TFormatter>::TBegin TFormattableView<TRange, TFormatter>::begin() const +{ + return RangeBegin; +} + +template <class TRange, class TFormatter> +typename TFormattableView<TRange, TFormatter>::TEnd TFormattableView<TRange, TFormatter>::end() const +{ + return RangeEnd; +} + +template <class TRange, class TFormatter> +TFormattableView<TRange, TFormatter> MakeFormattableView( + const TRange& range, + TFormatter&& formatter) +{ + return TFormattableView<TRange, std::decay_t<TFormatter>>{range.begin(), range.end(), std::forward<TFormatter>(formatter)}; +} + +template <class TRange, class TFormatter> +TFormattableView<TRange, TFormatter> MakeShrunkFormattableView( + const TRange& range, + TFormatter&& formatter, + size_t limit) +{ + return TFormattableView<TRange, std::decay_t<TFormatter>>{range.begin(), range.end(), std::forward<TFormatter>(formatter), limit}; +} + +template <class TRange, class TFormatter> +void FormatRange(TStringBuilderBase* builder, const TRange& range, const TFormatter& formatter, size_t limit = std::numeric_limits<size_t>::max()) +{ + builder->AppendChar('['); + size_t index = 0; + for (const auto& item : range) { + if (index > 0) { + builder->AppendString(DefaultJoinToStringDelimiter); + } + if (index == limit) { + builder->AppendString(DefaultRangeEllipsisFormat); + break; + } + formatter(builder, item); + ++index; + } + builder->AppendChar(']'); +} + +template <class TRange, class TFormatter> +void FormatKeyValueRange(TStringBuilderBase* builder, const TRange& range, const TFormatter& formatter, size_t limit = std::numeric_limits<size_t>::max()) +{ + builder->AppendChar('{'); + size_t index = 0; + for (const auto& item : range) { + if (index > 0) { + builder->AppendString(DefaultJoinToStringDelimiter); + } + if (index == limit) { + builder->AppendString(DefaultRangeEllipsisFormat); + break; + } + formatter(builder, item.first); + builder->AppendString(DefaultKeyValueDelimiter); + formatter(builder, item.second); + ++index; + } + builder->AppendChar('}'); +} + +// TFormattableView +template <class TRange, class TFormatter> +struct TValueFormatter<TFormattableView<TRange, TFormatter>> +{ + static void Do(TStringBuilderBase* builder, const TFormattableView<TRange, TFormatter>& range, TStringBuf /*format*/) + { + FormatRange(builder, range, range.Formatter, range.Limit); + } +}; + +template <class TFormatter> +TFormatterWrapper<TFormatter> MakeFormatterWrapper( + TFormatter&& formatter) +{ + return TFormatterWrapper<TFormatter>{ + .Formatter = std::move(formatter) + }; +} + +// TFormatterWrapper +template <class TFormatter> +struct TValueFormatter<TFormatterWrapper<TFormatter>> +{ + static void Do(TStringBuilderBase* builder, const TFormatterWrapper<TFormatter>& wrapper, TStringBuf /*format*/) + { + wrapper.Formatter(builder); + } +}; + +// std::vector +template <class T, class TAllocator> +struct TValueFormatter<std::vector<T, TAllocator>> +{ + static void Do(TStringBuilderBase* builder, const std::vector<T, TAllocator>& collection, TStringBuf /*format*/) + { + FormatRange(builder, collection, TDefaultFormatter()); + } +}; + +// TCompactVector +template <class T, unsigned N> +struct TValueFormatter<TCompactVector<T, N>> +{ + static void Do(TStringBuilderBase* builder, const TCompactVector<T, N>& collection, TStringBuf /*format*/) + { + FormatRange(builder, collection, TDefaultFormatter()); + } +}; + +// std::set +template <class T> +struct TValueFormatter<std::set<T>> +{ + static void Do(TStringBuilderBase* builder, const std::set<T>& collection, TStringBuf /*format*/) + { + FormatRange(builder, collection, TDefaultFormatter()); + } +}; + +// std::map +template <class K, class V> +struct TValueFormatter<std::map<K, V>> +{ + static void Do(TStringBuilderBase* builder, const std::map<K, V>& collection, TStringBuf /*format*/) + { + FormatKeyValueRange(builder, collection, TDefaultFormatter()); + } +}; + +// std::multimap +template <class K, class V> +struct TValueFormatter<std::multimap<K, V>> +{ + static void Do(TStringBuilderBase* builder, const std::multimap<K, V>& collection, TStringBuf /*format*/) + { + FormatKeyValueRange(builder, collection, TDefaultFormatter()); + } +}; + +// THashSet +template <class T> +struct TValueFormatter<THashSet<T>> +{ + static void Do(TStringBuilderBase* builder, const THashSet<T>& collection, TStringBuf /*format*/) + { + FormatRange(builder, collection, TDefaultFormatter()); + } +}; + +// THashMultiSet +template <class T> +struct TValueFormatter<THashMultiSet<T>> +{ + static void Do(TStringBuilderBase* builder, const THashMultiSet<T>& collection, TStringBuf /*format*/) + { + FormatRange(builder, collection, TDefaultFormatter()); + } +}; + +// THashMap +template <class K, class V> +struct TValueFormatter<THashMap<K, V>> +{ + static void Do(TStringBuilderBase* builder, const THashMap<K, V>& collection, TStringBuf /*format*/) + { + FormatKeyValueRange(builder, collection, TDefaultFormatter()); + } +}; + +// THashMultiMap +template <class K, class V> +struct TValueFormatter<THashMultiMap<K, V>> +{ + static void Do(TStringBuilderBase* builder, const THashMultiMap<K, V>& collection, TStringBuf /*format*/) + { + FormatKeyValueRange(builder, collection, TDefaultFormatter()); + } +}; + +// TEnumIndexedVector +template <class E, class T> +struct TValueFormatter<TEnumIndexedVector<E, T>> +{ + static void Do(TStringBuilderBase* builder, const TEnumIndexedVector<E, T>& collection, TStringBuf format) + { + builder->AppendChar('{'); + bool firstItem = true; + for (const auto& index : TEnumTraits<E>::GetDomainValues()) { + if (!firstItem) { + builder->AppendString(DefaultJoinToStringDelimiter); + } + FormatValue(builder, index, format); + builder->AppendString(": "); + FormatValue(builder, collection[index], format); + firstItem = false; + } + builder->AppendChar('}'); + } +}; + +// std::pair +template <class T1, class T2> +struct TValueFormatter<std::pair<T1, T2>> +{ + static void Do(TStringBuilderBase* builder, const std::pair<T1, T2>& value, TStringBuf format) + { + builder->AppendChar('{'); + FormatValue(builder, value.first, format); + builder->AppendString(TStringBuf(", ")); + FormatValue(builder, value.second, format); + builder->AppendChar('}'); + } +}; + +// std::optional +inline void FormatValue(TStringBuilderBase* builder, std::nullopt_t, TStringBuf /*format*/) +{ + builder->AppendString(TStringBuf("<null>")); +} + +template <class T> +struct TValueFormatter<std::optional<T>> +{ + static void Do(TStringBuilderBase* builder, const std::optional<T>& value, TStringBuf format) + { + if (value) { + FormatValue(builder, *value, format); + } else { + FormatValue(builder, std::nullopt, format); + } + } +}; + +template <class TValue> +auto FormatValue(TStringBuilderBase* builder, const TValue& value, TStringBuf format) -> + decltype(TValueFormatter<TValue>::Do(builder, value, format)) +{ + return TValueFormatter<TValue>::Do(builder, value, format); +} + +template <class TValue> +void FormatValueViaSprintf( + TStringBuilderBase* builder, + TValue value, + TStringBuf format, + TStringBuf genericSpec) +{ + constexpr int MaxFormatSize = 64; + constexpr int SmallResultSize = 64; + + auto copyFormat = [] (char* destination, const char* source, int length) { + int position = 0; + for (int index = 0; index < length; ++index) { + if (IsQuotationSpecSymbol(source[index])) { + continue; + } + destination[position] = source[index]; + ++position; + } + return destination + position; + }; + + char formatBuf[MaxFormatSize]; + YT_VERIFY(format.length() >= 1 && format.length() <= MaxFormatSize - 2); // one for %, one for \0 + formatBuf[0] = '%'; + if (format[format.length() - 1] == GenericSpecSymbol) { + char* formatEnd = copyFormat(formatBuf + 1, format.begin(), format.length() - 1); + ::memcpy(formatEnd, genericSpec.begin(), genericSpec.length()); + formatEnd[genericSpec.length()] = '\0'; + } else { + char* formatEnd = copyFormat(formatBuf + 1, format.begin(), format.length()); + *formatEnd = '\0'; + } + + char* result = builder->Preallocate(SmallResultSize); + size_t resultSize = ::snprintf(result, SmallResultSize, formatBuf, value); + if (resultSize >= SmallResultSize) { + result = builder->Preallocate(resultSize + 1); + YT_VERIFY(::snprintf(result, resultSize + 1, formatBuf, value) == static_cast<int>(resultSize)); + } + builder->Advance(resultSize); +} + +template <class TValue> +char* WriteIntToBufferBackwards(char* buffer, TValue value); + +template <class TValue> +void FormatValueViaHelper(TStringBuilderBase* builder, TValue value, TStringBuf format, TStringBuf genericSpec) +{ + if (format == TStringBuf("v")) { + const int MaxResultSize = 64; + char buffer[MaxResultSize]; + char* end = buffer + MaxResultSize; + char* start = WriteIntToBufferBackwards(end, value); + builder->AppendString(TStringBuf(start, end)); + } else { + FormatValueViaSprintf(builder, value, format, genericSpec); + } +} + +#define XX(valueType, castType, genericSpec) \ + inline void FormatValue(TStringBuilderBase* builder, valueType value, TStringBuf format) \ + { \ + FormatValueViaHelper(builder, static_cast<castType>(value), format, genericSpec); \ + } + +XX(i8, int, TStringBuf("d")) +XX(ui8, unsigned int, TStringBuf("u")) +XX(i16, int, TStringBuf("d")) +XX(ui16, unsigned int, TStringBuf("u")) +XX(i32, int, TStringBuf("d")) +XX(ui32, unsigned int, TStringBuf("u")) +XX(long, long, TStringBuf("ld")) +XX(unsigned long, unsigned long, TStringBuf("lu")) + +#undef XX + +#define XX(valueType, castType, genericSpec) \ + inline void FormatValue(TStringBuilderBase* builder, valueType value, TStringBuf format) \ + { \ + FormatValueViaSprintf(builder, static_cast<castType>(value), format, genericSpec); \ + } + +XX(double, double, TStringBuf("lf")) +XX(float, float, TStringBuf("f")) + +#undef XX + +// Pointer +template <class T> +void FormatValue(TStringBuilderBase* builder, T* value, TStringBuf format) +{ + FormatValueViaSprintf(builder, value, format, TStringBuf("p")); +} + +// TDuration (specialize for performance reasons) +inline void FormatValue(TStringBuilderBase* builder, TDuration value, TStringBuf /*format*/) +{ + builder->AppendFormat("%vus", value.MicroSeconds()); +} + +// TInstant (specialize for TFormatTraits) +inline void FormatValue(TStringBuilderBase* builder, TInstant value, TStringBuf format) +{ + // TODO(babenko): optimize + builder->AppendFormat("%v", ToString(value), format); +} + +//////////////////////////////////////////////////////////////////////////////// + +template <class TArgFormatter> +void FormatImpl( + TStringBuilderBase* builder, + TStringBuf format, + const TArgFormatter& argFormatter) +{ + size_t argIndex = 0; + auto current = format.begin(); + while (true) { + // Scan verbatim part until stop symbol. + auto verbatimBegin = current; + auto verbatimEnd = verbatimBegin; + while (verbatimEnd != format.end() && *verbatimEnd != '%') { + ++verbatimEnd; + } + + // Copy verbatim part, if any. + size_t verbatimSize = verbatimEnd - verbatimBegin; + if (verbatimSize > 0) { + builder->AppendString(TStringBuf(verbatimBegin, verbatimSize)); + } + + // Handle stop symbol. + current = verbatimEnd; + if (current == format.end()) { + break; + } + + YT_ASSERT(*current == '%'); + ++current; + + if (*current == '%') { + // Verbatim %. + builder->AppendChar('%'); + ++current; + } else { + // Scan format part until stop symbol. + auto argFormatBegin = current; + auto argFormatEnd = argFormatBegin; + bool singleQuotes = false; + bool doubleQuotes = false; + + while ( + argFormatEnd != format.end() && + *argFormatEnd != GenericSpecSymbol && // value in generic format + *argFormatEnd != 'd' && // others are standard specifiers supported by printf + *argFormatEnd != 'i' && + *argFormatEnd != 'u' && + *argFormatEnd != 'o' && + *argFormatEnd != 'x' && + *argFormatEnd != 'X' && + *argFormatEnd != 'f' && + *argFormatEnd != 'F' && + *argFormatEnd != 'e' && + *argFormatEnd != 'E' && + *argFormatEnd != 'g' && + *argFormatEnd != 'G' && + *argFormatEnd != 'a' && + *argFormatEnd != 'A' && + *argFormatEnd != 'c' && + *argFormatEnd != 's' && + *argFormatEnd != 'p' && + *argFormatEnd != 'n') + { + if (*argFormatEnd == 'q') { + singleQuotes = true; + } else if (*argFormatEnd == 'Q') { + doubleQuotes = true; + } + ++argFormatEnd; + } + + // Handle end of format string. + if (argFormatEnd != format.end()) { + ++argFormatEnd; + } + + // 'n' means 'nothing'; skip the argument. + if (*argFormatBegin != 'n') { + // Format argument. + TStringBuf argFormat(argFormatBegin, argFormatEnd); + if (singleQuotes) { + builder->AppendChar('\''); + } + if (doubleQuotes) { + builder->AppendChar('"'); + } + argFormatter(argIndex++, builder, argFormat); + if (singleQuotes) { + builder->AppendChar('\''); + } + if (doubleQuotes) { + builder->AppendChar('"'); + } + } + + current = argFormatEnd; + } + } +} + +//////////////////////////////////////////////////////////////////////////////// + +template <class T> +struct TFormatTraits +{ + static constexpr bool HasCustomFormatValue = !std::is_same_v< + decltype(FormatValue( + static_cast<TStringBuilderBase*>(nullptr), + *static_cast<const T*>(nullptr), + TStringBuf())), + TToStringFallbackValueFormatterTag>; +}; + +//////////////////////////////////////////////////////////////////////////////// + +template <size_t IndexBase, class... TArgs> +struct TArgFormatterImpl; + +template <size_t IndexBase> +struct TArgFormatterImpl<IndexBase> +{ + void operator() (size_t /*index*/, TStringBuilderBase* builder, TStringBuf /*format*/) const + { + builder->AppendString(TStringBuf("<missing argument>")); + } +}; + +template <size_t IndexBase, class THeadArg, class... TTailArgs> +struct TArgFormatterImpl<IndexBase, THeadArg, TTailArgs...> +{ + explicit TArgFormatterImpl(const THeadArg& headArg, const TTailArgs&... tailArgs) + : HeadArg(headArg) + , TailFormatter(tailArgs...) + { } + + const THeadArg& HeadArg; + TArgFormatterImpl<IndexBase + 1, TTailArgs...> TailFormatter; + + void operator() (size_t index, TStringBuilderBase* builder, TStringBuf format) const + { + YT_ASSERT(index >= IndexBase); + if (index == IndexBase) { + FormatValue(builder, HeadArg, format); + } else { + TailFormatter(index, builder, format); + } + } +}; + +//////////////////////////////////////////////////////////////////////////////// + +template <size_t Length, class... TArgs> +void Format( + TStringBuilderBase* builder, + const char (&format)[Length], + TArgs&&... args) +{ + Format(builder, TStringBuf(format, Length - 1), std::forward<TArgs>(args)...); +} + +template <class... TArgs> +void Format( + TStringBuilderBase* builder, + TStringBuf format, + TArgs&&... args) +{ + TArgFormatterImpl<0, TArgs...> argFormatter(args...); + FormatImpl(builder, format, argFormatter); +} + +template <size_t Length, class... TArgs> +TString Format( + const char (&format)[Length], + TArgs&&... args) +{ + TStringBuilder builder; + Format(&builder, format, std::forward<TArgs>(args)...); + return builder.Flush(); +} + +template <class... TArgs> +TString Format( + TStringBuf format, + TArgs&&... args) +{ + TStringBuilder builder; + Format(&builder, format, std::forward<TArgs>(args)...); + return builder.Flush(); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yt/string/format.h b/library/cpp/yt/string/format.h new file mode 100644 index 0000000000..9708fe5906 --- /dev/null +++ b/library/cpp/yt/string/format.h @@ -0,0 +1,114 @@ +#pragma once + +#include "string_builder.h" + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +/* + * Format: a type-safe and fast formatting utility. + * + * Basically works as a type-safe analogue of |sprintf| and is expected to + * be backwards-compatible with the latter. + * + * Like Go's |Sprintf|, supports the ultimate format specifier |v| + * causing arguments to be emitted in default format. + * This is the default and preferred way of formatting things, + * which should be used in newer code. + * + * |Format| may currently invoke |sprintf| internally for emitting numeric and some other + * types. You can always write your own optimized implementation, if you wish :) + * + * In additional to the usual |sprintf|, supports a number of non-standard flags: + * + * |q| Causes the argument to be surrounded with single quotes (|'|). + * Applies to all types. + * + * |Q| Causes the argument to be surrounded with double quotes (|"|). + * Applies to all types. + * + * |l| The argument is emitted in "lowercase" style. + * Only applies to enums and bools. + * + * The following argument types are supported: + * + * Strings (including |const char*|, |TStringBuf|, and |TString|) and chars: + * Emitted as is. Fast. + * + * Numerics and pointers: + * Emitted using |sprintf|. Maybe not that fast. + * + * |bool|: + * Emitted either as |True| and |False| or |true| and |false| (if lowercase mode is ON). + * + * Enums: + * Emitted in either camel (|SomeName|) or in lowercase-with-underscores style + * (|some_name|, if lowercase mode is ON). + * + * Nullables: + * |std::nullopt| is emitted as |<null>|. + * + * All others: + * Emitted as strings by calling |ToString|. + * + */ + +template <size_t Length, class... TArgs> +void Format(TStringBuilderBase* builder, const char (&format)[Length], TArgs&&... args); +template <class... TArgs> +void Format(TStringBuilderBase* builder, TStringBuf format, TArgs&&... args); + +template <size_t Length, class... TArgs> +TString Format(const char (&format)[Length], TArgs&&... args); +template <class... TArgs> +TString Format(TStringBuf format, TArgs&&... args); + +//////////////////////////////////////////////////////////////////////////////// + +template <class TRange, class TFormatter> +struct TFormattableView +{ + using TBegin = std::decay_t<decltype(std::declval<const TRange>().begin())>; + using TEnd = std::decay_t<decltype(std::declval<const TRange>().end())>; + + TBegin RangeBegin; + TEnd RangeEnd; + TFormatter Formatter; + size_t Limit = std::numeric_limits<size_t>::max(); + + TBegin begin() const; + TEnd end() const; +}; + +//! Annotates a given #range with #formatter to be applied to each item. +template <class TRange, class TFormatter> +TFormattableView<TRange, TFormatter> MakeFormattableView( + const TRange& range, + TFormatter&& formatter); + +template <class TRange, class TFormatter> +TFormattableView<TRange, TFormatter> MakeShrunkFormattableView( + const TRange& range, + TFormatter&& formatter, + size_t limit); + +//////////////////////////////////////////////////////////////////////////////// + +template <class TFormatter> +struct TFormatterWrapper +{ + TFormatter Formatter; +}; + +template <class TFormatter> +TFormatterWrapper<TFormatter> MakeFormatterWrapper( + TFormatter&& formatter); + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT + +#define FORMAT_INL_H_ +#include "format-inl.h" +#undef FORMAT_INL_H_ diff --git a/library/cpp/yt/string/guid.cpp b/library/cpp/yt/string/guid.cpp new file mode 100644 index 0000000000..6c133a9778 --- /dev/null +++ b/library/cpp/yt/string/guid.cpp @@ -0,0 +1,22 @@ +#include "guid.h" + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +void FormatValue(TStringBuilderBase* builder, TGuid value, TStringBuf /*format*/) +{ + char* begin = builder->Preallocate(MaxGuidStringSize); + char* end = WriteGuidToBuffer(begin, value); + builder->Advance(end - begin); +} + +TString ToString(TGuid guid) +{ + return ToStringViaBuilder(guid); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT + diff --git a/library/cpp/yt/string/guid.h b/library/cpp/yt/string/guid.h new file mode 100644 index 0000000000..75edbce5db --- /dev/null +++ b/library/cpp/yt/string/guid.h @@ -0,0 +1,14 @@ +#include <library/cpp/yt/misc/guid.h> + +#include "format.h" + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +void FormatValue(TStringBuilderBase* builder, TGuid value, TStringBuf /*format*/); +TString ToString(TGuid guid); + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yt/string/string.cpp b/library/cpp/yt/string/string.cpp new file mode 100644 index 0000000000..7440ac3fdd --- /dev/null +++ b/library/cpp/yt/string/string.cpp @@ -0,0 +1,272 @@ +#include "string.h" +#include "format.h" + +#include <library/cpp/yt/assert/assert.h> + +#include <util/generic/hash.h> + +#include <util/string/ascii.h> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +void UnderscoreCaseToCamelCase(TStringBuilderBase* builder, TStringBuf str) +{ + bool first = true; + bool upper = true; + for (char c : str) { + if (c == '_') { + upper = true; + } else { + if (upper) { + if (!std::isalpha(c) && !first) { + builder->AppendChar('_'); + } + c = std::toupper(c); + } + builder->AppendChar(c); + upper = false; + } + first = false; + } +} + +TString UnderscoreCaseToCamelCase(TStringBuf str) +{ + TStringBuilder builder; + UnderscoreCaseToCamelCase(&builder, str); + return builder.Flush(); +} + +void CamelCaseToUnderscoreCase(TStringBuilderBase* builder, TStringBuf str) +{ + bool first = true; + for (char c : str) { + if (std::isupper(c) && std::isalpha(c)) { + if (!first) { + builder->AppendChar('_'); + } + c = std::tolower(c); + } + builder->AppendChar(c); + first = false; + } +} + +TString CamelCaseToUnderscoreCase(TStringBuf str) +{ + TStringBuilder builder; + CamelCaseToUnderscoreCase(&builder, str); + return builder.Flush(); +} + +//////////////////////////////////////////////////////////////////////////////// + +TString TrimLeadingWhitespaces(const TString& str) +{ + for (int i = 0; i < static_cast<int>(str.size()); ++i) { + if (str[i] != ' ') { + return str.substr(i); + } + } + return ""; +} + +TString Trim(const TString& str, const TString& whitespaces) +{ + size_t end = str.size(); + while (end > 0) { + size_t i = end - 1; + bool isWhitespace = false; + for (auto c : whitespaces) { + if (str[i] == c) { + isWhitespace = true; + break; + } + } + if (!isWhitespace) { + break; + } + --end; + } + + if (end == 0) { + return ""; + } + + size_t begin = str.find_first_not_of(whitespaces); + YT_VERIFY(begin != TString::npos); + YT_VERIFY(begin < end); + return str.substr(begin, end - begin); +} + +//////////////////////////////////////////////////////////////////////////////// + +namespace { + +ui16 DecimalDigits2[100] = { + 12336, 12592, 12848, 13104, 13360, 13616, 13872, 14128, 14384, 14640, + 12337, 12593, 12849, 13105, 13361, 13617, 13873, 14129, 14385, 14641, + 12338, 12594, 12850, 13106, 13362, 13618, 13874, 14130, 14386, 14642, + 12339, 12595, 12851, 13107, 13363, 13619, 13875, 14131, 14387, 14643, + 12340, 12596, 12852, 13108, 13364, 13620, 13876, 14132, 14388, 14644, + 12341, 12597, 12853, 13109, 13365, 13621, 13877, 14133, 14389, 14645, + 12342, 12598, 12854, 13110, 13366, 13622, 13878, 14134, 14390, 14646, + 12343, 12599, 12855, 13111, 13367, 13623, 13879, 14135, 14391, 14647, + 12344, 12600, 12856, 13112, 13368, 13624, 13880, 14136, 14392, 14648, + 12345, 12601, 12857, 13113, 13369, 13625, 13881, 14137, 14393, 14649 +}; + +template <class T> +char* WriteSignedIntToBufferBackwardsImpl(char* ptr, T value, TStringBuf min) +{ + if (value == 0) { + --ptr; + *ptr = '0'; + return ptr; + } + + // The negative value handling code below works incorrectly for min values. + if (value == std::numeric_limits<T>::min()) { + ptr -= min.length(); + ::memcpy(ptr, min.begin(), min.length()); + return ptr; + } + + bool negative = false; + if (value < 0) { + negative = true; + value = -value; + } + + while (value >= 10) { + i64 rem = value % 100; + i64 quot = value / 100; + ptr -= 2; + ::memcpy(ptr, &DecimalDigits2[rem], 2); + value = quot; + } + + if (value > 0) { + --ptr; + *ptr = ('0' + value); + } + + if (negative) { + --ptr; + *ptr = '-'; + } + + return ptr; +} + +template <class T> +char* WriteUnsignedIntToBufferBackwardsImpl(char* ptr, T value) +{ + if (value == 0) { + --ptr; + *ptr = '0'; + return ptr; + } + + while (value >= 10) { + i64 rem = value % 100; + i64 quot = value / 100; + ptr -= 2; + ::memcpy(ptr, &DecimalDigits2[rem], 2); + value = quot; + } + + if (value > 0) { + --ptr; + *ptr = ('0' + value); + } + + return ptr; +} + +} // namespace + +template <> +char* WriteIntToBufferBackwards(char* ptr, i32 value) +{ + return WriteSignedIntToBufferBackwardsImpl(ptr, value, TStringBuf("-2147483647")); +} + +template <> +char* WriteIntToBufferBackwards(char* ptr, i64 value) +{ + return WriteSignedIntToBufferBackwardsImpl(ptr, value, TStringBuf("-9223372036854775808")); +} + +template <> +char* WriteIntToBufferBackwards(char* ptr, ui32 value) +{ + return WriteUnsignedIntToBufferBackwardsImpl(ptr, value); +} + +template <> +char* WriteIntToBufferBackwards(char* ptr, ui64 value) +{ + return WriteUnsignedIntToBufferBackwardsImpl(ptr, value); +} + +//////////////////////////////////////////////////////////////////////////////// + +size_t TCaseInsensitiveStringHasher::operator()(TStringBuf arg) const +{ + auto compute = [&] (char* buffer) { + for (size_t index = 0; index < arg.length(); ++index) { + buffer[index] = AsciiToLower(arg[index]); + } + return ComputeHash(TStringBuf(buffer, arg.length())); + }; + const size_t SmallSize = 256; + if (arg.length() <= SmallSize) { + std::array<char, SmallSize> stackBuffer; + return compute(stackBuffer.data()); + } else { + std::unique_ptr<char[]> heapBuffer(new char[arg.length()]); + return compute(heapBuffer.get()); + } +} + +bool TCaseInsensitiveStringEqualityComparer::operator()(TStringBuf lhs, TStringBuf rhs) const +{ + return AsciiEqualsIgnoreCase(lhs, rhs); +} + +//////////////////////////////////////////////////////////////////////////////// + +bool TryParseBool(TStringBuf value, bool* result) +{ + if (value == "true" || value == "1") { + *result = true; + return true; + } else if (value == "false" || value == "0") { + *result = false; + return true; + } else { + return false; + } +} + +bool ParseBool(TStringBuf value) +{ + bool result; + if (!TryParseBool(value, &result)) { + throw TSimpleException(Format("Error parsing boolean value %Qv", + value)); + } + return result; +} + +TStringBuf FormatBool(bool value) +{ + return value ? TStringBuf("true") : TStringBuf("false"); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yt/string/string.h b/library/cpp/yt/string/string.h new file mode 100644 index 0000000000..ae6c99caab --- /dev/null +++ b/library/cpp/yt/string/string.h @@ -0,0 +1,221 @@ +#pragma once + +#include "string_builder.h" + +#include <library/cpp/yt/exception/exception.h> + +#include <util/datetime/base.h> + +#include <util/generic/string.h> + +#include <util/string/strip.h> + +#include <vector> +#include <set> +#include <map> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +//! Formatters enable customizable way to turn an object into a string. +//! This default implementation uses |FormatValue|. +struct TDefaultFormatter +{ + template <class T> + void operator()(TStringBuilderBase* builder, const T& obj) const + { + FormatValue(builder, obj, TStringBuf("v")); + } +}; + +static constexpr TStringBuf DefaultJoinToStringDelimiter = ", "; +static constexpr TStringBuf DefaultKeyValueDelimiter = ": "; +static constexpr TStringBuf DefaultRangeEllipsisFormat = "..."; + +// ASCII characters from 0x20 = ' ' to 0x7e = '~' are printable. +static constexpr char PrintableASCIILow = 0x20; +static constexpr char PrintableASCIIHigh = 0x7e; +static constexpr TStringBuf Int2Hex = "0123456789abcdef"; + +//! Joins a range of items into a string intermixing them with the delimiter. +/*! + * \param builder String builder where the output goes. + * \param begin Iterator pointing to the first item (inclusive). + * \param end Iterator pointing to the last item (not inclusive). + * \param formatter Formatter to apply to the items. + * \param delimiter A delimiter to be inserted between items: ", " by default. + * \return The resulting combined string. + */ +template <class TIterator, class TFormatter> +void JoinToString( + TStringBuilderBase* builder, + const TIterator& begin, + const TIterator& end, + const TFormatter& formatter, + TStringBuf delimiter = DefaultJoinToStringDelimiter) +{ + for (auto current = begin; current != end; ++current) { + if (current != begin) { + builder->AppendString(delimiter); + } + formatter(builder, *current); + } +} + +template <class TIterator, class TFormatter> +TString JoinToString( + const TIterator& begin, + const TIterator& end, + const TFormatter& formatter, + TStringBuf delimiter = DefaultJoinToStringDelimiter) +{ + TStringBuilder builder; + JoinToString(&builder, begin, end, formatter, delimiter); + return builder.Flush(); +} + +//! A handy shortcut with default formatter. +template <class TIterator> +TString JoinToString( + const TIterator& begin, + const TIterator& end, + TStringBuf delimiter = DefaultJoinToStringDelimiter) +{ + return JoinToString(begin, end, TDefaultFormatter(), delimiter); +} + +//! Joins a collection of given items into a string intermixing them with the delimiter. +/*! + * \param collection A collection containing the items to be joined. + * \param formatter Formatter to apply to the items. + * \param delimiter A delimiter to be inserted between items; ", " by default. + */ +template <class TCollection, class TFormatter> +TString JoinToString( + const TCollection& collection, + const TFormatter& formatter, + TStringBuf delimiter = DefaultJoinToStringDelimiter) +{ + using std::begin; + using std::end; + return JoinToString(begin(collection), end(collection), formatter, delimiter); +} + +//! A handy shortcut with the default formatter. +template <class TCollection> +TString JoinToString( + const TCollection& collection, + TStringBuf delimiter = DefaultJoinToStringDelimiter) +{ + return JoinToString(collection, TDefaultFormatter(), delimiter); +} + +//! Concatenates a bunch of TStringBuf-like instances into TString. +template <class... Ts> +TString ConcatToString(Ts... args) +{ + size_t length = 0; + ((length += args.length()), ...); + + TString result; + result.reserve(length); + (result.append(args), ...); + + return result; +} + +//! Converts a range of items into strings. +template <class TIter, class TFormatter> +std::vector<TString> ConvertToStrings( + const TIter& begin, + const TIter& end, + const TFormatter& formatter, + size_t maxSize = std::numeric_limits<size_t>::max()) +{ + std::vector<TString> result; + for (auto it = begin; it != end; ++it) { + TStringBuilder builder; + formatter(&builder, *it); + result.push_back(builder.Flush()); + if (result.size() == maxSize) { + break; + } + } + return result; +} + +//! A handy shortcut with the default formatter. +template <class TIter> +std::vector<TString> ConvertToStrings( + const TIter& begin, + const TIter& end, + size_t maxSize = std::numeric_limits<size_t>::max()) +{ + return ConvertToStrings(begin, end, TDefaultFormatter(), maxSize); +} + +//! Converts a given collection of items into strings. +/*! + * \param collection A collection containing the items to be converted. + * \param formatter Formatter to apply to the items. + * \param maxSize Size limit for the resulting vector. + */ +template <class TCollection, class TFormatter> +std::vector<TString> ConvertToStrings( + const TCollection& collection, + const TFormatter& formatter, + size_t maxSize = std::numeric_limits<size_t>::max()) +{ + using std::begin; + using std::end; + return ConvertToStrings(begin(collection), end(collection), formatter, maxSize); +} + +//! A handy shortcut with default formatter. +template <class TCollection> +std::vector<TString> ConvertToStrings( + const TCollection& collection, + size_t maxSize = std::numeric_limits<size_t>::max()) +{ + return ConvertToStrings(collection, TDefaultFormatter(), maxSize); +} + +//////////////////////////////////////////////////////////////////////////////// + +void UnderscoreCaseToCamelCase(TStringBuilderBase* builder, TStringBuf str); +TString UnderscoreCaseToCamelCase(TStringBuf str); + +void CamelCaseToUnderscoreCase(TStringBuilderBase* builder, TStringBuf str); +TString CamelCaseToUnderscoreCase(TStringBuf str); + +TString TrimLeadingWhitespaces(const TString& str); +TString Trim(const TString& str, const TString& whitespaces); + +//////////////////////////////////////////////////////////////////////////////// + +//! Implemented for |[u]i(32|64)|. +template <class T> +char* WriteIntToBufferBackwards(char* ptr, T value); + +//////////////////////////////////////////////////////////////////////////////// + +struct TCaseInsensitiveStringHasher +{ + size_t operator()(TStringBuf arg) const; +}; + +struct TCaseInsensitiveStringEqualityComparer +{ + bool operator()(TStringBuf lhs, TStringBuf rhs) const; +}; + +//////////////////////////////////////////////////////////////////////////////// + +bool TryParseBool(TStringBuf value, bool* result); +bool ParseBool(TStringBuf value); +TStringBuf FormatBool(bool value); + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yt/string/string_builder-inl.h b/library/cpp/yt/string/string_builder-inl.h new file mode 100644 index 0000000000..151fcabf7f --- /dev/null +++ b/library/cpp/yt/string/string_builder-inl.h @@ -0,0 +1,129 @@ +#ifndef STRING_BUILDER_INL_H_ +#error "Direct inclusion of this file is not allowed, include string.h" +// For the sake of sane code completion. +#include "string_builder.h" +#endif + +#include <library/cpp/yt/assert/assert.h> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +inline char* TStringBuilderBase::Preallocate(size_t size) +{ + if (Y_UNLIKELY(End_ - Current_ < static_cast<ssize_t>(size))) { + size_t length = GetLength(); + auto newLength = std::max(length + size, MinBufferLength); + DoPreallocate(newLength); + Current_ = Begin_ + length; + } + return Current_; +} + +inline size_t TStringBuilderBase::GetLength() const +{ + return Current_ ? Current_ - Begin_ : 0; +} + +inline TStringBuf TStringBuilderBase::GetBuffer() const +{ + return TStringBuf(Begin_, Current_); +} + +inline void TStringBuilderBase::Advance(size_t size) +{ + Current_ += size; + YT_ASSERT(Current_ <= End_); +} + +inline void TStringBuilderBase::AppendChar(char ch) +{ + *Preallocate(1) = ch; + Advance(1); +} + +inline void TStringBuilderBase::AppendChar(char ch, int n) +{ + YT_ASSERT(n >= 0); + if (Y_LIKELY(0 != n)) { + char* dst = Preallocate(n); + ::memset(dst, ch, n); + Advance(n); + } +} + +inline void TStringBuilderBase::AppendString(TStringBuf str) +{ + if (Y_LIKELY(str)) { + char* dst = Preallocate(str.length()); + ::memcpy(dst, str.begin(), str.length()); + Advance(str.length()); + } +} + +inline void TStringBuilderBase::AppendString(const char* str) +{ + AppendString(TStringBuf(str)); +} + +inline void TStringBuilderBase::Reset() +{ + Begin_ = Current_ = End_ = nullptr; + DoReset(); +} + +template <class... TArgs> +void TStringBuilderBase::AppendFormat(TStringBuf format, TArgs&& ... args) +{ + Format(this, format, std::forward<TArgs>(args)...); +} + +template <size_t Length, class... TArgs> +void TStringBuilderBase::AppendFormat(const char (&format)[Length], TArgs&& ... args) +{ + Format(this, format, std::forward<TArgs>(args)...); +} + +//////////////////////////////////////////////////////////////////////////////// + +inline TString TStringBuilder::Flush() +{ + Buffer_.resize(GetLength()); + auto result = std::move(Buffer_); + Reset(); + return result; +} + +inline void TStringBuilder::DoReset() +{ + Buffer_ = {}; +} + +inline void TStringBuilder::DoPreallocate(size_t newLength) +{ + Buffer_.ReserveAndResize(newLength); + auto capacity = Buffer_.capacity(); + Buffer_.ReserveAndResize(capacity); + Begin_ = &*Buffer_.begin(); + End_ = Begin_ + capacity; +} + +//////////////////////////////////////////////////////////////////////////////// + +inline void FormatValue(TStringBuilderBase* builder, const TStringBuilder& value, TStringBuf /*format*/) +{ + builder->AppendString(value.GetBuffer()); +} + +template <class T> +TString ToStringViaBuilder(const T& value, TStringBuf spec) +{ + TStringBuilder builder; + FormatValue(&builder, value, spec); + return builder.Flush(); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT diff --git a/library/cpp/yt/string/string_builder.h b/library/cpp/yt/string/string_builder.h new file mode 100644 index 0000000000..0e13e70904 --- /dev/null +++ b/library/cpp/yt/string/string_builder.h @@ -0,0 +1,116 @@ +#pragma once + +#include <util/generic/string.h> + +namespace NYT { + +//////////////////////////////////////////////////////////////////////////////// + +// Forward declarations. +class TStringBuilderBase; +class TStringBuilder; +class TDelimitedStringBuilderWrapper; + +template <size_t Length, class... TArgs> +void Format(TStringBuilderBase* builder, const char (&format)[Length], TArgs&&... args); +template <class... TArgs> +void Format(TStringBuilderBase* builder, TStringBuf format, TArgs&&... args); + +//////////////////////////////////////////////////////////////////////////////// + +//! A simple helper for constructing strings by a sequence of appends. +class TStringBuilderBase +{ +public: + virtual ~TStringBuilderBase() = default; + + char* Preallocate(size_t size); + + size_t GetLength() const; + + TStringBuf GetBuffer() const; + + void Advance(size_t size); + + void AppendChar(char ch); + void AppendChar(char ch, int n); + + void AppendString(TStringBuf str); + void AppendString(const char* str); + + template <size_t Length, class... TArgs> + void AppendFormat(const char (&format)[Length], TArgs&&... args); + template <class... TArgs> + void AppendFormat(TStringBuf format, TArgs&&... args); + + void Reset(); + +protected: + char* Begin_ = nullptr; + char* Current_ = nullptr; + char* End_ = nullptr; + + virtual void DoReset() = 0; + virtual void DoPreallocate(size_t newLength) = 0; + + // -64 must account for any reasonable overhead in dynamic string allocation. + static constexpr size_t MinBufferLength = 1024 - 64; +}; + +//////////////////////////////////////////////////////////////////////////////// + +class TStringBuilder + : public TStringBuilderBase +{ +public: + TString Flush(); + +protected: + TString Buffer_; + + void DoReset() override; + void DoPreallocate(size_t size) override; +}; + +//////////////////////////////////////////////////////////////////////////////// + +template <class T> +TString ToStringViaBuilder(const T& value, TStringBuf spec = TStringBuf("v")); + +//////////////////////////////////////////////////////////////////////////////// + +//! Appends a certain delimiter starting from the second call. +class TDelimitedStringBuilderWrapper + : private TNonCopyable +{ +public: + TDelimitedStringBuilderWrapper( + TStringBuilderBase* builder, + TStringBuf delimiter = TStringBuf(", ")) + : Builder_(builder) + , Delimiter_(delimiter) + { } + + TStringBuilderBase* operator->() + { + if (!FirstCall_) { + Builder_->AppendString(Delimiter_); + } + FirstCall_ = false; + return Builder_; + } + +private: + TStringBuilderBase* const Builder_; + const TStringBuf Delimiter_; + + bool FirstCall_ = true; +}; + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace NYT + +#define STRING_BUILDER_INL_H_ +#include "string_builder-inl.h" +#undef STRING_BUILDER_INL_H_ diff --git a/library/cpp/yt/string/unittests/enum_ut.cpp b/library/cpp/yt/string/unittests/enum_ut.cpp new file mode 100644 index 0000000000..b8076fd8ee --- /dev/null +++ b/library/cpp/yt/string/unittests/enum_ut.cpp @@ -0,0 +1,61 @@ +#include <library/cpp/testing/gtest/gtest.h> + +#include <library/cpp/yt/string/enum.h> +#include <library/cpp/yt/string/format.h> + +#include <limits> + +namespace NYT { +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +// Some compile-time sanity checks. +DEFINE_ENUM(ESample, (One)(Two)); +static_assert(TFormatTraits<ESample>::HasCustomFormatValue); +static_assert(TFormatTraits<TEnumIndexedVector<ESample, int>>::HasCustomFormatValue); + +DEFINE_ENUM(EColor, + (Red) + (BlackAndWhite) +); + +DEFINE_BIT_ENUM(ELangs, + ((None) (0x00)) + ((Cpp) (0x01)) + ((Go) (0x02)) + ((Rust) (0x04)) + ((Python) (0x08)) + ((JavaScript) (0x10)) +) + +TEST(TFormatTest, Enum) +{ + EXPECT_EQ("Red", Format("%v", EColor::Red)); + EXPECT_EQ("red", Format("%lv", EColor::Red)); + + EXPECT_EQ("BlackAndWhite", Format("%v", EColor::BlackAndWhite)); + EXPECT_EQ("black_and_white", Format("%lv", EColor::BlackAndWhite)); + + EXPECT_EQ("EColor(100)", Format("%v", EColor(100))); + + EXPECT_EQ("JavaScript", Format("%v", ELangs::JavaScript)); + EXPECT_EQ("java_script", Format("%lv", ELangs::JavaScript)); + + EXPECT_EQ("None", Format("%v", ELangs::None)); + EXPECT_EQ("none", Format("%lv", ELangs::None)); + + EXPECT_EQ("Cpp | Go", Format("%v", ELangs::Cpp | ELangs::Go)); + EXPECT_EQ("cpp | go", Format("%lv", ELangs::Cpp | ELangs::Go)); + + auto four = ELangs::Cpp | ELangs::Go | ELangs::Python | ELangs::JavaScript; + EXPECT_EQ("Cpp | Go | Python | JavaScript", Format("%v", four)); + EXPECT_EQ("cpp | go | python | java_script", Format("%lv", four)); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace +} // namespace NYT + + diff --git a/library/cpp/yt/string/unittests/format_ut.cpp b/library/cpp/yt/string/unittests/format_ut.cpp new file mode 100644 index 0000000000..ee069bb2c0 --- /dev/null +++ b/library/cpp/yt/string/unittests/format_ut.cpp @@ -0,0 +1,149 @@ +#include <library/cpp/testing/gtest/gtest.h> + +#include <library/cpp/yt/string/format.h> + +#include <library/cpp/yt/small_containers/compact_vector.h> + +#include <limits> + +namespace NYT { +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +// Some compile-time sanity checks. +static_assert(TFormatTraits<int>::HasCustomFormatValue); +static_assert(TFormatTraits<double>::HasCustomFormatValue); +static_assert(TFormatTraits<void*>::HasCustomFormatValue); +static_assert(TFormatTraits<const char*>::HasCustomFormatValue); +static_assert(TFormatTraits<TStringBuf>::HasCustomFormatValue); +static_assert(TFormatTraits<TString>::HasCustomFormatValue); +static_assert(TFormatTraits<std::vector<int>>::HasCustomFormatValue); + +// N.B. TCompactVector<int, 1> is not buildable on Windows +static_assert(TFormatTraits<TCompactVector<int, 2>>::HasCustomFormatValue); +static_assert(TFormatTraits<std::set<int>>::HasCustomFormatValue); +static_assert(TFormatTraits<std::map<int, int>>::HasCustomFormatValue); +static_assert(TFormatTraits<std::multimap<int, int>>::HasCustomFormatValue); +static_assert(TFormatTraits<THashSet<int>>::HasCustomFormatValue); +static_assert(TFormatTraits<THashMap<int, int>>::HasCustomFormatValue); +static_assert(TFormatTraits<THashMultiSet<int>>::HasCustomFormatValue); +static_assert(TFormatTraits<std::pair<int, int>>::HasCustomFormatValue); +static_assert(TFormatTraits<std::optional<int>>::HasCustomFormatValue); +static_assert(TFormatTraits<TDuration>::HasCustomFormatValue); +static_assert(TFormatTraits<TInstant>::HasCustomFormatValue); + +struct TUnformattable +{ }; +static_assert(!TFormatTraits<TUnformattable>::HasCustomFormatValue); + +//////////////////////////////////////////////////////////////////////////////// + +TEST(TFormatTest, Nothing) +{ + EXPECT_EQ("abc", Format("a%nb%nc", 1, 2)); +} + +TEST(TFormatTest, Verbatim) +{ + EXPECT_EQ("", Format("")); + EXPECT_EQ("test", Format("test")); + EXPECT_EQ("%", Format("%%")); + EXPECT_EQ("%hello%world%", Format("%%hello%%world%%")); +} + +TEST(TFormatTest, MultipleArgs) +{ + EXPECT_EQ("2+2=4", Format("%v+%v=%v", 2, 2, 4)); +} + +TEST(TFormatTest, Strings) +{ + EXPECT_EQ("test", Format("%s", "test")); + EXPECT_EQ("test", Format("%s", TStringBuf("test"))); + EXPECT_EQ("test", Format("%s", TString("test"))); + + EXPECT_EQ(" abc", Format("%6s", TString("abc"))); + EXPECT_EQ("abc ", Format("%-6s", TString("abc"))); + EXPECT_EQ(" abc", Format("%10v", TString("abc"))); + EXPECT_EQ("abc ", Format("%-10v", TString("abc"))); + EXPECT_EQ("abc", Format("%2s", TString("abc"))); + EXPECT_EQ("abc", Format("%-2s", TString("abc"))); + EXPECT_EQ("abc", Format("%0s", TString("abc"))); + EXPECT_EQ("abc", Format("%-0s", TString("abc"))); + EXPECT_EQ(100, std::ssize(Format("%100v", "abc"))); +} + +TEST(TFormatTest, Integers) +{ + EXPECT_EQ("123", Format("%d", 123)); + EXPECT_EQ("123", Format("%v", 123)); + + EXPECT_EQ("042", Format("%03d", 42)); + EXPECT_EQ("42", Format("%01d", 42)); + + EXPECT_EQ("2147483647", Format("%d", std::numeric_limits<i32>::max())); + EXPECT_EQ("-2147483648", Format("%d", std::numeric_limits<i32>::min())); + + EXPECT_EQ("0", Format("%u", 0U)); + EXPECT_EQ("0", Format("%v", 0U)); + EXPECT_EQ("4294967295", Format("%u", std::numeric_limits<ui32>::max())); + EXPECT_EQ("4294967295", Format("%v", std::numeric_limits<ui32>::max())); + + EXPECT_EQ("9223372036854775807", Format("%" PRId64, std::numeric_limits<i64>::max())); + EXPECT_EQ("9223372036854775807", Format("%v", std::numeric_limits<i64>::max())); + EXPECT_EQ("-9223372036854775808", Format("%" PRId64, std::numeric_limits<i64>::min())); + EXPECT_EQ("-9223372036854775808", Format("%v", std::numeric_limits<i64>::min())); + + EXPECT_EQ("0", Format("%" PRIu64, 0ULL)); + EXPECT_EQ("0", Format("%v", 0ULL)); + EXPECT_EQ("18446744073709551615", Format("%" PRIu64, std::numeric_limits<ui64>::max())); + EXPECT_EQ("18446744073709551615", Format("%v", std::numeric_limits<ui64>::max())); +} + +TEST(TFormatTest, Floats) +{ + EXPECT_EQ("3.14", Format("%.2f", 3.1415F)); + EXPECT_EQ("3.14", Format("%.2v", 3.1415F)); + EXPECT_EQ("3.14", Format("%.2lf", 3.1415)); + EXPECT_EQ("3.14", Format("%.2v", 3.1415)); + EXPECT_EQ(TString(std::to_string(std::numeric_limits<double>::max())), + Format("%lF", std::numeric_limits<double>::max())); +} + +TEST(TFormatTest, Bool) +{ + EXPECT_EQ("True", Format("%v", true)); + EXPECT_EQ("False", Format("%v", false)); + EXPECT_EQ("true", Format("%lv", true)); + EXPECT_EQ("false", Format("%lv", false)); +} + +TEST(TFormatTest, Quotes) +{ + EXPECT_EQ("\"True\"", Format("%Qv", true)); + EXPECT_EQ("'False'", Format("%qv", false)); + EXPECT_EQ("'\\\'\"'", Format("%qv", "\'\"")); + EXPECT_EQ("\"\\x01\"", Format("%Qv", "\x1")); + EXPECT_EQ("'\\x1b'", Format("%qv", '\x1b')); +} + +TEST(TFormatTest, Nullable) +{ + EXPECT_EQ("1", Format("%v", std::make_optional<int>(1))); + EXPECT_EQ("<null>", Format("%v", std::nullopt)); + EXPECT_EQ("<null>", Format("%v", std::optional<int>())); + EXPECT_EQ("3.14", Format("%.2f", std::optional<double>(3.1415))); +} + +TEST(TFormatTest, Pointers) +{ + // No idea if pointer format is standardized, check against Sprintf. + auto p = reinterpret_cast<void*>(123); + EXPECT_EQ(Sprintf("%p", reinterpret_cast<void*>(123)), Format("%p", p)); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace +} // namespace NYT diff --git a/library/cpp/yt/string/unittests/guid_ut.cpp b/library/cpp/yt/string/unittests/guid_ut.cpp new file mode 100644 index 0000000000..4b5eebea16 --- /dev/null +++ b/library/cpp/yt/string/unittests/guid_ut.cpp @@ -0,0 +1,58 @@ +#include <library/cpp/testing/gtest/gtest.h> + +#include <library/cpp/yt/string/guid.h> +#include <library/cpp/yt/string/format.h> + +#include <util/string/hex.h> + +namespace NYT { +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +static_assert(TFormatTraits<TGuid>::HasCustomFormatValue); + +TString CanonicalToString(TGuid value) +{ + return Sprintf("%x-%x-%x-%x", + value.Parts32[3], + value.Parts32[2], + value.Parts32[1], + value.Parts32[0]); +} + +const ui32 TrickyValues[] = { + 0, 0x1, 0x12, 0x123, 0x1234, 0x12345, 0x123456, 0x1234567, 0x12345678 +}; + +TEST(TGuidTest, FormatAllTricky) +{ + for (ui32 a : TrickyValues) { + for (ui32 b : TrickyValues) { + for (ui32 c : TrickyValues) { + for (ui32 d : TrickyValues) { + auto value = TGuid(a, b, c, d); + EXPECT_EQ(CanonicalToString(value), ToString(value)); + } + } + } + } +} + +TEST(TGuidTest, FormatAllSymbols) +{ + const auto Value = TGuid::FromString("12345678-abcdef01-12345678-abcdef01"); + EXPECT_EQ(CanonicalToString(Value), ToString(Value)); +} + +TEST(TGuidTest, ByteOrder) +{ + auto guid = TGuid::FromStringHex32("12345678ABCDEF0112345678ABCDEF01"); + TString bytes{reinterpret_cast<const char*>(&(guid.Parts32[0])), 16}; + EXPECT_EQ(HexEncode(bytes), "01EFCDAB7856341201EFCDAB78563412"); +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace +} // namespace NYT diff --git a/library/cpp/yt/string/unittests/string_ut.cpp b/library/cpp/yt/string/unittests/string_ut.cpp new file mode 100644 index 0000000000..3e12312af0 --- /dev/null +++ b/library/cpp/yt/string/unittests/string_ut.cpp @@ -0,0 +1,52 @@ +#include <library/cpp/testing/gtest/gtest.h> + +#include <library/cpp/yt/string/string.h> + +namespace NYT { +namespace { + +//////////////////////////////////////////////////////////////////////////////// + +struct TTestCase +{ + const char* UnderCase; + const char* CamelCase; +}; + +static std::vector<TTestCase> TestCases { + { "kenny", "Kenny" }, + { "south_park", "SouthPark" }, + { "a", "A" }, + { "a_b_c", "ABC" }, + { "reed_solomon_6_3", "ReedSolomon_6_3" }, + { "lrc_12_2_2", "Lrc_12_2_2" }, + { "0", "0" }, + { "0_1_2", "0_1_2" }, + { "int64", "Int64" } +}; + +//////////////////////////////////////////////////////////////////////////////// + +TEST(TStringTest, UnderscoreCaseToCamelCase) +{ + for (const auto& testCase : TestCases) { + auto result = UnderscoreCaseToCamelCase(testCase.UnderCase); + EXPECT_STREQ(testCase.CamelCase, result.c_str()) + << "Original: \"" << testCase.UnderCase << '"'; + } +} + +TEST(TStringTest, CamelCaseToUnderscoreCase) +{ + for (const auto& testCase : TestCases) { + auto result = CamelCaseToUnderscoreCase(testCase.CamelCase); + EXPECT_STREQ(testCase.UnderCase, result.c_str()) + << "Original: \"" << testCase.CamelCase << '"'; + } +} + +//////////////////////////////////////////////////////////////////////////////// + +} // namespace +} // namespace NYT + diff --git a/library/cpp/yt/string/unittests/ya.make b/library/cpp/yt/string/unittests/ya.make new file mode 100644 index 0000000000..9d539758d1 --- /dev/null +++ b/library/cpp/yt/string/unittests/ya.make @@ -0,0 +1,17 @@ +GTEST(unittester-library-string-helpers) + +OWNER(g:yt) + +SRCS( + enum_ut.cpp + format_ut.cpp + guid_ut.cpp + string_ut.cpp +) + +PEERDIR( + library/cpp/yt/string + library/cpp/testing/gtest +) + +END() diff --git a/library/cpp/yt/string/ya.make b/library/cpp/yt/string/ya.make new file mode 100644 index 0000000000..83efd5eb2f --- /dev/null +++ b/library/cpp/yt/string/ya.make @@ -0,0 +1,30 @@ +LIBRARY() + +SRCS( + enum.cpp + guid.cpp + string.cpp +) + +PEERDIR( + library/cpp/yt/assert + library/cpp/yt/exception + library/cpp/yt/misc +) + +CHECK_DEPENDENT_DIRS( + ALLOW_ONLY ALL + build + contrib + library + util + library/cpp/yt/assert + library/cpp/yt/misc + library/cpp/yt/small_containers +) + +END() + +RECURSE_FOR_TESTS( + unittests +) |