diff options
author | yazevnul <yazevnul@yandex-team.ru> | 2022-02-10 16:46:46 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:46 +0300 |
commit | 8cbc307de0221f84c80c42dcbe07d40727537e2c (patch) | |
tree | 625d5a673015d1df891e051033e9fcde5c7be4e5 /util/string/subst.cpp | |
parent | 30d1ef3941e0dc835be7609de5ebee66958f215a (diff) | |
download | ydb-8cbc307de0221f84c80c42dcbe07d40727537e2c.tar.gz |
Restoring authorship annotation for <yazevnul@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'util/string/subst.cpp')
-rw-r--r-- | util/string/subst.cpp | 360 |
1 files changed, 180 insertions, 180 deletions
diff --git a/util/string/subst.cpp b/util/string/subst.cpp index b2df328dc1..8daef0708b 100644 --- a/util/string/subst.cpp +++ b/util/string/subst.cpp @@ -1,200 +1,200 @@ #include "subst.h" - -#include <util/generic/strbuf.h> -#include <util/generic/string.h> -#include <util/system/compiler.h> - -#include <string> -#include <type_traits> - -// a bit of template magic (to be fast and unreadable) -template <class TStringType, class TTo, bool Main> -static Y_FORCE_INLINE void MoveBlock(typename TStringType::value_type* ptr, size_t& srcPos, size_t& dstPos, const size_t off, const TTo to, const size_t toSize) { - const size_t unchangedSize = off - srcPos; - if (dstPos < srcPos) { - for (size_t i = 0; i < unchangedSize; ++i) { - ptr[dstPos++] = ptr[srcPos++]; - } - } else { - dstPos += unchangedSize; - srcPos += unchangedSize; - } - - if (Main) { - for (size_t i = 0; i < toSize; ++i) { - ptr[dstPos++] = to[i]; - } - } -} - -template <typename T, typename U> -static bool IsIntersect(const T& a, const U& b) noexcept { - if (b.data() < a.data()) { - return IsIntersect(b, a); - } - - return !a.empty() && !b.empty() && - ((a.data() <= b.data() && b.data() < a.data() + a.size()) || - (a.data() < b.data() + b.size() && b.data() + b.size() <= a.data() + a.size())); -} - -/** - * Replaces all occurences of substring @c from in string @c s to string @c to. - * Uses two separate implementations (inplace for shrink and append for grow case) - * See IGNIETFERRO-394 - **/ + +#include <util/generic/strbuf.h> +#include <util/generic/string.h> +#include <util/system/compiler.h> + +#include <string> +#include <type_traits> + +// a bit of template magic (to be fast and unreadable) +template <class TStringType, class TTo, bool Main> +static Y_FORCE_INLINE void MoveBlock(typename TStringType::value_type* ptr, size_t& srcPos, size_t& dstPos, const size_t off, const TTo to, const size_t toSize) { + const size_t unchangedSize = off - srcPos; + if (dstPos < srcPos) { + for (size_t i = 0; i < unchangedSize; ++i) { + ptr[dstPos++] = ptr[srcPos++]; + } + } else { + dstPos += unchangedSize; + srcPos += unchangedSize; + } + + if (Main) { + for (size_t i = 0; i < toSize; ++i) { + ptr[dstPos++] = to[i]; + } + } +} + +template <typename T, typename U> +static bool IsIntersect(const T& a, const U& b) noexcept { + if (b.data() < a.data()) { + return IsIntersect(b, a); + } + + return !a.empty() && !b.empty() && + ((a.data() <= b.data() && b.data() < a.data() + a.size()) || + (a.data() < b.data() + b.size() && b.data() + b.size() <= a.data() + a.size())); +} + +/** + * Replaces all occurences of substring @c from in string @c s to string @c to. + * Uses two separate implementations (inplace for shrink and append for grow case) + * See IGNIETFERRO-394 + **/ template <class TStringType, typename TStringViewType = TBasicStringBuf<typename TStringType::value_type>> -static inline size_t SubstGlobalImpl(TStringType& s, const TStringViewType from, const TStringViewType to, size_t fromPos = 0) { +static inline size_t SubstGlobalImpl(TStringType& s, const TStringViewType from, const TStringViewType to, size_t fromPos = 0) { if (from.empty()) { - return 0; - } - - Y_ASSERT(!IsIntersect(s, from)); - Y_ASSERT(!IsIntersect(s, to)); - - const size_t fromSize = from.size(); - const size_t toSize = to.size(); - size_t replacementsCount = 0; - size_t off = fromPos; - size_t srcPos = 0; - - if (toSize > fromSize) { - // string will grow: append to another string - TStringType result; - for (; (off = TStringViewType(s).find(from, off)) != TStringType::npos; off += fromSize) { - if (!replacementsCount) { - // first replacement occured, we can prepare result string - result.reserve(s.size() + s.size() / 3); - } - result.append(s.begin() + srcPos, s.begin() + off); + return 0; + } + + Y_ASSERT(!IsIntersect(s, from)); + Y_ASSERT(!IsIntersect(s, to)); + + const size_t fromSize = from.size(); + const size_t toSize = to.size(); + size_t replacementsCount = 0; + size_t off = fromPos; + size_t srcPos = 0; + + if (toSize > fromSize) { + // string will grow: append to another string + TStringType result; + for (; (off = TStringViewType(s).find(from, off)) != TStringType::npos; off += fromSize) { + if (!replacementsCount) { + // first replacement occured, we can prepare result string + result.reserve(s.size() + s.size() / 3); + } + result.append(s.begin() + srcPos, s.begin() + off); result.append(to.data(), to.size()); - srcPos = off + fromSize; - ++replacementsCount; - } - if (replacementsCount) { - // append tail - result.append(s.begin() + srcPos, s.end()); - s = std::move(result); - } - return replacementsCount; - } - - // string will not grow: use inplace algo - size_t dstPos = 0; + srcPos = off + fromSize; + ++replacementsCount; + } + if (replacementsCount) { + // append tail + result.append(s.begin() + srcPos, s.end()); + s = std::move(result); + } + return replacementsCount; + } + + // string will not grow: use inplace algo + size_t dstPos = 0; typename TStringType::value_type* ptr = &*s.begin(); - for (; (off = TStringViewType(s).find(from, off)) != TStringType::npos; off += fromSize) { - Y_ASSERT(dstPos <= srcPos); - MoveBlock<TStringType, TStringViewType, true>(ptr, srcPos, dstPos, off, to, toSize); - srcPos = off + fromSize; - ++replacementsCount; - } - - if (replacementsCount) { - // append tail - MoveBlock<TStringType, TStringViewType, false>(ptr, srcPos, dstPos, s.size(), to, toSize); - s.resize(dstPos); - } - return replacementsCount; -} - -/// Replaces all occurences of the 'from' symbol in a string to the 'to' symbol. -template <class TStringType> -inline size_t SubstCharGlobalImpl(TStringType& s, typename TStringType::value_type from, typename TStringType::value_type to, size_t fromPos = 0) { - if (fromPos >= s.size()) { - return 0; - } - - size_t result = 0; - fromPos = s.find(from, fromPos); - - // s.begin() might cause memory copying, so call it only if needed - if (fromPos != TStringType::npos) { + for (; (off = TStringViewType(s).find(from, off)) != TStringType::npos; off += fromSize) { + Y_ASSERT(dstPos <= srcPos); + MoveBlock<TStringType, TStringViewType, true>(ptr, srcPos, dstPos, off, to, toSize); + srcPos = off + fromSize; + ++replacementsCount; + } + + if (replacementsCount) { + // append tail + MoveBlock<TStringType, TStringViewType, false>(ptr, srcPos, dstPos, s.size(), to, toSize); + s.resize(dstPos); + } + return replacementsCount; +} + +/// Replaces all occurences of the 'from' symbol in a string to the 'to' symbol. +template <class TStringType> +inline size_t SubstCharGlobalImpl(TStringType& s, typename TStringType::value_type from, typename TStringType::value_type to, size_t fromPos = 0) { + if (fromPos >= s.size()) { + return 0; + } + + size_t result = 0; + fromPos = s.find(from, fromPos); + + // s.begin() might cause memory copying, so call it only if needed + if (fromPos != TStringType::npos) { auto* it = &*s.begin() + fromPos; - *it = to; - ++result; - // at this point string is copied and it's safe to use constant s.end() to iterate + *it = to; + ++result; + // at this point string is copied and it's safe to use constant s.end() to iterate const auto* const sEnd = &*s.end(); - // unrolled loop goes first because it is more likely that `it` will be properly aligned - for (const auto* const end = sEnd - (sEnd - it) % 4; it < end;) { - if (*it == from) { - *it = to; - ++result; - } - ++it; - if (*it == from) { - *it = to; - ++result; - } - ++it; - if (*it == from) { - *it = to; - ++result; - } - ++it; - if (*it == from) { - *it = to; - ++result; - } - ++it; - } - for (; it < sEnd; ++it) { - if (*it == from) { - *it = to; - ++result; - } - } - } - - return result; -} - -/* Standard says that `char16_t` is a distinct type and has same size, signedness and alignment as - * `std::uint_least16_t`, so we check if `char16_t` has same signedness and size as `wchar16` to be - * sure that we can make safe casts between values of these types and pointers. - */ -static_assert(sizeof(wchar16) == sizeof(char16_t), ""); + // unrolled loop goes first because it is more likely that `it` will be properly aligned + for (const auto* const end = sEnd - (sEnd - it) % 4; it < end;) { + if (*it == from) { + *it = to; + ++result; + } + ++it; + if (*it == from) { + *it = to; + ++result; + } + ++it; + if (*it == from) { + *it = to; + ++result; + } + ++it; + if (*it == from) { + *it = to; + ++result; + } + ++it; + } + for (; it < sEnd; ++it) { + if (*it == from) { + *it = to; + ++result; + } + } + } + + return result; +} + +/* Standard says that `char16_t` is a distinct type and has same size, signedness and alignment as + * `std::uint_least16_t`, so we check if `char16_t` has same signedness and size as `wchar16` to be + * sure that we can make safe casts between values of these types and pointers. + */ +static_assert(sizeof(wchar16) == sizeof(char16_t), ""); static_assert(sizeof(wchar32) == sizeof(char32_t), ""); -static_assert(std::is_unsigned<wchar16>::value == std::is_unsigned<char16_t>::value, ""); +static_assert(std::is_unsigned<wchar16>::value == std::is_unsigned<char16_t>::value, ""); static_assert(std::is_unsigned<wchar32>::value == std::is_unsigned<char32_t>::value, ""); - -size_t SubstGlobal(TString& text, const TStringBuf what, const TStringBuf with, size_t from) { - return SubstGlobalImpl(text, what, with, from); -} - -size_t SubstGlobal(std::string& text, const TStringBuf what, const TStringBuf with, size_t from) { - return SubstGlobalImpl(text, what, with, from); -} - -size_t SubstGlobal(TUtf16String& text, const TWtringBuf what, const TWtringBuf with, size_t from) { - return SubstGlobalImpl(text, what, with, from); -} - + +size_t SubstGlobal(TString& text, const TStringBuf what, const TStringBuf with, size_t from) { + return SubstGlobalImpl(text, what, with, from); +} + +size_t SubstGlobal(std::string& text, const TStringBuf what, const TStringBuf with, size_t from) { + return SubstGlobalImpl(text, what, with, from); +} + +size_t SubstGlobal(TUtf16String& text, const TWtringBuf what, const TWtringBuf with, size_t from) { + return SubstGlobalImpl(text, what, with, from); +} + size_t SubstGlobal(TUtf32String& text, const TUtf32StringBuf what, const TUtf32StringBuf with, size_t from) { return SubstGlobalImpl(text, what, with, from); } -size_t SubstGlobal(std::u16string& text, const TWtringBuf what, const TWtringBuf with, size_t from) { +size_t SubstGlobal(std::u16string& text, const TWtringBuf what, const TWtringBuf with, size_t from) { return SubstGlobalImpl(text, std::u16string_view(reinterpret_cast<const char16_t*>(what.data()), what.size()), std::u16string_view(reinterpret_cast<const char16_t*>(with.data()), with.size()), from); -} - -size_t SubstGlobal(TString& text, char what, char with, size_t from) { - return SubstCharGlobalImpl(text, what, with, from); -} - -size_t SubstGlobal(std::string& text, char what, char with, size_t from) { - return SubstCharGlobalImpl(text, what, with, from); -} - -size_t SubstGlobal(TUtf16String& text, wchar16 what, wchar16 with, size_t from) { - return SubstCharGlobalImpl(text, (char16_t)what, (char16_t)with, from); -} - -size_t SubstGlobal(std::u16string& text, wchar16 what, wchar16 with, size_t from) { - return SubstCharGlobalImpl(text, (char16_t)what, (char16_t)with, from); -} +} + +size_t SubstGlobal(TString& text, char what, char with, size_t from) { + return SubstCharGlobalImpl(text, what, with, from); +} + +size_t SubstGlobal(std::string& text, char what, char with, size_t from) { + return SubstCharGlobalImpl(text, what, with, from); +} + +size_t SubstGlobal(TUtf16String& text, wchar16 what, wchar16 with, size_t from) { + return SubstCharGlobalImpl(text, (char16_t)what, (char16_t)with, from); +} + +size_t SubstGlobal(std::u16string& text, wchar16 what, wchar16 with, size_t from) { + return SubstCharGlobalImpl(text, (char16_t)what, (char16_t)with, from); +} size_t SubstGlobal(TUtf32String& text, wchar32 what, wchar32 with, size_t from) { return SubstCharGlobalImpl(text, (char32_t)what, (char32_t)with, from); |