summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorbabenko <[email protected]>2025-12-24 23:29:35 +0300
committerbabenko <[email protected]>2025-12-24 23:45:58 +0300
commit4dbf62fd2f8cc5ece53cc1446561cf71476bdd12 (patch)
tree9aab73c043bf8f6dc177b06f69dab4336dadfcba
parentdba8986f6b1a5fc7c4f230bee510113995a48970 (diff)
Explicitly use TCowString in TYsonString
Для ревьюеров: изменения вне `library/cpp/yt` убирают `using namespace NYT` из хедера. Эта конструкция приводила к клешу имен глобального неймспейса и `NYT` и ошибкам сборки. commit_hash:f598da488a6dd8671af9f1f02870ab5612ae46eb
-rw-r--r--library/cpp/containers/cow_string/cow_string.cpp280
-rw-r--r--library/cpp/containers/cow_string/cow_string.h1047
-rw-r--r--library/cpp/containers/cow_string/cow_string_ut.cpp1268
-rw-r--r--library/cpp/containers/cow_string/output.cpp46
-rw-r--r--library/cpp/containers/cow_string/reverse.cpp32
-rw-r--r--library/cpp/containers/cow_string/reverse.h16
-rw-r--r--library/cpp/containers/cow_string/str_stl.h67
-rw-r--r--library/cpp/containers/cow_string/subst.cpp182
-rw-r--r--library/cpp/containers/cow_string/subst.h31
-rw-r--r--library/cpp/containers/cow_string/ut/ya.make7
-rw-r--r--library/cpp/containers/cow_string/ut_medium/cow_string_medium_ut.cpp55
-rw-r--r--library/cpp/containers/cow_string/ut_medium/ya.make9
-rw-r--r--library/cpp/containers/cow_string/ya.make15
-rw-r--r--library/cpp/yt/yson_string/string.cpp25
-rw-r--r--library/cpp/yt/yson_string/string.h4
-rw-r--r--library/cpp/yt/yson_string/ya.make1
16 files changed, 3066 insertions, 19 deletions
diff --git a/library/cpp/containers/cow_string/cow_string.cpp b/library/cpp/containers/cow_string/cow_string.cpp
new file mode 100644
index 00000000000..87dc0ad99ec
--- /dev/null
+++ b/library/cpp/containers/cow_string/cow_string.cpp
@@ -0,0 +1,280 @@
+#include "cow_string.h"
+
+#include <util/string/ascii.h>
+#include <util/system/sanitizers.h>
+#include <util/system/sys_alloc.h>
+#include <util/charset/wide.h>
+
+#include <iostream>
+
+template <bool stopOnFirstModification, typename TCharType, typename F>
+static bool ModifySequence(TCharType*& p, const TCharType* const pe, F&& f) {
+ while (p != pe) {
+ const auto symbol = ReadSymbol(p, pe);
+ const auto modified = f(symbol);
+ if (symbol != modified) {
+ if (stopOnFirstModification) {
+ return true;
+ }
+
+ WriteSymbol(modified, p); // also moves `p` forward
+ } else {
+ p = SkipSymbol(p, pe);
+ }
+ }
+
+ return false;
+}
+
+template <bool stopOnFirstModification, typename TCharType, typename F>
+static bool ModifySequence(const TCharType*& p, const TCharType* const pe, TCharType*& out, F&& f) {
+ while (p != pe) {
+ const auto symbol = stopOnFirstModification ? ReadSymbol(p, pe) : ReadSymbolAndAdvance(p, pe);
+ const auto modified = f(symbol);
+
+ if (stopOnFirstModification) {
+ if (symbol != modified) {
+ return true;
+ }
+
+ p = SkipSymbol(p, pe);
+ }
+
+ WriteSymbol(modified, out);
+ }
+
+ return false;
+}
+
+template <class TStringType>
+static void DetachAndFixPointers(TStringType& text, typename TStringType::value_type*& p, const typename TStringType::value_type*& pe) {
+ const auto pos = p - text.data();
+ const auto count = pe - p;
+ p = text.Detach() + pos;
+ pe = p + count;
+}
+
+template <class TStringType, typename F>
+static bool ModifyStringSymbolwise(TStringType& text, size_t pos, size_t count, F&& f) {
+ // TODO(yazevnul): this is done for consistency with `TUtf16String::to_lower` and friends
+ // at r2914050, maybe worth replacing them with asserts. Also see the same code in `ToTitle`.
+ pos = pos < text.size() ? pos : text.size();
+ count = count < text.size() - pos ? count : text.size() - pos;
+
+ // TUtf16String is refcounted and it's `data` method return pointer to the constant memory.
+ // To simplify the code we do a `const_cast`, though first write to the memory will be done only
+ // after we call `Detach()` and get pointer to a writable piece of memory.
+ auto* p = const_cast<typename TStringType::value_type*>(text.data() + pos);
+ const auto* pe = text.data() + pos + count;
+
+ if (ModifySequence<true>(p, pe, f)) {
+ DetachAndFixPointers(text, p, pe);
+ ModifySequence<false>(p, pe, f);
+ return true;
+ }
+
+ return false;
+}
+
+std::ostream& operator<<(std::ostream& os, const TCowString& s) {
+ return os.write(s.data(), s.size());
+}
+
+std::istream& operator>>(std::istream& is, TCowString& s) {
+ return is >> s.MutRef();
+}
+
+template <>
+bool TBasicCowString<char, std::char_traits<char>>::to_lower(size_t pos, size_t n) {
+ return Transform([](size_t, char c) { return AsciiToLower(c); }, pos, n);
+}
+
+template <>
+bool TBasicCowString<char, std::char_traits<char>>::to_upper(size_t pos, size_t n) {
+ return Transform([](size_t, char c) { return AsciiToUpper(c); }, pos, n);
+}
+
+template <>
+bool TBasicCowString<char, std::char_traits<char>>::to_title(size_t pos, size_t n) {
+ if (n == 0) {
+ return false;
+ }
+ bool changed = to_upper(pos, 1);
+ return to_lower(pos + 1, n - 1) || changed;
+}
+
+template <>
+TUtf16CowString&
+TBasicCowString<wchar16, std::char_traits<wchar16>>::AppendAscii(const ::TStringBuf& s) {
+ ReserveAndResize(size() + s.size());
+
+ auto dst = begin() + size() - s.size();
+
+ for (const char* src = s.data(); dst != end(); ++dst, ++src) {
+ *dst = static_cast<wchar16>(*src);
+ }
+
+ return *this;
+}
+
+template <>
+TUtf16CowString&
+TBasicCowString<wchar16, std::char_traits<wchar16>>::AppendUtf8(const ::TStringBuf& s) {
+ size_t oldSize = size();
+ ReserveAndResize(size() + s.size() * 4);
+ size_t written = 0;
+ size_t pos = UTF8ToWideImpl(s.data(), s.size(), begin() + oldSize, written);
+ if (pos != s.size()) {
+ ythrow yexception() << "failed to decode UTF-8 string at pos " << pos << ::NDetail::InStringMsg(s.data(), s.size());
+ }
+ resize(oldSize + written);
+
+ return *this;
+}
+
+template <>
+bool TBasicCowString<wchar16, std::char_traits<wchar16>>::to_lower(size_t pos, size_t n) {
+ const auto f = [](const wchar32 s) { return ToLower(s); };
+ return ModifyStringSymbolwise(*this, pos, n, f);
+}
+
+template <>
+bool TBasicCowString<wchar16, std::char_traits<wchar16>>::to_upper(size_t pos, size_t n) {
+ const auto f = [](const wchar32 s) { return ToUpper(s); };
+ return ModifyStringSymbolwise(*this, pos, n, f);
+}
+
+template <>
+bool TBasicCowString<wchar16, std::char_traits<wchar16>>::to_title(size_t pos, size_t nn) {
+ if (!*this) {
+ return false;
+ }
+
+ pos = pos < this->size() ? pos : this->size();
+ nn = nn < this->size() - pos ? nn : this->size() - pos;
+
+ const auto toLower = [](const wchar32 s) { return ToLower(s); };
+
+ auto* p = const_cast<wchar16*>(this->data() + pos);
+ const auto* pe = this->data() + pos + nn;
+
+ const auto firstSymbol = ReadSymbol(p, pe);
+ if (firstSymbol == ToTitle(firstSymbol)) {
+ p = SkipSymbol(p, pe);
+ if (ModifySequence<true>(p, pe, toLower)) {
+ DetachAndFixPointers(*this, p, pe);
+ ModifySequence<false>(p, pe, toLower);
+ return true;
+ }
+ } else {
+ DetachAndFixPointers(*this, p, pe);
+ WriteSymbol(ToTitle(ReadSymbol(p, pe)), p); // also moves `p` forward
+ ModifySequence<false>(p, pe, toLower);
+ return true;
+ }
+
+ return false;
+}
+
+template <>
+TUtf32CowString&
+TBasicCowString<wchar32, std::char_traits<wchar32>>::AppendAscii(const ::TStringBuf& s) {
+ ReserveAndResize(size() + s.size());
+
+ auto dst = begin() + size() - s.size();
+
+ for (const char* src = s.data(); dst != end(); ++dst, ++src) {
+ *dst = static_cast<wchar32>(*src);
+ }
+
+ return *this;
+}
+
+template <>
+TBasicCowString<char, std::char_traits<char>>&
+TBasicCowString<char, std::char_traits<char>>::AppendUtf16(const ::TWtringBuf& s) {
+ const size_t oldSize = size();
+ ReserveAndResize(size() + WideToUTF8BufferSize(s.size()));
+
+ size_t written = 0;
+ WideToUTF8(s.data(), s.size(), begin() + oldSize, written);
+
+ resize(oldSize + written);
+
+ return *this;
+}
+
+template <>
+TUtf32CowString&
+TBasicCowString<wchar32, std::char_traits<wchar32>>::AppendUtf8(const ::TStringBuf& s) {
+ size_t oldSize = size();
+ ReserveAndResize(size() + s.size() * 4);
+ size_t written = 0;
+ size_t pos = UTF8ToWideImpl(s.data(), s.size(), begin() + oldSize, written);
+ if (pos != s.size()) {
+ ythrow yexception() << "failed to decode UTF-8 string at pos " << pos << ::NDetail::InStringMsg(s.data(), s.size());
+ }
+ resize(oldSize + written);
+
+ return *this;
+}
+
+template <>
+TUtf32CowString&
+TBasicCowString<wchar32, std::char_traits<wchar32>>::AppendUtf16(const ::TWtringBuf& s) {
+ size_t oldSize = size();
+ ReserveAndResize(size() + s.size() * 2);
+
+ wchar32* oldEnd = begin() + oldSize;
+ wchar32* end = oldEnd;
+ NDetail::UTF16ToUTF32ImplScalar(s.data(), s.data() + s.size(), end);
+ size_t written = end - oldEnd;
+
+ resize(oldSize + written);
+
+ return *this;
+}
+
+template <>
+bool TBasicCowString<wchar32, std::char_traits<wchar32>>::to_lower(size_t pos, size_t n) {
+ const auto f = [](const wchar32 s) { return ToLower(s); };
+ return ModifyStringSymbolwise(*this, pos, n, f);
+}
+
+template <>
+bool TBasicCowString<wchar32, std::char_traits<wchar32>>::to_upper(size_t pos, size_t n) {
+ const auto f = [](const wchar32 s) { return ToUpper(s); };
+ return ModifyStringSymbolwise(*this, pos, n, f);
+}
+
+template <>
+bool TBasicCowString<wchar32, std::char_traits<wchar32>>::to_title(size_t pos, size_t n) {
+ if (!*this) {
+ return false;
+ }
+
+ pos = pos < this->size() ? pos : this->size();
+ n = n < this->size() - pos ? n : this->size() - pos;
+
+ const auto toLower = [](const wchar32 s) { return ToLower(s); };
+
+ auto* p = const_cast<wchar32*>(this->data() + pos);
+ const auto* pe = this->data() + pos + n;
+
+ const auto firstSymbol = *p;
+ if (firstSymbol == ToTitle(firstSymbol)) {
+ p += 1;
+ if (ModifySequence<true>(p, pe, toLower)) {
+ DetachAndFixPointers(*this, p, pe);
+ ModifySequence<false>(p, pe, toLower);
+ return true;
+ }
+ } else {
+ DetachAndFixPointers(*this, p, pe);
+ WriteSymbol(ToTitle(ReadSymbol(p, pe)), p); // also moves `p` forward
+ ModifySequence<false>(p, pe, toLower);
+ return true;
+ }
+
+ return false;
+}
diff --git a/library/cpp/containers/cow_string/cow_string.h b/library/cpp/containers/cow_string/cow_string.h
new file mode 100644
index 00000000000..597d4c2f06b
--- /dev/null
+++ b/library/cpp/containers/cow_string/cow_string.h
@@ -0,0 +1,1047 @@
+#pragma once
+
+#include <util/generic/string.h>
+
+template <typename TCharType, typename TTraits = std::char_traits<TCharType>>
+class TBasicCowString: public TStringBase<TBasicCowString<TCharType, TTraits>, TCharType, TTraits> {
+public:
+ // TODO: Move to private section
+ using TBase = TStringBase<TBasicCowString, TCharType, TTraits>;
+ using TStringType = std::basic_string<TCharType, TTraits>;
+ using TStdStr = TStdString<TStringType>;
+ using TStorage = TIntrusivePtr<TStdStr, TStringPtrOps<TStdStr>>;
+ using reference = TBasicCharRef<TBasicCowString>;
+ using char_type = TCharType; // TODO: DROP
+ using value_type = TCharType;
+ using traits_type = TTraits;
+
+ using iterator = TCharType*;
+ using reverse_iterator = std::reverse_iterator<iterator>;
+ using typename TBase::const_iterator;
+ using typename TBase::const_reference;
+ using typename TBase::const_reverse_iterator;
+
+ struct TUninitialized {
+ explicit TUninitialized(size_t size)
+ : Size(size)
+ {
+ }
+
+ size_t Size;
+ };
+
+ size_t max_size() noexcept {
+ static size_t res = TStringType().max_size();
+
+ return res;
+ }
+
+protected:
+ TStorage S_;
+
+ template <typename... A>
+ static TStorage Construct(A&&... a) {
+ return {new TStdStr(std::forward<A>(a)...), typename TStorage::TNoIncrement()};
+ }
+
+ static TStorage Construct() noexcept {
+ return TStdStr::NullStr();
+ }
+
+ TStdStr& StdStr() noexcept {
+ return *S_;
+ }
+
+ const TStdStr& StdStr() const noexcept {
+ return *S_;
+ }
+
+ /**
+ * Makes a distinct copy of this string. `IsDetached()` is always true after this call.
+ *
+ * @throw std::length_error
+ */
+ void Clone() {
+ Construct(StdStr()).Swap(S_);
+ }
+
+ size_t RefCount() const noexcept {
+ return S_.RefCount();
+ }
+
+public:
+ inline const TStringType& ConstRef() const Y_LIFETIME_BOUND {
+ return StdStr();
+ }
+
+ inline TStringType& MutRef() Y_LIFETIME_BOUND {
+ Detach();
+
+ return StdStr();
+ }
+
+ inline const_reference operator[](size_t pos) const noexcept Y_LIFETIME_BOUND {
+ Y_ASSERT(pos <= length());
+
+ return this->data()[pos];
+ }
+
+ inline reference operator[](size_t pos) noexcept Y_LIFETIME_BOUND {
+ Y_ASSERT(pos <= length());
+
+ return reference(*this, pos);
+ }
+
+ using TBase::back;
+
+ inline reference back() noexcept Y_LIFETIME_BOUND {
+ Y_ASSERT(!this->empty());
+
+ if (Y_UNLIKELY(this->empty())) {
+ return reference(*this, 0);
+ }
+
+ return reference(*this, length() - 1);
+ }
+
+ using TBase::front;
+
+ inline reference front() noexcept Y_LIFETIME_BOUND {
+ Y_ASSERT(!this->empty());
+
+ return reference(*this, 0);
+ }
+
+ inline size_t length() const noexcept {
+ return ConstRef().length();
+ }
+
+ inline const TCharType* data() const noexcept Y_LIFETIME_BOUND {
+ return ConstRef().data();
+ }
+
+ inline const TCharType* c_str() const noexcept Y_LIFETIME_BOUND {
+ return ConstRef().c_str();
+ }
+
+ // ~~~ STL compatible method to obtain data pointer ~~~
+ iterator begin() Y_LIFETIME_BOUND {
+ return &*MutRef().begin();
+ }
+
+ iterator end() Y_LIFETIME_BOUND {
+ return &*MutRef().end();
+ }
+
+ reverse_iterator rbegin() Y_LIFETIME_BOUND {
+ return reverse_iterator(end());
+ }
+
+ reverse_iterator rend() Y_LIFETIME_BOUND {
+ return reverse_iterator(begin());
+ }
+
+ const_iterator begin() const noexcept Y_LIFETIME_BOUND {
+ return TBase::begin();
+ }
+ const_iterator cbegin() const noexcept Y_LIFETIME_BOUND {
+ return TBase::cbegin();
+ }
+
+ const_iterator cend() const noexcept Y_LIFETIME_BOUND {
+ return TBase::cend();
+ }
+
+ const_reverse_iterator crbegin() const noexcept Y_LIFETIME_BOUND {
+ return TBase::crbegin();
+ }
+
+ const_reverse_iterator crend() const noexcept Y_LIFETIME_BOUND {
+ return TBase::crend();
+ }
+
+ const_iterator end() const noexcept Y_LIFETIME_BOUND {
+ return TBase::end();
+ }
+
+ const_reverse_iterator rbegin() const noexcept Y_LIFETIME_BOUND {
+ return TBase::rbegin();
+ }
+
+ const_reverse_iterator rend() const noexcept Y_LIFETIME_BOUND {
+ return TBase::rend();
+ }
+
+ inline size_t capacity() const noexcept {
+ if (S_->IsNull()) {
+ return 0;
+ }
+
+ return S_->capacity();
+ }
+
+ TCharType* Detach() Y_LIFETIME_BOUND {
+ if (Y_UNLIKELY(!IsDetached())) {
+ Clone();
+ }
+
+ return (TCharType*)S_->data();
+ }
+
+ bool IsDetached() const {
+ return 1 == RefCount();
+ }
+
+ // ~~~ Size and capacity ~~~
+ TBasicCowString& resize(size_t n, TCharType c = ' ') Y_LIFETIME_BOUND { // remove or append
+ MutRef().resize(n, c);
+
+ return *this;
+ }
+
+ // ~~~ Constructor ~~~ : FAMILY0(,TBasicCowString)
+ TBasicCowString() noexcept
+ : S_(Construct())
+ {
+ }
+
+ inline explicit TBasicCowString(::NDetail::TReserveTag rt)
+ : S_(Construct<>())
+ {
+ reserve(rt.Capacity);
+ }
+
+ inline TBasicCowString(const TBasicCowString& s)
+ : S_(s.S_)
+ {
+ }
+
+ inline TBasicCowString(TBasicCowString&& s) noexcept
+ : S_(Construct())
+ {
+ s.swap(*this);
+ }
+
+ template <typename T, typename A>
+ explicit inline TBasicCowString(const std::basic_string<TCharType, T, A>& s)
+ : TBasicCowString(s.data(), s.size())
+ {
+ }
+
+ template <typename T, typename A>
+ inline TBasicCowString(std::basic_string<TCharType, T, A>&& s)
+ : S_(s.empty() ? Construct() : Construct(std::move(s)))
+ {
+ }
+
+ TBasicCowString(const TBasicCowString& s, size_t pos, size_t n)
+ : S_(n ? Construct(s, pos, n) : Construct())
+ {
+ }
+
+ TBasicCowString(const TCharType* pc)
+ : TBasicCowString(pc, TBase::StrLen(pc))
+ {
+ }
+ TBasicCowString(std::nullptr_t) = delete;
+
+ TBasicCowString(const TCharType* pc, size_t n)
+ : S_(n ? Construct(pc, n) : Construct())
+ {
+ }
+ TBasicCowString(std::nullptr_t, size_t) = delete;
+
+ TBasicCowString(const TCharType* pc, size_t pos, size_t n)
+ : TBasicCowString(pc + pos, n)
+ {
+ }
+
+ explicit TBasicCowString(TExplicitType<TCharType> c)
+ : TBasicCowString(&c.Value(), 1)
+ {
+ }
+ explicit TBasicCowString(const reference& c)
+ : TBasicCowString(&c, 1)
+ {
+ }
+
+ TBasicCowString(size_t n, TCharType c)
+ : S_(Construct(n, c))
+ {
+ }
+
+ /**
+ * Constructs an uninitialized string of size `uninitialized.Size`. The proper
+ * way to use this ctor is via `TBasicCowString::Uninitialized` factory function.
+ *
+ * @throw std::length_error
+ */
+ TBasicCowString(TUninitialized uninitialized)
+ : S_(Construct<>())
+ {
+ ReserveAndResize(uninitialized.Size);
+ }
+
+ TBasicCowString(const TCharType* b, const TCharType* e)
+ : TBasicCowString(b, NonNegativeDistance(b, e))
+ {
+ }
+
+ explicit TBasicCowString(const TBasicStringBuf<TCharType, TTraits> s)
+ : TBasicCowString(s.data(), s.size())
+ {
+ }
+
+ template <typename Traits>
+ explicit inline TBasicCowString(const std::basic_string_view<TCharType, Traits>& s)
+ : TBasicCowString(s.data(), s.size())
+ {
+ }
+
+ /**
+ * WARN:
+ * Certain invocations of this method will result in link-time error.
+ * You are free to implement corresponding methods in string.cpp if you need them.
+ */
+ static TBasicCowString FromAscii(const ::TStringBuf& s) {
+ return TBasicCowString().AppendAscii(s);
+ }
+
+ static TBasicCowString FromUtf8(const ::TStringBuf& s) {
+ return TBasicCowString().AppendUtf8(s);
+ }
+
+ static TBasicCowString FromUtf16(const ::TWtringBuf& s) {
+ return TBasicCowString().AppendUtf16(s);
+ }
+
+ static TBasicCowString Uninitialized(size_t n) {
+ return TBasicCowString(TUninitialized(n));
+ }
+
+private:
+ template <typename T>
+ using TJoinParam = std::conditional_t<std::is_same_v<T, TCharType>, TCharType, TBasicStringBuf<TCharType, TTraits>>;
+
+ template <typename... R>
+ static size_t SumLength(const TBasicStringBuf<TCharType, TTraits> s1, const R&... r) noexcept {
+ return s1.size() + SumLength(r...);
+ }
+
+ template <typename... R>
+ static size_t SumLength(const TCharType /*s1*/, const R&... r) noexcept {
+ return 1 + SumLength(r...);
+ }
+
+ static constexpr size_t SumLength() noexcept {
+ return 0;
+ }
+
+ template <typename... R>
+ static void CopyAll(TCharType* p, const TBasicStringBuf<TCharType, TTraits> s, const R&... r) {
+ TTraits::copy(p, s.data(), s.size());
+ CopyAll(p + s.size(), r...);
+ }
+
+ template <typename... R, class TNextCharType, typename = std::enable_if_t<std::is_same<TCharType, TNextCharType>::value>>
+ static void CopyAll(TCharType* p, const TNextCharType s, const R&... r) {
+ p[0] = s;
+ CopyAll(p + 1, r...);
+ }
+
+ static void CopyAll(TCharType*) noexcept {
+ }
+
+ template <typename... R>
+ static inline TBasicCowString JoinImpl(const R&... r) {
+ TBasicCowString s{TUninitialized{SumLength(r...)}};
+
+ TBasicCowString::CopyAll((TCharType*)s.data(), r...);
+
+ return s;
+ }
+
+public:
+ Y_REINITIALIZES_OBJECT inline void clear() noexcept {
+ if (IsDetached()) {
+ S_->clear();
+
+ return;
+ }
+
+ Construct().Swap(S_);
+ }
+
+ template <typename... R>
+ static inline TBasicCowString Join(const R&... r) {
+ return JoinImpl(TJoinParam<R>(r)...);
+ }
+
+ // ~~~ Assignment ~~~ : FAMILY0(TBasicCowString&, assign);
+ TBasicCowString& assign(size_t size, TCharType ch) Y_LIFETIME_BOUND {
+ ReserveAndResize(size);
+ std::fill(begin(), end(), ch);
+ return *this;
+ }
+
+ TBasicCowString& assign(const TBasicCowString& s) Y_LIFETIME_BOUND {
+ TBasicCowString(s).swap(*this);
+
+ return *this;
+ }
+
+ TBasicCowString& assign(const TBasicCowString& s, size_t pos, size_t n) Y_LIFETIME_BOUND {
+ return assign(TBasicCowString(s, pos, n));
+ }
+
+ TBasicCowString& assign(const TCharType* pc) Y_LIFETIME_BOUND {
+ return assign(pc, TBase::StrLen(pc));
+ }
+
+ TBasicCowString& assign(TCharType ch) Y_LIFETIME_BOUND {
+ return assign(&ch, 1);
+ }
+
+ TBasicCowString& assign(const TCharType* pc, size_t len) Y_LIFETIME_BOUND {
+#if defined(address_sanitizer_enabled) || defined(thread_sanitizer_enabled)
+ pc = (const TCharType*)HidePointerOrigin((void*)pc);
+#endif
+ if (IsDetached()) {
+ MutRef().assign(pc, len);
+ } else {
+ TBasicCowString(pc, len).swap(*this);
+ }
+
+ return *this;
+ }
+
+ TBasicCowString& assign(const TCharType* first, const TCharType* last) Y_LIFETIME_BOUND {
+ return assign(first, NonNegativeDistance(first, last));
+ }
+
+ TBasicCowString& assign(const TCharType* pc, size_t pos, size_t n) Y_LIFETIME_BOUND {
+ return assign(pc + pos, n);
+ }
+
+ TBasicCowString& assign(const TBasicStringBuf<TCharType, TTraits> s) Y_LIFETIME_BOUND {
+ return assign(s.data(), s.size());
+ }
+
+ TBasicCowString& assign(const TBasicStringBuf<TCharType, TTraits> s, size_t spos, size_t sn = TBase::npos) Y_LIFETIME_BOUND {
+ return assign(s.SubString(spos, sn));
+ }
+
+ inline TBasicCowString& AssignNoAlias(const TCharType* pc, size_t len) Y_LIFETIME_BOUND {
+ return assign(pc, len);
+ }
+
+ inline TBasicCowString& AssignNoAlias(const TCharType* b, const TCharType* e) Y_LIFETIME_BOUND {
+ return AssignNoAlias(b, e - b);
+ }
+
+ TBasicCowString& AssignNoAlias(const TBasicStringBuf<TCharType, TTraits> s) Y_LIFETIME_BOUND {
+ return AssignNoAlias(s.data(), s.size());
+ }
+
+ TBasicCowString& AssignNoAlias(const TBasicStringBuf<TCharType, TTraits> s, size_t spos, size_t sn = TBase::npos) Y_LIFETIME_BOUND {
+ return AssignNoAlias(s.SubString(spos, sn));
+ }
+
+ /**
+ * WARN:
+ * Certain invocations of this method will result in link-time error.
+ * You are free to implement corresponding methods in string.cpp if you need them.
+ */
+ auto AssignAscii(const ::TStringBuf& s) {
+ clear();
+ return AppendAscii(s);
+ }
+
+ auto AssignUtf8(const ::TStringBuf& s) {
+ clear();
+ return AppendUtf8(s);
+ }
+
+ auto AssignUtf16(const ::TWtringBuf& s) {
+ clear();
+ return AppendUtf16(s);
+ }
+
+ TBasicCowString& operator=(const TBasicCowString& s) Y_LIFETIME_BOUND {
+ return assign(s);
+ }
+
+ TBasicCowString& operator=(TBasicCowString&& s) noexcept Y_LIFETIME_BOUND {
+ swap(s);
+ return *this;
+ }
+
+ template <typename T, typename A>
+ TBasicCowString& operator=(std::basic_string<TCharType, T, A>&& s) noexcept Y_LIFETIME_BOUND {
+ TBasicCowString(std::move(s)).swap(*this);
+
+ return *this;
+ }
+
+ TBasicCowString& operator=(const TBasicStringBuf<TCharType, TTraits> s) Y_LIFETIME_BOUND {
+ return assign(s);
+ }
+
+ TBasicCowString& operator=(std::initializer_list<TCharType> il) Y_LIFETIME_BOUND {
+ return assign(il.begin(), il.end());
+ }
+
+ TBasicCowString& operator=(const TCharType* s) Y_LIFETIME_BOUND {
+ return assign(s);
+ }
+ TBasicCowString& operator=(std::nullptr_t) Y_LIFETIME_BOUND = delete;
+
+ TBasicCowString& operator=(TExplicitType<TCharType> ch) Y_LIFETIME_BOUND {
+ return assign(ch);
+ }
+
+ inline void reserve(size_t len) {
+ MutRef().reserve(len);
+ }
+
+ // ~~~ Appending ~~~ : FAMILY0(TBasicCowString&, append);
+ inline TBasicCowString& append(size_t count, TCharType ch) Y_LIFETIME_BOUND {
+ MutRef().append(count, ch);
+
+ return *this;
+ }
+
+ inline TBasicCowString& append(const TBasicCowString& s) Y_LIFETIME_BOUND {
+ MutRef().append(s.ConstRef());
+
+ return *this;
+ }
+
+ inline TBasicCowString& append(const TBasicCowString& s, size_t pos, size_t n) Y_LIFETIME_BOUND {
+ MutRef().append(s.ConstRef(), pos, n);
+
+ return *this;
+ }
+
+ inline TBasicCowString& append(const TCharType* pc) Y_LIFETIME_BOUND {
+ MutRef().append(pc);
+
+ return *this;
+ }
+
+ inline TBasicCowString& append(TCharType c) Y_LIFETIME_BOUND {
+ MutRef().push_back(c);
+
+ return *this;
+ }
+
+ inline TBasicCowString& append(const TCharType* first, const TCharType* last) Y_LIFETIME_BOUND {
+ MutRef().append(first, last);
+
+ return *this;
+ }
+
+ inline TBasicCowString& append(const TCharType* pc, size_t len) Y_LIFETIME_BOUND {
+ MutRef().append(pc, len);
+
+ return *this;
+ }
+
+ inline void ReserveAndResize(size_t len) {
+ ::ResizeUninitialized(MutRef(), len);
+ }
+
+ TBasicCowString& AppendNoAlias(const TCharType* pc, size_t len) Y_LIFETIME_BOUND {
+ if (len) {
+ auto s = this->size();
+
+ ReserveAndResize(s + len);
+ memcpy(&*(begin() + s), pc, len * sizeof(*pc));
+ }
+
+ return *this;
+ }
+
+ TBasicCowString& AppendNoAlias(const TBasicStringBuf<TCharType, TTraits> s) Y_LIFETIME_BOUND {
+ return AppendNoAlias(s.data(), s.size());
+ }
+
+ TBasicCowString& AppendNoAlias(const TBasicStringBuf<TCharType, TTraits> s, size_t spos, size_t sn = TBase::npos) Y_LIFETIME_BOUND {
+ return AppendNoAlias(s.SubString(spos, sn));
+ }
+
+ TBasicCowString& append(const TBasicStringBuf<TCharType, TTraits> s) Y_LIFETIME_BOUND {
+ return append(s.data(), s.size());
+ }
+
+ TBasicCowString& append(const TBasicStringBuf<TCharType, TTraits> s, size_t spos, size_t sn = TBase::npos) Y_LIFETIME_BOUND {
+ return append(s.SubString(spos, sn));
+ }
+
+ TBasicCowString& append(const TCharType* pc, size_t pos, size_t n, size_t pc_len = TBase::npos) Y_LIFETIME_BOUND {
+ return append(pc + pos, Min(n, pc_len - pos));
+ }
+
+ /**
+ * WARN:
+ * Certain invocations of this method will result in link-time error.
+ * You are free to implement corresponding methods in string.cpp if you need them.
+ */
+ TBasicCowString& AppendAscii(const ::TStringBuf& s) Y_LIFETIME_BOUND;
+
+ TBasicCowString& AppendUtf8(const ::TStringBuf& s) Y_LIFETIME_BOUND;
+
+ TBasicCowString& AppendUtf16(const ::TWtringBuf& s) Y_LIFETIME_BOUND;
+
+ inline void push_back(TCharType c) {
+ // TODO
+ append(c);
+ }
+
+ template <class T>
+ TBasicCowString& operator+=(const T& s) Y_LIFETIME_BOUND {
+ return append(s);
+ }
+
+ template <class T>
+ friend TBasicCowString operator*(const TBasicCowString& s, T count) {
+ static_assert(std::is_integral<T>::value, "Integral type required.");
+
+ TBasicCowString result;
+
+ if (count > 0) {
+ result.reserve(s.length() * count);
+ }
+
+ for (T i = 0; i < count; ++i) {
+ result += s;
+ }
+
+ return result;
+ }
+
+ template <class T>
+ TBasicCowString& operator*=(T count) Y_LIFETIME_BOUND {
+ static_assert(std::is_integral<T>::value, "Integral type required.");
+
+ TBasicCowString temp;
+
+ if (count > 0) {
+ temp.reserve(length() * count);
+ }
+
+ for (T i = 0; i < count; ++i) {
+ temp += *this;
+ }
+
+ swap(temp);
+
+ return *this;
+ }
+
+ operator const TStringType&() const noexcept Y_LIFETIME_BOUND {
+ return this->ConstRef();
+ }
+
+ /* We have operator casting TString to `const std::string&` but we explicitly don't support
+ * casting TString to `std::string&` since such casting requires detaching TString and therefore
+ * modifies TString object. Sometimes compiler might call `operator std::string&`
+ * implicitly and it might lead to problems. Check IGNIETFERRO-2155 for details.
+ */
+ template <typename T, typename = std::enable_if_t<std::is_same_v<T, TStringType>>>
+ operator T&() & Y_LIFETIME_BOUND requires false {
+ return this->MutRef();
+ }
+
+ /*
+ * Following overloads of "operator+" aim to choose the cheapest implementation depending on
+ * summand types: lvalues, detached rvalues, shared rvalues.
+ *
+ * General idea is to use the detached-rvalue argument (left of right) to store the result
+ * wherever possible. If a buffer in rvalue is large enough this saves a re-allocation. If
+ * both arguments are rvalues we check which one is detached. If both of them are detached then
+ * the left argument is obviously preferrable because you won't need to shift the data.
+ *
+ * If an rvalue is shared then it's basically the same as lvalue because you cannot use its
+ * buffer to store the sum. However, we rely on the fact that append() and prepend() are already
+ * optimized for the shared case and detach the string into the buffer large enough to store
+ * the sum (compared to the detach+reallocation). This way, if we have only one rvalue argument
+ * (left or right) then we simply append/prepend into it, without checking if it's detached or
+ * not. This will be checked inside ReserveAndResize anyway.
+ *
+ * If both arguments cannot be used to store the sum (e.g. two lvalues) then we fall back to the
+ * Join function that constructs a resulting string in the new buffer with the minimum overhead:
+ * malloc + memcpy + memcpy.
+ */
+
+ friend TBasicCowString operator+(TBasicCowString&& s1, const TBasicCowString& s2) Y_WARN_UNUSED_RESULT {
+ s1 += s2;
+ return std::move(s1);
+ }
+
+ friend TBasicCowString operator+(const TBasicCowString& s1, TBasicCowString&& s2) Y_WARN_UNUSED_RESULT {
+ s2.prepend(s1);
+ return std::move(s2);
+ }
+
+ friend TBasicCowString operator+(TBasicCowString&& s1, TBasicCowString&& s2) Y_WARN_UNUSED_RESULT {
+#if 0
+ if (!s1.IsDetached() && s2.IsDetached()) {
+ s2.prepend(s1);
+ return std::move(s2);
+ }
+#endif
+ s1 += s2;
+ return std::move(s1);
+ }
+
+ friend TBasicCowString operator+(TBasicCowString&& s1, const TBasicStringBuf<TCharType, TTraits> s2) Y_WARN_UNUSED_RESULT {
+ s1 += s2;
+ return std::move(s1);
+ }
+
+ friend TBasicCowString operator+(TBasicCowString&& s1, const TCharType* s2) Y_WARN_UNUSED_RESULT {
+ s1 += s2;
+ return std::move(s1);
+ }
+
+ friend TBasicCowString operator+(TBasicCowString&& s1, TCharType s2) Y_WARN_UNUSED_RESULT {
+ s1 += s2;
+ return std::move(s1);
+ }
+
+ friend TBasicCowString operator+(TExplicitType<TCharType> ch, const TBasicCowString& s) Y_WARN_UNUSED_RESULT {
+ return Join(TCharType(ch), s);
+ }
+
+ friend TBasicCowString operator+(TExplicitType<TCharType> ch, TBasicCowString&& s) Y_WARN_UNUSED_RESULT {
+ s.prepend(ch);
+ return std::move(s);
+ }
+
+ friend TBasicCowString operator+(const TBasicCowString& s1, const TBasicCowString& s2) Y_WARN_UNUSED_RESULT {
+ return Join(s1, s2);
+ }
+
+ friend TBasicCowString operator+(const TBasicCowString& s1, const TBasicStringBuf<TCharType, TTraits> s2) Y_WARN_UNUSED_RESULT {
+ return Join(s1, s2);
+ }
+
+ friend TBasicCowString operator+(const TBasicCowString& s1, const TCharType* s2) Y_WARN_UNUSED_RESULT {
+ return Join(s1, s2);
+ }
+
+ friend TBasicCowString operator+(const TBasicCowString& s1, TCharType s2) Y_WARN_UNUSED_RESULT {
+ return Join(s1, TBasicStringBuf<TCharType, TTraits>(&s2, 1));
+ }
+
+ friend TBasicCowString operator+(const TCharType* s1, TBasicCowString&& s2) Y_WARN_UNUSED_RESULT {
+ s2.prepend(s1);
+ return std::move(s2);
+ }
+
+ friend TBasicCowString operator+(const TBasicStringBuf<TCharType, TTraits> s1, TBasicCowString&& s2) Y_WARN_UNUSED_RESULT {
+ s2.prepend(s1);
+ return std::move(s2);
+ }
+
+ friend TBasicCowString operator+(const TBasicStringBuf<TCharType, TTraits> s1, const TBasicCowString& s2) Y_WARN_UNUSED_RESULT {
+ return Join(s1, s2);
+ }
+
+ friend TBasicCowString operator+(const TCharType* s1, const TBasicCowString& s2) Y_WARN_UNUSED_RESULT {
+ return Join(s1, s2);
+ }
+
+ friend TBasicCowString operator+(std::basic_string<TCharType, TTraits> l, TBasicCowString r) {
+ return std::move(l) + r.ConstRef();
+ }
+
+ friend TBasicCowString operator+(TBasicCowString l, std::basic_string<TCharType, TTraits> r) {
+ return l.ConstRef() + std::move(r);
+ }
+
+ // ~~~ Prepending ~~~ : FAMILY0(TBasicCowString&, prepend);
+ TBasicCowString& prepend(const TBasicCowString& s) Y_LIFETIME_BOUND {
+ MutRef().insert(0, s.ConstRef());
+
+ return *this;
+ }
+
+ TBasicCowString& prepend(const TBasicCowString& s, size_t pos, size_t n) Y_LIFETIME_BOUND {
+ MutRef().insert(0, s.ConstRef(), pos, n);
+
+ return *this;
+ }
+
+ TBasicCowString& prepend(const TCharType* pc) Y_LIFETIME_BOUND {
+ MutRef().insert(0, pc);
+
+ return *this;
+ }
+
+ TBasicCowString& prepend(size_t n, TCharType c) Y_LIFETIME_BOUND {
+ MutRef().insert(size_t(0), n, c);
+
+ return *this;
+ }
+
+ TBasicCowString& prepend(TCharType c) Y_LIFETIME_BOUND {
+ MutRef().insert(size_t(0), 1, c);
+
+ return *this;
+ }
+
+ TBasicCowString& prepend(const TBasicStringBuf<TCharType, TTraits> s, size_t spos = 0, size_t sn = TBase::npos) Y_LIFETIME_BOUND {
+ return insert(0, s, spos, sn);
+ }
+
+ // ~~~ Insertion ~~~ : FAMILY1(TBasicCowString&, insert, size_t pos);
+ TBasicCowString& insert(size_t pos, const TBasicCowString& s) Y_LIFETIME_BOUND {
+ MutRef().insert(pos, s.ConstRef());
+
+ return *this;
+ }
+
+ TBasicCowString& insert(size_t pos, const TBasicCowString& s, size_t pos1, size_t n1) Y_LIFETIME_BOUND {
+ MutRef().insert(pos, s.ConstRef(), pos1, n1);
+
+ return *this;
+ }
+
+ TBasicCowString& insert(size_t pos, const TCharType* pc) Y_LIFETIME_BOUND {
+ MutRef().insert(pos, pc);
+
+ return *this;
+ }
+
+ TBasicCowString& insert(size_t pos, const TCharType* pc, size_t len) Y_LIFETIME_BOUND {
+ MutRef().insert(pos, pc, len);
+
+ return *this;
+ }
+
+ TBasicCowString& insert(const_iterator pos, const_iterator b, const_iterator e) Y_LIFETIME_BOUND {
+ return insert(this->off(pos), b, e - b);
+ }
+
+ TBasicCowString& insert(size_t pos, size_t n, TCharType c) Y_LIFETIME_BOUND {
+ MutRef().insert(pos, n, c);
+
+ return *this;
+ }
+
+ TBasicCowString& insert(const_iterator pos, size_t len, TCharType ch) Y_LIFETIME_BOUND {
+ return this->insert(this->off(pos), len, ch);
+ }
+
+ TBasicCowString& insert(const_iterator pos, TCharType ch) Y_LIFETIME_BOUND {
+ return this->insert(pos, 1, ch);
+ }
+
+ TBasicCowString& insert(size_t pos, const TBasicStringBuf<TCharType, TTraits> s, size_t spos = 0, size_t sn = TBase::npos) Y_LIFETIME_BOUND {
+ MutRef().insert(pos, s, spos, sn);
+
+ return *this;
+ }
+
+ // ~~~ Removing ~~~
+ TBasicCowString& remove(size_t pos, size_t n) Y_LIFETIME_BOUND {
+ if (pos < length()) {
+ MutRef().erase(pos, n);
+ }
+
+ return *this;
+ }
+
+ TBasicCowString& remove(size_t pos = 0) Y_LIFETIME_BOUND {
+ if (pos < length()) {
+ MutRef().erase(pos);
+ }
+
+ return *this;
+ }
+
+ TBasicCowString& erase(size_t pos = 0, size_t n = TBase::npos) Y_LIFETIME_BOUND {
+ MutRef().erase(pos, n);
+
+ return *this;
+ }
+
+ TBasicCowString& erase(const_iterator b, const_iterator e) Y_LIFETIME_BOUND {
+ return erase(this->off(b), e - b);
+ }
+
+ TBasicCowString& erase(const_iterator i) Y_LIFETIME_BOUND {
+ return erase(i, i + 1);
+ }
+
+ TBasicCowString& pop_back() Y_LIFETIME_BOUND {
+ Y_ASSERT(!this->empty());
+
+ MutRef().pop_back();
+
+ return *this;
+ }
+
+ // ~~~ replacement ~~~ : FAMILY2(TBasicCowString&, replace, size_t pos, size_t n);
+ TBasicCowString& replace(size_t pos, size_t n, const TBasicCowString& s) Y_LIFETIME_BOUND {
+ MutRef().replace(pos, n, s.ConstRef());
+
+ return *this;
+ }
+
+ TBasicCowString& replace(size_t pos, size_t n, const TBasicCowString& s, size_t pos1, size_t n1) Y_LIFETIME_BOUND {
+ MutRef().replace(pos, n, s.ConstRef(), pos1, n1);
+
+ return *this;
+ }
+
+ TBasicCowString& replace(size_t pos, size_t n, const TCharType* pc) Y_LIFETIME_BOUND {
+ MutRef().replace(pos, n, pc);
+
+ return *this;
+ }
+
+ TBasicCowString& replace(size_t pos, size_t n, const TCharType* s, size_t len) Y_LIFETIME_BOUND {
+ MutRef().replace(pos, n, s, len);
+
+ return *this;
+ }
+
+ TBasicCowString& replace(size_t pos, size_t n, const TCharType* s, size_t spos, size_t sn) Y_LIFETIME_BOUND {
+ MutRef().replace(pos, n, s + spos, sn - spos);
+
+ return *this;
+ }
+
+ TBasicCowString& replace(size_t pos, size_t n1, size_t n2, TCharType c) Y_LIFETIME_BOUND {
+ MutRef().replace(pos, n1, n2, c);
+
+ return *this;
+ }
+
+ TBasicCowString& replace(size_t pos, size_t n, const TBasicStringBuf<TCharType, TTraits> s, size_t spos = 0, size_t sn = TBase::npos) Y_LIFETIME_BOUND {
+ MutRef().replace(pos, n, s, spos, sn);
+
+ return *this;
+ }
+
+ void swap(TBasicCowString& s) noexcept {
+ S_.Swap(s.S_);
+ }
+
+ /**
+ * @returns String suitable for debug printing (like Python's `repr()`).
+ * Format of the string is unspecified and may be changed over time.
+ */
+ TBasicCowString Quote() const {
+ extern TBasicCowString EscapeC(const TBasicCowString&);
+
+ return TBasicCowString() + '"' + EscapeC(*this) + '"';
+ }
+
+ /**
+ * Modifies the case of the string, depending on the operation.
+ * @return false if no changes have been made.
+ *
+ * @warning when the value_type is char, these methods will not work with non-ASCII letters.
+ */
+ bool to_lower(size_t pos = 0, size_t n = TBase::npos);
+ bool to_upper(size_t pos = 0, size_t n = TBase::npos);
+ bool to_title(size_t pos = 0, size_t n = TBase::npos);
+
+ constexpr const TCharType* Data() const noexcept = delete;
+ constexpr size_t Size() noexcept = delete;
+ Y_PURE_FUNCTION constexpr bool Empty() const noexcept = delete;
+
+public:
+ /**
+ * Modifies the substring of length `n` starting from `pos`, applying `f` to each position and symbol.
+ *
+ * @return false if no changes have been made.
+ */
+ template <typename T>
+ bool Transform(T&& f, size_t pos = 0, size_t n = TBase::npos) {
+ size_t len = length();
+
+ if (pos > len) {
+ pos = len;
+ }
+
+ if (n > len - pos) {
+ n = len - pos;
+ }
+
+ bool changed = false;
+
+ for (size_t i = pos; i != pos + n; ++i) {
+ auto c = f(i, data()[i]);
+ if (c != data()[i]) {
+ if (!changed) {
+ Detach();
+ changed = true;
+ }
+
+ begin()[i] = c;
+ }
+ }
+
+ return changed;
+ }
+};
+
+using TCowString = TBasicCowString<char>;
+using TUtf16CowString = TBasicCowString<wchar16>;
+using TUtf32CowString = TBasicCowString<wchar32>;
+
+std::ostream& operator<<(std::ostream&, const TCowString&);
+std::istream& operator>>(std::istream&, TCowString&);
+
+template <typename TCharType, typename TTraits>
+TBasicCowString<TCharType> to_lower(const TBasicCowString<TCharType, TTraits>& s) {
+ TBasicCowString<TCharType> ret(s);
+ ret.to_lower();
+ return ret;
+}
+
+template <typename TCharType, typename TTraits>
+TBasicCowString<TCharType> to_upper(const TBasicCowString<TCharType, TTraits>& s) {
+ TBasicCowString<TCharType> ret(s);
+ ret.to_upper();
+ return ret;
+}
+
+template <typename TCharType, typename TTraits>
+TBasicCowString<TCharType> to_title(const TBasicCowString<TCharType, TTraits>& s) {
+ TBasicCowString<TCharType> ret(s);
+ ret.to_title();
+ return ret;
+}
+
+namespace std {
+ template <>
+ struct hash<TCowString> {
+ using argument_type = TCowString;
+ using result_type = size_t;
+ inline result_type operator()(argument_type const& s) const noexcept {
+ return NHashPrivate::ComputeStringHash(s.data(), s.size());
+ }
+ };
+} // namespace std
+
+// interop
+template <class TCharType, class TTraits>
+auto& MutRef(TBasicCowString<TCharType, TTraits>& s Y_LIFETIME_BOUND) {
+ return s.MutRef();
+}
+
+template <class TCharType, class TTraits>
+const auto& ConstRef(const TBasicCowString<TCharType, TTraits>& s Y_LIFETIME_BOUND) noexcept {
+ return s.ConstRef();
+}
+
+template <class TCharType, class TTraits>
+void ResizeUninitialized(TBasicCowString<TCharType, TTraits>& s, size_t len) {
+ s.ReserveAndResize(len);
+}
diff --git a/library/cpp/containers/cow_string/cow_string_ut.cpp b/library/cpp/containers/cow_string/cow_string_ut.cpp
new file mode 100644
index 00000000000..6de74b5c4b7
--- /dev/null
+++ b/library/cpp/containers/cow_string/cow_string_ut.cpp
@@ -0,0 +1,1268 @@
+#include <cow_string.h>
+
+#include <library/cpp/containers/cow_string/str_stl.h>
+#include <library/cpp/containers/cow_string/subst.h>
+#include <library/cpp/containers/cow_string/reverse.h>
+
+#include <util/charset/wide.h>
+#include "util/generic/deque.h"
+#include "util/generic/strbuf.h"
+#include "util/generic/string_ut.h"
+#include "util/generic/vector.h"
+#include "util/generic/yexception.h"
+#include <util/stream/output.h>
+#include <util/string/subst.h>
+
+#include <string>
+#include <sstream>
+#include <algorithm>
+#include <stdexcept>
+
+static_assert(sizeof(TCowString) == sizeof(const char*), "expect sizeof(TCowString) == sizeof(const char*)");
+
+class TStringTestZero: public TTestBase {
+ UNIT_TEST_SUITE(TStringTestZero);
+ UNIT_TEST(TestZero);
+ UNIT_TEST_SUITE_END();
+
+public:
+ void TestZero() {
+ const char data[] = "abc\0def\0";
+ TCowString s(data, sizeof(data));
+ UNIT_ASSERT(s.size() == sizeof(data));
+ UNIT_ASSERT(s.StartsWith(s));
+ UNIT_ASSERT(s.EndsWith(s));
+ UNIT_ASSERT(s.Contains('\0'));
+
+ const char raw_def[] = "def";
+ const char raw_zero[] = "\0";
+ TCowString def(raw_def, sizeof(raw_def) - 1);
+ TCowString zero(raw_zero, sizeof(raw_zero) - 1);
+ UNIT_ASSERT_EQUAL(4, s.find(raw_def));
+ UNIT_ASSERT_EQUAL(4, s.find(def));
+ UNIT_ASSERT_EQUAL(4, s.find_first_of(raw_def));
+ UNIT_ASSERT_EQUAL(3, s.find_first_of(zero));
+ UNIT_ASSERT_EQUAL(7, s.find_first_not_of(def, 4));
+
+ const char nonSubstring[] = "def\0ghi";
+ UNIT_ASSERT_EQUAL(TCowString::npos, s.find(TCowString(nonSubstring, sizeof(nonSubstring))));
+
+ TCowString copy = s;
+ copy.replace(copy.size() - 1, 1, "z");
+ UNIT_ASSERT(s != copy);
+ copy.replace(copy.size() - 1, 1, "\0", 0, 1);
+ UNIT_ASSERT(s == copy);
+
+ TCowString prefix(data, 5);
+ UNIT_ASSERT(s.StartsWith(prefix));
+ UNIT_ASSERT(s != prefix);
+ UNIT_ASSERT(s > prefix);
+ UNIT_ASSERT(s > s.data());
+ UNIT_ASSERT(s == TCowString(s.data(), s.size()));
+ UNIT_ASSERT(data < s);
+
+ s.remove(5);
+ UNIT_ASSERT(s == prefix);
+ }
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TStringTestZero);
+
+template <typename TStringType, typename TTestData>
+class TStringStdTestImpl {
+ using TChar = typename TStringType::char_type;
+ using TTraits = typename TStringType::traits_type;
+ using TView = std::basic_string_view<TChar, TTraits>;
+
+ TTestData Data_;
+
+protected:
+ void Constructor() {
+ UNIT_ASSERT_EXCEPTION(TStringType((size_t)-1, *Data_.a()), std::length_error);
+ }
+
+ void reserve() {
+#if 0
+ TStringType s;
+ UNIT_ASSERT_EXCEPTION(s.reserve(s.max_size() + 1), std::length_error);
+
+ // Non-shared behaviour - never shrink
+
+ s.reserve(256);
+ const auto* data = s.data();
+
+ UNIT_ASSERT(s.capacity() >= 256);
+
+ s.reserve(128);
+
+ UNIT_ASSERT(s.capacity() >= 256 && s.data() == data);
+
+ s.resize(64, 'x');
+ s.reserve(10);
+
+ UNIT_ASSERT(s.capacity() >= 256 && s.data() == data);
+
+ // Shared behaviour - always reallocate, just as much as requisted
+
+ TStringType holder = s;
+
+ UNIT_ASSERT(s.capacity() >= 256);
+
+ s.reserve(128);
+
+ UNIT_ASSERT(s.capacity() >= 128 && s.capacity() < 256 && s.data() != data);
+ UNIT_ASSERT(s.IsDetached());
+
+ s.resize(64, 'x');
+ data = s.data();
+ holder = s;
+
+ s.reserve(10);
+
+ UNIT_ASSERT(s.capacity() >= 64 && s.capacity() < 128 && s.data() != data);
+ UNIT_ASSERT(s.IsDetached());
+#endif
+ }
+
+ void short_string() {
+ TStringType const ref_short_str1(Data_.str1()), ref_short_str2(Data_.str2());
+ TStringType short_str1(ref_short_str1), short_str2(ref_short_str2);
+ TStringType const ref_long_str1(Data_.str__________________________________________________1());
+ TStringType const ref_long_str2(Data_.str__________________________________________________2());
+ TStringType long_str1(ref_long_str1), long_str2(ref_long_str2);
+
+ UNIT_ASSERT(short_str1 == ref_short_str1);
+ UNIT_ASSERT(long_str1 == ref_long_str1);
+
+ {
+ TStringType str1(short_str1);
+ str1 = long_str1;
+ UNIT_ASSERT(str1 == ref_long_str1);
+ }
+
+ {
+ TStringType str1(long_str1);
+ str1 = short_str1;
+ UNIT_ASSERT(str1 == ref_short_str1);
+ }
+
+ {
+ short_str1.swap(short_str2);
+ UNIT_ASSERT((short_str1 == ref_short_str2) && (short_str2 == ref_short_str1));
+ short_str1.swap(short_str2);
+ }
+
+ {
+ long_str1.swap(long_str2);
+ UNIT_ASSERT((long_str1 == ref_long_str2) && (long_str2 == ref_long_str1));
+ long_str1.swap(long_str2);
+ }
+
+ {
+ short_str1.swap(long_str1);
+ UNIT_ASSERT((short_str1 == ref_long_str1) && (long_str1 == ref_short_str1));
+ short_str1.swap(long_str1);
+ }
+
+ {
+ long_str1.swap(short_str1);
+ UNIT_ASSERT((short_str1 == ref_long_str1) && (long_str1 == ref_short_str1));
+ long_str1.swap(short_str1);
+ }
+
+ {
+ // This is to test move constructor
+ TVector<TStringType> str_vect;
+
+ str_vect.push_back(short_str1);
+ str_vect.push_back(long_str1);
+ str_vect.push_back(short_str2);
+ str_vect.push_back(long_str2);
+
+ UNIT_ASSERT(str_vect[0] == ref_short_str1);
+ UNIT_ASSERT(str_vect[1] == ref_long_str1);
+ UNIT_ASSERT(str_vect[2] == ref_short_str2);
+ UNIT_ASSERT(str_vect[3] == ref_long_str2);
+ }
+ }
+
+ void erase() {
+ TChar const* c_str = Data_.Hello_World();
+ TStringType str(c_str);
+ UNIT_ASSERT(str == c_str);
+
+ str.erase(str.begin() + 1, str.end() - 1); // Erase all but first and last.
+
+ size_t i;
+ for (i = 0; i < str.size(); ++i) {
+ switch (i) {
+ case 0:
+ UNIT_ASSERT(str[i] == *Data_.H());
+ break;
+
+ case 1:
+ UNIT_ASSERT(str[i] == *Data_.d());
+ break;
+
+ default:
+ UNIT_ASSERT(false);
+ }
+ }
+
+ str.insert(1, c_str);
+ str.erase(str.begin()); // Erase first element.
+ str.erase(str.end() - 1); // Erase last element.
+ UNIT_ASSERT(str == c_str);
+ str.clear(); // Erase all.
+ UNIT_ASSERT(str.empty());
+
+ str = c_str;
+ UNIT_ASSERT(str == c_str);
+
+ str.erase(1, str.size() - 1); // Erase all but first and last.
+ for (i = 0; i < str.size(); i++) {
+ switch (i) {
+ case 0:
+ UNIT_ASSERT(str[i] == *Data_.H());
+ break;
+
+ case 1:
+ UNIT_ASSERT(str[i] == *Data_.d());
+ break;
+
+ default:
+ UNIT_ASSERT(false);
+ }
+ }
+
+ str.erase(1);
+ UNIT_ASSERT(str == Data_.H());
+ }
+
+ void data() {
+ TStringType xx;
+
+ // ISO-IEC-14882:1998(E), 21.3.6, paragraph 3
+ UNIT_ASSERT(xx.data() != nullptr);
+ }
+
+ void c_str() {
+ TStringType low(Data_._2004_01_01());
+ TStringType xx;
+ TStringType yy;
+
+ // ISO-IEC-14882:1998(E), 21.3.6, paragraph 1
+ UNIT_ASSERT(*(yy.c_str()) == 0);
+
+ // Blocks A and B should follow each other.
+ // Block A:
+ xx = Data_._123456();
+ xx += low;
+ UNIT_ASSERT(xx.c_str() == TView(Data_._1234562004_01_01()));
+ // End of block A
+
+ // Block B:
+ xx = Data_._1234();
+ xx += Data_._5();
+ UNIT_ASSERT(xx.c_str() == TView(Data_._12345()));
+ // End of block B
+ }
+
+ void null_char_of_empty() {
+ const TStringType s;
+
+ // NOTE: https://a.yandex-team.ru/arcadia/junk/grechnik/test_string?rev=r12602052
+ i64 i = s[s.size()];
+ UNIT_ASSERT_VALUES_EQUAL(i, 0);
+ }
+
+ void null_char() {
+ // ISO/IEC 14882:1998(E), ISO/IEC 14882:2003(E), 21.3.4 ('... the const version')
+ const TStringType s(Data_._123456());
+
+ UNIT_ASSERT(s[s.size()] == 0);
+ }
+
+ // Allowed since C++17, see http://www.open-std.org/jtc1/sc22/wg21/docs/lwg-defects.html#2475
+ void null_char_assignment_to_subscript_of_empty() {
+ TStringType s;
+
+ using reference = typename TStringType::reference;
+ reference trailing_zero = s[s.size()];
+ trailing_zero = 0;
+ UNIT_ASSERT(trailing_zero == 0);
+ }
+
+ // Allowed since C++17, see http://www.open-std.org/jtc1/sc22/wg21/docs/lwg-defects.html#2475
+ void null_char_assignment_to_subscript_of_nonempty() {
+ TStringType s(Data_._123456());
+
+ using reference = typename TStringType::reference;
+ reference trailing_zero = s[s.size()];
+ trailing_zero = 0;
+ UNIT_ASSERT(trailing_zero == 0);
+ }
+
+ // Dereferencing string end() is not allowed by C++ standard as of C++20, avoid using in real code.
+ void null_char_assignment_to_end_of_empty() {
+ TStringType s;
+
+ volatile auto& trailing_zero = *(s.begin() + s.size());
+ trailing_zero = 0;
+ UNIT_ASSERT(trailing_zero == 0);
+ }
+
+ // Dereferencing string end() is not allowed by C++ standard as of C++20, avoid using in real code.
+ void null_char_assignment_to_end_of_nonempty() {
+ TStringType s(Data_._123456());
+
+ volatile auto& trailing_zero = *(s.begin() + s.size());
+ trailing_zero = 0;
+ UNIT_ASSERT(trailing_zero == 0);
+ }
+
+ void insert() {
+ TStringType strorg = Data_.This_is_test_string_for_string_calls();
+ TStringType str;
+
+ // In case of reallocation there is no auto reference problem
+ // so we reserve a big enough TStringType to be sure to test this
+ // particular point.
+
+ str.reserve(100);
+ str = strorg;
+
+ // test self insertion:
+ str.insert(10, str.c_str() + 5, 15);
+ UNIT_ASSERT(str == Data_.This_is_teis_test_string_st_string_for_string_calls());
+
+ str = strorg;
+ str.insert(15, str.c_str() + 5, 25);
+ UNIT_ASSERT(str == Data_.This_is_test_stis_test_string_for_stringring_for_string_calls());
+
+ str = strorg;
+ str.insert(0, str.c_str() + str.size() - 4, 4);
+ UNIT_ASSERT(str == Data_.allsThis_is_test_string_for_string_calls());
+
+ str = strorg;
+ str.insert(0, str.c_str() + str.size() / 2 - 1, str.size() / 2 + 1);
+ UNIT_ASSERT(str == Data_.ng_for_string_callsThis_is_test_string_for_string_calls());
+
+ str = strorg;
+ typename TStringType::iterator b = str.begin();
+ typename TStringType::const_iterator s = str.begin() + str.size() / 2 - 1;
+ typename TStringType::const_iterator e = str.end();
+ str.insert(b, s, e);
+ UNIT_ASSERT(str == Data_.ng_for_string_callsThis_is_test_string_for_string_calls());
+
+#if 0
+ // AV
+ str = strorg;
+ str.insert(str.begin(), str.begin() + str.size() / 2 - 1, str.end());
+ UNIT_ASSERT(str == Data.ng_for_string_callsThis_is_test_string_for_string_calls());
+#endif
+
+ TStringType str0;
+ str0.insert(str0.begin(), 5, *Data_._0());
+ UNIT_ASSERT(str0 == Data_._00000());
+
+ TStringType str1;
+ {
+ typename TStringType::size_type pos = 0, nb = 2;
+ str1.insert(pos, nb, *Data_._1());
+ }
+ UNIT_ASSERT(str1 == Data_._11());
+
+ str0.insert(0, str1);
+ UNIT_ASSERT(str0 == Data_._1100000());
+
+ TStringType str2(Data_._2345());
+ str0.insert(str0.size(), str2, 1, 2);
+ UNIT_ASSERT(str0 == Data_._110000034());
+
+ str1.insert(str1.begin() + 1, 2, *Data_._2());
+ UNIT_ASSERT(str1 == Data_._1221());
+
+ str1.insert(2, Data_._333333(), 3);
+ UNIT_ASSERT(str1 == Data_._1233321());
+
+ str1.insert(4, Data_._4444());
+ UNIT_ASSERT(str1 == Data_._12334444321());
+
+ str1.insert(str1.begin() + 6, *Data_._5());
+ UNIT_ASSERT(str1 == Data_._123344544321());
+ }
+
+ void resize() {
+ TStringType s;
+
+ s.resize(0);
+
+ UNIT_ASSERT(*s.c_str() == 0);
+
+ s = Data_._1234567();
+
+ s.resize(0);
+ UNIT_ASSERT(*s.c_str() == 0);
+
+ s = Data_._1234567();
+ s.resize(1);
+ UNIT_ASSERT(s.size() == 1);
+ UNIT_ASSERT(*s.c_str() == *Data_._1());
+ UNIT_ASSERT(*(s.c_str() + 1) == 0);
+
+ s = Data_._1234567();
+#if 0
+ s.resize(10);
+#else
+ s.resize(10, 0);
+#endif
+ UNIT_ASSERT(s.size() == 10);
+ UNIT_ASSERT(s[6] == *Data_._7());
+ UNIT_ASSERT(s[7] == 0);
+ UNIT_ASSERT(s[8] == 0);
+ UNIT_ASSERT(s[9] == 0);
+ }
+
+ void find() {
+ TStringType s(Data_.one_two_three_one_two_three());
+
+ UNIT_ASSERT(s.find(Data_.one()) == 0);
+ UNIT_ASSERT(s.find(*Data_.t()) == 4);
+ UNIT_ASSERT(s.find(*Data_.t(), 5) == 8);
+
+ UNIT_ASSERT(s.find(Data_.four()) == TStringType::npos);
+ UNIT_ASSERT(s.find(Data_.one(), TStringType::npos) == TStringType::npos);
+ UNIT_ASSERT(s.find_first_of(Data_.abcde()) == 2);
+ UNIT_ASSERT(s.find_first_not_of(Data_.enotw_()) == 9);
+ }
+
+ void capacity() {
+ TStringType s;
+
+ UNIT_ASSERT(s.capacity() < s.max_size());
+ UNIT_ASSERT(s.capacity() >= s.size());
+
+ for (int i = 0; i < 18; ++i) {
+ s += ' ';
+
+ UNIT_ASSERT(s.capacity() > 0);
+ UNIT_ASSERT(s.capacity() < s.max_size());
+ UNIT_ASSERT(s.capacity() >= s.size());
+ }
+ }
+
+ void assign() {
+ TStringType s;
+ TChar const* cstr = Data_.test_string_for_assign();
+
+ s.assign(cstr, cstr + 22);
+ UNIT_ASSERT(s == Data_.test_string_for_assign());
+
+ TStringType s2(Data_.other_test_string());
+ s.assign(s2);
+ UNIT_ASSERT(s == s2);
+
+ static TStringType str1;
+ static TStringType str2;
+
+ // short TStringType optim:
+ str1 = Data_._123456();
+ // longer than short TStringType:
+ str2 = Data_._1234567890123456789012345678901234567890();
+
+ UNIT_ASSERT(str1[5] == *Data_._6());
+ UNIT_ASSERT(str2[29] == *Data_._0());
+ }
+
+ void copy() {
+ TStringType s(Data_.foo());
+ TChar dest[4];
+ dest[0] = dest[1] = dest[2] = dest[3] = 1;
+ s.copy(dest, 4);
+ int pos = 0;
+ UNIT_ASSERT(dest[pos++] == *Data_.f());
+ UNIT_ASSERT(dest[pos++] == *Data_.o());
+ UNIT_ASSERT(dest[pos++] == *Data_.o());
+ UNIT_ASSERT(dest[pos++] == 1);
+
+ dest[0] = dest[1] = dest[2] = dest[3] = 1;
+ s.copy(dest, 4, 2);
+ pos = 0;
+ UNIT_ASSERT(dest[pos++] == *Data_.o());
+ UNIT_ASSERT(dest[pos++] == 1);
+
+ UNIT_ASSERT_EXCEPTION(s.copy(dest, 4, 5), std::out_of_range);
+ }
+
+ void cbegin_cend() {
+ const char helloThere[] = "Hello there";
+ TCowString s = helloThere;
+ size_t index = 0;
+ for (auto it = s.cbegin(); s.cend() != it; ++it, ++index) {
+ UNIT_ASSERT_VALUES_EQUAL(helloThere[index], *it);
+ }
+ }
+
+ void compare() {
+ TStringType str1(Data_.abcdef());
+ TStringType str2;
+
+ str2 = Data_.abcdef();
+ UNIT_ASSERT(str1.compare(str2) == 0);
+ UNIT_ASSERT(str1.compare(str2.data(), str2.size()) == 0);
+ str2 = Data_.abcde();
+ UNIT_ASSERT(str1.compare(str2) > 0);
+ UNIT_ASSERT(str1.compare(str2.data(), str2.size()) > 0);
+ str2 = Data_.abcdefg();
+ UNIT_ASSERT(str1.compare(str2) < 0);
+ UNIT_ASSERT(str1.compare(str2.data(), str2.size()) < 0);
+
+ UNIT_ASSERT(str1.compare(Data_.abcdef()) == 0);
+ UNIT_ASSERT(str1.compare(Data_.abcde()) > 0);
+ UNIT_ASSERT(str1.compare(Data_.abcdefg()) < 0);
+
+ str2 = Data_.cde();
+ UNIT_ASSERT(str1.compare(2, 3, str2) == 0);
+ str2 = Data_.cd();
+ UNIT_ASSERT(str1.compare(2, 3, str2) > 0);
+ str2 = Data_.cdef();
+ UNIT_ASSERT(str1.compare(2, 3, str2) < 0);
+
+ str2 = Data_.abcdef();
+ UNIT_ASSERT(str1.compare(2, 3, str2, 2, 3) == 0);
+ UNIT_ASSERT(str1.compare(2, 3, str2, 2, 2) > 0);
+ UNIT_ASSERT(str1.compare(2, 3, str2, 2, 4) < 0);
+
+ UNIT_ASSERT(str1.compare(2, 3, Data_.cdefgh(), 3) == 0);
+ UNIT_ASSERT(str1.compare(2, 3, Data_.cdefgh(), 2) > 0);
+ UNIT_ASSERT(str1.compare(2, 3, Data_.cdefgh(), 4) < 0);
+ }
+
+ void find_last_of() {
+ // 21.3.6.4
+ TStringType s(Data_.one_two_three_one_two_three());
+
+ UNIT_ASSERT(s.find_last_of(Data_.abcde()) == 26);
+ UNIT_ASSERT(s.find_last_of(TStringType(Data_.abcde())) == 26);
+
+ TStringType test(Data_.aba());
+
+ UNIT_ASSERT(test.find_last_of(Data_.a(), 2, 1) == 2);
+ UNIT_ASSERT(test.find_last_of(Data_.a(), 1, 1) == 0);
+ UNIT_ASSERT(test.find_last_of(Data_.a(), 0, 1) == 0);
+
+ UNIT_ASSERT(test.find_last_of(*Data_.a(), 2) == 2);
+ UNIT_ASSERT(test.find_last_of(*Data_.a(), 1) == 0);
+ UNIT_ASSERT(test.find_last_of(*Data_.a(), 0) == 0);
+ }
+#if 0
+ void rfind() {
+ // 21.3.6.2
+ TStringType s(Data.one_two_three_one_two_three());
+
+ UNIT_ASSERT(s.rfind(Data.two()) == 18);
+ UNIT_ASSERT(s.rfind(Data.two(), 0) == TStringType::npos);
+ UNIT_ASSERT(s.rfind(Data.two(), 11) == 4);
+ UNIT_ASSERT(s.rfind(*Data.w()) == 19);
+
+ TStringType test(Data.aba());
+
+ UNIT_ASSERT(test.rfind(Data.a(), 2, 1) == 2);
+ UNIT_ASSERT(test.rfind(Data.a(), 1, 1) == 0);
+ UNIT_ASSERT(test.rfind(Data.a(), 0, 1) == 0);
+
+ UNIT_ASSERT(test.rfind(*Data.a(), 2) == 2);
+ UNIT_ASSERT(test.rfind(*Data.a(), 1) == 0);
+ UNIT_ASSERT(test.rfind(*Data.a(), 0) == 0);
+ }
+#endif
+ void find_last_not_of() {
+ // 21.3.6.6
+ TStringType s(Data_.one_two_three_one_two_three());
+
+ UNIT_ASSERT(s.find_last_not_of(Data_.ehortw_()) == 15);
+
+ TStringType test(Data_.aba());
+
+ UNIT_ASSERT(test.find_last_not_of(Data_.a(), 2, 1) == 1);
+ UNIT_ASSERT(test.find_last_not_of(Data_.b(), 2, 1) == 2);
+ UNIT_ASSERT(test.find_last_not_of(Data_.a(), 1, 1) == 1);
+ UNIT_ASSERT(test.find_last_not_of(Data_.b(), 1, 1) == 0);
+ UNIT_ASSERT(test.find_last_not_of(Data_.a(), 0, 1) == TStringType::npos);
+ UNIT_ASSERT(test.find_last_not_of(Data_.b(), 0, 1) == 0);
+
+ UNIT_ASSERT(test.find_last_not_of(*Data_.a(), 2) == 1);
+ UNIT_ASSERT(test.find_last_not_of(*Data_.b(), 2) == 2);
+ UNIT_ASSERT(test.find_last_not_of(*Data_.a(), 1) == 1);
+ UNIT_ASSERT(test.find_last_not_of(*Data_.b(), 1) == 0);
+ UNIT_ASSERT(test.find_last_not_of(*Data_.a(), 0) == TStringType::npos);
+ UNIT_ASSERT(test.find_last_not_of(*Data_.b(), 0) == 0);
+ }
+#if 0
+ void replace() {
+ // This test case is for the non template basic_TString::replace method,
+ // this is why we play with the const iterators and reference to guaranty
+ // that the right method is called.
+
+ const TStringType v(Data._78());
+ TStringType s(Data._123456());
+ TStringType const& cs = s;
+
+ typename TStringType::iterator i = s.begin() + 1;
+ s.replace(i, i + 3, v.begin(), v.end());
+ UNIT_ASSERT(s == Data._17856());
+
+ s = Data._123456();
+ i = s.begin() + 1;
+ s.replace(i, i + 1, v.begin(), v.end());
+ UNIT_ASSERT(s == Data._1783456());
+
+ s = Data._123456();
+ i = s.begin() + 1;
+ typename TStringType::const_iterator ci = s.begin() + 1;
+ s.replace(i, i + 3, ci + 3, cs.end());
+ UNIT_ASSERT(s == Data._15656());
+
+ s = Data._123456();
+ i = s.begin() + 1;
+ ci = s.begin() + 1;
+ s.replace(i, i + 3, ci, ci + 2);
+ UNIT_ASSERT(s == Data._12356());
+
+ s = Data._123456();
+ i = s.begin() + 1;
+ ci = s.begin() + 1;
+ s.replace(i, i + 3, ci + 1, cs.end());
+ UNIT_ASSERT(s == Data._1345656());
+
+ s = Data._123456();
+ i = s.begin();
+ ci = s.begin() + 1;
+ s.replace(i, i, ci, ci + 1);
+ UNIT_ASSERT(s == Data._2123456());
+
+ s = Data._123456();
+ s.replace(s.begin() + 4, s.end(), cs.begin(), cs.end());
+ UNIT_ASSERT(s == Data._1234123456());
+
+ // This is the test for the template replace method.
+
+ s = Data._123456();
+ typename TStringType::iterator b = s.begin() + 4;
+ typename TStringType::iterator e = s.end();
+ typename TStringType::const_iterator rb = s.begin();
+ typename TStringType::const_iterator re = s.end();
+ s.replace(b, e, rb, re);
+ UNIT_ASSERT(s == Data._1234123456());
+
+ s = Data._123456();
+ s.replace(s.begin() + 4, s.end(), s.begin(), s.end());
+ UNIT_ASSERT(s == Data._1234123456());
+
+ TStringType strorg(Data.This_is_test_StringT_for_StringT_calls());
+ TStringType str = strorg;
+ str.replace(5, 15, str.c_str(), 10);
+ UNIT_ASSERT(str == Data.This_This_is_tefor_StringT_calls());
+
+ str = strorg;
+ str.replace(5, 5, str.c_str(), 10);
+ UNIT_ASSERT(str == Data.This_This_is_test_StringT_for_StringT_calls());
+
+ #if !defined(STLPORT) || defined(_STLP_MEMBER_TEMPLATES)
+ deque<TChar> cdeque;
+ cdeque.push_back(*Data.I());
+ str.replace(str.begin(), str.begin() + 11, cdeque.begin(), cdeque.end());
+ UNIT_ASSERT(str == Data.Is_test_StringT_for_StringT_calls());
+ #endif
+ }
+#endif
+}; // TStringStdTestImpl
+
+class TStringTest: public TTestBase, private TStringTestImpl<TCowString, TTestData<char>> {
+public:
+ UNIT_TEST_SUITE(TStringTest);
+ UNIT_TEST(TestMaxSize);
+ UNIT_TEST(TestConstructors);
+ UNIT_TEST(TestReplace);
+ UNIT_TEST(TestRefCount);
+ UNIT_TEST(TestFind);
+ UNIT_TEST(TestContains);
+ UNIT_TEST(TestOperators);
+ UNIT_TEST(TestMulOperators);
+ UNIT_TEST(TestFuncs);
+ UNIT_TEST(TestUtils);
+ UNIT_TEST(TestEmpty);
+ UNIT_TEST(TestJoin);
+ UNIT_TEST(TestCopy);
+ UNIT_TEST(TestStrCpy);
+ UNIT_TEST(TestPrefixSuffix);
+ UNIT_TEST(TestCharRef);
+ UNIT_TEST(TestBack)
+ UNIT_TEST(TestFront)
+ UNIT_TEST(TestIterators);
+ UNIT_TEST(TestReverseIterators);
+ UNIT_TEST(TestAppendUtf16)
+ UNIT_TEST(TestFillingAssign)
+ UNIT_TEST(TestStdStreamApi)
+ // UNIT_TEST(TestOperatorsCI); must fail
+ UNIT_TEST_SUITE_END();
+
+ void TestAppendUtf16() {
+ TCowString appended = TCowString("А роза упала").AppendUtf16(u" на лапу Азора");
+ UNIT_ASSERT(appended == "А роза упала на лапу Азора");
+ }
+
+ void TestFillingAssign() {
+ TCowString s("abc");
+ s.assign(5, 'a');
+ UNIT_ASSERT_VALUES_EQUAL(s, "aaaaa");
+ }
+
+ void TestStdStreamApi() {
+ const TCowString data = "abracadabra";
+ std::stringstream ss;
+ ss << data;
+
+ UNIT_ASSERT_VALUES_EQUAL(data, ss.str());
+
+ ss << '\n'
+ << data << std::endl;
+
+ TCowString read = "xxx";
+ ss >> read;
+ UNIT_ASSERT_VALUES_EQUAL(read, data);
+ }
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TStringTest);
+
+class TWideStringTest: public TTestBase, private TStringTestImpl<TUtf16CowString, TTestData<wchar16>> {
+public:
+ UNIT_TEST_SUITE(TWideStringTest);
+ UNIT_TEST(TestConstructors);
+ UNIT_TEST(TestReplace);
+ UNIT_TEST(TestRefCount);
+ UNIT_TEST(TestFind);
+ UNIT_TEST(TestContains);
+ UNIT_TEST(TestOperators);
+ UNIT_TEST(TestLetOperator)
+ UNIT_TEST(TestMulOperators);
+ UNIT_TEST(TestFuncs);
+ UNIT_TEST(TestUtils);
+ UNIT_TEST(TestEmpty);
+ UNIT_TEST(TestJoin);
+ UNIT_TEST(TestCopy);
+ UNIT_TEST(TestStrCpy);
+ UNIT_TEST(TestPrefixSuffix);
+ UNIT_TEST(TestCharRef);
+ UNIT_TEST(TestBack);
+ UNIT_TEST(TestFront)
+ UNIT_TEST(TestDecodingMethods);
+ UNIT_TEST(TestIterators);
+ UNIT_TEST(TestReverseIterators);
+ UNIT_TEST(TestStringLiterals);
+ UNIT_TEST_SUITE_END();
+
+private:
+ void TestDecodingMethods() {
+ UNIT_ASSERT(TUtf16CowString::FromAscii("").empty());
+ UNIT_ASSERT(TUtf16CowString::FromAscii("abc") == ASCIIToWide("abc"));
+
+#if 0 // no wide convertions support
+ const char* text = "123kx83abcd ej)#$%ddja&%J&";
+ TUtf16CowString wtext = ASCIIToWide(text);
+
+ UNIT_ASSERT(wtext == TUtf16CowString::FromAscii(text));
+
+ TCowString strtext(text);
+ UNIT_ASSERT(wtext == TUtf16CowString::FromAscii(strtext));
+
+ TStringBuf strbuftext(text);
+ UNIT_ASSERT(wtext == TUtf16CowString::FromAscii(strbuftext));
+
+ UNIT_ASSERT(wtext.substr(5) == TUtf16CowString::FromAscii(text + 5));
+
+ const wchar16 wideCyrillicAlphabet[] = {
+ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
+ 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
+ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
+ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
+ 0x00};
+
+ TUtf16CowString strWide(wideCyrillicAlphabet);
+ TCowString strUtf8 = WideToUTF8(strWide);
+
+ UNIT_ASSERT(strWide == TUtf16CowString::FromUtf8(strUtf8.c_str()));
+ UNIT_ASSERT(strWide == TUtf16CowString::FromUtf8(strUtf8));
+ UNIT_ASSERT(strWide == TUtf16CowString::FromUtf8(TStringBuf(strUtf8)));
+
+ // assign
+
+ TUtf16CowString s1;
+ s1.AssignAscii("1234");
+ UNIT_ASSERT(s1 == ASCIIToWide("1234"));
+
+ s1.AssignUtf8(strUtf8);
+ UNIT_ASSERT(s1 == strWide);
+
+ s1.AssignAscii(text);
+ UNIT_ASSERT(s1 == wtext);
+
+ // append
+
+ TUtf16CowString s2;
+ TUtf16CowString testAppend = strWide;
+ s2.AppendUtf8(strUtf8);
+ UNIT_ASSERT(testAppend == s2);
+
+ testAppend += ' ';
+ s2.AppendAscii(" ");
+ UNIT_ASSERT(testAppend == s2);
+
+ testAppend += '_';
+ s2.AppendUtf8("_");
+ UNIT_ASSERT(testAppend == s2);
+
+ testAppend += wtext;
+ s2.AppendAscii(text);
+ UNIT_ASSERT(testAppend == s2);
+
+ testAppend += wtext;
+ s2.AppendUtf8(text);
+ UNIT_ASSERT(testAppend == s2);
+#endif
+ }
+
+ void TestLetOperator() {
+ TUtf16CowString str;
+
+ str = wchar16('X');
+ UNIT_ASSERT(str == TUtf16CowString::FromAscii("X"));
+
+ const TUtf16CowString hello = TUtf16CowString::FromAscii("hello");
+ str = hello.data();
+ UNIT_ASSERT(str == hello);
+
+ str = hello;
+ UNIT_ASSERT(str == hello);
+ }
+
+ void TestStringLiterals() {
+ TUtf16CowString s1 = u"hello";
+ UNIT_ASSERT_VALUES_EQUAL(s1, TUtf16CowString::FromAscii("hello"));
+
+ TUtf16CowString s2 = u"привет";
+ UNIT_ASSERT_VALUES_EQUAL(s2, TUtf16CowString::FromUtf8("привет"));
+ }
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TWideStringTest);
+
+class TUtf32StringTest: public TTestBase, private TStringTestImpl<TUtf32CowString, TTestData<wchar32>> {
+public:
+ UNIT_TEST_SUITE(TUtf32StringTest);
+ UNIT_TEST(TestConstructors);
+ UNIT_TEST(TestReplace);
+ UNIT_TEST(TestRefCount);
+ UNIT_TEST(TestFind);
+ UNIT_TEST(TestContains);
+ UNIT_TEST(TestOperators);
+ UNIT_TEST(TestLetOperator)
+ UNIT_TEST(TestMulOperators);
+ UNIT_TEST(TestFuncs);
+ UNIT_TEST(TestUtils);
+ UNIT_TEST(TestEmpty);
+ UNIT_TEST(TestJoin);
+ UNIT_TEST(TestCopy);
+ UNIT_TEST(TestStrCpy);
+ UNIT_TEST(TestPrefixSuffix);
+ UNIT_TEST(TestCharRef);
+ UNIT_TEST(TestBack);
+ UNIT_TEST(TestFront)
+ UNIT_TEST(TestDecodingMethods);
+ UNIT_TEST(TestDecodingMethodsMixedStr);
+ UNIT_TEST(TestIterators);
+ UNIT_TEST(TestReverseIterators);
+ UNIT_TEST(TestStringLiterals);
+ UNIT_TEST_SUITE_END();
+
+private:
+ void TestDecodingMethods() {
+ UNIT_ASSERT(TUtf32CowString::FromAscii("").empty());
+ UNIT_ASSERT(TUtf32CowString::FromAscii("abc") == ASCIIToUTF32("abc"));
+
+#if 0 // no wide convertions support
+ const char* text = "123kx83abcd ej)#$%ddja&%J&";
+ TUtf32CowString wtext = ASCIIToUTF32(text);
+
+ UNIT_ASSERT(wtext == TUtf32CowString::FromAscii(text));
+
+ TCowString strtext(text);
+ UNIT_ASSERT(wtext == TUtf32CowString::FromAscii(strtext));
+
+ TStringBuf strbuftext(text);
+ UNIT_ASSERT(wtext == TUtf32CowString::FromAscii(strbuftext));
+
+ UNIT_ASSERT(wtext.substr(5) == TUtf32CowString::FromAscii(text + 5));
+
+ const wchar32 wideCyrillicAlphabet[] = {
+ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
+ 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
+ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
+ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
+ 0x00};
+
+ TUtf32CowString strWide(wideCyrillicAlphabet);
+ TCowString strUtf8 = WideToUTF8(strWide);
+
+ UNIT_ASSERT(strWide == TUtf32CowString::FromUtf8(strUtf8.c_str()));
+ UNIT_ASSERT(strWide == TUtf32CowString::FromUtf8(strUtf8));
+ UNIT_ASSERT(strWide == TUtf32CowString::FromUtf8(TStringBuf(strUtf8)));
+
+ // assign
+
+ TUtf32CowString s1;
+ s1.AssignAscii("1234");
+ UNIT_ASSERT(s1 == ASCIIToUTF32("1234"));
+
+ s1.AssignUtf8(strUtf8);
+ UNIT_ASSERT(s1 == strWide);
+
+ s1.AssignAscii(text);
+ UNIT_ASSERT(s1 == wtext);
+
+ // append
+
+ TUtf32CowString s2;
+ TUtf32CowString testAppend = strWide;
+ s2.AppendUtf8(strUtf8);
+ UNIT_ASSERT(testAppend == s2);
+
+ testAppend += ' ';
+ s2.AppendAscii(" ");
+ UNIT_ASSERT(testAppend == s2);
+
+ testAppend += '_';
+ s2.AppendUtf8("_");
+ UNIT_ASSERT(testAppend == s2);
+
+ testAppend += wtext;
+ s2.AppendAscii(text);
+ UNIT_ASSERT(testAppend == s2);
+
+ testAppend += wtext;
+ s2.AppendUtf8(text);
+
+ UNIT_ASSERT(testAppend == s2);
+#endif
+ }
+
+ void TestDecodingMethodsMixedStr() {
+ UNIT_ASSERT(TUtf32CowString::FromAscii("").empty());
+ UNIT_ASSERT(TUtf32CowString::FromAscii("abc") == ASCIIToUTF32("abc"));
+
+#if 0 // no wide convertions support
+ const char* text = "123kx83abcd ej)#$%ddja&%J&";
+ TUtf32CowString wtext = ASCIIToUTF32(text);
+
+ UNIT_ASSERT(wtext == TUtf32CowString::FromAscii(text));
+
+ TCowString strtext(text);
+ UNIT_ASSERT(wtext == TUtf32CowString::FromAscii(strtext));
+
+ TStringBuf strbuftext(text);
+ UNIT_ASSERT(wtext == TUtf32CowString::FromAscii(strbuftext));
+
+ UNIT_ASSERT(wtext.substr(5) == TUtf32CowString::FromAscii(text + 5));
+
+ const wchar32 cyrilicAndLatinWide[] = {
+ 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
+ 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
+ 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
+ wchar32('z'),
+ 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
+ wchar32('z'),
+ 0x00};
+
+ TUtf32CowString strWide(cyrilicAndLatinWide);
+ TCowString strUtf8 = WideToUTF8(strWide);
+
+ UNIT_ASSERT(strWide == TUtf32CowString::FromUtf8(strUtf8.c_str()));
+ UNIT_ASSERT(strWide == TUtf32CowString::FromUtf8(strUtf8));
+ UNIT_ASSERT(strWide == UTF8ToUTF32<true>(strUtf8));
+ UNIT_ASSERT(strWide == UTF8ToUTF32<false>(strUtf8));
+ UNIT_ASSERT(strWide == TUtf32CowString::FromUtf8(TStringBuf(strUtf8)));
+
+ // assign
+
+ TUtf32CowString s1;
+ s1.AssignAscii("1234");
+ UNIT_ASSERT(s1 == ASCIIToUTF32("1234"));
+
+ s1.AssignUtf8(strUtf8);
+ UNIT_ASSERT(s1 == strWide);
+
+ s1.AssignAscii(text);
+ UNIT_ASSERT(s1 == wtext);
+
+ // append
+
+ TUtf32CowString s2;
+ TUtf32CowString testAppend = strWide;
+ s2.AppendUtf16(UTF8ToWide(strUtf8));
+ UNIT_ASSERT(testAppend == s2);
+
+ testAppend += ' ';
+ s2.AppendAscii(" ");
+ UNIT_ASSERT(testAppend == s2);
+
+ testAppend += '_';
+ s2.AppendUtf8("_");
+ UNIT_ASSERT(testAppend == s2);
+
+ testAppend += wtext;
+ s2.AppendAscii(text);
+ UNIT_ASSERT(testAppend == s2);
+
+ testAppend += wtext;
+ s2.AppendUtf8(text);
+
+ UNIT_ASSERT(testAppend == s2);
+#endif
+ }
+
+ void TestLetOperator() {
+ TUtf32CowString str;
+
+ str = wchar32('X');
+ UNIT_ASSERT(str == TUtf32CowString::FromAscii("X"));
+
+ const TUtf32CowString hello = TUtf32CowString::FromAscii("hello");
+ str = hello.data();
+ UNIT_ASSERT(str == hello);
+
+ str = hello;
+ UNIT_ASSERT(str == hello);
+ }
+
+ void TestStringLiterals() {
+ TUtf32CowString s1 = U"hello";
+ UNIT_ASSERT_VALUES_EQUAL(s1, TUtf32CowString::FromAscii("hello"));
+
+ TUtf32CowString s2 = U"привет";
+ UNIT_ASSERT_VALUES_EQUAL(s2, TUtf32CowString::FromUtf8("привет"));
+ }
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TUtf32StringTest);
+
+class TStringStdTest: public TTestBase, private TStringStdTestImpl<TCowString, TTestData<char>> {
+public:
+ UNIT_TEST_SUITE(TStringStdTest);
+ UNIT_TEST(Constructor);
+ UNIT_TEST(reserve);
+ UNIT_TEST(short_string);
+ UNIT_TEST(erase);
+ UNIT_TEST(data);
+ UNIT_TEST(c_str);
+ UNIT_TEST(null_char_of_empty);
+ UNIT_TEST(null_char);
+ UNIT_TEST(null_char_assignment_to_subscript_of_empty);
+ UNIT_TEST(null_char_assignment_to_subscript_of_nonempty);
+ UNIT_TEST(null_char_assignment_to_end_of_empty);
+ UNIT_TEST(null_char_assignment_to_end_of_nonempty);
+ UNIT_TEST(insert);
+ UNIT_TEST(resize);
+ UNIT_TEST(find);
+ UNIT_TEST(capacity);
+ UNIT_TEST(assign);
+ UNIT_TEST(copy);
+ UNIT_TEST(cbegin_cend);
+ UNIT_TEST(compare);
+ UNIT_TEST(find_last_of);
+#if 0
+ UNIT_TEST(rfind);
+ UNIT_TEST(replace);
+#endif
+ UNIT_TEST(find_last_not_of);
+ UNIT_TEST_SUITE_END();
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TStringStdTest);
+
+class TWideStringStdTest: public TTestBase, private TStringStdTestImpl<TUtf16CowString, TTestData<wchar16>> {
+public:
+ UNIT_TEST_SUITE(TWideStringStdTest);
+ UNIT_TEST(Constructor);
+ UNIT_TEST(reserve);
+ UNIT_TEST(short_string);
+ UNIT_TEST(erase);
+ UNIT_TEST(data);
+ UNIT_TEST(c_str);
+ UNIT_TEST(null_char_of_empty);
+ UNIT_TEST(null_char);
+ UNIT_TEST(null_char_assignment_to_subscript_of_empty);
+ UNIT_TEST(null_char_assignment_to_subscript_of_nonempty);
+ UNIT_TEST(null_char_assignment_to_end_of_empty);
+ UNIT_TEST(null_char_assignment_to_end_of_nonempty);
+ UNIT_TEST(insert);
+ UNIT_TEST(resize);
+ UNIT_TEST(find);
+ UNIT_TEST(capacity);
+ UNIT_TEST(assign);
+ UNIT_TEST(copy);
+ UNIT_TEST(cbegin_cend);
+ UNIT_TEST(compare);
+ UNIT_TEST(find_last_of);
+#if 0
+ UNIT_TEST(rfind);
+ UNIT_TEST(replace);
+#endif
+ UNIT_TEST(find_last_not_of);
+ UNIT_TEST_SUITE_END();
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TWideStringStdTest);
+
+Y_UNIT_TEST_SUITE(TStringConversionTest) {
+ Y_UNIT_TEST(ConversionToStdStringTest) {
+ TCowString abra = "cadabra";
+ std::string stdAbra = abra;
+ UNIT_ASSERT_VALUES_EQUAL(stdAbra, "cadabra");
+ }
+
+ Y_UNIT_TEST(ConversionToStdStringViewTest) {
+ TCowString abra = "cadabra";
+ std::string_view stdAbra = abra;
+ UNIT_ASSERT_VALUES_EQUAL(stdAbra, "cadabra");
+ }
+} // Y_UNIT_TEST_SUITE(TStringConversionTest)
+
+Y_UNIT_TEST_SUITE(HashFunctorTests) {
+ Y_UNIT_TEST(TestTransparency) {
+ THash<TCowString> h;
+ const char* ptr = "a";
+ const TStringBuf strbuf = ptr;
+ const TCowString str = ptr;
+ const std::string stdStr = ptr;
+ UNIT_ASSERT_VALUES_EQUAL(h(ptr), h(strbuf));
+ UNIT_ASSERT_VALUES_EQUAL(h(ptr), h(str));
+ UNIT_ASSERT_VALUES_EQUAL(h(ptr), h(stdStr));
+ }
+} // Y_UNIT_TEST_SUITE(HashFunctorTests)
+
+Y_UNIT_TEST_SUITE(StdNonConformant) {
+ Y_UNIT_TEST(TestEraseNoThrow) {
+ TCowString x;
+
+ LegacyErase(x, 10);
+ }
+
+ Y_UNIT_TEST(TestReplaceNoThrow) {
+ TCowString x;
+
+ LegacyReplace(x, 0, 0, "1");
+
+ UNIT_ASSERT_VALUES_EQUAL(x, "1");
+
+ LegacyReplace(x, 10, 0, "1");
+
+ UNIT_ASSERT_VALUES_EQUAL(x, "1");
+ }
+
+ Y_UNIT_TEST(TestNoAlias) {
+ TCowString s = "x";
+
+ s.AppendNoAlias("abc", 3);
+
+ UNIT_ASSERT_VALUES_EQUAL(s, "xabc");
+ UNIT_ASSERT_VALUES_EQUAL(TCowString(s.c_str()), "xabc");
+ }
+} // Y_UNIT_TEST_SUITE(StdNonConformant)
+
+Y_UNIT_TEST_SUITE(Interop) {
+ static void Mutate(std::string& s) {
+ s += "y";
+ }
+
+ static void Mutate(TCowString& s) {
+ Mutate(MutRef(s));
+ }
+
+ Y_UNIT_TEST(TestMutate) {
+ TCowString x = "x";
+
+ Mutate(x);
+
+ UNIT_ASSERT_VALUES_EQUAL(x, "xy");
+ }
+
+ static std::string TransformStd(const std::string& s) {
+ return s + "y";
+ }
+
+ static TCowString Transform(const TCowString& s) {
+ return TransformStd(s);
+ }
+
+ Y_UNIT_TEST(TestTransform) {
+ UNIT_ASSERT_VALUES_EQUAL(Transform(TCowString("x")), "xy");
+ }
+
+ Y_UNIT_TEST(TestTemp) {
+ UNIT_ASSERT_VALUES_EQUAL("x" + ConstRef(TCowString("y")), "xy");
+ }
+
+ static void ComparePointers(const std::string& s, const void* expected, TStringBuf descr) {
+ UNIT_ASSERT_VALUES_EQUAL_C(static_cast<const void*>(s.c_str()), expected, descr);
+ }
+
+ Y_UNIT_TEST(TestConstShared) {
+ TCowString s(600, 'a');
+ const void* stringStart = s.c_str();
+ ComparePointers(s, stringStart, "unique");
+ TCowString shared{s};
+ ComparePointers(s, stringStart, "shared"); // converting a TCowString to a `const std::string&` should not cause data cloning
+ }
+} // Y_UNIT_TEST_SUITE(Interop)
+
+Y_UNIT_TEST_SUITE(CowPitfalls) {
+ template<class T>
+ static TString CopyStringViaBeginEndIterators(T& string, bool reverse) {
+ decltype(string.begin()) b;
+ decltype(string.end()) e;
+ if (!reverse) {
+ b = string.begin();
+ e = string.end();
+ } else {
+ e = string.end();
+ b = string.begin();
+ }
+ return TString{b, e};
+ }
+
+ Y_UNIT_TEST(IteratorCallOrder) {
+ const TString ref(600, 'a');
+ for (const bool reverse : {false, true}) {
+ TCowString s = {ref.begin(), ref.end()};
+ // sanity check
+ UNIT_ASSERT_VALUES_EQUAL_C(CopyStringViaBeginEndIterators<const TCowString>(s, reverse), TStringBuf(ref), LabeledOutput(reverse));
+ UNIT_ASSERT_VALUES_EQUAL_C(CopyStringViaBeginEndIterators<TCowString>(s, reverse), TStringBuf(ref), LabeledOutput(reverse));
+ // test
+ TCowString copy = s;
+ UNIT_ASSERT_VALUES_EQUAL_C(CopyStringViaBeginEndIterators<const TCowString>(s, reverse), TStringBuf(ref), LabeledOutput(reverse));
+ UNIT_ASSERT_VALUES_EQUAL_C(CopyStringViaBeginEndIterators<TCowString>(s, reverse), TStringBuf(ref), LabeledOutput(reverse));
+ }
+ }
+
+ Y_UNIT_TEST(RangeFor) {
+ TCowString str;
+ str.resize(200);
+ TCowString copy = str;
+ for (auto& c : str) {
+ c = 'x';
+ }
+ UNIT_ASSERT_VALUES_EQUAL(str, TString(200, 'x'));
+ }
+} // Y_UNIT_TEST_SUITE(CowPitfalls)
diff --git a/library/cpp/containers/cow_string/output.cpp b/library/cpp/containers/cow_string/output.cpp
new file mode 100644
index 00000000000..e0b4924ad31
--- /dev/null
+++ b/library/cpp/containers/cow_string/output.cpp
@@ -0,0 +1,46 @@
+#include "cow_string.h"
+
+#include <util/charset/wide.h>
+#include <util/stream/input.h>
+#include <util/string/cast.h>
+
+constexpr size_t MAX_UTF8_BYTES = 4; // UTF-8-encoded code point takes between 1 and 4 bytes
+
+template <typename TCharType>
+static void WriteString(IOutputStream& o, const TCharType* w, size_t n) {
+ const size_t buflen = (n * MAX_UTF8_BYTES); // * 4 because the conversion functions can convert unicode character into maximum 4 bytes of UTF8
+ TTempBuf buffer(buflen + 1);
+ size_t written = 0;
+ WideToUTF8(w, n, buffer.Data(), written);
+ o.Write(buffer.Data(), written);
+}
+
+template <>
+void Out<TCowString>(IOutputStream& o, const TCowString& p) {
+ o.Write(p.data(), p.size());
+}
+
+template <>
+void Out<TUtf16CowString>(IOutputStream& o, const TUtf16CowString& w) {
+ WriteString(o, w.c_str(), w.size());
+}
+
+template <>
+void Out<TUtf32CowString>(IOutputStream& o, const TUtf32CowString& w) {
+ WriteString(o, w.c_str(), w.size());
+}
+
+template <>
+void Out<TBasicCharRef<TCowString>>(IOutputStream& o, const TBasicCharRef<TCowString>& c) {
+ o << static_cast<char>(c);
+}
+
+template <>
+void Out<TBasicCharRef<TUtf16CowString>>(IOutputStream& o, const TBasicCharRef<TUtf16CowString>& c) {
+ o << static_cast<wchar16>(c);
+}
+
+template <>
+void Out<TBasicCharRef<TUtf32CowString>>(IOutputStream& o, const TBasicCharRef<TUtf32CowString>& c) {
+ o << static_cast<wchar32>(c);
+}
diff --git a/library/cpp/containers/cow_string/reverse.cpp b/library/cpp/containers/cow_string/reverse.cpp
new file mode 100644
index 00000000000..b5bd10d250a
--- /dev/null
+++ b/library/cpp/containers/cow_string/reverse.cpp
@@ -0,0 +1,32 @@
+#include "reverse.h"
+
+#include <util/generic/vector.h>
+#include <util/charset/wide_specific.h>
+
+#include <algorithm>
+
+void ReverseInPlace(TCowString& string) {
+ auto* begin = string.begin();
+ std::reverse(begin, begin + string.size());
+}
+
+void ReverseInPlace(TUtf16CowString& string) {
+ auto* begin = string.begin();
+ const auto len = string.size();
+ auto* end = begin + string.size();
+
+ TVector<wchar16> buffer(len);
+ wchar16* rbegin = buffer.data() + len;
+ for (wchar16* p = begin; p < end;) {
+ const size_t symbolSize = W16SymbolSize(p, end);
+ rbegin -= symbolSize;
+ std::copy(p, p + symbolSize, rbegin);
+ p += symbolSize;
+ }
+ std::copy(buffer.begin(), buffer.end(), begin);
+}
+
+void ReverseInPlace(TUtf32CowString& string) {
+ auto* begin = string.begin();
+ std::reverse(begin, begin + string.size());
+}
diff --git a/library/cpp/containers/cow_string/reverse.h b/library/cpp/containers/cow_string/reverse.h
new file mode 100644
index 00000000000..d27b0b4fed6
--- /dev/null
+++ b/library/cpp/containers/cow_string/reverse.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include <library/cpp/containers/cow_string/cow_string.h>
+
+void ReverseInPlace(TCowString& string);
+
+/** NB. UTF-16 is variable-length encoding because of the surrogate pairs.
+ * This function takes this into account and treats a surrogate pair as a single symbol.
+ * Ex. if [C D] is a surrogate pair,
+ * A B [C D] E
+ * will become
+ * E [C D] B A
+ */
+void ReverseInPlace(TUtf16CowString& string);
+
+void ReverseInPlace(TUtf32CowString& string);
diff --git a/library/cpp/containers/cow_string/str_stl.h b/library/cpp/containers/cow_string/str_stl.h
new file mode 100644
index 00000000000..d8256a6e10b
--- /dev/null
+++ b/library/cpp/containers/cow_string/str_stl.h
@@ -0,0 +1,67 @@
+#pragma once
+
+#include <util/str_stl.h>
+
+template <>
+struct hash<TCowString>: ::NHashPrivate::TStringHash<char> {
+};
+
+template <>
+struct hash<TUtf16CowString>: ::NHashPrivate::TStringHash<wchar16> {
+};
+
+template <>
+struct hash<TUtf32CowString>: ::NHashPrivate::TStringHash<wchar32> {
+};
+
+template <>
+struct TEqualTo<TCowString>: public TEqualTo<TStringBuf> {
+ using is_transparent = void;
+};
+
+template <>
+struct TEqualTo<TUtf16CowString>: public TEqualTo<TWtringBuf> {
+ using is_transparent = void;
+};
+
+template <>
+struct TEqualTo<TUtf32CowString>: public TEqualTo<TUtf32StringBuf> {
+ using is_transparent = void;
+};
+
+template <>
+struct TCIEqualTo<TCowString> {
+ inline bool operator()(const TCowString& a, const TCowString& b) const {
+ return a.size() == b.size() && strnicmp(a.data(), b.data(), a.size()) == 0;
+ }
+};
+
+template <>
+struct TLess<TCowString>: public TLess<TStringBuf> {
+ using is_transparent = void;
+};
+
+template <>
+struct TLess<TUtf16CowString>: public TLess<TWtringBuf> {
+ using is_transparent = void;
+};
+
+template <>
+struct TLess<TUtf32CowString>: public TLess<TUtf32StringBuf> {
+ using is_transparent = void;
+};
+
+template <>
+struct TGreater<TCowString>: public TGreater<TStringBuf> {
+ using is_transparent = void;
+};
+
+template <>
+struct TGreater<TUtf16CowString>: public TGreater<TWtringBuf> {
+ using is_transparent = void;
+};
+
+template <>
+struct TGreater<TUtf32CowString>: public TGreater<TUtf32StringBuf> {
+ using is_transparent = void;
+};
diff --git a/library/cpp/containers/cow_string/subst.cpp b/library/cpp/containers/cow_string/subst.cpp
new file mode 100644
index 00000000000..d4e9ff3395d
--- /dev/null
+++ b/library/cpp/containers/cow_string/subst.cpp
@@ -0,0 +1,182 @@
+#include "subst.h"
+
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+#include <util/system/compiler.h>
+
+#include <string>
+#include <type_traits>
+
+// a bit of template magic (to be fast and unreadable)
+template <class TStringType, class TTo, bool Main>
+static Y_FORCE_INLINE void MoveBlock(typename TStringType::value_type* ptr, size_t& srcPos, size_t& dstPos, const size_t off, const TTo to, const size_t toSize) {
+ const size_t unchangedSize = off - srcPos;
+ if (dstPos < srcPos) {
+ for (size_t i = 0; i < unchangedSize; ++i) {
+ ptr[dstPos++] = ptr[srcPos++];
+ }
+ } else {
+ dstPos += unchangedSize;
+ srcPos += unchangedSize;
+ }
+
+ if (Main) {
+ for (size_t i = 0; i < toSize; ++i) {
+ ptr[dstPos++] = to[i];
+ }
+ }
+}
+
+template <typename T, typename U>
+static bool IsIntersect(const T& a, const U& b) noexcept {
+ if (b.data() < a.data()) {
+ return IsIntersect(b, a);
+ }
+
+ return !a.empty() && !b.empty() &&
+ ((a.data() <= b.data() && b.data() < a.data() + a.size()) ||
+ (a.data() < b.data() + b.size() && b.data() + b.size() <= a.data() + a.size()));
+}
+
+/**
+ * Replaces all occurences of substring @c from in string @c s to string @c to.
+ * Uses two separate implementations (inplace for shrink and append for grow case)
+ * See IGNIETFERRO-394
+ **/
+template <class TStringType, typename TStringViewType = TBasicStringBuf<typename TStringType::value_type>>
+static inline size_t SubstGlobalImpl(TStringType& s, const TStringViewType from, const TStringViewType to, size_t fromPos = 0) {
+ if (from.empty()) {
+ return 0;
+ }
+
+ Y_ASSERT(!IsIntersect(s, from));
+ Y_ASSERT(!IsIntersect(s, to));
+
+ const size_t fromSize = from.size();
+ const size_t toSize = to.size();
+ size_t replacementsCount = 0;
+ size_t off = fromPos;
+ size_t srcPos = 0;
+
+ if (toSize > fromSize) {
+ // string will grow: append to another string
+ TStringType result;
+ for (; (off = TStringViewType(s).find(from, off)) != TStringType::npos; off += fromSize) {
+ if (!replacementsCount) {
+ // first replacement occured, we can prepare result string
+ result.reserve(s.size() + s.size() / 3);
+ }
+ result.append(s.begin() + srcPos, s.begin() + off);
+ result.append(to.data(), to.size());
+ srcPos = off + fromSize;
+ ++replacementsCount;
+ }
+ if (replacementsCount) {
+ // append tail
+ result.append(s.begin() + srcPos, s.end());
+ s = std::move(result);
+ }
+ return replacementsCount;
+ }
+
+ // string will not grow: use inplace algo
+ size_t dstPos = 0;
+ typename TStringType::value_type* ptr = &*s.begin();
+ for (; (off = TStringViewType(s).find(from, off)) != TStringType::npos; off += fromSize) {
+ Y_ASSERT(dstPos <= srcPos);
+ MoveBlock<TStringType, TStringViewType, true>(ptr, srcPos, dstPos, off, to, toSize);
+ srcPos = off + fromSize;
+ ++replacementsCount;
+ }
+
+ if (replacementsCount) {
+ // append tail
+ MoveBlock<TStringType, TStringViewType, false>(ptr, srcPos, dstPos, s.size(), to, toSize);
+ s.resize(dstPos);
+ }
+ return replacementsCount;
+}
+
+/// Replaces all occurences of the 'from' symbol in a string to the 'to' symbol.
+template <class TStringType>
+inline size_t SubstCharGlobalImpl(TStringType& s, typename TStringType::value_type from, typename TStringType::value_type to, size_t fromPos = 0) {
+ if (fromPos >= s.size()) {
+ return 0;
+ }
+
+ size_t result = 0;
+ fromPos = s.find(from, fromPos);
+
+ // s.begin() might cause memory copying, so call it only if needed
+ if (fromPos != TStringType::npos) {
+ auto* it = &*s.begin() + fromPos;
+ *it = to;
+ ++result;
+ // at this point string is copied and it's safe to use constant s.end() to iterate
+ const auto* const sEnd = &*s.end();
+ // unrolled loop goes first because it is more likely that `it` will be properly aligned
+ for (const auto* const end = sEnd - (sEnd - it) % 4; it < end;) {
+ if (*it == from) {
+ *it = to;
+ ++result;
+ }
+ ++it;
+ if (*it == from) {
+ *it = to;
+ ++result;
+ }
+ ++it;
+ if (*it == from) {
+ *it = to;
+ ++result;
+ }
+ ++it;
+ if (*it == from) {
+ *it = to;
+ ++result;
+ }
+ ++it;
+ }
+ for (; it < sEnd; ++it) {
+ if (*it == from) {
+ *it = to;
+ ++result;
+ }
+ }
+ }
+
+ return result;
+}
+
+/* Standard says that `char16_t` is a distinct type and has same size, signedness and alignment as
+ * `std::uint_least16_t`, so we check if `char16_t` has same signedness and size as `wchar16` to be
+ * sure that we can make safe casts between values of these types and pointers.
+ */
+static_assert(sizeof(wchar16) == sizeof(char16_t), "");
+static_assert(sizeof(wchar32) == sizeof(char32_t), "");
+static_assert(std::is_unsigned<wchar16>::value == std::is_unsigned<char16_t>::value, "");
+static_assert(std::is_unsigned<wchar32>::value == std::is_unsigned<char32_t>::value, "");
+
+size_t SubstGlobal(TCowString& text, const TStringBuf what, const TStringBuf with, size_t from) {
+ return SubstGlobalImpl(text, what, with, from);
+}
+
+size_t SubstGlobal(TUtf16CowString& text, const TWtringBuf what, const TWtringBuf with, size_t from) {
+ return SubstGlobalImpl(text, what, with, from);
+}
+
+size_t SubstGlobal(TUtf32CowString& text, const TUtf32StringBuf what, const TUtf32StringBuf with, size_t from) {
+ return SubstGlobalImpl(text, what, with, from);
+}
+
+size_t SubstGlobal(TCowString& text, char what, char with, size_t from) {
+ return SubstCharGlobalImpl(text, what, with, from);
+}
+
+size_t SubstGlobal(TUtf16CowString& text, wchar16 what, wchar16 with, size_t from) {
+ return SubstCharGlobalImpl(text, (char16_t)what, (char16_t)with, from);
+}
+
+size_t SubstGlobal(TUtf32CowString& text, wchar32 what, wchar32 with, size_t from) {
+ return SubstCharGlobalImpl(text, (char32_t)what, (char32_t)with, from);
+}
diff --git a/library/cpp/containers/cow_string/subst.h b/library/cpp/containers/cow_string/subst.h
new file mode 100644
index 00000000000..6090ba54b25
--- /dev/null
+++ b/library/cpp/containers/cow_string/subst.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include <library/cpp/containers/cow_string/cow_string.h>
+
+#include <util/string/subst.h>
+
+/* Replace all occurences of substring `what` with string `with` starting from position `from`.
+ *
+ * @param text String to modify.
+ * @param what Substring to replace.
+ * @param with Substring to use as replacement.
+ * @param from Position at with to start replacement.
+ *
+ * @return Number of replacements occured.
+ */
+size_t SubstGlobal(TCowString& text, TStringBuf what, TStringBuf with, size_t from = 0);
+size_t SubstGlobal(TUtf16CowString& text, TWtringBuf what, TWtringBuf with, size_t from = 0);
+size_t SubstGlobal(TUtf32CowString& text, TUtf32StringBuf what, TUtf32StringBuf with, size_t from = 0);
+
+/* Replace all occurences of character `what` with character `with` starting from position `from`.
+ *
+ * @param text String to modify.
+ * @param what Character to replace.
+ * @param with Character to use as replacement.
+ * @param from Position at with to start replacement.
+ *
+ * @return Number of replacements occured.
+ */
+size_t SubstGlobal(TCowString& text, char what, char with, size_t from = 0);
+size_t SubstGlobal(TUtf16CowString& text, wchar16 what, wchar16 with, size_t from = 0);
+size_t SubstGlobal(TUtf32CowString& text, wchar32 what, wchar32 with, size_t from = 0);
diff --git a/library/cpp/containers/cow_string/ut/ya.make b/library/cpp/containers/cow_string/ut/ya.make
new file mode 100644
index 00000000000..1a54646dc77
--- /dev/null
+++ b/library/cpp/containers/cow_string/ut/ya.make
@@ -0,0 +1,7 @@
+UNITTEST_FOR(library/cpp/containers/cow_string)
+
+SRCS(
+ cow_string_ut.cpp
+)
+
+END()
diff --git a/library/cpp/containers/cow_string/ut_medium/cow_string_medium_ut.cpp b/library/cpp/containers/cow_string/ut_medium/cow_string_medium_ut.cpp
new file mode 100644
index 00000000000..a9a37db776d
--- /dev/null
+++ b/library/cpp/containers/cow_string/ut_medium/cow_string_medium_ut.cpp
@@ -0,0 +1,55 @@
+#include <library/cpp/containers/cow_string/cow_string.h>
+
+#include <library/cpp/testing/unittest/registar.h>
+
+#include <util/generic/strbuf.h>
+#include <util/generic/yexception.h>
+#include <util/stream/output.h>
+#include <util/system/thread.h>
+
+#include <string>
+#include <barrier>
+
+static_assert(sizeof(TCowString) == sizeof(const char*), "expect sizeof(TCowString) == sizeof(const char*)");
+
+Y_UNIT_TEST_SUITE(CowPitfalls) {
+ Y_UNIT_TEST(ParallelDetach) {
+ // best results with thread-sanitizer
+ std::vector<std::unique_ptr<TThread>> threads;
+ TCowString a = "the string";
+ TCowString b = a;
+ auto makeRefToA = [&a, &b]() {
+ b = a; // make second reference to the same string
+ };
+ constexpr int nThreads = 8;
+#ifdef _tsan_enabled_
+ constexpr i64 retries = 1'000;
+#else
+ constexpr i64 retries = 1'000'000;
+#endif
+ std::barrier iterationSyncPoint(nThreads, makeRefToA);
+ std::atomic<i64> totalLen = 0;
+ auto addLen = [](std::string a, std::atomic<i64>& len) {
+ len += a.length();
+ };
+ auto workload = [&a, &addLen, &totalLen, &iterationSyncPoint]() {
+ std::atomic<i64> len = 0;
+ for (i64 j = 0; j < retries; ++j) {
+ addLen(a, len); // possibility of bad implicit conversion
+ iterationSyncPoint.arrive_and_wait();
+ }
+ totalLen += len.load();
+ };
+ for (int i = 0; i < nThreads; ++i) {
+ threads.push_back(std::make_unique<TThread>(workload));
+ }
+ for (auto& t : threads) {
+ t->Start();
+ }
+ for (auto& t : threads) {
+ t->Join();
+ }
+ UNIT_ASSERT_VALUES_EQUAL(totalLen.load(), b.size() * nThreads * retries);
+ }
+
+} // Y_UNIT_TEST_SUITE(CowPitfalls)
diff --git a/library/cpp/containers/cow_string/ut_medium/ya.make b/library/cpp/containers/cow_string/ut_medium/ya.make
new file mode 100644
index 00000000000..f8420a8c688
--- /dev/null
+++ b/library/cpp/containers/cow_string/ut_medium/ya.make
@@ -0,0 +1,9 @@
+UNITTEST_FOR(library/cpp/containers/cow_string)
+
+SRCS(
+ cow_string_medium_ut.cpp
+)
+
+SIZE(medium)
+
+END()
diff --git a/library/cpp/containers/cow_string/ya.make b/library/cpp/containers/cow_string/ya.make
new file mode 100644
index 00000000000..5df2d2de830
--- /dev/null
+++ b/library/cpp/containers/cow_string/ya.make
@@ -0,0 +1,15 @@
+LIBRARY()
+
+SRCS(
+ cow_string.cpp
+ output.cpp
+ reverse.cpp
+ subst.cpp
+)
+
+END()
+
+RECURSE_FOR_TESTS(
+ ut
+ ut_medium
+)
diff --git a/library/cpp/yt/yson_string/string.cpp b/library/cpp/yt/yson_string/string.cpp
index 45a6aa7f099..c21783b204a 100644
--- a/library/cpp/yt/yson_string/string.cpp
+++ b/library/cpp/yt/yson_string/string.cpp
@@ -91,25 +91,14 @@ TYsonString::TYsonString(
: TYsonString(TYsonStringBuf(data, type))
{ }
-#ifdef TSTRING_IS_STD_STRING
TYsonString::TYsonString(
const TString& data,
EYsonType type)
- : TYsonString(TYsonStringBuf(data, type))
+ : Payload_(TCowString(data))
+ , Begin_(std::get<TCowString>(Payload_).data())
+ , Size_(data.length())
+ , Type_(type)
{ }
-#else
-TYsonString::TYsonString(
- const TString& data,
- EYsonType type)
-{
- // NOTE: CoW TString implementation is assumed
- // Moving the payload MUST NOT invalidate its internal pointers
- Payload_ = data;
- Begin_ = data.data();
- Size_ = data.length();
- Type_ = type;
-}
-#endif
TYsonString::TYsonString(
const TSharedRef& data,
@@ -148,8 +137,8 @@ TString TYsonString::ToString() const
[&] (const TSharedRangeHolderPtr&) {
return TString(AsStringBuf());
},
- [] (const TString& payload) {
- return payload;
+ [] (const TCowString& payload) {
+ return TString(payload);
});
}
@@ -163,7 +152,7 @@ TSharedRef TYsonString::ToSharedRef() const
[&] (const TSharedRangeHolderPtr& holder) {
return TSharedRef(Begin_, Size_, holder);
},
- [] (const TString& payload) {
+ [] (const TCowString& payload) {
return TSharedRef::FromString(payload);
});
}
diff --git a/library/cpp/yt/yson_string/string.h b/library/cpp/yt/yson_string/string.h
index 8fbe415e0fd..b9f44e6dccd 100644
--- a/library/cpp/yt/yson_string/string.h
+++ b/library/cpp/yt/yson_string/string.h
@@ -6,6 +6,8 @@
#include <library/cpp/yt/string/format.h>
+#include <library/cpp/containers/cow_string/cow_string.h>
+
#include <variant>
namespace NYT::NYson {
@@ -117,7 +119,7 @@ private:
struct TNullPayload
{ };
- std::variant<TNullPayload, TSharedRangeHolderPtr, TString> Payload_;
+ std::variant<TNullPayload, TSharedRangeHolderPtr, TCowString> Payload_;
const char* Begin_;
ui64 Size_ : 56;
diff --git a/library/cpp/yt/yson_string/ya.make b/library/cpp/yt/yson_string/ya.make
index ba693760f10..da180de5471 100644
--- a/library/cpp/yt/yson_string/ya.make
+++ b/library/cpp/yt/yson_string/ya.make
@@ -8,6 +8,7 @@ SRCS(
)
PEERDIR(
+ library/cpp/containers/cow_string
library/cpp/yt/assert
library/cpp/yt/coding
library/cpp/yt/exception