aboutsummaryrefslogtreecommitdiffstats
path: root/util/string/split.h
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /util/string/split.h
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'util/string/split.h')
-rw-r--r--util/string/split.h1085
1 files changed, 1085 insertions, 0 deletions
diff --git a/util/string/split.h b/util/string/split.h
new file mode 100644
index 0000000000..80f8c787dc
--- /dev/null
+++ b/util/string/split.h
@@ -0,0 +1,1085 @@
+#pragma once
+
+#include "strspn.h"
+#include "cast.h"
+
+#include <util/generic/algorithm.h>
+#include <util/generic/fwd.h>
+#include <util/generic/iterator.h>
+#include <util/generic/iterator_range.h>
+#include <util/generic/store_policy.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+#include <util/generic/typetraits.h>
+#include <util/generic/vector.h>
+#include <util/generic/ylimits.h>
+#include <util/system/compat.h>
+#include <util/system/defaults.h>
+
+#include <utility>
+#include <stlfwd>
+
+// NOTE: Check StringSplitter below to get more convenient split string interface.
+
+namespace NStringSplitPrivate {
+
+ template <class T, class I, class = void>
+ struct TIsConsumer: std::false_type {};
+
+ template <class T, class I>
+ struct TIsConsumer<
+ T, I,
+ TVoidT<decltype(std::declval<T>().Consume(
+ std::declval<I>(), std::declval<I>(), std::declval<I>()))>>
+ : std::true_type {};
+
+ template <class T, class I>
+ constexpr bool TIsConsumerV = TIsConsumer<T, I>::value;
+
+ template <class T>
+ T* Find(T* str, std::common_type_t<T> ch) {
+ for (; *str; ++str) {
+ if (*str == ch) {
+ return str;
+ }
+ }
+
+ return nullptr;
+ }
+
+}
+
+template <class I, class TDelim, class TConsumer>
+std::enable_if_t<::NStringSplitPrivate::TIsConsumerV<TConsumer, I>>
+SplitString(I b, I e, const TDelim& d, TConsumer&& c) {
+ I l, i;
+
+ do {
+ l = b;
+ i = d.Find(b, e);
+ } while (c.Consume(l, i, b) && (b != i));
+}
+
+template <class I, class TDelim, class TConsumer>
+std::enable_if_t<::NStringSplitPrivate::TIsConsumerV<TConsumer, I>>
+SplitString(I b, const TDelim& d, TConsumer&& c) {
+ I l, i;
+
+ do {
+ l = b;
+ i = d.Find(b);
+ } while (c.Consume(l, i, b) && (b != i));
+}
+
+template <class I1, class I2>
+static inline I1* FastStrChr(I1* str, I2 f) noexcept {
+ I1* ret = NStringSplitPrivate::Find(str, f);
+
+ if (!ret) {
+ ret = str + std::char_traits<I1>::length(str);
+ }
+
+ return ret;
+}
+
+template <class I>
+static inline I* FastStrStr(I* str, I* f, size_t l) noexcept {
+ std::basic_string_view<I> strView(str);
+ const auto ret = strView.find(*f);
+
+ if (ret != std::string::npos) {
+ std::basic_string_view<I> fView(f, l);
+ strView = strView.substr(ret);
+ for (; strView.size() >= l; strView = strView.substr(1)) {
+ if (strView.substr(0, l) == fView) {
+ break;
+ }
+ }
+
+ return strView.size() >= l ? strView.data() : strView.data() + strView.size();
+ } else {
+ return strView.data() + strView.size();
+ }
+}
+
+template <class Char>
+struct TStringDelimiter {
+ inline TStringDelimiter(Char* delim) noexcept
+ : Delim(delim)
+ , Len(std::char_traits<Char>::length(delim))
+ {
+ }
+
+ inline TStringDelimiter(Char* delim, size_t len) noexcept
+ : Delim(delim)
+ , Len(len)
+ {
+ }
+
+ inline Char* Find(Char*& b, Char* e) const noexcept {
+ const auto ret = std::basic_string_view<Char>(b, e - b).find(Delim, 0, Len);
+
+ if (ret != std::string::npos) {
+ const auto result = b + ret;
+ b = result + Len;
+ return result;
+ }
+
+ return (b = e);
+ }
+
+ inline Char* Find(Char*& b) const noexcept {
+ Char* ret = FastStrStr(b, Delim, Len);
+
+ b = *ret ? ret + Len : ret;
+
+ return ret;
+ }
+
+ Char* Delim;
+ const size_t Len;
+};
+
+template <class Char>
+struct TCharDelimiter {
+ inline TCharDelimiter(Char ch) noexcept
+ : Ch(ch)
+ {
+ }
+
+ inline Char* Find(Char*& b, Char* e) const noexcept {
+ const auto ret = std::basic_string_view<Char>(b, e - b).find(Ch);
+
+ if (ret != std::string::npos) {
+ const auto result = b + ret;
+ b = result + 1;
+ return result;
+ }
+
+ return (b = e);
+ }
+
+ inline Char* Find(Char*& b) const noexcept {
+ Char* ret = FastStrChr(b, Ch);
+
+ if (*ret) {
+ b = ret + 1;
+ } else {
+ b = ret;
+ }
+
+ return ret;
+ }
+
+ Char Ch;
+};
+
+template <class Iterator, class Condition>
+struct TFuncDelimiter {
+public:
+ template <class... Args>
+ TFuncDelimiter(Args&&... args)
+ : Fn(std::forward<Args>(args)...)
+ {
+ }
+
+ inline Iterator Find(Iterator& b, Iterator e) const noexcept {
+ if ((b = std::find_if(b, e, Fn)) != e) {
+ return b++;
+ }
+
+ return b;
+ }
+
+private:
+ Condition Fn;
+};
+
+template <class Char>
+struct TFindFirstOf {
+ inline TFindFirstOf(Char* set)
+ : Set(set)
+ {
+ }
+
+ inline Char* FindFirstOf(Char* b, Char* e) const noexcept {
+ Char* ret = b;
+ for (; ret != e; ++ret) {
+ if (NStringSplitPrivate::Find(Set, *ret))
+ break;
+ }
+ return ret;
+ }
+
+ inline Char* FindFirstOf(Char* b) const noexcept {
+ const std::basic_string_view<Char> bView(b);
+ const auto ret = bView.find_first_of(Set);
+ return ret != std::string::npos ? b + ret : b + bView.size();
+ }
+
+ Char* Set;
+};
+
+template <>
+struct TFindFirstOf<const char>: public TCompactStrSpn {
+ inline TFindFirstOf(const char* set, const char* e)
+ : TCompactStrSpn(set, e)
+ {
+ }
+
+ inline TFindFirstOf(const char* set)
+ : TCompactStrSpn(set)
+ {
+ }
+};
+
+template <class Char>
+struct TSetDelimiter: private TFindFirstOf<const Char> {
+ using TFindFirstOf<const Char>::TFindFirstOf;
+
+ inline Char* Find(Char*& b, Char* e) const noexcept {
+ Char* ret = const_cast<Char*>(this->FindFirstOf(b, e));
+
+ if (ret != e) {
+ b = ret + 1;
+ return ret;
+ }
+
+ return (b = e);
+ }
+
+ inline Char* Find(Char*& b) const noexcept {
+ Char* ret = const_cast<Char*>(this->FindFirstOf(b));
+
+ if (*ret) {
+ b = ret + 1;
+ return ret;
+ }
+
+ return (b = ret);
+ }
+};
+
+namespace NSplitTargetHasPushBack {
+ Y_HAS_MEMBER(push_back, PushBack);
+}
+
+template <class T, class = void>
+struct TConsumerBackInserter;
+
+template <class T>
+struct TConsumerBackInserter<T, std::enable_if_t<NSplitTargetHasPushBack::TClassHasPushBack<T>::value>> {
+ static void DoInsert(T* C, const typename T::value_type& i) {
+ C->push_back(i);
+ }
+};
+
+template <class T>
+struct TConsumerBackInserter<T, std::enable_if_t<!NSplitTargetHasPushBack::TClassHasPushBack<T>::value>> {
+ static void DoInsert(T* C, const typename T::value_type& i) {
+ C->insert(C->end(), i);
+ }
+};
+
+template <class T>
+struct TContainerConsumer {
+ inline TContainerConsumer(T* c) noexcept
+ : C(c)
+ {
+ }
+
+ template <class I>
+ inline bool Consume(I* b, I* d, I* /*e*/) {
+ TConsumerBackInserter<T>::DoInsert(C, typename T::value_type(b, d));
+
+ return true;
+ }
+
+ T* C;
+};
+
+template <class T>
+struct TContainerConvertingConsumer {
+ inline TContainerConvertingConsumer(T* c) noexcept
+ : C(c)
+ {
+ }
+
+ template <class I>
+ inline bool Consume(I* b, I* d, I* /*e*/) {
+ TConsumerBackInserter<T>::DoInsert(C, FromString<typename T::value_type>(TStringBuf(b, d)));
+
+ return true;
+ }
+
+ T* C;
+};
+
+template <class S, class I>
+struct TLimitingConsumer {
+ inline TLimitingConsumer(size_t cnt, S* slave) noexcept
+ : Cnt(cnt ? cnt - 1 : Max<size_t>())
+ , Slave(slave)
+ , Last(nullptr)
+ {
+ }
+
+ inline bool Consume(I* b, I* d, I* e) {
+ if (!Cnt) {
+ Last = b;
+
+ return false;
+ }
+
+ --Cnt;
+
+ return Slave->Consume(b, d, e);
+ }
+
+ size_t Cnt;
+ S* Slave;
+ I* Last;
+};
+
+template <class S>
+struct TSkipEmptyTokens {
+ inline TSkipEmptyTokens(S* slave) noexcept
+ : Slave(slave)
+ {
+ }
+
+ template <class I>
+ inline bool Consume(I* b, I* d, I* e) {
+ if (b != d) {
+ return Slave->Consume(b, d, e);
+ }
+
+ return true;
+ }
+
+ S* Slave;
+};
+
+template <class S>
+struct TKeepDelimiters {
+ inline TKeepDelimiters(S* slave) noexcept
+ : Slave(slave)
+ {
+ }
+
+ template <class I>
+ inline bool Consume(I* b, I* d, I* e) {
+ if (Slave->Consume(b, d, d)) {
+ if (d != e) {
+ return Slave->Consume(d, e, e);
+ }
+
+ return true;
+ }
+
+ return false;
+ }
+
+ S* Slave;
+};
+
+template <class T>
+struct TSimplePusher {
+ inline bool Consume(char* b, char* d, char*) {
+ *d = 0;
+ C->push_back(b);
+
+ return true;
+ }
+
+ T* C;
+};
+
+template <class T>
+static inline void Split(char* buf, char ch, T* res) {
+ res->resize(0);
+ if (*buf == 0)
+ return;
+
+ TCharDelimiter<char> delim(ch);
+ TSimplePusher<T> pusher = {res};
+
+ SplitString(buf, delim, pusher);
+}
+
+/// Split string into res vector. Res vector is cleared before split.
+/// Old good slow split function.
+/// Field delimter is any number of symbols specified in delim (no empty strings in res vector)
+/// @return number of elements created
+size_t Split(const char* in, const char* delim, TVector<TString>& res);
+size_t Split(const TString& in, const TString& delim, TVector<TString>& res);
+
+/// Old split reimplemented for TStringBuf using the new code
+/// Note that delim can be constructed from char* automatically (it is not cheap though)
+inline size_t Split(const TStringBuf s, const TSetDelimiter<const char>& delim, TVector<TStringBuf>& res) {
+ res.clear();
+ TContainerConsumer<TVector<TStringBuf>> res1(&res);
+ TSkipEmptyTokens<TContainerConsumer<TVector<TStringBuf>>> consumer(&res1);
+ SplitString(s.data(), s.data() + s.size(), delim, consumer);
+ return res.size();
+}
+
+template <class P, class D>
+void GetNext(TStringBuf& s, D delim, P& param) {
+ TStringBuf next = s.NextTok(delim);
+ Y_ENSURE(next.IsInited(), TStringBuf("Split: number of fields less than number of Split output arguments"));
+ param = FromString<P>(next);
+}
+
+template <class P, class D>
+void GetNext(TStringBuf& s, D delim, TMaybe<P>& param) {
+ TStringBuf next = s.NextTok(delim);
+ if (next.IsInited()) {
+ param = FromString<P>(next);
+ } else {
+ param.Clear();
+ }
+}
+
+// example:
+// Split(TStringBuf("Sherlock,2014,36.6"), ',', name, year, temperature);
+template <class D, class P1, class P2>
+void Split(TStringBuf s, D delim, P1& p1, P2& p2) {
+ GetNext(s, delim, p1);
+ GetNext(s, delim, p2);
+ Y_ENSURE(!s.IsInited(), TStringBuf("Split: number of fields more than number of Split output arguments"));
+}
+
+template <class D, class P1, class P2, class... Other>
+void Split(TStringBuf s, D delim, P1& p1, P2& p2, Other&... other) {
+ GetNext(s, delim, p1);
+ Split(s, delim, p2, other...);
+}
+
+/**
+ * \fn auto StringSplitter(...)
+ *
+ * Creates a string splitter object. The only use for it is to call one of its
+ * `Split*` methods, and then do something with the resulting proxy range.
+ *
+ * Some examples:
+ * \code
+ * TVector<TStringBuf> values = StringSplitter("1\t2\t3").Split('\t');
+ *
+ * for(TStringBuf part: StringSplitter("1::2::::3").SplitByString("::").SkipEmpty()) {
+ * Cerr << part;
+ * }
+ *
+ * TVector<TString> firstTwoValues = StringSplitter("1\t2\t3").Split('\t').Take(2);
+ * \endcode
+ *
+ * Use `Collect` or `AddTo` to store split results into an existing container:
+ * \code
+ * TVector<TStringBuf> values = {"0"};
+ * StringSplitter("1\t2\t3").Split('\t').AddTo(&values);
+ * \endcode
+ * Note that `Collect` clears target container, while `AddTo` just inserts values.
+ * You can use these methods with any container that has `emplace` / `emplace_back`.
+ *
+ * Use `ParseInto` to also perform string conversions before inserting values
+ * into target container:
+ * \code
+ * TSet<int> values;
+ * StringSplitter("1\t2\t3").Split('\t').ParseInto(&values);
+ * \endcode
+ */
+
+namespace NStringSplitPrivate {
+ Y_HAS_MEMBER(push_back, PushBack);
+ Y_HAS_MEMBER(insert, Insert);
+ Y_HAS_MEMBER(data, Data);
+
+ /**
+ * This one is needed here so that `std::string_view -> std::string_view`
+ * conversion works.
+ */
+ template <class Src, class Dst>
+ inline void DoFromString(const Src& src, Dst* dst) {
+ *dst = ::FromString<Dst>(src);
+ }
+
+ template <class T>
+ inline void DoFromString(const T& src, T* dst) noexcept {
+ *dst = src;
+ }
+
+ template <class T>
+ inline void DoFromString(const T& src, decltype(std::ignore)* dst) noexcept {
+ *dst = src;
+ }
+
+ template <class Src, class Dst>
+ inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const Src& src, Dst* dst) noexcept {
+ return ::TryFromString(src, *dst);
+ }
+
+ template <class T>
+ inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const T& src, T* dst) noexcept {
+ *dst = src;
+ return true;
+ }
+
+ template <class T>
+ inline Y_WARN_UNUSED_RESULT bool TryDoFromString(const T& src, decltype(std::ignore)* dst) noexcept {
+ *dst = src;
+ return true;
+ }
+
+ /**
+ * Consumer that places provided elements into a container. Not using
+ * `emplace(iterator)` for efficiency.
+ */
+ template <class Container>
+ struct TContainerConsumer {
+ using value_type = typename Container::value_type;
+
+ TContainerConsumer(Container* c)
+ : C_(c)
+ {
+ }
+
+ // TODO: return bool (continue)
+ template <class StringBuf>
+ void operator()(StringBuf e) const {
+ this->operator()(C_, e);
+ }
+
+ private:
+ template <class OtherContainer, class StringBuf>
+ auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace_back()) {
+ return c->emplace_back(value_type(e));
+ }
+
+ template <class OtherContainer, class StringBuf>
+ auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace()) {
+ return c->emplace(value_type(e));
+ }
+
+ Container* C_;
+ };
+
+ /**
+ * Consumer that converts provided elements via `FromString` and places them
+ * into a container.
+ */
+ template <class Container>
+ struct TContainerConvertingConsumer {
+ using value_type = typename Container::value_type;
+
+ TContainerConvertingConsumer(Container* c)
+ : C_(c)
+ {
+ }
+
+ template <class StringBuf>
+ void operator()(StringBuf e) const {
+ this->operator()(C_, e);
+ }
+
+ private:
+ template <class OtherContainer, class StringBuf>
+ auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace_back()) {
+ value_type v;
+ DoFromString(e, &v);
+ return c->emplace_back(std::move(v));
+ }
+
+ template <class OtherContainer, class StringBuf>
+ auto operator()(OtherContainer* c, StringBuf e) const -> decltype(c->emplace()) {
+ value_type v;
+ DoFromString(e, &v);
+ return c->emplace(std::move(v));
+ }
+
+ Container* C_;
+ };
+
+ template <class String>
+ struct TStringBufOfImpl {
+ using type = std::conditional_t<
+ THasData<String>::value,
+ TBasicStringBuf<typename String::value_type>,
+ TIteratorRange<typename String::const_iterator>>;
+ };
+
+ template <class Char, class Traits, class Allocator>
+ struct TStringBufOfImpl<std::basic_string<Char, Traits, Allocator>> {
+ using type = std::basic_string_view<Char, Traits>;
+ };
+
+ template <class Char, class Traits>
+ struct TStringBufOfImpl<std::basic_string_view<Char, Traits>> {
+ using type = std::basic_string_view<Char, Traits>;
+ };
+
+ /**
+ * Metafunction that returns a string buffer for the given type. This is to
+ * make sure that splitting `std::string` returns `std::string_view`.
+ */
+ template <class String>
+ using TStringBufOf = typename TStringBufOfImpl<String>::type;
+
+ template <class StringBuf, class Iterator>
+ StringBuf DoMakeStringBuf(Iterator b, Iterator e, StringBuf*) {
+ return StringBuf(b, e);
+ }
+
+ template <class Char, class Traits, class Iterator>
+ std::basic_string_view<Char, Traits> DoMakeStringBuf(Iterator b, Iterator e, std::basic_string_view<Char, Traits>*) {
+ return std::basic_string_view<Char, Traits>(b, e - b);
+ }
+
+ template <class StringBuf, class Iterator>
+ StringBuf MakeStringBuf(Iterator b, Iterator e) {
+ return DoMakeStringBuf(b, e, static_cast<StringBuf*>(nullptr));
+ }
+
+ template <class String>
+ struct TIteratorOfImpl {
+ using type = std::conditional_t<
+ THasData<String>::value,
+ const typename String::value_type*,
+ typename String::const_iterator>;
+ };
+
+ template <class String>
+ using TIteratorOf = typename TIteratorOfImpl<String>::type;
+
+ template <class String>
+ struct TIterState {
+ using TStringBufType = TStringBufOf<String>;
+ using TIterator = TIteratorOf<String>;
+
+ TIterState(const String& string) noexcept
+ : TokS()
+ , TokD()
+ {
+ Init(string, THasData<String>());
+ }
+
+ operator TStringBufType() const noexcept {
+ return Token();
+ }
+
+ template <
+ typename Other,
+ class = typename std::enable_if<
+ std::is_convertible<Other, TStringBufType>::value,
+ void>::type>
+ bool operator==(const Other& toCompare) const {
+ return TStringBufType(*this) == TStringBufType(toCompare);
+ }
+
+ explicit operator bool() const {
+ return !Empty();
+ }
+
+ TIterator TokenStart() const noexcept {
+ return TokS;
+ }
+
+ TIterator TokenDelim() const noexcept {
+ return TokD;
+ }
+
+ TIterator TokenEnd() const noexcept {
+ return B;
+ }
+
+ Y_PURE_FUNCTION bool Empty() const noexcept {
+ return TokenStart() == TokenDelim();
+ }
+
+ TStringBufType Token() const noexcept {
+ return MakeStringBuf<TStringBufType>(TokenStart(), TokenDelim());
+ }
+
+ TStringBufType Delim() const noexcept {
+ return MakeStringBuf<TStringBufType>(TokenDelim(), TokenEnd());
+ }
+
+ TIterator B;
+ TIterator E;
+
+ TIterator TokS;
+ TIterator TokD;
+
+ private:
+ void Init(const String& string, std::true_type) {
+ B = string.data();
+ E = string.data() + string.size();
+ }
+
+ void Init(const String& string, std::false_type) {
+ B = string.begin();
+ E = string.end();
+ }
+ };
+
+ template <class Base>
+ class TSplitRange: public Base, public TInputRangeAdaptor<TSplitRange<Base>> {
+ using TStringBufType = decltype(std::declval<Base>().Next()->Token());
+
+ public:
+ template <typename... Args>
+ inline TSplitRange(Args&&... args)
+ : Base(std::forward<Args>(args)...)
+ {
+ }
+
+ template <class Consumer, std::enable_if_t<std::is_same<decltype(std::declval<Consumer>()(std::declval<TStringBufType>())), void>::value, int>* = nullptr>
+ inline void Consume(Consumer&& f) {
+ for (auto&& it : *this) {
+ f(it.Token());
+ }
+ }
+
+ template <class Consumer, std::enable_if_t<std::is_same<decltype(std::declval<Consumer>()(std::declval<TStringBufType>())), bool>::value, int>* = nullptr>
+ inline bool Consume(Consumer&& f) {
+ for (auto&& it : *this) {
+ if (!f(it.Token())) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ template <class Container, class = std::enable_if_t<THasInsert<Container>::value || THasPushBack<Container>::value>>
+ operator Container() {
+ Container result;
+ AddTo(&result);
+ return result;
+ }
+
+ template <class S>
+ inline TVector<S> ToList() {
+ TVector<S> result;
+ for (auto&& it : *this) {
+ result.push_back(S(it.Token()));
+ }
+ return result;
+ }
+
+ template <class Container>
+ inline void Collect(Container* c) {
+ Y_ASSERT(c);
+ c->clear();
+ AddTo(c);
+ }
+
+ template <class Container>
+ inline void AddTo(Container* c) {
+ Y_ASSERT(c);
+ TContainerConsumer<Container> consumer(c);
+ Consume(consumer);
+ }
+
+ template <class Container>
+ inline void ParseInto(Container* c) {
+ Y_ASSERT(c);
+ TContainerConvertingConsumer<Container> consumer(c);
+ Consume(consumer);
+ }
+
+ // TODO: this is actually TryParseInto
+ /**
+ * Same as `CollectInto`, just doesn't throw.
+ *
+ * \param[out] args Output arguments.
+ * \returns Whether parsing was successful.
+ */
+ template <typename... Args>
+ inline bool TryCollectInto(Args*... args) noexcept {
+ size_t successfullyFilled = 0;
+ auto it = this->begin();
+
+ //FIXME: actually, some kind of TryApplyToMany is needed in order to stop iteration upon first failure
+ ApplyToMany([&](auto&& arg) {
+ if (it != this->end()) {
+ if (TryDoFromString(it->Token(), arg)) {
+ ++successfullyFilled;
+ }
+ ++it;
+ }
+ }, args...);
+
+ return successfullyFilled == sizeof...(args) && it == this->end();
+ }
+
+ // TODO: this is actually ParseInto
+ /**
+ * Splits and parses everything that's in this splitter into `args`.
+ *
+ * Example usage:
+ * \code
+ * int l, r;
+ * StringSplitter("100*200").Split('*').CollectInto(&l, &r);
+ * \endcode
+ *
+ * \param[out] args Output arguments.
+ * \throws If not all items were parsed, or
+ * if there were too many items in the split.
+ */
+ template <typename... Args>
+ inline void CollectInto(Args*... args) {
+ Y_ENSURE(TryCollectInto<Args...>(args...));
+ }
+
+ inline size_t Count() const {
+ size_t cnt = 0;
+ for (auto&& it : *this) {
+ Y_UNUSED(it);
+ ++cnt;
+ }
+ return cnt;
+ }
+ };
+
+ template <class String>
+ class TStringSplitter {
+ using TStringType = String;
+ using TStringBufType = TStringBufOf<TStringType>;
+ using TChar = typename TStringType::value_type;
+ using TIterator = TIteratorOf<TStringType>;
+ using TIteratorState = TIterState<TStringType>;
+
+ /**
+ * Base class for all split ranges that actually does the splitting.
+ */
+ template <class DelimStorage>
+ struct TSplitRangeBase {
+ template <class OtherString, class... Args>
+ inline TSplitRangeBase(OtherString&& s, Args&&... args)
+ : String_(std::forward<OtherString>(s))
+ , State_(String_)
+ , Delim_(std::forward<Args>(args)...)
+ {
+ }
+
+ inline TIteratorState* Next() {
+ if (State_.TokD == State_.B) {
+ return nullptr;
+ }
+
+ State_.TokS = State_.B;
+ State_.TokD = Delim_.Ptr()->Find(State_.B, State_.E);
+
+ return &State_;
+ }
+
+ private:
+ TStringType String_;
+ TIteratorState State_;
+ DelimStorage Delim_;
+ };
+
+ template <class Base, class Filter>
+ struct TFilterRange: public Base {
+ template <class... Args>
+ inline TFilterRange(const Base& base, Args&&... args)
+ : Base(base)
+ , Filter_(std::forward<Args>(args)...)
+ {
+ }
+
+ inline TIteratorState* Next() {
+ TIteratorState* ret;
+
+ do {
+ ret = Base::Next();
+ } while (ret && !Filter_.Accept(ret));
+
+ return ret;
+ }
+
+ Filter Filter_;
+ };
+
+ struct TNonEmptyFilter {
+ template <class TToken>
+ inline bool Accept(const TToken* token) noexcept {
+ return !token->Empty();
+ }
+ };
+
+ template <class TIter>
+ struct TStopIteration;
+
+ template <class Base>
+ struct TFilters: public Base {
+ template <class TFilter>
+ using TIt = TSplitRange<TStopIteration<TFilters<TFilterRange<Base, TFilter>>>>;
+
+ template <typename... Args>
+ inline TFilters(Args&&... args)
+ : Base(std::forward<Args>(args)...)
+ {
+ }
+
+ inline TIt<TNonEmptyFilter> SkipEmpty() const {
+ return {*this};
+ }
+ };
+
+ template <class Base, class Stopper>
+ struct TStopRange: public Base {
+ template <typename... Args>
+ inline TStopRange(const Base& base, Args&&... args)
+ : Base(base)
+ , Stopper_(std::forward<Args>(args)...)
+ {
+ }
+
+ inline TIteratorState* Next() {
+ TIteratorState* ret = Base::Next();
+ if (!ret || Stopper_.Stop(ret)) {
+ return nullptr;
+ }
+ return ret;
+ }
+
+ Stopper Stopper_;
+ };
+
+ struct TTake {
+ TTake() = default;
+
+ TTake(size_t count)
+ : Count(count)
+ {
+ }
+
+ template <class TToken>
+ inline bool Stop(TToken*) noexcept {
+ if (Count > 0) {
+ --Count;
+ return false;
+ } else {
+ return true;
+ }
+ }
+
+ size_t Count = 0;
+ };
+
+ struct TLimit {
+ TLimit() = default;
+
+ TLimit(size_t count)
+ : Count(count)
+ {
+ Y_ASSERT(Count > 0);
+ }
+
+ template <class TToken>
+ inline bool Stop(TToken* token) noexcept {
+ if (Count > 1) {
+ --Count;
+ return false;
+ } else if (Count == 1) {
+ token->TokD = token->B = token->E;
+ return false;
+ }
+ return true;
+ }
+
+ size_t Count = 0;
+ };
+
+ template <class Base>
+ struct TStopIteration: public Base {
+ template <class TStopper>
+ using TIt = TSplitRange<TStopIteration<TFilters<TStopRange<Base, TStopper>>>>;
+
+ template <typename... Args>
+ inline TStopIteration(Args&&... args)
+ : Base(std::forward<Args>(args)...)
+ {
+ }
+
+ inline TIt<TTake> Take(size_t count) {
+ return {*this, count};
+ }
+
+ inline TIt<TLimit> Limit(size_t count) {
+ return {*this, count};
+ }
+ };
+
+ template <class TPolicy>
+ using TIt = TSplitRange<TStopIteration<TFilters<TSplitRangeBase<TPolicy>>>>;
+
+ public:
+ template <class OtherString>
+ explicit TStringSplitter(OtherString&& s)
+ : String_(std::forward<OtherString>(s))
+ {
+ }
+
+ //does not own TDelim
+ template <class TDelim>
+ inline TIt<TPtrPolicy<const TDelim>> Split(const TDelim& d) const noexcept {
+ return {String_, &d};
+ }
+
+ inline TIt<TEmbedPolicy<TCharDelimiter<const TChar>>> Split(TChar ch) const noexcept {
+ return {String_, ch};
+ }
+
+ inline TIt<TSimpleRefPolicy<TSetDelimiter<const TChar>>> SplitBySet(const TChar* set) const noexcept {
+ return {String_, set};
+ }
+
+ inline TIt<TEmbedPolicy<TStringDelimiter<const TChar>>> SplitByString(const TStringBufType& str) const noexcept {
+ return {String_, str.data(), str.size()};
+ }
+
+ template <class TFunc>
+ inline TIt<TEmbedPolicy<TFuncDelimiter<TIterator, TFunc>>> SplitByFunc(TFunc f) const noexcept {
+ return {String_, f};
+ }
+
+ private:
+ TStringType String_;
+ };
+
+ template <class String>
+ auto MakeStringSplitter(String&& s) {
+ return TStringSplitter<std::remove_reference_t<String>>(std::forward<String>(s));
+ }
+}
+
+template <class Iterator>
+auto StringSplitter(Iterator begin, Iterator end) {
+ return ::NStringSplitPrivate::MakeStringSplitter(TIteratorRange<Iterator>(begin, end));
+}
+
+template <class Char>
+auto StringSplitter(const Char* begin, const Char* end) {
+ return ::NStringSplitPrivate::MakeStringSplitter(TBasicStringBuf<Char>(begin, end));
+}
+
+template <class Char>
+auto StringSplitter(const Char* begin, size_t len) {
+ return ::NStringSplitPrivate::MakeStringSplitter(TBasicStringBuf<Char>(begin, len));
+}
+
+template <class Char>
+auto StringSplitter(const Char* str) {
+ return ::NStringSplitPrivate::MakeStringSplitter(TBasicStringBuf<Char>(str));
+}
+
+template <class String, std::enable_if_t<!std::is_pointer<std::remove_reference_t<String>>::value, int> = 0>
+auto StringSplitter(String& s) {
+ return ::NStringSplitPrivate::MakeStringSplitter(::NStringSplitPrivate::TStringBufOf<String>(s.data(), s.size()));
+}
+
+template <class String, std::enable_if_t<!std::is_pointer<std::remove_reference_t<String>>::value, int> = 0>
+auto StringSplitter(String&& s) {
+ return ::NStringSplitPrivate::MakeStringSplitter(std::move(s));
+}