aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/deprecated/split
diff options
context:
space:
mode:
authorDevtools Arcadia <arcadia-devtools@yandex-team.ru>2022-02-07 18:08:42 +0300
committerDevtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net>2022-02-07 18:08:42 +0300
commit1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch)
treee26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/deprecated/split
downloadydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/deprecated/split')
-rw-r--r--library/cpp/deprecated/split/delim_string_iter.cpp45
-rw-r--r--library/cpp/deprecated/split/delim_string_iter.h185
-rw-r--r--library/cpp/deprecated/split/delim_string_iter_ut.cpp99
-rw-r--r--library/cpp/deprecated/split/split_iterator.cpp318
-rw-r--r--library/cpp/deprecated/split/split_iterator.h317
-rw-r--r--library/cpp/deprecated/split/split_iterator_ut.cpp152
-rw-r--r--library/cpp/deprecated/split/ya.make14
7 files changed, 1130 insertions, 0 deletions
diff --git a/library/cpp/deprecated/split/delim_string_iter.cpp b/library/cpp/deprecated/split/delim_string_iter.cpp
new file mode 100644
index 0000000000..af418c5bfb
--- /dev/null
+++ b/library/cpp/deprecated/split/delim_string_iter.cpp
@@ -0,0 +1,45 @@
+#include "delim_string_iter.h"
+
+//
+// TKeyValueDelimStringIter
+//
+
+void TKeyValueDelimStringIter::ReadKeyAndValue() {
+ TStringBuf currentToken(*DelimIter);
+
+ size_t pos = currentToken.find('=');
+ if (pos == TString::npos) {
+ ChunkValue.Clear();
+ ChunkKey = currentToken;
+ } else {
+ ChunkKey = currentToken.SubStr(0, pos);
+ ChunkValue = currentToken.SubStr(pos + 1);
+ }
+}
+
+TKeyValueDelimStringIter::TKeyValueDelimStringIter(const TStringBuf str, const TStringBuf delim)
+ : DelimIter(str, delim)
+{
+ if (DelimIter.Valid())
+ ReadKeyAndValue();
+}
+
+bool TKeyValueDelimStringIter::Valid() const {
+ return DelimIter.Valid();
+}
+
+TKeyValueDelimStringIter& TKeyValueDelimStringIter::operator++() {
+ ++DelimIter;
+ if (DelimIter.Valid())
+ ReadKeyAndValue();
+
+ return *this;
+}
+
+const TStringBuf& TKeyValueDelimStringIter::Key() const {
+ return ChunkKey;
+}
+
+const TStringBuf& TKeyValueDelimStringIter::Value() const {
+ return ChunkValue;
+}
diff --git a/library/cpp/deprecated/split/delim_string_iter.h b/library/cpp/deprecated/split/delim_string_iter.h
new file mode 100644
index 0000000000..8e4ca171a0
--- /dev/null
+++ b/library/cpp/deprecated/split/delim_string_iter.h
@@ -0,0 +1,185 @@
+#pragma once
+
+#include <util/generic/algorithm.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/yexception.h>
+#include <util/string/cast.h>
+#include <util/system/yassert.h>
+
+#include <iterator>
+
+class TDelimStringIter {
+public:
+ using value_type = TStringBuf;
+ using difference_type = ptrdiff_t;
+ using pointer = const TStringBuf*;
+ using reference = const TStringBuf&;
+ using iterator_category = std::forward_iterator_tag;
+
+ inline TDelimStringIter(const char* begin, const char* strEnd, TStringBuf delim)
+ : TDelimStringIter(TStringBuf(begin, strEnd), delim)
+ {
+ }
+
+ inline TDelimStringIter(TStringBuf str, TStringBuf delim)
+ : IsValid(true)
+ , Str(str)
+ , Delim(delim)
+ {
+ UpdateCurrent();
+ }
+
+ inline TDelimStringIter()
+ : IsValid(false)
+ {
+ }
+
+ inline explicit operator bool() const {
+ return IsValid;
+ }
+
+ // NOTE: this is a potentially unsafe operation (no overrun check)
+ inline TDelimStringIter& operator++() {
+ if (Current.end() != Str.end()) {
+ Str.Skip(Current.length() + Delim.length());
+ UpdateCurrent();
+ } else {
+ Str.Clear();
+ Current.Clear();
+ IsValid = false;
+ }
+ return *this;
+ }
+
+ inline void operator+=(size_t n) {
+ for (; n > 0; --n) {
+ ++(*this);
+ }
+ }
+
+ inline bool operator==(const TDelimStringIter& rhs) const {
+ return (IsValid == rhs.IsValid) && (!IsValid || (Current.begin() == rhs.Current.begin()));
+ }
+
+ inline bool operator!=(const TDelimStringIter& rhs) const {
+ return !(*this == rhs);
+ }
+
+ inline TStringBuf operator*() const {
+ return Current;
+ }
+
+ inline const TStringBuf* operator->() const {
+ return &Current;
+ }
+
+ // Get & advance
+ template <class T>
+ inline bool TryNext(T& t) {
+ if (IsValid) {
+ t = FromString<T>(Current);
+ operator++();
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ template <class T>
+ inline TDelimStringIter& Next(T& t) // Get & advance
+ {
+ if (!TryNext(t))
+ ythrow yexception() << "No valid field";
+ return *this;
+ }
+
+ template <class T>
+ inline T GetNext() {
+ T res;
+ Next(res);
+ return res;
+ }
+
+ inline const char* GetBegin() const {
+ return Current.begin();
+ }
+
+ inline const char* GetEnd() const {
+ return Current.end();
+ }
+
+ inline bool Valid() const {
+ return IsValid;
+ }
+
+ // contents from next token to the end of string
+ inline TStringBuf Cdr() const {
+ return Str.SubStr(Current.length() + Delim.length());
+ }
+
+ inline TDelimStringIter IterEnd() const {
+ return TDelimStringIter();
+ }
+
+private:
+ inline void UpdateCurrent() {
+ // it is much faster than TStringBuf::find
+ size_t pos = std::search(Str.begin(), Str.end(), Delim.begin(), Delim.end()) - Str.begin();
+ Current = Str.Head(pos);
+ }
+
+private:
+ bool IsValid;
+
+ TStringBuf Str;
+ TStringBuf Current;
+ TStringBuf Delim;
+};
+
+//example: for (TStringBuf field: TDelimStroka(line, "@@")) { ... }
+struct TDelimStroka {
+ TStringBuf S;
+ TStringBuf Delim;
+
+ inline TDelimStroka(TStringBuf s, TStringBuf delim)
+ : S(s)
+ , Delim(delim)
+ {
+ }
+
+ inline TDelimStringIter begin() const {
+ return TDelimStringIter(S, Delim);
+ }
+
+ inline TDelimStringIter end() const {
+ return TDelimStringIter();
+ }
+};
+
+inline TDelimStringIter begin_delim(const TString& str, TStringBuf delim) {
+ return TDelimStringIter(str, delim);
+}
+
+inline TDelimStringIter begin_delim(TStringBuf str, TStringBuf delim) {
+ return TDelimStringIter(str.begin(), str.end(), delim);
+}
+
+inline TDelimStringIter end_delim(const TString& /*str*/, TStringBuf /*delim*/) {
+ return TDelimStringIter();
+}
+
+class TKeyValueDelimStringIter {
+public:
+ TKeyValueDelimStringIter(const TStringBuf str, const TStringBuf delim);
+ bool Valid() const;
+ TKeyValueDelimStringIter& operator++();
+ const TStringBuf& Key() const;
+ const TStringBuf& Value() const;
+
+private:
+ TDelimStringIter DelimIter;
+ TStringBuf ChunkKey, ChunkValue;
+
+private:
+ void ReadKeyAndValue();
+};
diff --git a/library/cpp/deprecated/split/delim_string_iter_ut.cpp b/library/cpp/deprecated/split/delim_string_iter_ut.cpp
new file mode 100644
index 0000000000..18a8b2a160
--- /dev/null
+++ b/library/cpp/deprecated/split/delim_string_iter_ut.cpp
@@ -0,0 +1,99 @@
+#include "delim_string_iter.h"
+#include <util/generic/vector.h>
+#include <library/cpp/testing/unittest/registar.h>
+
+/// Test that TDelimStringIter build on top of given string and delimeter will produce expected sequence
+static void AssertStringSplit(const TString& str, const TString& delim, const TVector<TString>& expected) {
+ TDelimStringIter it(str, delim);
+
+ // test iterator invariants
+ for (const auto& expectedString : expected) {
+ UNIT_ASSERT(it.Valid());
+ UNIT_ASSERT(bool(it));
+ UNIT_ASSERT_STRINGS_EQUAL(it->ToString(), expectedString);
+ ++it;
+ }
+ UNIT_ASSERT(!it.Valid());
+};
+
+Y_UNIT_TEST_SUITE(TDelimStrokaIterTestSuite) {
+ Y_UNIT_TEST(SingleCharacterAsDelimiter) {
+ AssertStringSplit(
+ "Hello words!", " ", {"Hello", "words!"});
+ }
+
+ Y_UNIT_TEST(MultipleCharactersAsDelimiter) {
+ AssertStringSplit(
+ "0, 1, 1, 2, 3, 5, 8, 13, 21, 34", "1, ", {"0, ", "", "2, 3, 5, 8, 13, 2", "34"});
+ }
+
+ Y_UNIT_TEST(NoDelimitersPresent) {
+ AssertStringSplit("This string could be yours", "\t", {"This string could be yours"});
+ }
+
+ Y_UNIT_TEST(Cdr) {
+ TDelimStringIter it("a\tc\t", "\t");
+ UNIT_ASSERT_STRINGS_EQUAL(*it, "a");
+ UNIT_ASSERT_STRINGS_EQUAL(it.Cdr(), "c\t");
+ ++it;
+ UNIT_ASSERT_STRINGS_EQUAL(it.Cdr(), "");
+ }
+
+ Y_UNIT_TEST(ForIter) {
+ TVector<TStringBuf> expected = {"1", "", "3@4", ""};
+ TVector<TStringBuf> got;
+
+ for (TStringBuf x : TDelimStroka("1@@@@3@4@@", "@@")) {
+ got.push_back(x);
+ }
+
+ UNIT_ASSERT_EQUAL(got, expected);
+ }
+}
+
+static void AssertKeyValueStringSplit(
+ const TStringBuf str,
+ const TStringBuf delim,
+ const TVector<std::pair<TStringBuf, TStringBuf>>& expected) {
+ TKeyValueDelimStringIter it(str, delim);
+
+ for (const auto& expectedKeyValue : expected) {
+ UNIT_ASSERT(it.Valid());
+ UNIT_ASSERT_STRINGS_EQUAL(it.Key(), expectedKeyValue.first);
+ UNIT_ASSERT_STRINGS_EQUAL(it.Value(), expectedKeyValue.second);
+ ++it;
+ }
+ UNIT_ASSERT(!it.Valid());
+}
+
+Y_UNIT_TEST_SUITE(TKeyValueDelimStringIterTestSuite) {
+ Y_UNIT_TEST(SingleCharacterAsDelimiter) {
+ AssertKeyValueStringSplit(
+ "abc=123,cde=qwer", ",",
+ {{"abc", "123"},
+ {"cde", "qwer"}});
+ }
+
+ Y_UNIT_TEST(MultipleCharactersAsDelimiter) {
+ AssertKeyValueStringSplit(
+ "abc=xyz@@qwerty=zxcv", "@@",
+ {{"abc", "xyz"},
+ {"qwerty", "zxcv"}});
+ }
+
+ Y_UNIT_TEST(NoDelimiters) {
+ AssertKeyValueStringSplit(
+ "abc=zz", ",",
+ {{"abc", "zz"}});
+ }
+
+ Y_UNIT_TEST(EmptyElements) {
+ AssertKeyValueStringSplit(
+ "@@abc=zxy@@@@qwerty=y@@", "@@",
+ {{"", ""},
+ {"abc", "zxy"},
+ {"", ""},
+ {"qwerty", "y"},
+ {"", ""}});
+ }
+}
diff --git a/library/cpp/deprecated/split/split_iterator.cpp b/library/cpp/deprecated/split/split_iterator.cpp
new file mode 100644
index 0000000000..32262d25bd
--- /dev/null
+++ b/library/cpp/deprecated/split/split_iterator.cpp
@@ -0,0 +1,318 @@
+#include "split_iterator.h"
+
+#include <util/system/yassert.h>
+
+#include <cctype>
+#include <cstring>
+#include <cstdlib>
+
+/****************** TSplitDelimiters2 ******************/
+
+TSplitDelimiters::TSplitDelimiters(const char* s) {
+ memset(Delims, 0, sizeof(Delims));
+ while (*s)
+ Delims[(ui8) * (s++)] = true;
+}
+
+/****************** TSplitBase ******************/
+TSplitBase::TSplitBase(const char* str, size_t length)
+ : Str(str)
+ , Len(length)
+{
+}
+
+TSplitBase::TSplitBase(const TString& s)
+ : Str(s.data())
+ , Len(s.size())
+{
+}
+
+/****************** TDelimitersSplit ******************/
+
+TDelimitersSplit::TDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters)
+ : TSplitBase(str, length)
+ , Delimiters(delimiters)
+{
+}
+
+TDelimitersSplit::TDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters)
+ : TSplitBase(s)
+ , Delimiters(delimiters)
+{
+}
+
+size_t TDelimitersSplit::Begin() const {
+ size_t pos = 0;
+ while ((pos < Len) && Delimiters.IsDelimiter(Str[pos]))
+ ++pos;
+ return pos;
+}
+
+TSizeTRegion TDelimitersSplit::Next(size_t& pos) const {
+ size_t begin = pos;
+ while ((pos < Len) && !Delimiters.IsDelimiter(Str[pos]))
+ ++pos;
+ TSizeTRegion result(begin, pos);
+
+ while ((pos < Len) && Delimiters.IsDelimiter(Str[pos]))
+ ++pos;
+
+ return result;
+}
+
+TDelimitersSplit::TIterator TDelimitersSplit::Iterator() const {
+ return TIterator(*this);
+}
+
+/****************** TDelimitersStrictSplit ******************/
+
+TDelimitersStrictSplit::TDelimitersStrictSplit(const char* str, size_t length, const TSplitDelimiters& delimiters)
+ : TSplitBase(str, length)
+ , Delimiters(delimiters)
+{
+}
+
+TDelimitersStrictSplit::TDelimitersStrictSplit(const TString& s, const TSplitDelimiters& delimiters)
+ : TSplitBase(s)
+ , Delimiters(delimiters)
+{
+}
+
+TDelimitersStrictSplit::TIterator TDelimitersStrictSplit::Iterator() const {
+ return TIterator(*this);
+}
+
+TSizeTRegion TDelimitersStrictSplit::Next(size_t& pos) const {
+ size_t begin = pos;
+ while ((pos < Len) && !Delimiters.IsDelimiter(Str[pos]))
+ ++pos;
+ TSizeTRegion result(begin, pos);
+
+ if (pos < Len)
+ ++pos;
+
+ return result;
+}
+
+size_t TDelimitersStrictSplit::Begin() const {
+ return 0;
+}
+
+/****************** TScreenedDelimitersSplit ******************/
+
+TScreenedDelimitersSplit::TScreenedDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens)
+ : TSplitBase(s)
+ , Delimiters(delimiters)
+ , Screens(screens)
+{
+}
+
+TScreenedDelimitersSplit::TScreenedDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens)
+ : TSplitBase(str, length)
+ , Delimiters(delimiters)
+ , Screens(screens)
+{
+}
+
+TScreenedDelimitersSplit::TIterator TScreenedDelimitersSplit::Iterator() const {
+ return TIterator(*this);
+}
+
+TSizeTRegion TScreenedDelimitersSplit::Next(size_t& pos) const {
+ size_t begin = pos;
+ bool screened = false;
+ while (pos < Len) {
+ if (Screens.IsDelimiter(Str[pos]))
+ screened = !screened;
+ if (Delimiters.IsDelimiter(Str[pos]) && !screened)
+ break;
+ ++pos;
+ }
+ TSizeTRegion result(begin, pos);
+
+ if (pos < Len)
+ ++pos;
+
+ return result;
+}
+
+size_t TScreenedDelimitersSplit::Begin() const {
+ return 0;
+}
+
+/****************** TDelimitersSplitWithoutTags ******************/
+
+TDelimitersSplitWithoutTags::TDelimitersSplitWithoutTags(const char* str, size_t length, const TSplitDelimiters& delimiters)
+ : TSplitBase(str, length)
+ , Delimiters(delimiters)
+{
+}
+
+TDelimitersSplitWithoutTags::TDelimitersSplitWithoutTags(const TString& s, const TSplitDelimiters& delimiters)
+ : TSplitBase(s)
+ , Delimiters(delimiters)
+{
+}
+
+size_t TDelimitersSplitWithoutTags::SkipTag(size_t pos) const {
+ Y_ASSERT('<' == Str[pos]);
+ while ((pos < Len) && ('>' != Str[pos]))
+ ++pos;
+ return pos + 1;
+}
+
+size_t TDelimitersSplitWithoutTags::SkipDelimiters(size_t pos) const {
+ while (true) {
+ while ((pos < Len) && Delimiters.IsDelimiter(Str[pos]) && ('<' != Str[pos]))
+ ++pos;
+ if (pos < Len) {
+ if ('<' != Str[pos])
+ break;
+ else
+ pos = SkipTag(pos);
+ } else
+ break;
+ }
+ return pos;
+}
+
+size_t TDelimitersSplitWithoutTags::Begin() const {
+ size_t pos = 0;
+ pos = SkipDelimiters(pos);
+ return pos;
+}
+
+TSizeTRegion TDelimitersSplitWithoutTags::Next(size_t& pos) const {
+ size_t begin = pos;
+ while ((pos < Len) && !Delimiters.IsDelimiter(Str[pos]) && ('<' != Str[pos]))
+ ++pos;
+ TSizeTRegion result(begin, pos);
+
+ pos = SkipDelimiters(pos);
+
+ return result;
+}
+
+TDelimitersSplitWithoutTags::TIterator TDelimitersSplitWithoutTags::Iterator() const {
+ return TIterator(*this);
+}
+
+/****************** TCharSplit ******************/
+
+TCharSplit::TCharSplit(const char* str, size_t length)
+ : TSplitBase(str, length)
+{
+}
+
+TCharSplit::TCharSplit(const TString& s)
+ : TSplitBase(s)
+{
+}
+
+TCharSplit::TIterator TCharSplit::Iterator() const {
+ return TIterator(*this);
+}
+
+TSizeTRegion TCharSplit::Next(size_t& pos) const {
+ TSizeTRegion result(pos, pos + 1);
+ ++pos;
+ return result;
+}
+
+size_t TCharSplit::Begin() const {
+ return 0;
+}
+
+/****************** TCharSplitWithoutTags ******************/
+
+TCharSplitWithoutTags::TCharSplitWithoutTags(const char* str, size_t length)
+ : TSplitBase(str, length)
+{
+}
+
+TCharSplitWithoutTags::TCharSplitWithoutTags(const TString& s)
+ : TSplitBase(s)
+{
+}
+
+size_t TCharSplitWithoutTags::SkipTag(size_t pos) const {
+ Y_ASSERT('<' == Str[pos]);
+ while ((pos < Len) && ('>' != Str[pos]))
+ ++pos;
+ return pos + 1;
+}
+
+size_t TCharSplitWithoutTags::SkipDelimiters(size_t pos) const {
+ while (true) {
+ if (pos < Len) {
+ if ('<' != Str[pos])
+ break;
+ else
+ pos = SkipTag(pos);
+ } else
+ break;
+ }
+ return pos;
+}
+
+size_t TCharSplitWithoutTags::Begin() const {
+ size_t pos = 0;
+ pos = SkipDelimiters(pos);
+ return pos;
+}
+
+TSizeTRegion TCharSplitWithoutTags::Next(size_t& pos) const {
+ size_t begin = pos++;
+ TSizeTRegion result(begin, pos);
+
+ pos = SkipDelimiters(pos);
+
+ return result;
+}
+
+TCharSplitWithoutTags::TIterator TCharSplitWithoutTags::Iterator() const {
+ return TIterator(*this);
+}
+
+TSubstringSplitDelimiter::TSubstringSplitDelimiter(const TString& s)
+ : Matcher(s)
+ , Len(s.size())
+{
+}
+
+/****************** TSubstringSplit ******************/
+
+TSubstringSplit::TSubstringSplit(const char* str, size_t length, const TSubstringSplitDelimiter& delimiter)
+ : TSplitBase(str, length)
+ , Delimiter(delimiter)
+{
+}
+
+TSubstringSplit::TSubstringSplit(const TString& str, const TSubstringSplitDelimiter& delimiter)
+ : TSplitBase(str)
+ , Delimiter(delimiter)
+{
+}
+
+TSubstringSplit::TIterator TSubstringSplit::Iterator() const {
+ return TIterator(*this);
+}
+
+TSizeTRegion TSubstringSplit::Next(size_t& pos) const {
+ const char* begin = Str + pos;
+ const char* end = Str + Len;
+ const char* delim;
+ if (Delimiter.Matcher.SubStr(begin, end, delim)) {
+ TSizeTRegion result(pos, delim - begin + pos);
+ pos += delim - begin + Delimiter.Len;
+ return result;
+ } else {
+ TSizeTRegion result(pos, end - begin + pos);
+ pos += end - begin;
+ return result;
+ }
+}
+
+size_t TSubstringSplit::Begin() const {
+ return 0;
+}
diff --git a/library/cpp/deprecated/split/split_iterator.h b/library/cpp/deprecated/split/split_iterator.h
new file mode 100644
index 0000000000..0eacc29228
--- /dev/null
+++ b/library/cpp/deprecated/split/split_iterator.h
@@ -0,0 +1,317 @@
+#pragma once
+
+#include <library/cpp/deprecated/kmp/kmp.h>
+#include <util/string/cast.h>
+#include <util/string/util.h>
+#include <util/string/builder.h>
+
+#include <util/system/yassert.h>
+#include <util/system/defaults.h>
+#include <util/generic/strbuf.h>
+#include <util/generic/string.h>
+#include <util/generic/vector.h>
+#include <util/generic/yexception.h>
+
+#include <cstdio>
+
+template <typename T>
+struct TNumPair {
+ T Begin;
+ T End;
+
+ TNumPair() = default;
+
+ TNumPair(T begin, T end)
+ : Begin(begin)
+ , End(end)
+ {
+ Y_ASSERT(begin <= end);
+ }
+
+ T Length() const {
+ return End - Begin + 1;
+ }
+
+ bool operator==(const TNumPair& r) const {
+ return (Begin == r.Begin) && (End == r.End);
+ }
+
+ bool operator!=(const TNumPair& r) const {
+ return (Begin != r.Begin) || (End != r.End);
+ }
+};
+
+using TSizeTRegion = TNumPair<size_t>;
+using TUi32Region = TNumPair<ui32>;
+
+template <>
+inline TString ToString(const TUi32Region& r) {
+ return TStringBuilder() << "(" << r.Begin << ", " << r.End << ")";
+}
+
+template <>
+inline TUi32Region FromString(const TString& s) {
+ TUi32Region result;
+ sscanf(s.data(), "(%" PRIu32 ", %" PRIu32 ")", &result.Begin, &result.End);
+ return result;
+}
+
+class TSplitDelimiters {
+private:
+ bool Delims[256];
+
+public:
+ explicit TSplitDelimiters(const char* s);
+
+ Y_FORCE_INLINE bool IsDelimiter(ui8 ch) const {
+ return Delims[ch];
+ }
+};
+
+template <class Split>
+class TSplitIterator;
+
+class TSplitBase {
+protected:
+ const char* Str;
+ size_t Len;
+
+public:
+ TSplitBase(const char* str, size_t length);
+ TSplitBase(const TString& s);
+
+ Y_FORCE_INLINE const char* GetString() const {
+ return Str;
+ }
+
+ Y_FORCE_INLINE size_t GetLength() const {
+ return Len;
+ }
+
+private:
+ // we don't own Str, make sure that no one calls us with temporary object
+ TSplitBase(TString&&) = delete;
+};
+
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4512)
+#endif
+
+class TDelimitersSplit: public TSplitBase {
+private:
+ const TSplitDelimiters& Delimiters;
+
+public:
+ using TIterator = TSplitIterator<TDelimitersSplit>;
+ friend class TSplitIterator<TDelimitersSplit>;
+
+ TDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters);
+ TDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters);
+ TIterator Iterator() const;
+ TSizeTRegion Next(size_t& pos) const;
+ size_t Begin() const;
+
+private:
+ // we don't own Delimiters, make sure that no one calls us with temporary object
+ TDelimitersSplit(const char*, size_t, TSplitDelimiters&&) = delete;
+ TDelimitersSplit(const TString&, TSplitDelimiters&&) = delete;
+ TDelimitersSplit(TString&&, const TSplitDelimiters&) = delete;
+};
+
+class TDelimitersStrictSplit: public TSplitBase {
+private:
+ const TSplitDelimiters& Delimiters;
+
+public:
+ using TIterator = TSplitIterator<TDelimitersStrictSplit>;
+ friend class TSplitIterator<TDelimitersStrictSplit>;
+
+ TDelimitersStrictSplit(const char* str, size_t length, const TSplitDelimiters& delimiters);
+ TDelimitersStrictSplit(const TString& s, const TSplitDelimiters& delimiters);
+ TIterator Iterator() const;
+ TSizeTRegion Next(size_t& pos) const;
+ size_t Begin() const;
+
+private:
+ // we don't own Delimiters, make sure that no one calls us with temporary object
+ TDelimitersStrictSplit(const char*, size_t, TSplitDelimiters&&) = delete;
+ TDelimitersStrictSplit(const TString&, TSplitDelimiters&&) = delete;
+ TDelimitersStrictSplit(TString&&, const TSplitDelimiters&) = delete;
+};
+
+class TScreenedDelimitersSplit: public TSplitBase {
+private:
+ const TSplitDelimiters& Delimiters;
+ const TSplitDelimiters& Screens;
+
+public:
+ using TIterator = TSplitIterator<TScreenedDelimitersSplit>;
+ friend class TSplitIterator<TScreenedDelimitersSplit>;
+
+ TScreenedDelimitersSplit(const char*, size_t, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens);
+ TScreenedDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens);
+ TIterator Iterator() const;
+ TSizeTRegion Next(size_t& pos) const;
+ size_t Begin() const;
+
+private:
+ // we don't own Delimiters and Screens, make sure that no one calls us with temporary object
+ TScreenedDelimitersSplit(TString&&, const TSplitDelimiters&, const TSplitDelimiters&) = delete;
+ TScreenedDelimitersSplit(const TString&, TSplitDelimiters&&, const TSplitDelimiters&) = delete;
+ TScreenedDelimitersSplit(const TString&, const TSplitDelimiters&, TSplitDelimiters&&) = delete;
+};
+
+class TDelimitersSplitWithoutTags: public TSplitBase {
+private:
+ const TSplitDelimiters& Delimiters;
+ size_t SkipTag(size_t pos) const;
+ size_t SkipDelimiters(size_t pos) const;
+
+public:
+ using TIterator = TSplitIterator<TDelimitersSplitWithoutTags>;
+ friend class TSplitIterator<TDelimitersSplitWithoutTags>;
+
+ TDelimitersSplitWithoutTags(const char* str, size_t length, const TSplitDelimiters& delimiters);
+ TDelimitersSplitWithoutTags(const TString& s, const TSplitDelimiters& delimiters);
+ TIterator Iterator() const;
+ TSizeTRegion Next(size_t& pos) const;
+ size_t Begin() const;
+
+private:
+ // we don't own Delimiters, make sure that no one calls us with temporary object
+ TDelimitersSplitWithoutTags(const char*, size_t, TSplitDelimiters&&) = delete;
+ TDelimitersSplitWithoutTags(const TString&, TSplitDelimiters&&) = delete;
+ TDelimitersSplitWithoutTags(TString&&, const TSplitDelimiters&) = delete;
+};
+
+class TCharSplit: public TSplitBase {
+public:
+ using TIterator = TSplitIterator<TCharSplit>;
+ friend class TSplitIterator<TCharSplit>;
+
+ TCharSplit(const char* str, size_t length);
+ TCharSplit(const TString& s);
+ TIterator Iterator() const;
+ TSizeTRegion Next(size_t& pos) const;
+ size_t Begin() const;
+
+private:
+ // we don't own Str, make sure that no one calls us with temporary object
+ TCharSplit(TString&&) = delete;
+};
+
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
+class TCharSplitWithoutTags: public TSplitBase {
+private:
+ size_t SkipTag(size_t pos) const;
+ size_t SkipDelimiters(size_t pos) const;
+
+public:
+ using TIterator = TSplitIterator<TCharSplitWithoutTags>;
+ friend class TSplitIterator<TCharSplitWithoutTags>;
+
+ TCharSplitWithoutTags(const char* str, size_t length);
+ TCharSplitWithoutTags(const TString& s);
+ TIterator Iterator() const;
+ TSizeTRegion Next(size_t& pos) const;
+ size_t Begin() const;
+
+private:
+ // we don't own Str, make sure that no one calls us with temporary object
+ TCharSplitWithoutTags(TString&&) = delete;
+};
+
+class TSubstringSplitDelimiter {
+public:
+ TKMPMatcher Matcher;
+ size_t Len;
+
+ TSubstringSplitDelimiter(const TString& s);
+};
+
+class TSubstringSplit: public TSplitBase {
+private:
+ const TSubstringSplitDelimiter& Delimiter;
+
+public:
+ using TIterator = TSplitIterator<TSubstringSplit>;
+ friend class TSplitIterator<TSubstringSplit>;
+
+ TSubstringSplit(const char* str, size_t length, const TSubstringSplitDelimiter& delimiter);
+ TSubstringSplit(const TString& str, const TSubstringSplitDelimiter& delimiter);
+ TIterator Iterator() const;
+ TSizeTRegion Next(size_t& pos) const;
+ size_t Begin() const;
+
+private:
+ // we don't own Delimiters, make sure that no one calls us with temporary object
+ TSubstringSplit(TString&&, const TSubstringSplitDelimiter&) = delete;
+ TSubstringSplit(const TString&, TSubstringSplitDelimiter&&) = delete;
+};
+
+template <class TSplit>
+class TSplitIterator {
+protected:
+ const TSplit& Split;
+ size_t Pos;
+ TString* CurrentStroka;
+
+public:
+ TSplitIterator(const TSplit& split)
+ : Split(split)
+ , Pos(Split.Begin())
+ , CurrentStroka(nullptr)
+ {
+ }
+
+ virtual ~TSplitIterator() {
+ delete CurrentStroka;
+ }
+
+ inline TSizeTRegion Next() {
+ Y_ENSURE(!Eof(), TStringBuf("eof reached"));
+ return Split.Next(Pos);
+ }
+
+ TStringBuf NextTok() {
+ if (Eof())
+ return TStringBuf();
+ TSizeTRegion region = Next();
+ return TStringBuf(Split.Str + region.Begin, region.End - region.Begin);
+ }
+
+ const TString& NextString() {
+ if (!CurrentStroka)
+ CurrentStroka = new TString();
+ TSizeTRegion region = Next();
+ CurrentStroka->assign(Split.Str, region.Begin, region.Length() - 1);
+ return *CurrentStroka;
+ }
+
+ inline bool Eof() const {
+ return Pos >= Split.Len;
+ }
+
+ TString GetTail() const {
+ return TString(Split.Str + Pos);
+ }
+
+ void Skip(size_t count) {
+ for (size_t i = 0; i < count; ++i)
+ Next();
+ }
+};
+
+using TSplitTokens = TVector<TString>;
+
+template <typename TSplit>
+void Split(const TSplit& split, TSplitTokens* words) {
+ words->clear();
+ TSplitIterator<TSplit> it(split);
+ while (!it.Eof())
+ words->push_back(it.NextString());
+}
diff --git a/library/cpp/deprecated/split/split_iterator_ut.cpp b/library/cpp/deprecated/split/split_iterator_ut.cpp
new file mode 100644
index 0000000000..be5069c4be
--- /dev/null
+++ b/library/cpp/deprecated/split/split_iterator_ut.cpp
@@ -0,0 +1,152 @@
+#include "split_iterator.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+class TSplitIteratorTest: public TTestBase {
+ UNIT_TEST_SUITE(TSplitIteratorTest);
+ UNIT_TEST(TestDelimiters);
+ UNIT_TEST(TestDelimitersSplit);
+ UNIT_TEST(TestDelimitersStrictSplit);
+ UNIT_TEST(TestTail);
+ UNIT_TEST(TestScreenedDelimitersSplit);
+ UNIT_TEST(TestSubstringDelimiter);
+ UNIT_TEST_SUITE_END();
+
+public:
+ void TestDelimiters();
+ void TestDelimitersSplit();
+ void TestDelimitersStrictSplit();
+ void TestTail();
+ void TestScreenedDelimitersSplit();
+ void TestSubstringDelimiter();
+};
+
+void TSplitIteratorTest::TestDelimiters() {
+ TSplitDelimiters delims("@");
+ for (int i = 0; i < 256; ++i)
+ if ('@' != i) {
+ UNIT_ASSERT(!delims.IsDelimiter((ui8)i));
+ } else {
+ UNIT_ASSERT(delims.IsDelimiter((ui8)i));
+ }
+}
+
+void TSplitIteratorTest::TestDelimitersSplit() {
+ {
+ TString s = "1a3b45cd";
+ TSplitDelimiters delims("abcd");
+ TDelimitersSplit split(s, delims);
+ TSplitTokens tokens;
+ Split(split, &tokens);
+ TSplitTokens pattern = {"1", "3", "45"};
+ UNIT_ASSERT(tokens == pattern);
+ }
+ {
+ TString s = "aaaaaa";
+ TSplitDelimiters delims("abcd");
+ TDelimitersSplit split(s, delims);
+ TSplitTokens tokens;
+ Split(split, &tokens);
+ TSplitTokens pattern = {};
+ UNIT_ASSERT(tokens == pattern);
+ }
+}
+
+void TSplitIteratorTest::TestDelimitersStrictSplit() {
+ {
+ TString s = "grp@2";
+ TSplitDelimiters delims("@");
+ TDelimitersStrictSplit split(s, delims);
+ TSplitTokens tokens;
+ Split(split, &tokens);
+ TSplitTokens pattern = {"grp", "2"};
+ UNIT_ASSERT(tokens == pattern);
+ }
+
+ {
+ TString s = "@grp@2@@";
+ TSplitDelimiters delims("@");
+ TDelimitersStrictSplit split(s, delims);
+ TSplitTokens tokens;
+ Split(split, &tokens);
+ TSplitTokens pattern = {"", "grp", "2", ""};
+ UNIT_ASSERT(tokens == pattern);
+ }
+}
+
+void TSplitIteratorTest::TestTail() {
+ TString s = "grp@2@4";
+ TSplitDelimiters delims("@");
+ TDelimitersSplit split(s, delims);
+ TDelimitersSplit::TIterator it = split.Iterator();
+ UNIT_ASSERT_EQUAL(it.GetTail(), "grp@2@4");
+ it.Next();
+ UNIT_ASSERT_EQUAL(it.GetTail(), "2@4");
+ it.Next();
+ UNIT_ASSERT_EQUAL(it.GetTail(), "4");
+ it.Next();
+ UNIT_ASSERT_EQUAL(it.GetTail(), "");
+}
+
+void TSplitIteratorTest::TestScreenedDelimitersSplit() {
+ {
+ const TString s = "77.88.58.91 - - [28/Aug/2008:00:08:07 +0400] \"GET /export/mordashka.tgz HTTP/1.1\" 304 - \"-\" \"libwww-perl/5.805\" \"news.yandex.ru,80\" \"-\" \"-\" 1219867687 \"0\" 3283 2";
+ const TSplitDelimiters delims(" ");
+ const TSplitDelimiters screens("\"[]");
+ const TScreenedDelimitersSplit splitter(s, delims, screens);
+ TScreenedDelimitersSplit::TIterator it = splitter.Iterator();
+ UNIT_ASSERT_EQUAL(it.NextString(), "77.88.58.91");
+ UNIT_ASSERT_EQUAL(it.NextString(), "-");
+ UNIT_ASSERT_EQUAL(it.NextString(), "-");
+ UNIT_ASSERT_EQUAL(it.NextString(), "[28/Aug/2008:00:08:07 +0400]");
+ UNIT_ASSERT_EQUAL(it.NextString(), "\"GET /export/mordashka.tgz HTTP/1.1\"");
+ UNIT_ASSERT_EQUAL(it.NextString(), "304");
+ UNIT_ASSERT_EQUAL(it.NextString(), "-");
+ UNIT_ASSERT_EQUAL(it.NextString(), "\"-\"");
+ UNIT_ASSERT_EQUAL(it.NextString(), "\"libwww-perl/5.805\"");
+ UNIT_ASSERT_EQUAL(it.NextString(), "\"news.yandex.ru,80\"");
+ UNIT_ASSERT_EQUAL(it.NextString(), "\"-\"");
+ UNIT_ASSERT_EQUAL(it.NextString(), "\"-\"");
+ UNIT_ASSERT_EQUAL(it.NextString(), "1219867687");
+ UNIT_ASSERT_EQUAL(it.NextString(), "\"0\"");
+ UNIT_ASSERT_EQUAL(it.NextString(), "3283");
+ UNIT_ASSERT_EQUAL(it.NextString(), "2");
+ }
+ {
+ const TString s = "77.88.58.91 - - [28/Aug/2008:00:08:07 +0400] \"GET /export/mordashka.tgz HTTP/1.1\" 304 - \"-\" \"libwww-perl/5.805\" \"news.yandex.ru,80\" \"-\" \"-\" 1219867687 \"0\" 3283 2";
+ const TSplitDelimiters delims(" ");
+ const TSplitDelimiters screens("\"[]");
+ const TScreenedDelimitersSplit splitter(s.Data(), s.Size(), delims, screens);
+ TScreenedDelimitersSplit::TIterator it = splitter.Iterator();
+ UNIT_ASSERT_EQUAL(it.NextString(), "77.88.58.91");
+ UNIT_ASSERT_EQUAL(it.NextString(), "-");
+ UNIT_ASSERT_EQUAL(it.NextString(), "-");
+ UNIT_ASSERT_EQUAL(it.NextString(), "[28/Aug/2008:00:08:07 +0400]");
+ UNIT_ASSERT_EQUAL(it.NextString(), "\"GET /export/mordashka.tgz HTTP/1.1\"");
+ UNIT_ASSERT_EQUAL(it.NextString(), "304");
+ UNIT_ASSERT_EQUAL(it.NextString(), "-");
+ UNIT_ASSERT_EQUAL(it.NextString(), "\"-\"");
+ UNIT_ASSERT_EQUAL(it.NextString(), "\"libwww-perl/5.805\"");
+ UNIT_ASSERT_EQUAL(it.NextString(), "\"news.yandex.ru,80\"");
+ UNIT_ASSERT_EQUAL(it.NextString(), "\"-\"");
+ UNIT_ASSERT_EQUAL(it.NextString(), "\"-\"");
+ UNIT_ASSERT_EQUAL(it.NextString(), "1219867687");
+ UNIT_ASSERT_EQUAL(it.NextString(), "\"0\"");
+ UNIT_ASSERT_EQUAL(it.NextString(), "3283");
+ UNIT_ASSERT_EQUAL(it.NextString(), "2");
+ }
+}
+
+void TSplitIteratorTest::TestSubstringDelimiter() {
+ const TString s = "a@@bb@@cc@c.d@@r";
+ static const TSubstringSplitDelimiter delimiter("@@");
+ const TSubstringSplit splitter(s, delimiter);
+ TSubstringSplit::TIterator it = splitter.Iterator();
+ UNIT_ASSERT_EQUAL(it.NextString(), "a");
+ UNIT_ASSERT_EQUAL(it.NextString(), "bb");
+ UNIT_ASSERT_EQUAL(it.NextString(), "cc@c.d");
+ UNIT_ASSERT_EQUAL(it.NextString(), "r");
+ UNIT_ASSERT(it.Eof());
+}
+
+UNIT_TEST_SUITE_REGISTRATION(TSplitIteratorTest);
diff --git a/library/cpp/deprecated/split/ya.make b/library/cpp/deprecated/split/ya.make
new file mode 100644
index 0000000000..946e685ac8
--- /dev/null
+++ b/library/cpp/deprecated/split/ya.make
@@ -0,0 +1,14 @@
+LIBRARY()
+
+OWNER(wrg0ababd)
+
+SRCS(
+ delim_string_iter.cpp
+ split_iterator.cpp
+)
+
+PEERDIR(
+ library/cpp/deprecated/kmp
+)
+
+END()