diff options
author | Devtools Arcadia <arcadia-devtools@yandex-team.ru> | 2022-02-07 18:08:42 +0300 |
---|---|---|
committer | Devtools Arcadia <arcadia-devtools@mous.vla.yp-c.yandex.net> | 2022-02-07 18:08:42 +0300 |
commit | 1110808a9d39d4b808aef724c861a2e1a38d2a69 (patch) | |
tree | e26c9fed0de5d9873cce7e00bc214573dc2195b7 /library/cpp/deprecated/split/split_iterator.h | |
download | ydb-1110808a9d39d4b808aef724c861a2e1a38d2a69.tar.gz |
intermediate changes
ref:cde9a383711a11544ce7e107a78147fb96cc4029
Diffstat (limited to 'library/cpp/deprecated/split/split_iterator.h')
-rw-r--r-- | library/cpp/deprecated/split/split_iterator.h | 317 |
1 files changed, 317 insertions, 0 deletions
diff --git a/library/cpp/deprecated/split/split_iterator.h b/library/cpp/deprecated/split/split_iterator.h new file mode 100644 index 0000000000..0eacc29228 --- /dev/null +++ b/library/cpp/deprecated/split/split_iterator.h @@ -0,0 +1,317 @@ +#pragma once + +#include <library/cpp/deprecated/kmp/kmp.h> +#include <util/string/cast.h> +#include <util/string/util.h> +#include <util/string/builder.h> + +#include <util/system/yassert.h> +#include <util/system/defaults.h> +#include <util/generic/strbuf.h> +#include <util/generic/string.h> +#include <util/generic/vector.h> +#include <util/generic/yexception.h> + +#include <cstdio> + +template <typename T> +struct TNumPair { + T Begin; + T End; + + TNumPair() = default; + + TNumPair(T begin, T end) + : Begin(begin) + , End(end) + { + Y_ASSERT(begin <= end); + } + + T Length() const { + return End - Begin + 1; + } + + bool operator==(const TNumPair& r) const { + return (Begin == r.Begin) && (End == r.End); + } + + bool operator!=(const TNumPair& r) const { + return (Begin != r.Begin) || (End != r.End); + } +}; + +using TSizeTRegion = TNumPair<size_t>; +using TUi32Region = TNumPair<ui32>; + +template <> +inline TString ToString(const TUi32Region& r) { + return TStringBuilder() << "(" << r.Begin << ", " << r.End << ")"; +} + +template <> +inline TUi32Region FromString(const TString& s) { + TUi32Region result; + sscanf(s.data(), "(%" PRIu32 ", %" PRIu32 ")", &result.Begin, &result.End); + return result; +} + +class TSplitDelimiters { +private: + bool Delims[256]; + +public: + explicit TSplitDelimiters(const char* s); + + Y_FORCE_INLINE bool IsDelimiter(ui8 ch) const { + return Delims[ch]; + } +}; + +template <class Split> +class TSplitIterator; + +class TSplitBase { +protected: + const char* Str; + size_t Len; + +public: + TSplitBase(const char* str, size_t length); + TSplitBase(const TString& s); + + Y_FORCE_INLINE const char* GetString() const { + return Str; + } + + Y_FORCE_INLINE size_t GetLength() const { + return Len; + } + +private: + // we don't own Str, make sure that no one calls us with temporary object + TSplitBase(TString&&) = delete; +}; + +#ifdef _MSC_VER +#pragma warning(push) +#pragma warning(disable : 4512) +#endif + +class TDelimitersSplit: public TSplitBase { +private: + const TSplitDelimiters& Delimiters; + +public: + using TIterator = TSplitIterator<TDelimitersSplit>; + friend class TSplitIterator<TDelimitersSplit>; + + TDelimitersSplit(const char* str, size_t length, const TSplitDelimiters& delimiters); + TDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters); + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; + +private: + // we don't own Delimiters, make sure that no one calls us with temporary object + TDelimitersSplit(const char*, size_t, TSplitDelimiters&&) = delete; + TDelimitersSplit(const TString&, TSplitDelimiters&&) = delete; + TDelimitersSplit(TString&&, const TSplitDelimiters&) = delete; +}; + +class TDelimitersStrictSplit: public TSplitBase { +private: + const TSplitDelimiters& Delimiters; + +public: + using TIterator = TSplitIterator<TDelimitersStrictSplit>; + friend class TSplitIterator<TDelimitersStrictSplit>; + + TDelimitersStrictSplit(const char* str, size_t length, const TSplitDelimiters& delimiters); + TDelimitersStrictSplit(const TString& s, const TSplitDelimiters& delimiters); + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; + +private: + // we don't own Delimiters, make sure that no one calls us with temporary object + TDelimitersStrictSplit(const char*, size_t, TSplitDelimiters&&) = delete; + TDelimitersStrictSplit(const TString&, TSplitDelimiters&&) = delete; + TDelimitersStrictSplit(TString&&, const TSplitDelimiters&) = delete; +}; + +class TScreenedDelimitersSplit: public TSplitBase { +private: + const TSplitDelimiters& Delimiters; + const TSplitDelimiters& Screens; + +public: + using TIterator = TSplitIterator<TScreenedDelimitersSplit>; + friend class TSplitIterator<TScreenedDelimitersSplit>; + + TScreenedDelimitersSplit(const char*, size_t, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens); + TScreenedDelimitersSplit(const TString& s, const TSplitDelimiters& delimiters, const TSplitDelimiters& screens); + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; + +private: + // we don't own Delimiters and Screens, make sure that no one calls us with temporary object + TScreenedDelimitersSplit(TString&&, const TSplitDelimiters&, const TSplitDelimiters&) = delete; + TScreenedDelimitersSplit(const TString&, TSplitDelimiters&&, const TSplitDelimiters&) = delete; + TScreenedDelimitersSplit(const TString&, const TSplitDelimiters&, TSplitDelimiters&&) = delete; +}; + +class TDelimitersSplitWithoutTags: public TSplitBase { +private: + const TSplitDelimiters& Delimiters; + size_t SkipTag(size_t pos) const; + size_t SkipDelimiters(size_t pos) const; + +public: + using TIterator = TSplitIterator<TDelimitersSplitWithoutTags>; + friend class TSplitIterator<TDelimitersSplitWithoutTags>; + + TDelimitersSplitWithoutTags(const char* str, size_t length, const TSplitDelimiters& delimiters); + TDelimitersSplitWithoutTags(const TString& s, const TSplitDelimiters& delimiters); + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; + +private: + // we don't own Delimiters, make sure that no one calls us with temporary object + TDelimitersSplitWithoutTags(const char*, size_t, TSplitDelimiters&&) = delete; + TDelimitersSplitWithoutTags(const TString&, TSplitDelimiters&&) = delete; + TDelimitersSplitWithoutTags(TString&&, const TSplitDelimiters&) = delete; +}; + +class TCharSplit: public TSplitBase { +public: + using TIterator = TSplitIterator<TCharSplit>; + friend class TSplitIterator<TCharSplit>; + + TCharSplit(const char* str, size_t length); + TCharSplit(const TString& s); + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; + +private: + // we don't own Str, make sure that no one calls us with temporary object + TCharSplit(TString&&) = delete; +}; + +#ifdef _MSC_VER +#pragma warning(pop) +#endif + +class TCharSplitWithoutTags: public TSplitBase { +private: + size_t SkipTag(size_t pos) const; + size_t SkipDelimiters(size_t pos) const; + +public: + using TIterator = TSplitIterator<TCharSplitWithoutTags>; + friend class TSplitIterator<TCharSplitWithoutTags>; + + TCharSplitWithoutTags(const char* str, size_t length); + TCharSplitWithoutTags(const TString& s); + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; + +private: + // we don't own Str, make sure that no one calls us with temporary object + TCharSplitWithoutTags(TString&&) = delete; +}; + +class TSubstringSplitDelimiter { +public: + TKMPMatcher Matcher; + size_t Len; + + TSubstringSplitDelimiter(const TString& s); +}; + +class TSubstringSplit: public TSplitBase { +private: + const TSubstringSplitDelimiter& Delimiter; + +public: + using TIterator = TSplitIterator<TSubstringSplit>; + friend class TSplitIterator<TSubstringSplit>; + + TSubstringSplit(const char* str, size_t length, const TSubstringSplitDelimiter& delimiter); + TSubstringSplit(const TString& str, const TSubstringSplitDelimiter& delimiter); + TIterator Iterator() const; + TSizeTRegion Next(size_t& pos) const; + size_t Begin() const; + +private: + // we don't own Delimiters, make sure that no one calls us with temporary object + TSubstringSplit(TString&&, const TSubstringSplitDelimiter&) = delete; + TSubstringSplit(const TString&, TSubstringSplitDelimiter&&) = delete; +}; + +template <class TSplit> +class TSplitIterator { +protected: + const TSplit& Split; + size_t Pos; + TString* CurrentStroka; + +public: + TSplitIterator(const TSplit& split) + : Split(split) + , Pos(Split.Begin()) + , CurrentStroka(nullptr) + { + } + + virtual ~TSplitIterator() { + delete CurrentStroka; + } + + inline TSizeTRegion Next() { + Y_ENSURE(!Eof(), TStringBuf("eof reached")); + return Split.Next(Pos); + } + + TStringBuf NextTok() { + if (Eof()) + return TStringBuf(); + TSizeTRegion region = Next(); + return TStringBuf(Split.Str + region.Begin, region.End - region.Begin); + } + + const TString& NextString() { + if (!CurrentStroka) + CurrentStroka = new TString(); + TSizeTRegion region = Next(); + CurrentStroka->assign(Split.Str, region.Begin, region.Length() - 1); + return *CurrentStroka; + } + + inline bool Eof() const { + return Pos >= Split.Len; + } + + TString GetTail() const { + return TString(Split.Str + Pos); + } + + void Skip(size_t count) { + for (size_t i = 0; i < count; ++i) + Next(); + } +}; + +using TSplitTokens = TVector<TString>; + +template <typename TSplit> +void Split(const TSplit& split, TSplitTokens* words) { + words->clear(); + TSplitIterator<TSplit> it(split); + while (!it.Eof()) + words->push_back(it.NextString()); +} |